diff options
Diffstat (limited to 'usr/src/uts/common/io')
-rw-r--r-- | usr/src/uts/common/io/audio/impl/audio_grc3.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/io/bge/bge_chip2.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx.c | 45 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx.h | 13 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_gld.c | 56 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_reg.h | 12 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_ring.c | 251 |
8 files changed, 287 insertions, 102 deletions
diff --git a/usr/src/uts/common/io/audio/impl/audio_grc3.h b/usr/src/uts/common/io/audio/impl/audio_grc3.h index 0003dc1574..4472307edf 100644 --- a/usr/src/uts/common/io/audio/impl/audio_grc3.h +++ b/usr/src/uts/common/io/audio/impl/audio_grc3.h @@ -53,7 +53,7 @@ typedef struct grc3state { int32_t *historyptr; int32_t dummy_pad1; - int32_t history[GRC3_MAXHISTORY * 2]; + int32_t history[GRC3_MAXHISTORY * 2 + 1]; uint32_t outsz; } grc3state_t; diff --git a/usr/src/uts/common/io/bge/bge_chip2.c b/usr/src/uts/common/io/bge/bge_chip2.c index 14797ac90f..48d7ed0e0a 100644 --- a/usr/src/uts/common/io/bge/bge_chip2.c +++ b/usr/src/uts/common/io/bge/bge_chip2.c @@ -1005,8 +1005,8 @@ bge_nic_get64(bge_t *bgep, bge_regno_t addr) #elif defined(__sparc) if (DEVICE_5723_SERIES_CHIPSETS(bgep) || DEVICE_5717_SERIES_CHIPSETS(bgep) || - DEVICE_5725_SERIES_CHIPSETS(bgep || - DEVICE_57765_SERIES_CHIPSETS(bgep))) { + DEVICE_5725_SERIES_CHIPSETS(bgep) || + DEVICE_57765_SERIES_CHIPSETS(bgep)) { data = ddi_get32(bgep->io_handle, PIO_ADDR(bgep, addr)); data <<= 32; data |= ddi_get32(bgep->io_handle, diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c index 1b66e80a22..60587bcdf6 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c @@ -56,9 +56,9 @@ extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl); extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl); extern void sbd_pgr_reset(sbd_lu_t *sl); -extern int HardwareAcceleratedLocking; -extern int HardwareAcceleratedInit; -extern int HardwareAcceleratedMove; +extern uint8_t HardwareAcceleratedLocking; +extern uint8_t HardwareAcceleratedInit; +extern uint8_t HardwareAcceleratedMove; extern uint8_t sbd_unmap_enable; static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c index c90fa0969b..9fae7c5f77 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx.c @@ -273,11 +273,16 @@ * before making a WQE for it. * * After a completion event occurs, the packet is either discarded (and the - * buffer_t returned to the free list), or it is readied for loaning to MAC. + * buffer_t returned to the free list), or it is readied for loaning to MAC + * and placed on the "loaned" list in the mlxcx_buffer_shard_t. * * Once MAC and the rest of the system have finished with the packet, they call - * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the - * buffer_t to the free list. + * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point + * the fate of the buffer_t is determined by the state of the + * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t + * will be returned to the free list, potentially to be recycled and used + * again. But if the shard is draining (E.g. after a ring stop) there will be + * no recycling and the buffer_t is immediately destroyed. * * At detach/teardown time, buffers are only every destroyed from the free list. * @@ -289,18 +294,18 @@ * v * +----+----+ * | created | - * +----+----+ - * | - * | - * | mlxcx_buf_return - * | - * v - * mlxcx_buf_destroy +----+----+ - * +---------| free |<---------------+ - * | +----+----+ | + * +----+----+ +------+ + * | | dead | + * | +------+ + * | mlxcx_buf_return ^ + * | | + * v | mlxcx_buf_destroy + * mlxcx_buf_destroy +----+----+ +-----------+ | + * +---------| free |<------no-| draining? |-yes-+ + * | +----+----+ +-----------+ + * | | ^ * | | | - * | | | mlxcx_buf_return - * v | mlxcx_buf_take | + * v | mlxcx_buf_take | mlxcx_buf_return * +---+--+ v | * | dead | +---+---+ | * +------+ | on WQ |- - - - - - - - >O @@ -759,13 +764,19 @@ mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s) mlxcx_buffer_t *buf; mutex_enter(&s->mlbs_mtx); + while (!list_is_empty(&s->mlbs_busy)) cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); - while ((buf = list_head(&s->mlbs_free)) != NULL) { + + while (!list_is_empty(&s->mlbs_loaned)) + cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + + while ((buf = list_head(&s->mlbs_free)) != NULL) mlxcx_buf_destroy(mlxp, buf); - } + list_destroy(&s->mlbs_free); list_destroy(&s->mlbs_busy); + list_destroy(&s->mlbs_loaned); mutex_exit(&s->mlbs_mtx); cv_destroy(&s->mlbs_free_nonempty); @@ -1336,6 +1347,8 @@ mlxcx_mlbs_create(mlxcx_t *mlxp) offsetof(mlxcx_buffer_t, mlb_entry)); list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t), offsetof(mlxcx_buffer_t, mlb_entry)); + list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_entry)); cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL); list_insert_tail(&mlxp->mlx_buf_shards, s); diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h index bf07691095..52240df3a3 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.h +++ b/usr/src/uts/common/io/mlxcx/mlxcx.h @@ -424,11 +424,18 @@ typedef enum { MLXCX_BUFFER_ON_CHAIN, } mlxcx_buffer_state_t; +typedef enum { + MLXCX_SHARD_READY, + MLXCX_SHARD_DRAINING, +} mlxcx_shard_state_t; + typedef struct mlxcx_buf_shard { + mlxcx_shard_state_t mlbs_state; list_node_t mlbs_entry; kmutex_t mlbs_mtx; list_t mlbs_busy; list_t mlbs_free; + list_t mlbs_loaned; kcondvar_t mlbs_free_nonempty; } mlxcx_buf_shard_t; @@ -1171,9 +1178,11 @@ extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *); extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *); extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t); extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *); +extern void mlxcx_shard_ready(mlxcx_buf_shard_t *); +extern void mlxcx_shard_draining(mlxcx_buf_shard_t *); -extern mlxcx_buffer_t *mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *, - mblk_t *, size_t); +extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *, + mblk_t *, size_t, mlxcx_buffer_t **); extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *); extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *); diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c index a1d50659c1..5d15ec1fbb 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c @@ -395,6 +395,7 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp) uint32_t chkflags = 0; boolean_t ok; size_t take = 0; + uint_t bcount; VERIFY(mp->b_next == NULL); @@ -430,8 +431,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp) } } - b = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take); - if (b == NULL) { + bcount = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b); + if (bcount == 0) { atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC); return (mp); } @@ -450,17 +451,20 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp) return (NULL); } - if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) { + if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) != + MLXCX_WQ_STARTED) { mutex_exit(&sq->mlwq_mtx); mlxcx_buf_return_chain(mlxp, b, B_FALSE); return (NULL); } /* - * Similar logic here: bufcnt is only manipulated atomically, and - * bufhwm is set at startup. + * If the completion queue buffer count is already at or above + * the high water mark, or the addition of this new chain will + * exceed the CQ ring size, then indicate we are blocked. */ - if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm) { + if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm || + (cq->mlcq_bufcnt + bcount) > cq->mlcq_nents) { atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC); goto blocked; } @@ -722,8 +726,28 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) mlxcx_buf_shard_t *s; mlxcx_buffer_t *buf; + /* + * To prevent deadlocks and sleeping whilst holding either the + * CQ mutex or WQ mutex, we split the stop processing into two + * parts. + * + * With the CQ amd WQ mutexes held the appropriate WQ is stopped. + * The Q in the HCA is set to Reset state and flagged as no + * longer started. Atomic with changing this WQ state, the buffer + * shards are flagged as draining. + * + * Now, any requests for buffers and attempts to submit messages + * will fail and once we're in this state it is safe to relinquish + * the CQ and WQ mutexes. Allowing us to complete the ring stop + * by waiting for the buffer lists, with the exception of + * the loaned list, to drain. Buffers on the loaned list are + * not under our control, we will get them back when the mblk tied + * to the buffer is freed. + */ + mutex_enter(&cq->mlcq_mtx); mutex_enter(&wq->mlwq_mtx); + if (wq->mlwq_state & MLXCX_WQ_STARTED) { if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && !mlxcx_cmd_stop_rq(mlxp, wq)) { @@ -740,7 +764,15 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) } ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED); + mlxcx_shard_draining(wq->mlwq_bufs); + if (wq->mlwq_foreign_bufs != NULL) + mlxcx_shard_draining(wq->mlwq_foreign_bufs); + + if (wq->mlwq_state & MLXCX_WQ_BUFFERS) { + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); + /* Return any outstanding buffers to the free pool. */ while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) { mlxcx_buf_return_chain(mlxp, buf, B_FALSE); @@ -772,12 +804,13 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) mutex_exit(&s->mlbs_mtx); } + mutex_enter(&wq->mlwq_mtx); wq->mlwq_state &= ~MLXCX_WQ_BUFFERS; + mutex_exit(&wq->mlwq_mtx); + } else { + mutex_exit(&wq->mlwq_mtx); + mutex_exit(&cq->mlcq_mtx); } - ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS); - - mutex_exit(&wq->mlwq_mtx); - mutex_exit(&cq->mlcq_mtx); } static int @@ -1134,7 +1167,8 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) { mutex_enter(&sh->mlbs_mtx); if (!list_is_empty(&sh->mlbs_free) || - !list_is_empty(&sh->mlbs_busy)) { + !list_is_empty(&sh->mlbs_busy) || + !list_is_empty(&sh->mlbs_loaned)) { allocd = B_TRUE; mutex_exit(&sh->mlbs_mtx); break; diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h index f65280d41d..6d09abea5c 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_reg.h +++ b/usr/src/uts/common/io/mlxcx/mlxcx_reg.h @@ -390,8 +390,16 @@ typedef enum { MLXCX_WQE_OP_RDMA_R = 0x10, } mlxcx_wqe_opcode_t; +#define MLXCX_WQE_OCTOWORD 16 #define MLXCX_SQE_MAX_DS ((1 << 6) - 1) -#define MLXCX_SQE_MAX_PTRS 61 +/* + * Calculate the max number of address pointers in a single ethernet + * send message. This is the remainder from MLXCX_SQE_MAX_DS + * after accounting for the Control and Ethernet segements. + */ +#define MLXCX_SQE_MAX_PTRS (MLXCX_SQE_MAX_DS - \ + (sizeof (mlxcx_wqe_eth_seg_t) + sizeof (mlxcx_wqe_control_seg_t)) / \ + MLXCX_WQE_OCTOWORD) typedef enum { MLXCX_SQE_FENCE_NONE = 0x0, @@ -2497,6 +2505,8 @@ typedef struct { #pragma pack() +CTASSERT(MLXCX_SQE_MAX_PTRS > 0); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c index 2305f943a7..da98a5cf40 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c @@ -25,6 +25,7 @@ #include <sys/sysmacros.h> #include <sys/atomic.h> #include <sys/cpuvar.h> +#include <sys/sdt.h> #include <sys/pattr.h> #include <sys/dlpi.h> @@ -1212,6 +1213,8 @@ mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS); rq->mlwq_state |= MLXCX_WQ_BUFFERS; + mlxcx_shard_ready(rq->mlwq_bufs); + for (j = 0; j < rq->mlwq_nents; ++j) { if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b)) break; @@ -1408,6 +1411,9 @@ mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, } sq->mlwq_state |= MLXCX_WQ_BUFFERS; + mlxcx_shard_ready(sq->mlwq_bufs); + mlxcx_shard_ready(sq->mlwq_foreign_bufs); + if (!mlxcx_cmd_start_sq(mlxp, sq)) { mutex_exit(&sq->mlwq_mtx); mutex_exit(&cq->mlcq_mtx); @@ -1567,8 +1573,8 @@ mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, inlinelen); } - ent0->mlsqe_control.mlcs_ds = - offsetof(mlxcx_sendq_ent_t, mlsqe_data) / 16; + ent0->mlsqe_control.mlcs_ds = offsetof(mlxcx_sendq_ent_t, mlsqe_data) / + MLXCX_WQE_OCTOWORD; if (chkflags & HCK_IPV4_HDRCKSUM) { ASSERT(mlxp->mlx_caps->mlc_checksum); @@ -1653,7 +1659,20 @@ mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, /* * Make sure the workqueue entry is flushed out before updating * the doorbell. + * If the ring has wrapped, we need to flush the front and back. */ + if ((first + ents) > mlwq->mlwq_nents) { + uint_t sync_cnt = mlwq->mlwq_nents - first; + + VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, + (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent, + sync_cnt * sizeof (mlxcx_sendq_ent_t), + DDI_DMA_SYNC_FORDEV)); + + ent0 = &mlwq->mlwq_send_ent[0]; + ents -= sync_cnt; + } + VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent, ents * sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV)); @@ -1785,22 +1804,29 @@ mlxcx_rq_refill_task(void *arg) mlxcx_completion_queue_t *cq = wq->mlwq_cq; mlxcx_t *mlxp = wq->mlwq_mlx; mlxcx_buf_shard_t *s = wq->mlwq_bufs; - boolean_t refill; + boolean_t refill, draining; do { /* - * Wait until there are some free buffers. + * Wait here until one of 3 conditions: + * 1. The shard is draining, or + * 2. There are buffers on the free list, or + * 3. The WQ is being shut down. */ mutex_enter(&s->mlbs_mtx); - while (list_is_empty(&s->mlbs_free) && - (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) + while (s->mlbs_state != MLXCX_SHARD_DRAINING && + list_is_empty(&s->mlbs_free) && + (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) { cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); + } + + draining = (s->mlbs_state == MLXCX_SHARD_DRAINING); mutex_exit(&s->mlbs_mtx); mutex_enter(&cq->mlcq_mtx); mutex_enter(&wq->mlwq_mtx); - if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) { + if (draining || (cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) { refill = B_FALSE; wq->mlwq_state &= ~MLXCX_WQ_REFILLING; } else { @@ -1837,7 +1863,10 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP; cq = mlwq->mlwq_cq; - if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) + if ((mlwq->mlwq_state & MLXCX_WQ_STARTED) == 0) + return; + + if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) return; current = cq->mlcq_bufcnt; @@ -1869,7 +1898,7 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) return; } - if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) { + if ((mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) != 0) { for (i = 0; i < n; ++i) mlxcx_buf_return(mlxp, b[i]); return; @@ -2044,7 +2073,6 @@ mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, wqe_index = buf->mlb_wqe_index; if (!mlxcx_buf_loan(mlxp, buf)) { - mlxcx_warn(mlxp, "!loan failed, dropping packet"); mlxcx_buf_return(mlxp, buf); return (NULL); } @@ -2087,16 +2115,11 @@ mlxcx_buf_mp_return(caddr_t arg) mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg; mlxcx_t *mlxp = b->mlb_mlx; - if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) { - b->mlb_mp = NULL; - return; - } - /* - * The mblk for this buffer_t (in its mlb_mp field) has been used now, - * so NULL it out. - */ + /* The mblk has been used now, so NULL it out. */ b->mlb_mp = NULL; - mlxcx_buf_return(mlxp, b); + + if (b->mlb_state == MLXCX_BUFFER_ON_LOAN) + mlxcx_buf_return(mlxp, b); } boolean_t @@ -2163,6 +2186,11 @@ mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq) mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs; mutex_enter(&s->mlbs_mtx); + if (s->mlbs_state != MLXCX_SHARD_READY) { + mutex_exit(&s->mlbs_mtx); + return (NULL); + } + if ((b = list_remove_head(&s->mlbs_free)) != NULL) { ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); ASSERT(b->mlb_foreign); @@ -2205,58 +2233,64 @@ copyb: return (b); } -mlxcx_buffer_t * -mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, - mblk_t *mpb, size_t off) +static mlxcx_buffer_t * +mlxcx_bind_or_copy_mblk(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, + mblk_t *mp, size_t off) { - mlxcx_buffer_t *b, *b0 = NULL; - boolean_t first = B_TRUE; - mblk_t *mp; + mlxcx_buffer_t *b; uint8_t *rptr; size_t sz; - size_t ncookies = 0; boolean_t ret; - for (mp = mpb; mp != NULL; mp = mp->b_cont) { - rptr = mp->b_rptr; - sz = MBLKL(mp); + rptr = mp->b_rptr; + sz = MBLKL(mp); - if (off > 0) - ASSERT3U(off, <, sz); - rptr += off; - sz -= off; +#ifdef DEBUG + if (off > 0) { + ASSERT3U(off, <, sz); + } +#endif - if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) { - b = mlxcx_copy_data(mlxp, wq, rptr, sz); - if (b == NULL) - goto failed; - } else { - b = mlxcx_buf_take_foreign(mlxp, wq); - if (b == NULL) - goto failed; + rptr += off; + sz -= off; + + if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) { + b = mlxcx_copy_data(mlxp, wq, rptr, sz); + } else { + b = mlxcx_buf_take_foreign(mlxp, wq); + if (b == NULL) + return (NULL); - ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off, - B_FALSE); + ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off, + B_FALSE); - if (!ret) { - mlxcx_buf_return(mlxp, b); + if (!ret) { + mlxcx_buf_return(mlxp, b); - b = mlxcx_copy_data(mlxp, wq, rptr, sz); - if (b == NULL) - goto failed; - } + b = mlxcx_copy_data(mlxp, wq, rptr, sz); } + } + + return (b); +} + +uint_t +mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, + mblk_t *mpb, size_t off, mlxcx_buffer_t **bp) +{ + mlxcx_buffer_t *b, *b0 = NULL; + boolean_t first = B_TRUE; + mblk_t *mp; + size_t offset = off; + size_t ncookies = 0; + uint_t count = 0; + + for (mp = mpb; mp != NULL && ncookies <= MLXCX_SQE_MAX_PTRS; + mp = mp->b_cont) { + b = mlxcx_bind_or_copy_mblk(mlxp, wq, mp, offset); + if (b == NULL) + goto failed; - /* - * We might overestimate here when we've copied data, since - * the buffer might be longer than what we copied into it. This - * is safe since it's always wrong in the conservative - * direction (and we will blow up later when we actually - * generate the WQE anyway). - * - * If the assert below ever blows, we'll have to come and fix - * this up so we can transmit these packets. - */ ncookies += b->mlb_dma.mxdb_ncookies; if (first) @@ -2267,23 +2301,55 @@ mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, b->mlb_tx_mp = mp; b->mlb_tx_head = b0; - b->mlb_used = sz; + b->mlb_used = MBLKL(mp) - offset; if (!first) list_insert_tail(&b0->mlb_tx_chain, b); first = B_FALSE; - off = 0; + offset = 0; + + count++; + } + + /* + * The chain of mblks has resulted in too many cookies for + * a single message. This is unusual, so take the hit to tidy + * up, do a pullup to a single mblk and allocate the requisite + * buf. + */ + if (ncookies > MLXCX_SQE_MAX_PTRS) { + DTRACE_PROBE4(pullup, mlxcx_t *, mlxp, mlxcx_work_queue_t *, wq, + mblk_t *, mpb, size_t, ncookies); + + if (b0 != NULL) + mlxcx_buf_return_chain(mlxp, b0, B_TRUE); + + if ((mp = msgpullup(mpb, -1)) == NULL) + return (0); + + b0 = mlxcx_bind_or_copy_mblk(mlxp, wq, mp, off); + if (b0 == NULL) { + freemsg(mp); + return (0); + } + freemsg(mpb); + + b0->mlb_tx_mp = mp; + b0->mlb_tx_head = b0; + b0->mlb_used = MBLKL(mp) - off; + + count = 1; } - ASSERT3U(ncookies, <=, MLXCX_SQE_MAX_PTRS); + *bp = b0; - return (b0); + return (count); failed: if (b0 != NULL) mlxcx_buf_return_chain(mlxp, b0, B_TRUE); - return (NULL); + return (0); } mlxcx_buffer_t * @@ -2293,6 +2359,11 @@ mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq) mlxcx_buf_shard_t *s = wq->mlwq_bufs; mutex_enter(&s->mlbs_mtx); + if (s->mlbs_state != MLXCX_SHARD_READY) { + mutex_exit(&s->mlbs_mtx); + return (NULL); + } + if ((b = list_remove_head(&s->mlbs_free)) != NULL) { ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); b->mlb_state = MLXCX_BUFFER_ON_WQ; @@ -2314,6 +2385,11 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, s = wq->mlwq_bufs; mutex_enter(&s->mlbs_mtx); + if (s->mlbs_state != MLXCX_SHARD_READY) { + mutex_exit(&s->mlbs_mtx); + return (0); + } + while (done < nbufs && (b = list_remove_head(&s->mlbs_free)) != NULL) { ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); b->mlb_state = MLXCX_BUFFER_ON_WQ; @@ -2327,6 +2403,8 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, boolean_t mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b) { + mlxcx_buf_shard_t *s = b->mlb_shard; + VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ); ASSERT3P(b->mlb_mlx, ==, mlxp); @@ -2339,6 +2417,12 @@ mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b) b->mlb_state = MLXCX_BUFFER_ON_LOAN; b->mlb_wqe_index = 0; + + mutex_enter(&s->mlbs_mtx); + list_remove(&s->mlbs_busy, b); + list_insert_tail(&s->mlbs_loaned, b); + mutex_exit(&s->mlbs_mtx); + return (B_TRUE); } @@ -2401,7 +2485,23 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b) break; case MLXCX_BUFFER_ON_LOAN: ASSERT(!b->mlb_foreign); - list_remove(&s->mlbs_busy, b); + list_remove(&s->mlbs_loaned, b); + if (s->mlbs_state == MLXCX_SHARD_DRAINING) { + /* + * When we're draining, Eg during mac_stop(), + * we destroy the buffer immediately rather than + * recycling it. Otherwise we risk leaving it + * on the free list and leaking it. + */ + list_insert_tail(&s->mlbs_free, b); + mlxcx_buf_destroy(mlxp, b); + /* + * Teardown might be waiting for loaned list to empty. + */ + cv_broadcast(&s->mlbs_free_nonempty); + mutex_exit(&s->mlbs_mtx); + return; + } break; case MLXCX_BUFFER_FREE: VERIFY(0); @@ -2414,7 +2514,7 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b) } list_insert_tail(&s->mlbs_free, b); - cv_signal(&s->mlbs_free_nonempty); + cv_broadcast(&s->mlbs_free_nonempty); mutex_exit(&s->mlbs_mtx); @@ -2432,9 +2532,11 @@ void mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b) { mlxcx_buf_shard_t *s = b->mlb_shard; + VERIFY(b->mlb_state == MLXCX_BUFFER_FREE || b->mlb_state == MLXCX_BUFFER_INIT); ASSERT(mutex_owned(&s->mlbs_mtx)); + if (b->mlb_state == MLXCX_BUFFER_FREE) list_remove(&s->mlbs_free, b); @@ -2454,3 +2556,20 @@ mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b) kmem_cache_free(mlxp->mlx_bufs_cache, b); } + +void +mlxcx_shard_ready(mlxcx_buf_shard_t *s) +{ + mutex_enter(&s->mlbs_mtx); + s->mlbs_state = MLXCX_SHARD_READY; + mutex_exit(&s->mlbs_mtx); +} + +void +mlxcx_shard_draining(mlxcx_buf_shard_t *s) +{ + mutex_enter(&s->mlbs_mtx); + s->mlbs_state = MLXCX_SHARD_DRAINING; + cv_broadcast(&s->mlbs_free_nonempty); + mutex_exit(&s->mlbs_mtx); +} |