summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorPaul Winder <pwinder@racktopsystems.com>2020-04-01 13:53:23 +0100
committerPaul Winder <paul@winders.demon.co.uk>2020-04-30 08:09:30 +0100
commit19325e87abbf92e73010df2342fc48019027aee1 (patch)
tree58917ee4ba9aca6e35153ea1e74370c0900fda89 /usr/src
parentc11c816fbf1bf201998f795321092b084ad65f58 (diff)
downloadillumos-joyent-19325e87abbf92e73010df2342fc48019027aee1.tar.gz
12481 attempting to change MTU size of aggregate over mlxcx can cause dladm to hang
Reviewed by: Garrett D'Amore <garrett@damore.org> Reviewed by: Andy Stormont <astormont@racktopsystems.com> Reviewed by: Dan McDonald <danmcd@joyent.com> Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.c45
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx.h9
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_gld.c43
-rw-r--r--usr/src/uts/common/io/mlxcx/mlxcx_ring.c105
4 files changed, 161 insertions, 41 deletions
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c
index c90fa0969b..9fae7c5f77 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.c
@@ -273,11 +273,16 @@
* before making a WQE for it.
*
* After a completion event occurs, the packet is either discarded (and the
- * buffer_t returned to the free list), or it is readied for loaning to MAC.
+ * buffer_t returned to the free list), or it is readied for loaning to MAC
+ * and placed on the "loaned" list in the mlxcx_buffer_shard_t.
*
* Once MAC and the rest of the system have finished with the packet, they call
- * freemsg() on its mblk, which will call mlxcx_buf_mp_return and return the
- * buffer_t to the free list.
+ * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point
+ * the fate of the buffer_t is determined by the state of the
+ * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t
+ * will be returned to the free list, potentially to be recycled and used
+ * again. But if the shard is draining (E.g. after a ring stop) there will be
+ * no recycling and the buffer_t is immediately destroyed.
*
* At detach/teardown time, buffers are only every destroyed from the free list.
*
@@ -289,18 +294,18 @@
* v
* +----+----+
* | created |
- * +----+----+
- * |
- * |
- * | mlxcx_buf_return
- * |
- * v
- * mlxcx_buf_destroy +----+----+
- * +---------| free |<---------------+
- * | +----+----+ |
+ * +----+----+ +------+
+ * | | dead |
+ * | +------+
+ * | mlxcx_buf_return ^
+ * | |
+ * v | mlxcx_buf_destroy
+ * mlxcx_buf_destroy +----+----+ +-----------+ |
+ * +---------| free |<------no-| draining? |-yes-+
+ * | +----+----+ +-----------+
+ * | | ^
* | | |
- * | | | mlxcx_buf_return
- * v | mlxcx_buf_take |
+ * v | mlxcx_buf_take | mlxcx_buf_return
* +---+--+ v |
* | dead | +---+---+ |
* +------+ | on WQ |- - - - - - - - >O
@@ -759,13 +764,19 @@ mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s)
mlxcx_buffer_t *buf;
mutex_enter(&s->mlbs_mtx);
+
while (!list_is_empty(&s->mlbs_busy))
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
- while ((buf = list_head(&s->mlbs_free)) != NULL) {
+
+ while (!list_is_empty(&s->mlbs_loaned))
+ cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+
+ while ((buf = list_head(&s->mlbs_free)) != NULL)
mlxcx_buf_destroy(mlxp, buf);
- }
+
list_destroy(&s->mlbs_free);
list_destroy(&s->mlbs_busy);
+ list_destroy(&s->mlbs_loaned);
mutex_exit(&s->mlbs_mtx);
cv_destroy(&s->mlbs_free_nonempty);
@@ -1336,6 +1347,8 @@ mlxcx_mlbs_create(mlxcx_t *mlxp)
offsetof(mlxcx_buffer_t, mlb_entry));
list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t),
offsetof(mlxcx_buffer_t, mlb_entry));
+ list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t),
+ offsetof(mlxcx_buffer_t, mlb_entry));
cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL);
list_insert_tail(&mlxp->mlx_buf_shards, s);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h
index da048b4ac3..52240df3a3 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx.h
+++ b/usr/src/uts/common/io/mlxcx/mlxcx.h
@@ -424,11 +424,18 @@ typedef enum {
MLXCX_BUFFER_ON_CHAIN,
} mlxcx_buffer_state_t;
+typedef enum {
+ MLXCX_SHARD_READY,
+ MLXCX_SHARD_DRAINING,
+} mlxcx_shard_state_t;
+
typedef struct mlxcx_buf_shard {
+ mlxcx_shard_state_t mlbs_state;
list_node_t mlbs_entry;
kmutex_t mlbs_mtx;
list_t mlbs_busy;
list_t mlbs_free;
+ list_t mlbs_loaned;
kcondvar_t mlbs_free_nonempty;
} mlxcx_buf_shard_t;
@@ -1171,6 +1178,8 @@ extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
+extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
+extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
mblk_t *, size_t, mlxcx_buffer_t **);
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
index a08cec3980..5d15ec1fbb 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c
@@ -451,7 +451,8 @@ mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
return (NULL);
}
- if (sq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) !=
+ MLXCX_WQ_STARTED) {
mutex_exit(&sq->mlwq_mtx);
mlxcx_buf_return_chain(mlxp, b, B_FALSE);
return (NULL);
@@ -725,8 +726,28 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mlxcx_buf_shard_t *s;
mlxcx_buffer_t *buf;
+ /*
+ * To prevent deadlocks and sleeping whilst holding either the
+ * CQ mutex or WQ mutex, we split the stop processing into two
+ * parts.
+ *
+ * With the CQ amd WQ mutexes held the appropriate WQ is stopped.
+ * The Q in the HCA is set to Reset state and flagged as no
+ * longer started. Atomic with changing this WQ state, the buffer
+ * shards are flagged as draining.
+ *
+ * Now, any requests for buffers and attempts to submit messages
+ * will fail and once we're in this state it is safe to relinquish
+ * the CQ and WQ mutexes. Allowing us to complete the ring stop
+ * by waiting for the buffer lists, with the exception of
+ * the loaned list, to drain. Buffers on the loaned list are
+ * not under our control, we will get them back when the mblk tied
+ * to the buffer is freed.
+ */
+
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
+
if (wq->mlwq_state & MLXCX_WQ_STARTED) {
if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
!mlxcx_cmd_stop_rq(mlxp, wq)) {
@@ -743,7 +764,15 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
}
ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED);
+ mlxcx_shard_draining(wq->mlwq_bufs);
+ if (wq->mlwq_foreign_bufs != NULL)
+ mlxcx_shard_draining(wq->mlwq_foreign_bufs);
+
+
if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
+
/* Return any outstanding buffers to the free pool. */
while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) {
mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
@@ -775,12 +804,13 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh)
mutex_exit(&s->mlbs_mtx);
}
+ mutex_enter(&wq->mlwq_mtx);
wq->mlwq_state &= ~MLXCX_WQ_BUFFERS;
+ mutex_exit(&wq->mlwq_mtx);
+ } else {
+ mutex_exit(&wq->mlwq_mtx);
+ mutex_exit(&cq->mlcq_mtx);
}
- ASSERT0(wq->mlwq_state & MLXCX_WQ_BUFFERS);
-
- mutex_exit(&wq->mlwq_mtx);
- mutex_exit(&cq->mlcq_mtx);
}
static int
@@ -1137,7 +1167,8 @@ mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) {
mutex_enter(&sh->mlbs_mtx);
if (!list_is_empty(&sh->mlbs_free) ||
- !list_is_empty(&sh->mlbs_busy)) {
+ !list_is_empty(&sh->mlbs_busy) ||
+ !list_is_empty(&sh->mlbs_loaned)) {
allocd = B_TRUE;
mutex_exit(&sh->mlbs_mtx);
break;
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
index 492f8fd8a5..da98a5cf40 100644
--- a/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
+++ b/usr/src/uts/common/io/mlxcx/mlxcx_ring.c
@@ -1213,6 +1213,8 @@ mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS);
rq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(rq->mlwq_bufs);
+
for (j = 0; j < rq->mlwq_nents; ++j) {
if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b))
break;
@@ -1409,6 +1411,9 @@ mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
}
sq->mlwq_state |= MLXCX_WQ_BUFFERS;
+ mlxcx_shard_ready(sq->mlwq_bufs);
+ mlxcx_shard_ready(sq->mlwq_foreign_bufs);
+
if (!mlxcx_cmd_start_sq(mlxp, sq)) {
mutex_exit(&sq->mlwq_mtx);
mutex_exit(&cq->mlcq_mtx);
@@ -1799,22 +1804,29 @@ mlxcx_rq_refill_task(void *arg)
mlxcx_completion_queue_t *cq = wq->mlwq_cq;
mlxcx_t *mlxp = wq->mlwq_mlx;
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
- boolean_t refill;
+ boolean_t refill, draining;
do {
/*
- * Wait until there are some free buffers.
+ * Wait here until one of 3 conditions:
+ * 1. The shard is draining, or
+ * 2. There are buffers on the free list, or
+ * 3. The WQ is being shut down.
*/
mutex_enter(&s->mlbs_mtx);
- while (list_is_empty(&s->mlbs_free) &&
- (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0)
+ while (s->mlbs_state != MLXCX_SHARD_DRAINING &&
+ list_is_empty(&s->mlbs_free) &&
+ (cq->mlcq_state & MLXCX_CQ_TEARDOWN) == 0) {
cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
+ }
+
+ draining = (s->mlbs_state == MLXCX_SHARD_DRAINING);
mutex_exit(&s->mlbs_mtx);
mutex_enter(&cq->mlcq_mtx);
mutex_enter(&wq->mlwq_mtx);
- if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
+ if (draining || (cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0) {
refill = B_FALSE;
wq->mlwq_state &= ~MLXCX_WQ_REFILLING;
} else {
@@ -1851,7 +1863,10 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP;
cq = mlwq->mlwq_cq;
- if (cq->mlcq_state & MLXCX_CQ_TEARDOWN)
+ if ((mlwq->mlwq_state & MLXCX_WQ_STARTED) == 0)
+ return;
+
+ if ((cq->mlcq_state & MLXCX_CQ_TEARDOWN) != 0)
return;
current = cq->mlcq_bufcnt;
@@ -1883,7 +1898,7 @@ mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq)
return;
}
- if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) {
+ if ((mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) != 0) {
for (i = 0; i < n; ++i)
mlxcx_buf_return(mlxp, b[i]);
return;
@@ -2058,7 +2073,6 @@ mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq,
wqe_index = buf->mlb_wqe_index;
if (!mlxcx_buf_loan(mlxp, buf)) {
- mlxcx_warn(mlxp, "!loan failed, dropping packet");
mlxcx_buf_return(mlxp, buf);
return (NULL);
}
@@ -2101,16 +2115,11 @@ mlxcx_buf_mp_return(caddr_t arg)
mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg;
mlxcx_t *mlxp = b->mlb_mlx;
- if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) {
- b->mlb_mp = NULL;
- return;
- }
- /*
- * The mblk for this buffer_t (in its mlb_mp field) has been used now,
- * so NULL it out.
- */
+ /* The mblk has been used now, so NULL it out. */
b->mlb_mp = NULL;
- mlxcx_buf_return(mlxp, b);
+
+ if (b->mlb_state == MLXCX_BUFFER_ON_LOAN)
+ mlxcx_buf_return(mlxp, b);
}
boolean_t
@@ -2177,6 +2186,11 @@ mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
ASSERT(b->mlb_foreign);
@@ -2345,6 +2359,11 @@ mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq)
mlxcx_buf_shard_t *s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (NULL);
+ }
+
if ((b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2366,6 +2385,11 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
s = wq->mlwq_bufs;
mutex_enter(&s->mlbs_mtx);
+ if (s->mlbs_state != MLXCX_SHARD_READY) {
+ mutex_exit(&s->mlbs_mtx);
+ return (0);
+ }
+
while (done < nbufs && (b = list_remove_head(&s->mlbs_free)) != NULL) {
ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE);
b->mlb_state = MLXCX_BUFFER_ON_WQ;
@@ -2379,6 +2403,8 @@ mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq,
boolean_t
mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
+ mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ);
ASSERT3P(b->mlb_mlx, ==, mlxp);
@@ -2391,6 +2417,12 @@ mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b)
b->mlb_state = MLXCX_BUFFER_ON_LOAN;
b->mlb_wqe_index = 0;
+
+ mutex_enter(&s->mlbs_mtx);
+ list_remove(&s->mlbs_busy, b);
+ list_insert_tail(&s->mlbs_loaned, b);
+ mutex_exit(&s->mlbs_mtx);
+
return (B_TRUE);
}
@@ -2453,7 +2485,23 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
break;
case MLXCX_BUFFER_ON_LOAN:
ASSERT(!b->mlb_foreign);
- list_remove(&s->mlbs_busy, b);
+ list_remove(&s->mlbs_loaned, b);
+ if (s->mlbs_state == MLXCX_SHARD_DRAINING) {
+ /*
+ * When we're draining, Eg during mac_stop(),
+ * we destroy the buffer immediately rather than
+ * recycling it. Otherwise we risk leaving it
+ * on the free list and leaking it.
+ */
+ list_insert_tail(&s->mlbs_free, b);
+ mlxcx_buf_destroy(mlxp, b);
+ /*
+ * Teardown might be waiting for loaned list to empty.
+ */
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+ return;
+ }
break;
case MLXCX_BUFFER_FREE:
VERIFY(0);
@@ -2466,7 +2514,7 @@ mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b)
}
list_insert_tail(&s->mlbs_free, b);
- cv_signal(&s->mlbs_free_nonempty);
+ cv_broadcast(&s->mlbs_free_nonempty);
mutex_exit(&s->mlbs_mtx);
@@ -2484,9 +2532,11 @@ void
mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
{
mlxcx_buf_shard_t *s = b->mlb_shard;
+
VERIFY(b->mlb_state == MLXCX_BUFFER_FREE ||
b->mlb_state == MLXCX_BUFFER_INIT);
ASSERT(mutex_owned(&s->mlbs_mtx));
+
if (b->mlb_state == MLXCX_BUFFER_FREE)
list_remove(&s->mlbs_free, b);
@@ -2506,3 +2556,20 @@ mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b)
kmem_cache_free(mlxp->mlx_bufs_cache, b);
}
+
+void
+mlxcx_shard_ready(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_READY;
+ mutex_exit(&s->mlbs_mtx);
+}
+
+void
+mlxcx_shard_draining(mlxcx_buf_shard_t *s)
+{
+ mutex_enter(&s->mlbs_mtx);
+ s->mlbs_state = MLXCX_SHARD_DRAINING;
+ cv_broadcast(&s->mlbs_free_nonempty);
+ mutex_exit(&s->mlbs_mtx);
+}