diff options
author | Paul Winder <pwinder@racktopsystems.com> | 2020-07-24 12:16:05 +0100 |
---|---|---|
committer | Paul Winder <paul@winder.uk.net> | 2020-08-26 15:21:08 +0100 |
commit | e1447ca93391f31609bda487cb922dbff9dcdef5 (patch) | |
tree | fc963f1786bf7af1a35884957127903bfbc7ad3c | |
parent | dbe930bf51e0d7458b24d30e9f25756c5da54ddf (diff) | |
download | illumos-joyent-e1447ca93391f31609bda487cb922dbff9dcdef5.tar.gz |
12980 attempting to change MTU on mlxcx based aggregation can induce FMA event
12987 devo_power misconfigured in mlxcx
12988 potential hang in mlxcx when async and ring vectors end up on same CPU
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
Approved by: Garrett D'Amore <garrett@damore.org>
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_cmd.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_gld.c | 27 | ||||
-rw-r--r-- | usr/src/uts/common/io/mlxcx/mlxcx_intr.c | 66 |
5 files changed, 90 insertions, 12 deletions
diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.c b/usr/src/uts/common/io/mlxcx/mlxcx.c index dbad9be958..6421b47126 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx.c @@ -1800,7 +1800,7 @@ mlxcx_setup_ports(mlxcx_t *mlxp) p->mlx_port_event.mla_mlx = mlxp; p->mlx_port_event.mla_port = p; mutex_init(&p->mlx_port_event.mla_mtx, NULL, - MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri)); + MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri)); p->mlp_init |= MLXCX_PORT_INIT; mutex_init(&p->mlp_mtx, NULL, MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri)); @@ -2716,7 +2716,7 @@ mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) for (i = 0; i <= MLXCX_FUNC_ID_MAX; i++) { mlxp->mlx_npages_req[i].mla_mlx = mlxp; mutex_init(&mlxp->mlx_npages_req[i].mla_mtx, NULL, - MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri)); + MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri)); } mlxp->mlx_attach |= MLXCX_ATTACH_ASYNC_TQ; @@ -2913,7 +2913,6 @@ static struct dev_ops mlxcx_dev_ops = { .devo_attach = mlxcx_attach, .devo_detach = mlxcx_detach, .devo_reset = nodev, - .devo_power = ddi_power, .devo_quiesce = ddi_quiesce_not_supported, .devo_cb_ops = &mlxcx_cb_ops }; diff --git a/usr/src/uts/common/io/mlxcx/mlxcx.h b/usr/src/uts/common/io/mlxcx/mlxcx.h index 77d36447c6..dd91ea3734 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx.h +++ b/usr/src/uts/common/io/mlxcx/mlxcx.h @@ -1082,6 +1082,7 @@ struct mlxcx { * Interrupts */ uint_t mlx_intr_pri; + uint_t mlx_async_intr_pri; uint_t mlx_intr_type; /* always MSI-X */ int mlx_intr_count; size_t mlx_intr_size; /* allocation size */ diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c index c8eb1335ea..3cb531df39 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_cmd.c @@ -667,7 +667,8 @@ static void mlxcx_cmd_init(mlxcx_t *mlxp, mlxcx_cmd_t *cmd) { bzero(cmd, sizeof (*cmd)); - mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&cmd->mlcmd_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(mlxp->mlx_async_intr_pri)); cv_init(&cmd->mlcmd_cv, NULL, CV_DRIVER, NULL); cmd->mlcmd_token = id_alloc(mlxp->mlx_cmd.mcmd_tokens); cmd->mlcmd_poll = mlxp->mlx_cmd.mcmd_polled; diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c index 89645bb2b1..941eb0f9e7 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_gld.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_gld.c @@ -809,19 +809,32 @@ mlxcx_mac_ring_stop(mac_ring_driver_t rh) if (wq->mlwq_state & MLXCX_WQ_BUFFERS) { + list_t cq_buffers; + + /* + * Take the buffers away from the CQ. If the CQ is being + * processed and the WQ has been stopped, a completion + * which does not match to a buffer will be ignored. + */ + list_create(&cq_buffers, sizeof (mlxcx_buffer_t), + offsetof(mlxcx_buffer_t, mlb_cq_entry)); + + list_move_tail(&cq_buffers, &cq->mlcq_buffers); + + mutex_enter(&cq->mlcq_bufbmtx); + list_move_tail(&cq_buffers, &cq->mlcq_buffers_b); + mutex_exit(&cq->mlcq_bufbmtx); + + cq->mlcq_bufcnt = 0; + mutex_exit(&wq->mlwq_mtx); mutex_exit(&cq->mlcq_mtx); /* Return any outstanding buffers to the free pool. */ - while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) { + while ((buf = list_remove_head(&cq_buffers)) != NULL) { mlxcx_buf_return_chain(mlxp, buf, B_FALSE); } - mutex_enter(&cq->mlcq_bufbmtx); - while ((buf = list_remove_head(&cq->mlcq_buffers_b)) != NULL) { - mlxcx_buf_return_chain(mlxp, buf, B_FALSE); - } - mutex_exit(&cq->mlcq_bufbmtx); - cq->mlcq_bufcnt = 0; + list_destroy(&cq_buffers); s = wq->mlwq_bufs; mutex_enter(&s->mlbs_mtx); diff --git a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c index f79c148d20..10d8d19b2f 100644 --- a/usr/src/uts/common/io/mlxcx/mlxcx_intr.c +++ b/usr/src/uts/common/io/mlxcx/mlxcx_intr.c @@ -922,6 +922,20 @@ lookagain: if (added) goto lookagain; + /* + * This check could go just after the lookagain + * label, but it is a hot code path so we don't + * want to unnecessarily grab a lock and check + * a flag for a relatively rare event (the ring + * being stopped). + */ + mutex_enter(&wq->mlwq_mtx); + if ((wq->mlwq_state & MLXCX_WQ_STARTED) == 0) { + mutex_exit(&wq->mlwq_mtx); + goto nextcq; + } + mutex_exit(&wq->mlwq_mtx); + buf = list_head(&mlcq->mlcq_buffers); mlxcx_warn(mlxp, "got completion on CQ %x but " "no buffer matching wqe found: %x (first " @@ -1165,6 +1179,7 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_get_supported_types(dip, &types); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to get supported interrupt types"); return (B_FALSE); } @@ -1176,6 +1191,7 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_get_nintrs(dip, DDI_INTR_TYPE_MSIX, &nintrs); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to get number of interrupts"); return (B_FALSE); } if (nintrs < 2) { @@ -1203,10 +1219,14 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_alloc(dip, mlxp->mlx_intr_handles, DDI_INTR_TYPE_MSIX, 0, navail, &mlxp->mlx_intr_count, DDI_INTR_ALLOC_NORMAL); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to allocate %d interrupts", navail); mlxcx_intr_teardown(mlxp); return (B_FALSE); } if (mlxp->mlx_intr_count < mlxp->mlx_intr_cq0 + 1) { + mlxcx_warn(mlxp, "%d MSI-X interrupts allocated, but mlxcx " + "requires %d", mlxp->mlx_intr_count, + mlxp->mlx_intr_cq0 + 1); mlxcx_intr_teardown(mlxp); return (B_FALSE); } @@ -1214,10 +1234,29 @@ mlxcx_intr_setup(mlxcx_t *mlxp) ret = ddi_intr_get_pri(mlxp->mlx_intr_handles[0], &mlxp->mlx_intr_pri); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to get interrupt priority"); mlxcx_intr_teardown(mlxp); return (B_FALSE); } + /* + * Set the interrupt priority for the asynchronous handler higher + * than the ring handlers. Some operations which issue commands, + * and thus rely on the async interrupt handler for posting + * completion, do so with a CQ mutex held. The CQ mutex is also + * acquired during ring processing, so if the ring processing vector + * happens to be assigned to the same CPU as the async vector + * it can hold off the async interrupt thread and lead to a deadlock. + * By assigning a higher priority to the async vector, it will + * always be dispatched. + */ + mlxp->mlx_async_intr_pri = mlxp->mlx_intr_pri; + if (mlxp->mlx_async_intr_pri < LOCK_LEVEL) { + mlxp->mlx_async_intr_pri++; + } else { + mlxp->mlx_intr_pri--; + } + mlxp->mlx_eqs_size = mlxp->mlx_intr_count * sizeof (mlxcx_event_queue_t); mlxp->mlx_eqs = kmem_zalloc(mlxp->mlx_eqs_size, KM_SLEEP); @@ -1227,8 +1266,11 @@ mlxcx_intr_setup(mlxcx_t *mlxp) * mutex and avl tree to be init'ed - so do it now. */ for (i = 0; i < mlxp->mlx_intr_count; ++i) { + uint_t pri = (i == 0) ? mlxp->mlx_async_intr_pri : + mlxp->mlx_intr_pri; + mutex_init(&mlxp->mlx_eqs[i].mleq_mtx, NULL, MUTEX_DRIVER, - DDI_INTR_PRI(mlxp->mlx_intr_pri)); + DDI_INTR_PRI(pri)); cv_init(&mlxp->mlx_eqs[i].mleq_cv, NULL, CV_DRIVER, NULL); if (i < mlxp->mlx_intr_cq0) @@ -1239,9 +1281,19 @@ mlxcx_intr_setup(mlxcx_t *mlxp) offsetof(mlxcx_completion_queue_t, mlcq_eq_entry)); } + ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[0], + mlxp->mlx_async_intr_pri); + if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to set interrupt priority to %u for " + "async interrupt vector", mlxp->mlx_async_intr_pri); + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[0], mlxcx_intr_async, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[0]); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to add async interrupt handler"); mlxcx_intr_teardown(mlxp); return (B_FALSE); } @@ -1268,9 +1320,21 @@ mlxcx_intr_setup(mlxcx_t *mlxp) eqt = MLXCX_EQ_TYPE_RX; } + ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[i], + mlxp->mlx_intr_pri); + if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to set interrupt priority to " + "%u for interrupt vector %d", mlxp->mlx_intr_pri, + i); + mlxcx_intr_teardown(mlxp); + return (B_FALSE); + } + ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[i], mlxcx_intr_n, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[i]); if (ret != DDI_SUCCESS) { + mlxcx_warn(mlxp, "Failed to add interrupt handler %d", + i); mlxcx_intr_teardown(mlxp); return (B_FALSE); } |