summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans Rosenfeld <rosenfeld@grumpf.hope-2000.org>2017-02-13 14:44:28 +0100
committerHans Rosenfeld <hans.rosenfeld@joyent.com>2017-09-25 11:11:07 +0200
commit4b324362d54967a3ccfad8a9b113978ea1b38feb (patch)
tree12a7d308f7a25d9d7a200e13b6712c52b8e184c4
parent3c4c5929f4f5a266089758e0d018685b379767ad (diff)
downloadillumos-joyent-4b324362d54967a3ccfad8a9b113978ea1b38feb.tar.gz
8628 nvme: use a semaphore to guard submission queue
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Jason King <jason.king@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Garrett D'Amore <garrett@damore.org> Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r--usr/src/uts/common/io/nvme/nvme.c107
-rw-r--r--usr/src/uts/common/io/nvme/nvme_var.h4
2 files changed, 46 insertions, 65 deletions
diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c
index b148635e59..bcbe672041 100644
--- a/usr/src/uts/common/io/nvme/nvme.c
+++ b/usr/src/uts/common/io/nvme/nvme.c
@@ -44,7 +44,7 @@
*
* Command Processing:
*
- * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up
+ * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up
* to 65536 I/O commands. The driver will configure one I/O queue pair per
* available interrupt vector, with the queue length usually much smaller than
* the maximum of 65536. If the hardware doesn't provide enough queues, fewer
@@ -69,7 +69,8 @@
* wraps around in that time a submission may find the next array slot to still
* be used by a long-running command. In this case the array is sequentially
* searched for the next free slot. The length of the command array is the same
- * as the configured queue length.
+ * as the configured queue length. Queue overrun is prevented by the semaphore,
+ * so a command submission may block if the queue is full.
*
*
* Polled I/O Support:
@@ -257,7 +258,9 @@ static void nvme_free_cmd(nvme_cmd_t *);
static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
bd_xfer_t *);
static int nvme_admin_cmd(nvme_cmd_t *, int);
-static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
+static void nvme_submit_admin_cmd(nvme_qpair_t *, nvme_cmd_t *);
+static int nvme_submit_io_cmd(nvme_qpair_t *, nvme_cmd_t *);
+static void nvme_submit_cmd_common(nvme_qpair_t *, nvme_cmd_t *);
static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
static void nvme_wakeup_cmd(void *);
@@ -271,7 +274,7 @@ static int nvme_check_generic_cmd_status(nvme_cmd_t *);
static inline int nvme_check_cmd_status(nvme_cmd_t *);
static void nvme_abort_cmd(nvme_cmd_t *);
-static int nvme_async_event(nvme_t *);
+static void nvme_async_event(nvme_t *);
static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
boolean_t, uint8_t);
static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
@@ -721,6 +724,7 @@ nvme_free_qpair(nvme_qpair_t *qp)
int i;
mutex_destroy(&qp->nq_mutex);
+ sema_destroy(&qp->nq_sema);
if (qp->nq_sqdma != NULL)
nvme_free_dma(qp->nq_sqdma);
@@ -746,6 +750,7 @@ nvme_alloc_qpair(nvme_t *nvme, uint32_t nentry, nvme_qpair_t **nqp,
mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(nvme->n_intr_pri));
+ sema_init(&qp->nq_sema, nentry, NULL, SEMA_DRIVER, NULL);
if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t),
DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS)
@@ -812,18 +817,29 @@ nvme_free_cmd(nvme_cmd_t *cmd)
kmem_cache_free(nvme_cmd_cache, cmd);
}
+static void
+nvme_submit_admin_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
+{
+ sema_p(&qp->nq_sema);
+ nvme_submit_cmd_common(qp, cmd);
+}
+
static int
-nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
+nvme_submit_io_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
{
- nvme_reg_sqtdbl_t tail = { 0 };
+ if (sema_tryp(&qp->nq_sema) == 0)
+ return (EAGAIN);
- mutex_enter(&qp->nq_mutex);
+ nvme_submit_cmd_common(qp, cmd);
+ return (0);
+}
- if (qp->nq_active_cmds == qp->nq_nentry) {
- mutex_exit(&qp->nq_mutex);
- return (DDI_FAILURE);
- }
+static void
+nvme_submit_cmd_common(nvme_qpair_t *qp, nvme_cmd_t *cmd)
+{
+ nvme_reg_sqtdbl_t tail = { 0 };
+ mutex_enter(&qp->nq_mutex);
cmd->nc_completed = B_FALSE;
/*
@@ -849,7 +865,6 @@ nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
mutex_exit(&qp->nq_mutex);
- return (DDI_SUCCESS);
}
static nvme_cmd_t *
@@ -895,6 +910,7 @@ nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp)
nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
mutex_exit(&qp->nq_mutex);
+ sema_v(&qp->nq_sema);
return (cmd);
}
@@ -1363,7 +1379,6 @@ nvme_async_event_task(void *arg)
nvme_health_log_t *health_log = NULL;
size_t logsize = 0;
nvme_async_event_t event;
- int ret;
/*
* Check for errors associated with the async request itself. The only
@@ -1397,14 +1412,7 @@ nvme_async_event_task(void *arg)
/* Clear CQE and re-submit the async request. */
bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t));
- ret = nvme_submit_cmd(nvme->n_adminq, cmd);
-
- if (ret != DDI_SUCCESS) {
- dev_err(nvme->n_dip, CE_WARN,
- "!failed to resubmit async event request");
- atomic_inc_32(&nvme->n_async_resubmit_failed);
- nvme_free_cmd(cmd);
- }
+ nvme_submit_admin_cmd(nvme->n_adminq, cmd);
switch (event.b.ae_type) {
case NVME_ASYNC_TYPE_ERROR:
@@ -1517,19 +1525,8 @@ nvme_async_event_task(void *arg)
static int
nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
{
- int ret;
-
mutex_enter(&cmd->nc_mutex);
- ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd);
-
- if (ret != DDI_SUCCESS) {
- mutex_exit(&cmd->nc_mutex);
- dev_err(cmd->nc_nvme->n_dip, CE_WARN,
- "!nvme_submit_cmd failed");
- atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full);
- nvme_free_cmd(cmd);
- return (DDI_FAILURE);
- }
+ nvme_submit_admin_cmd(cmd->nc_nvme->n_adminq, cmd);
if (nvme_wait_cmd(cmd, sec) == B_FALSE) {
/*
@@ -1543,26 +1540,16 @@ nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
return (DDI_SUCCESS);
}
-static int
+static void
nvme_async_event(nvme_t *nvme)
{
nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
- int ret;
cmd->nc_sqid = 0;
cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT;
cmd->nc_callback = nvme_async_event_task;
- ret = nvme_submit_cmd(nvme->n_adminq, cmd);
-
- if (ret != DDI_SUCCESS) {
- dev_err(nvme->n_dip, CE_WARN,
- "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT");
- nvme_free_cmd(cmd);
- return (DDI_FAILURE);
- }
-
- return (DDI_SUCCESS);
+ nvme_submit_admin_cmd(nvme->n_adminq, cmd);
}
static int
@@ -2379,11 +2366,7 @@ nvme_init(nvme_t *nvme)
/*
* Post an asynchronous event command to catch errors.
*/
- if (nvme_async_event(nvme) != DDI_SUCCESS) {
- dev_err(nvme->n_dip, CE_WARN,
- "!failed to post async event");
- goto fail;
- }
+ nvme_async_event(nvme);
/*
* Identify Controller
@@ -2608,13 +2591,8 @@ nvme_init(nvme_t *nvme)
* Post more asynchronous events commands to reduce event reporting
* latency as suggested by the spec.
*/
- for (i = 1; i != nvme->n_async_event_limit; i++) {
- if (nvme_async_event(nvme) != DDI_SUCCESS) {
- dev_err(nvme->n_dip, CE_WARN,
- "!failed to post async event %d", i);
- goto fail;
- }
- }
+ for (i = 1; i != nvme->n_async_event_limit; i++)
+ nvme_async_event(nvme);
return (DDI_SUCCESS);
@@ -3278,9 +3256,10 @@ static int
nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
{
nvme_t *nvme = ns->ns_nvme;
- nvme_cmd_t *cmd, *ret;
+ nvme_cmd_t *cmd;
nvme_qpair_t *ioq;
boolean_t poll;
+ int ret;
if (nvme->n_dead)
return (EIO);
@@ -3300,16 +3279,18 @@ nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
*/
poll = (xfer->x_flags & BD_XFER_POLL) != 0;
- if (nvme_submit_cmd(ioq, cmd) != DDI_SUCCESS)
- return (EAGAIN);
+ ret = nvme_submit_io_cmd(ioq, cmd);
+
+ if (ret != 0)
+ return (ret);
if (!poll)
return (0);
do {
- ret = nvme_retrieve_cmd(nvme, ioq);
- if (ret != NULL)
- nvme_bd_xfer_done(ret);
+ cmd = nvme_retrieve_cmd(nvme, ioq);
+ if (cmd != NULL)
+ nvme_bd_xfer_done(cmd);
else
drv_usecwait(10);
} while (ioq->nq_active_cmds != 0);
diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h
index 651adaec8c..93f0cae064 100644
--- a/usr/src/uts/common/io/nvme/nvme_var.h
+++ b/usr/src/uts/common/io/nvme/nvme_var.h
@@ -12,6 +12,7 @@
/*
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 The MathWorks, Inc. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
*/
#ifndef _NVME_VAR_H
@@ -109,6 +110,7 @@ struct nvme_qpair {
int nq_phase;
kmutex_t nq_mutex;
+ ksema_t nq_sema;
};
struct nvme {
@@ -186,11 +188,9 @@ struct nvme {
uint32_t n_abort_failed;
uint32_t n_cmd_timeout;
uint32_t n_cmd_aborted;
- uint32_t n_async_resubmit_failed;
uint32_t n_wrong_logpage;
uint32_t n_unknown_logpage;
uint32_t n_too_many_cookies;
- uint32_t n_admin_queue_full;
/* errors detected by hardware */
uint32_t n_data_xfr_err;