summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans Rosenfeld <rosenfeld@grumpf.hope-2000.org>2021-10-11 20:49:25 +0200
committerHans Rosenfeld <rosenfeld@grumpf.hope-2000.org>2022-04-20 16:41:47 +0200
commit353d89b0745ef752e824c1afc3f0474f66dfbd64 (patch)
tree12aba4e03777e644f3116deb01a19ba2047259b2
parentc542a624b7efda0b8123026500f05f430ff6c770 (diff)
downloadillumos-joyent-353d89b0745ef752e824c1afc3f0474f66dfbd64.tar.gz
14530 nvme should require exclusive open for attach, detach, and format ioctls
Reviewed by: Andrew Giles <agiles@tintri.com> Reviewed by: Guy Morrogh <gmorrogh@tintri.com> Reviewed by: Robert Mustacchi <rm+illumos@fingolfin.org> Approved by: Gordon Ross <gordon.w.ross@gmail.com>
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm.c96
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm.h3
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm_dev.c4
-rw-r--r--usr/src/uts/common/io/nvme/nvme.c86
-rw-r--r--usr/src/uts/common/io/nvme/nvme_var.h12
5 files changed, 134 insertions, 67 deletions
diff --git a/usr/src/cmd/nvmeadm/nvmeadm.c b/usr/src/cmd/nvmeadm/nvmeadm.c
index ee9e26c997..3de4fa0b00 100644
--- a/usr/src/cmd/nvmeadm/nvmeadm.c
+++ b/usr/src/cmd/nvmeadm/nvmeadm.c
@@ -58,9 +58,12 @@ struct nvme_feature {
nvme_version_t *);
};
-#define NVMEADM_CTRL 1
-#define NVMEADM_NS 2
-#define NVMEADM_BOTH (NVMEADM_CTRL | NVMEADM_NS)
+#define NVMEADM_F_CTRL 1
+#define NVMEADM_F_NS 2
+#define NVMEADM_F_BOTH (NVMEADM_F_CTRL | NVMEADM_F_NS)
+
+#define NVMEADM_C_MULTI 1
+#define NVMEADM_C_EXCL 2
struct nvmeadm_cmd {
char *c_name;
@@ -68,8 +71,8 @@ struct nvmeadm_cmd {
const char *c_flagdesc;
int (*c_func)(int, const nvme_process_arg_t *);
void (*c_usage)(const char *);
- boolean_t c_multi;
void (*c_optparse)(nvme_process_arg_t *);
+ int c_flags;
};
@@ -123,119 +126,131 @@ static const nvmeadm_cmd_t nvmeadm_cmds[] = {
"list controllers and namespaces",
" -p\t\tprint parsable output\n"
" -o field\tselect a field for parsable output\n",
- do_list, usage_list, B_TRUE, optparse_list
+ do_list, usage_list, optparse_list,
+ NVMEADM_C_MULTI
},
{
"identify",
"identify controllers and/or namespaces",
NULL,
- do_identify, usage_identify, B_TRUE
+ do_identify, usage_identify, NULL,
+ NVMEADM_C_MULTI
},
{
"get-logpage",
"get a log page from controllers and/or namespaces",
NULL,
- do_get_logpage, usage_get_logpage, B_TRUE
+ do_get_logpage, usage_get_logpage, NULL,
+ NVMEADM_C_MULTI
},
{
"get-features",
"get features from controllers and/or namespaces",
NULL,
- do_get_features, usage_get_features, B_TRUE
+ do_get_features, usage_get_features, NULL,
+ NVMEADM_C_MULTI
},
{
"format",
"format namespace(s) of a controller",
NULL,
- do_format, usage_format, B_FALSE
+ do_format, usage_format, NULL,
+ NVMEADM_C_EXCL
},
{
"secure-erase",
"secure erase namespace(s) of a controller",
" -c Do a cryptographic erase.",
- do_secure_erase, usage_secure_erase, B_FALSE
+ do_secure_erase, usage_secure_erase, NULL,
+ NVMEADM_C_EXCL
},
{
"detach",
"detach blkdev(4D) from namespace(s) of a controller",
NULL,
- do_attach_detach, usage_attach_detach, B_FALSE
+ do_attach_detach, usage_attach_detach, NULL,
+ NVMEADM_C_EXCL
},
{
"attach",
"attach blkdev(4D) to namespace(s) of a controller",
NULL,
- do_attach_detach, usage_attach_detach, B_FALSE
+ do_attach_detach, usage_attach_detach, NULL,
+ NVMEADM_C_EXCL
},
{
"list-firmware",
"list firmware on a controller",
NULL,
- do_get_logpage_fwslot, usage_firmware_list, B_FALSE
+ do_get_logpage_fwslot, usage_firmware_list, NULL,
+ 0
},
{
"load-firmware",
"load firmware to a controller",
NULL,
- do_firmware_load, usage_firmware_load, B_FALSE
+ do_firmware_load, usage_firmware_load, NULL,
+ 0
},
{
"commit-firmware",
"commit downloaded firmware to a slot of a controller",
NULL,
- do_firmware_commit, usage_firmware_commit, B_FALSE
+ do_firmware_commit, usage_firmware_commit, NULL,
+ 0
},
{
"activate-firmware",
"activate a firmware slot of a controller",
NULL,
- do_firmware_activate, usage_firmware_activate, B_FALSE
+ do_firmware_activate, usage_firmware_activate, NULL,
+ 0
},
{
NULL, NULL, NULL,
- NULL, NULL, B_FALSE
+ NULL, NULL, NULL, 0
}
};
static const nvme_feature_t features[] = {
{ "Arbitration", "",
- NVME_FEAT_ARBITRATION, 0, NVMEADM_CTRL,
+ NVME_FEAT_ARBITRATION, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_arbitration },
{ "Power Management", "",
- NVME_FEAT_POWER_MGMT, 0, NVMEADM_CTRL,
+ NVME_FEAT_POWER_MGMT, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_power_mgmt },
{ "LBA Range Type", "range",
- NVME_FEAT_LBA_RANGE, NVME_LBA_RANGE_BUFSIZE, NVMEADM_NS,
+ NVME_FEAT_LBA_RANGE, NVME_LBA_RANGE_BUFSIZE, NVMEADM_F_NS,
do_get_feat_common, nvme_print_feat_lba_range },
{ "Temperature Threshold", "",
- NVME_FEAT_TEMPERATURE, 0, NVMEADM_CTRL,
+ NVME_FEAT_TEMPERATURE, 0, NVMEADM_F_CTRL,
do_get_feat_temp_thresh, nvme_print_feat_temperature },
{ "Error Recovery", "",
- NVME_FEAT_ERROR, 0, NVMEADM_CTRL,
+ NVME_FEAT_ERROR, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_error },
{ "Volatile Write Cache", "cache",
- NVME_FEAT_WRITE_CACHE, 0, NVMEADM_CTRL,
+ NVME_FEAT_WRITE_CACHE, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_write_cache },
{ "Number of Queues", "queues",
- NVME_FEAT_NQUEUES, 0, NVMEADM_CTRL,
+ NVME_FEAT_NQUEUES, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_nqueues },
{ "Interrupt Coalescing", "coalescing",
- NVME_FEAT_INTR_COAL, 0, NVMEADM_CTRL,
+ NVME_FEAT_INTR_COAL, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_intr_coal },
{ "Interrupt Vector Configuration", "vector",
- NVME_FEAT_INTR_VECT, 0, NVMEADM_CTRL,
+ NVME_FEAT_INTR_VECT, 0, NVMEADM_F_CTRL,
do_get_feat_intr_vect, nvme_print_feat_intr_vect },
{ "Write Atomicity", "atomicity",
- NVME_FEAT_WRITE_ATOM, 0, NVMEADM_CTRL,
+ NVME_FEAT_WRITE_ATOM, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_write_atom },
{ "Asynchronous Event Configuration", "event",
- NVME_FEAT_ASYNC_EVENT, 0, NVMEADM_CTRL,
+ NVME_FEAT_ASYNC_EVENT, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_async_event },
{ "Autonomous Power State Transition", "",
- NVME_FEAT_AUTO_PST, NVME_AUTO_PST_BUFSIZE, NVMEADM_CTRL,
+ NVME_FEAT_AUTO_PST, NVME_AUTO_PST_BUFSIZE, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_auto_pst },
{ "Software Progress Marker", "progress",
- NVME_FEAT_PROGRESS, 0, NVMEADM_CTRL,
+ NVME_FEAT_PROGRESS, 0, NVMEADM_F_CTRL,
do_get_feat_common, nvme_print_feat_progress },
{ NULL, NULL, 0, 0, B_FALSE, NULL }
};
@@ -294,6 +309,7 @@ main(int argc, char **argv)
npa.npa_cmd = cmd;
npa.npa_interactive = B_TRUE;
+ npa.npa_excl = ((cmd->c_flags & NVMEADM_C_EXCL) != 0);
optind++;
@@ -334,7 +350,7 @@ main(int argc, char **argv)
* aren't allowed to do that.
*/
if (ctrl != NULL && strchr(ctrl, ',') != NULL &&
- cmd->c_multi == B_FALSE) {
+ (cmd->c_flags & NVMEADM_C_MULTI) == 0) {
warnx("%s not allowed on multiple controllers",
cmd->c_name);
usage(cmd);
@@ -513,7 +529,7 @@ nvme_process(di_node_t node, di_minor_t minor, void *arg)
if (!nvme_match(npa))
return (DI_WALK_CONTINUE);
- if ((fd = nvme_open(minor)) < 0)
+ if ((fd = nvme_open(minor, npa->npa_excl)) < 0)
return (DI_WALK_CONTINUE);
npa->npa_found++;
@@ -851,9 +867,9 @@ usage_get_features(const char *c_name)
for (feat = &features[0]; feat->f_feature != 0; feat++) {
char *type;
- if ((feat->f_getflags & NVMEADM_BOTH) == NVMEADM_BOTH)
+ if ((feat->f_getflags & NVMEADM_F_BOTH) == NVMEADM_F_BOTH)
type = "both";
- else if ((feat->f_getflags & NVMEADM_CTRL) != 0)
+ else if ((feat->f_getflags & NVMEADM_F_CTRL) != 0)
type = "controller only";
else
type = "namespace only";
@@ -1065,9 +1081,9 @@ do_get_features(int fd, const nvme_process_arg_t *npa)
(void) printf("%s: Get Features\n", npa->npa_name);
for (feat = &features[0]; feat->f_feature != 0; feat++) {
if ((npa->npa_isns &&
- (feat->f_getflags & NVMEADM_NS) == 0) ||
+ (feat->f_getflags & NVMEADM_F_NS) == 0) ||
(!npa->npa_isns &&
- (feat->f_getflags & NVMEADM_CTRL) == 0))
+ (feat->f_getflags & NVMEADM_F_CTRL) == 0))
continue;
(void) feat->f_get(fd, feat, npa);
@@ -1101,11 +1117,12 @@ do_get_features(int fd, const nvme_process_arg_t *npa)
}
if ((npa->npa_isns &&
- (feat->f_getflags & NVMEADM_NS) == 0) ||
+ (feat->f_getflags & NVMEADM_F_NS) == 0) ||
(!npa->npa_isns &&
- (feat->f_getflags & NVMEADM_CTRL) == 0)) {
+ (feat->f_getflags & NVMEADM_F_CTRL) == 0)) {
warnx("feature %s %s supported for namespaces",
- feat->f_name, (feat->f_getflags & NVMEADM_NS) != 0 ?
+ feat->f_name,
+ (feat->f_getflags & NVMEADM_F_NS) != 0 ?
"only" : "not");
continue;
}
@@ -1248,6 +1265,7 @@ do_attach_detach(int fd, const nvme_process_arg_t *npa)
ns_npa.npa_name = npa->npa_name;
ns_npa.npa_isns = B_TRUE;
ns_npa.npa_cmd = npa->npa_cmd;
+ ns_npa.npa_excl = npa->npa_excl;
nvme_walk(&ns_npa, npa->npa_node);
diff --git a/usr/src/cmd/nvmeadm/nvmeadm.h b/usr/src/cmd/nvmeadm/nvmeadm.h
index e06cd93189..e6c16a8252 100644
--- a/usr/src/cmd/nvmeadm/nvmeadm.h
+++ b/usr/src/cmd/nvmeadm/nvmeadm.h
@@ -41,6 +41,7 @@ struct nvme_process_arg {
char *npa_name;
char *npa_nsid;
int npa_found;
+ boolean_t npa_excl;
boolean_t npa_isns;
boolean_t npa_ignored;
boolean_t npa_interactive;
@@ -101,7 +102,7 @@ extern void nvme_print_feat_progress(uint64_t, void *, size_t,
extern const char *nvme_fw_error(int, int);
/* device node functions */
-extern int nvme_open(di_minor_t);
+extern int nvme_open(di_minor_t, boolean_t);
extern void nvme_close(int);
extern nvme_version_t *nvme_version(int);
extern nvme_capabilities_t *nvme_capabilities(int);
diff --git a/usr/src/cmd/nvmeadm/nvmeadm_dev.c b/usr/src/cmd/nvmeadm/nvmeadm_dev.c
index 6c837d0d8c..ce86a8d164 100644
--- a/usr/src/cmd/nvmeadm/nvmeadm_dev.c
+++ b/usr/src/cmd/nvmeadm/nvmeadm_dev.c
@@ -241,7 +241,7 @@ nvme_is_ignored_ns(int fd)
}
int
-nvme_open(di_minor_t minor)
+nvme_open(di_minor_t minor, boolean_t excl)
{
char *devpath, *path;
int fd;
@@ -256,7 +256,7 @@ nvme_open(di_minor_t minor)
di_devfs_path_free(devpath);
- fd = open(path, O_RDWR);
+ fd = open(path, O_RDWR | (excl ? O_EXCL: 0));
if (fd < 0) {
if (debug)
diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c
index c1c808ed34..328dae2744 100644
--- a/usr/src/uts/common/io/nvme/nvme.c
+++ b/usr/src/uts/common/io/nvme/nvme.c
@@ -109,6 +109,17 @@
* minor nodes are open(9E), close(9E), and ioctl(9E). This serves as the
* interface for the nvmeadm(8) utility.
*
+ * Exclusive opens are required for certain ioctl(9E) operations that alter
+ * controller and/or namespace state. While different namespaces may be opened
+ * exclusively in parallel, an exclusive open of the controller minor node
+ * requires that no namespaces are currently open (exclusive or otherwise).
+ * Opening any namespace minor node (exclusive or otherwise) will fail while
+ * the controller minor node is opened exclusively by any other thread. Thus it
+ * is possible for one thread at a time to open the controller minor node
+ * exclusively, and keep it open while opening any namespace minor node of the
+ * same controller, exclusively or otherwise.
+ *
+ *
*
* Blkdev Interface:
*
@@ -194,8 +205,9 @@
* mutex is non-contentious but is required for implementation completeness
* and safety.
*
- * Each minor node has its own nm_mutex, which protects the open count nm_ocnt
- * and exclusive-open flag nm_oexcl.
+ * There is one mutex n_minor_mutex which protects all open flags nm_open and
+ * exclusive-open thread pointers nm_oexcl of each minor node associated with a
+ * controller and its namespaces.
*
*
* Quiesce / Fast Reboot:
@@ -3167,8 +3179,6 @@ nvme_init(nvme_t *nvme)
nvme->n_namespace_count, KM_SLEEP);
for (i = 0; i != nvme->n_namespace_count; i++) {
- mutex_init(&nvme->n_ns[i].ns_minor.nm_mutex, NULL, MUTEX_DRIVER,
- NULL);
nvme->n_ns[i].ns_ignore = B_TRUE;
if (nvme_init_ns(nvme, i + 1) != DDI_SUCCESS)
goto fail;
@@ -3551,7 +3561,8 @@ nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
goto fail;
}
- mutex_init(&nvme->n_minor.nm_mutex, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&nvme->n_minor_mutex, NULL, MUTEX_DRIVER, NULL);
+ nvme->n_progress |= NVME_MUTEX_INIT;
nvme->n_strict_version = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, "strict-version", 1) == 1 ? B_TRUE : B_FALSE;
@@ -3772,12 +3783,10 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
return (DDI_FAILURE);
ddi_remove_minor_node(dip, "devctl");
- mutex_destroy(&nvme->n_minor.nm_mutex);
if (nvme->n_ns) {
for (i = 0; i != nvme->n_namespace_count; i++) {
ddi_remove_minor_node(dip, nvme->n_ns[i].ns_name);
- mutex_destroy(&nvme->n_ns[i].ns_minor.nm_mutex);
if (nvme->n_ns[i].ns_bd_hdl) {
(void) bd_detach_handle(
@@ -3808,6 +3817,10 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
taskq_wait(nvme->n_cq[i]->ncq_cmd_taskq);
}
+ if (nvme->n_progress & NVME_MUTEX_INIT) {
+ mutex_destroy(&nvme->n_minor_mutex);
+ }
+
if (nvme->n_ioq_count > 0) {
for (i = 1; i != nvme->n_ioq_count + 1; i++) {
if (nvme->n_ioq[i] != NULL) {
@@ -4333,26 +4346,46 @@ nvme_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
if (nvme->n_dead)
return (EIO);
- nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
+ mutex_enter(&nvme->n_minor_mutex);
- mutex_enter(&nm->nm_mutex);
- if (nm->nm_oexcl) {
+ /*
+ * First check the devctl node and error out if it's been opened
+ * exclusively already by any other thread.
+ */
+ if (nvme->n_minor.nm_oexcl != NULL &&
+ nvme->n_minor.nm_oexcl != curthread) {
rv = EBUSY;
goto out;
}
+ nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
+
if (flag & FEXCL) {
- if (nm->nm_ocnt != 0) {
+ if (nm->nm_oexcl != NULL || nm->nm_open) {
rv = EBUSY;
goto out;
}
- nm->nm_oexcl = B_TRUE;
+
+ /*
+ * If at least one namespace is already open, fail the
+ * exclusive open of the devctl node.
+ */
+ if (nsid == 0) {
+ for (int i = 0; i != nvme->n_namespace_count; i++) {
+ if (nvme->n_ns[i].ns_minor.nm_open) {
+ rv = EBUSY;
+ goto out;
+ }
+ }
+ }
+
+ nm->nm_oexcl = curthread;
}
- nm->nm_ocnt++;
+ nm->nm_open = B_TRUE;
out:
- mutex_exit(&nm->nm_mutex);
+ mutex_exit(&nvme->n_minor_mutex);
return (rv);
}
@@ -4380,13 +4413,15 @@ nvme_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
- mutex_enter(&nm->nm_mutex);
- if (nm->nm_oexcl)
- nm->nm_oexcl = B_FALSE;
+ mutex_enter(&nvme->n_minor_mutex);
+ if (nm->nm_oexcl != NULL) {
+ ASSERT(nm->nm_oexcl == curthread);
+ nm->nm_oexcl = NULL;
+ }
- ASSERT(nm->nm_ocnt > 0);
- nm->nm_ocnt--;
- mutex_exit(&nm->nm_mutex);
+ ASSERT(nm->nm_open);
+ nm->nm_open = B_FALSE;
+ mutex_exit(&nvme->n_minor_mutex);
return (0);
}
@@ -4767,10 +4802,15 @@ nvme_ioctl_format(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
_NOTE(ARGUNUSED(mode));
nvme_format_nvm_t frmt = { 0 };
int c_nsid = nsid != 0 ? nsid - 1 : 0;
+ nvme_minor_state_t *nm;
if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
return (EPERM);
+ nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[c_nsid].ns_minor;
+ if (nm->nm_oexcl != curthread)
+ return (EACCES);
+
frmt.r = nioc->n_arg & 0xffffffff;
/*
@@ -4832,6 +4872,9 @@ nvme_ioctl_detach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
if (nsid == 0)
return (EINVAL);
+ if (nvme->n_ns[nsid - 1].ns_minor.nm_oexcl != curthread)
+ return (EACCES);
+
if (nvme->n_ns[nsid - 1].ns_ignore)
return (0);
@@ -4856,6 +4899,9 @@ nvme_ioctl_attach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
if (nsid == 0)
return (EINVAL);
+ if (nvme->n_ns[nsid - 1].ns_minor.nm_oexcl != curthread)
+ return (EACCES);
+
/*
* Identify namespace again, free old identify data.
*/
diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h
index 75039d4bc7..aa351d863f 100644
--- a/usr/src/uts/common/io/nvme/nvme_var.h
+++ b/usr/src/uts/common/io/nvme/nvme_var.h
@@ -10,12 +10,11 @@
*/
/*
- * Copyright 2018 Nexenta Systems, Inc.
* Copyright 2016 The MathWorks, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
- * Copyright 2019 Western Digital Corporation.
* Copyright 2021 Oxide Computer Company.
* Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
*/
#ifndef _NVME_VAR_H
@@ -41,6 +40,7 @@ extern "C" {
#define NVME_CTRL_LIMITS 0x8
#define NVME_INTERRUPTS 0x10
#define NVME_UFM_INIT 0x20
+#define NVME_MUTEX_INIT 0x40
#define NVME_MIN_ADMIN_QUEUE_LEN 16
#define NVME_MIN_IO_QUEUE_LEN 16
@@ -61,9 +61,8 @@ typedef struct nvme_qpair nvme_qpair_t;
typedef struct nvme_task_arg nvme_task_arg_t;
struct nvme_minor_state {
- kmutex_t nm_mutex;
- boolean_t nm_oexcl;
- uint_t nm_ocnt;
+ kthread_t *nm_oexcl;
+ boolean_t nm_open;
};
struct nvme_dma {
@@ -214,6 +213,9 @@ struct nvme {
ksema_t n_abort_sema;
+ /* protects minor node operations */
+ kmutex_t n_minor_mutex;
+
/* state for devctl minor node */
nvme_minor_state_t n_minor;