summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans Rosenfeld <rosenfeld@grumpf.hope-2000.org>2022-05-03 16:48:51 +0200
committerHans Rosenfeld <rosenfeld@grumpf.hope-2000.org>2022-06-01 16:16:18 +0200
commit3281964baf91e4061898a5490ee91e126e81116c (patch)
treeae0e7aa7ae4a73d1cb4cfbddfedd85c4bb008747
parente309284527479df5fbac1270f2abd4a739f1ab72 (diff)
downloadillumos-joyent-3281964baf91e4061898a5490ee91e126e81116c.tar.gz
14684 want nvme-config-list property
Reviewed by: Andrew Giles <agiles@tintri.com> Reviewed by: Guy Morrogh <gmorrogh@tintri.com> Reviewed by: Toomas Soome <tsoome@me.com> Reviewed by: C Fraire <cfraire@me.com> Approved by: Dan McDonald <danmcd@mnx.io>
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm_print.c2
-rw-r--r--usr/src/man/man4d/nvme.4d99
-rw-r--r--usr/src/uts/common/io/nvme/nvme.c204
-rw-r--r--usr/src/uts/common/io/nvme/nvme.conf26
4 files changed, 324 insertions, 7 deletions
diff --git a/usr/src/cmd/nvmeadm/nvmeadm_print.c b/usr/src/cmd/nvmeadm/nvmeadm_print.c
index 43c15925b2..dacdbfad70 100644
--- a/usr/src/cmd/nvmeadm/nvmeadm_print.c
+++ b/usr/src/cmd/nvmeadm/nvmeadm_print.c
@@ -245,7 +245,7 @@ nvme_print(int indent, const char *name, int index, const char *fmt, ...)
int
nvme_strlen(const char *str, int len)
{
- if (len < 0)
+ if (len <= 0)
return (0);
while (str[--len] == ' ')
diff --git a/usr/src/man/man4d/nvme.4d b/usr/src/man/man4d/nvme.4d
index 877b7c4081..db507b2c49 100644
--- a/usr/src/man/man4d/nvme.4d
+++ b/usr/src/man/man4d/nvme.4d
@@ -9,11 +9,10 @@
.\" http://www.illumos.org/license/CDDL.
.\"
.\"
-.\" Copyright 2016 Nexenta Systems, Inc. All rights reserved.
.\" Copyright (c) 2018, Joyent, Inc.
-.\" Copyright 2019, Western Digital Corporation
+.\" Copyright 2022 Tintri by DDN, Inc. All rights reserved.
.\"
-.Dd January 22, 2022
+.Dd May 11, 2022
.Dt NVME 4D
.Os
.Sh NAME
@@ -108,7 +107,99 @@ The number of queues created will not exceed the number of interrupt vectors,
.Em max-submission-queues ,
or the drive's hardware limitation.
.El
-.
+.Pp
+In addition to the above properties, some device-specific tunables can be
+configured in
+.Pa nvme.conf
+using the
+.Va nvme-config-list
+global property.
+The value of this property is a list of triplets.
+The formal syntax is:
+.Bl -column "nvme-config-list" -offset indent
+.It nvme-config-list ::= <triplet>
+.Op \&, <triplet> Ns
+*
+;
+.It <triplet> ::=
+.Qq <model>
+\&,
+.Qq <rev-list>
+\&,
+.Qq <tuple-list>
+.It <rev-list> ::=
+.Op <fwrev> Oo \&, <fwrev> Oc Ns *
+.It <tuple-list> ::=
+<tunable>
+.Op \&, <tunable> Ns
+*
+.It <tunable> ::=
+.Qq <name>
+\&:
+.Qq <value>
+.El
+.Pp
+The
+.Va <model>
+and
+.Va <fwrev>
+are the strings that are returned in the
+.Qq Model Number
+and
+.Qq Firmware Revision
+fields, respectively, in the
+.Qq Identify Controller
+data structure as returned by the
+.Qq IDENTIFY
+command.
+Specifying a
+.Va <rev-list>
+is optional, an empty string
+.Po Qo Qc Pc
+may be used instead.
+.Pp
+The
+.Va <tuple-list>
+contains one or more tunables to apply to all controllers that match the
+specified model number and optional firmware revisions.
+Each
+.Va <tunable>
+is a
+.Va <name>
+:
+.Va <value>
+pair.
+Supported tunable names are:
+.Bl -tag -width Va
+.It Va ignore-unknown-vendor-status
+Similar to the global property, this property accepts a boolean value specified
+as either
+.Qq on ,
+.Qq off ,
+.Qq true ,
+or
+.Qq false
+to enable or disable the driver continuing operation even if it receives an
+unknown vendor command status from a controller.
+.It Va min-phys-block-size
+Same as the global property, this is the minimum physical block size that is
+reported to
+.Xr blkdev 4D .
+This value must be a power of 2 greater than or equal to 512.
+If the device reports a best block size larger than what is
+specified here the driver will ignore the value specified here.
+.It Va volatile-write-cache
+Similar to the global property
+.Va volatile-write-cache-enable ,
+this property accepts a boolean value specified as either
+.Qq on ,
+.Qq off ,
+.Qq true ,
+or
+.Qq false
+to enable or disable the volatile write cache, if the hardware supports it.
+By default the volatile write cache is enabled.
+.El
.Sh FILES
.Bl -tag -compact -width Pa
.It Pa /dev/dsk/cntnd0sn
diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c
index 3730b31abe..e65a7a4139 100644
--- a/usr/src/uts/common/io/nvme/nvme.c
+++ b/usr/src/uts/common/io/nvme/nvme.c
@@ -272,6 +272,31 @@
* can be less than max-submission-queues, in which case the completion
* queues are shared.
*
+ * In addition to the above properties, some device-specific tunables can be
+ * configured using the nvme-config-list global property. The value of this
+ * property is a list of triplets. The formal syntax is:
+ *
+ * nvme-config-list ::= <triplet> [, <triplet>]* ;
+ * <triplet> ::= "<model>" , "<rev-list>" , "<tuple-list>"
+ * <rev-list> ::= [ <fwrev> [, <fwrev>]*]
+ * <tuple-list> ::= <tunable> [, <tunable>]*
+ * <tunable> ::= <name> : <value>
+ *
+ * The <model> and <fwrev> are the strings in nvme_identify_ctrl_t`id_model and
+ * nvme_identify_ctrl_t`id_fwrev, respectively. The remainder of <tuple-list>
+ * contains one or more tunables to apply to all controllers that match the
+ * specified model number and optionally firmware revision. Each <tunable> is a
+ * <name> : <value> pair. Supported tunables are:
+ *
+ * - ignore-unknown-vendor-status: can be set to "on" to not handle any vendor
+ * specific command status as a fatal error leading device faulting
+ *
+ * - min-phys-block-size: the minimum physical block size to report to blkdev,
+ * which is among other things the basis for ZFS vdev ashift
+ *
+ * - volatile-write-cache: can be set to "on" or "off" to enable or disable the
+ * volatile write cache, if present
+ *
*
* TODO:
* - figure out sane default for I/O queue depth reported to blkdev
@@ -2698,6 +2723,180 @@ nvme_shutdown(nvme_t *nvme, int mode, boolean_t quiesce)
}
}
+/*
+ * Return length of string without trailing spaces.
+ */
+static int
+nvme_strlen(const char *str, int len)
+{
+ if (len <= 0)
+ return (0);
+
+ while (str[--len] == ' ')
+ ;
+
+ return (++len);
+}
+
+static void
+nvme_config_min_block_size(nvme_t *nvme, char *model, char *val)
+{
+ ulong_t bsize = 0;
+ char *msg = "";
+
+ if (ddi_strtoul(val, NULL, 0, &bsize) != 0)
+ goto err;
+
+ if (!ISP2(bsize)) {
+ msg = ": not a power of 2";
+ goto err;
+ }
+
+ if (bsize < NVME_DEFAULT_MIN_BLOCK_SIZE) {
+ msg = ": too low";
+ goto err;
+ }
+
+ nvme->n_min_block_size = bsize;
+ return;
+
+err:
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme-config-list: ignoring invalid min-phys-block-size '%s' "
+ "for model '%s'%s", val, model, msg);
+
+ nvme->n_min_block_size = NVME_DEFAULT_MIN_BLOCK_SIZE;
+}
+
+static void
+nvme_config_boolean(nvme_t *nvme, char *model, char *name, char *val,
+ boolean_t *b)
+{
+ if (strcmp(val, "on") == 0 ||
+ strcmp(val, "true") == 0)
+ *b = B_TRUE;
+ else if (strcmp(val, "off") == 0 ||
+ strcmp(val, "false") == 0)
+ *b = B_FALSE;
+ else
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme-config-list: invalid value for %s '%s'"
+ " for model '%s', ignoring", name, val, model);
+}
+
+static void
+nvme_config_list(nvme_t *nvme)
+{
+ char **config_list;
+ uint_t nelem;
+ int rv, i;
+
+ /*
+ * We're following the pattern of 'sd-config-list' here, but extend it.
+ * Instead of two we have three separate strings for "model", "fwrev",
+ * and "name-value-list".
+ */
+ rv = ddi_prop_lookup_string_array(DDI_DEV_T_ANY, nvme->n_dip,
+ DDI_PROP_DONTPASS, "nvme-config-list", &config_list, &nelem);
+
+ if (rv != DDI_PROP_SUCCESS) {
+ if (rv == DDI_PROP_CANNOT_DECODE) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme-config-list: cannot be decoded");
+ }
+
+ return;
+ }
+
+ if ((nelem % 3) != 0) {
+ dev_err(nvme->n_dip, CE_WARN, "!nvme-config-list: must be "
+ "triplets of <model>/<fwrev>/<name-value-list> strings ");
+ goto out;
+ }
+
+ for (i = 0; i < nelem; i += 3) {
+ char *model = config_list[i];
+ char *fwrev = config_list[i + 1];
+ char *nvp, *save_nv;
+ int id_model_len, id_fwrev_len;
+
+ id_model_len = nvme_strlen(nvme->n_idctl->id_model,
+ sizeof (nvme->n_idctl->id_model));
+
+ if (strlen(model) != id_model_len)
+ continue;
+
+ if (strncmp(model, nvme->n_idctl->id_model, id_model_len) != 0)
+ continue;
+
+ id_fwrev_len = nvme_strlen(nvme->n_idctl->id_fwrev,
+ sizeof (nvme->n_idctl->id_fwrev));
+
+ if (strlen(fwrev) != 0) {
+ boolean_t match = B_FALSE;
+ char *fwr, *last_fw;
+
+ for (fwr = strtok_r(fwrev, ",", &last_fw);
+ fwr != NULL;
+ fwr = strtok_r(NULL, ",", &last_fw)) {
+ if (strlen(fwr) != id_fwrev_len)
+ continue;
+
+ if (strncmp(fwr, nvme->n_idctl->id_fwrev,
+ id_fwrev_len) == 0)
+ match = B_TRUE;
+ }
+
+ if (!match)
+ continue;
+ }
+
+ /*
+ * We should now have a comma-separated list of name:value
+ * pairs.
+ */
+ for (nvp = strtok_r(config_list[i + 2], ",", &save_nv);
+ nvp != NULL; nvp = strtok_r(NULL, ",", &save_nv)) {
+ char *name = nvp;
+ char *val = strchr(nvp, ':');
+
+ if (val == NULL || name == val) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme-config-list: <name-value-list> "
+ "for model '%s' is malformed", model);
+ goto out;
+ }
+
+ /*
+ * Null-terminate 'name', move 'val' past ':' sep.
+ */
+ *val++ = '\0';
+
+ /*
+ * Process the name:val pairs that we know about.
+ */
+ if (strcmp(name, "ignore-unknown-vendor-status") == 0) {
+ nvme_config_boolean(nvme, model, name, val,
+ &nvme->n_ignore_unknown_vendor_status);
+ } else if (strcmp(name, "min-phys-block-size") == 0) {
+ nvme_config_min_block_size(nvme, model, val);
+ } else if (strcmp(name, "volatile-write-cache") == 0) {
+ nvme_config_boolean(nvme, model, name, val,
+ &nvme->n_write_cache_enabled);
+ } else {
+ /*
+ * Unknown 'name'.
+ */
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme-config-list: unknown config '%s' "
+ "for model '%s', ignoring", name, model);
+ }
+ }
+ }
+
+out:
+ ddi_prop_free(config_list);
+}
static void
nvme_prepare_devid(nvme_t *nvme, uint32_t nsid)
@@ -3089,6 +3288,11 @@ nvme_init(nvme_t *nvme)
}
/*
+ * Process nvme-config-list (if present) in nvme.conf.
+ */
+ nvme_config_list(nvme);
+
+ /*
* Get Vendor & Product ID
*/
bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model));
diff --git a/usr/src/uts/common/io/nvme/nvme.conf b/usr/src/uts/common/io/nvme/nvme.conf
index 982be2d538..dd6592e1a5 100644
--- a/usr/src/uts/common/io/nvme/nvme.conf
+++ b/usr/src/uts/common/io/nvme/nvme.conf
@@ -8,8 +8,7 @@
# http://www.illumos.org/license/CDDL.
#
#
-# Copyright 2016 Nexenta Systems, Inc. All rights reserved.
-# Copyright 2019 Western Digital Corporation
+# Copyright 2022 Tintri by DDN, Inc. All rights reserved.
#
#
@@ -67,3 +66,26 @@
# be a power of 2 greater than or equal to 512.
#
#min-phys-block-size=512;
+
+#
+# Permit configuration of drive- and firmware-specific tunables. See nvme(4D)
+# for a detailed explanation.
+#
+# Supported tunables are:
+#
+# * ignore-unknown-vendor-status:{on,off,true,false}
+# The driver does currently not support any vendor specific extension to the
+# specification. By default it will fault the device if it receives a vendor-
+# specific command status. Setting this to "on" or "true" will disable this
+# behaviour.
+#
+# * min-phys-block-size:<size>
+# The minimum physical blocks size to be reported to blkdev. This value
+# must be a power of 2 greater than or equal to 512.
+#
+# * volatile-write-cache:{on,off,true,false}
+# Enable or disable the Volatile Write Cache, if present.
+#
+#nvme-config-list=
+# "SOME DRIVE MODEL", "FWVER1,FWVER2",
+# "min-phys-block-size:8192";