diff options
author | Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> | 2022-05-03 16:48:51 +0200 |
---|---|---|
committer | Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> | 2022-06-01 16:16:18 +0200 |
commit | 3281964baf91e4061898a5490ee91e126e81116c (patch) | |
tree | ae0e7aa7ae4a73d1cb4cfbddfedd85c4bb008747 | |
parent | e309284527479df5fbac1270f2abd4a739f1ab72 (diff) | |
download | illumos-joyent-3281964baf91e4061898a5490ee91e126e81116c.tar.gz |
14684 want nvme-config-list property
Reviewed by: Andrew Giles <agiles@tintri.com>
Reviewed by: Guy Morrogh <gmorrogh@tintri.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: C Fraire <cfraire@me.com>
Approved by: Dan McDonald <danmcd@mnx.io>
-rw-r--r-- | usr/src/cmd/nvmeadm/nvmeadm_print.c | 2 | ||||
-rw-r--r-- | usr/src/man/man4d/nvme.4d | 99 | ||||
-rw-r--r-- | usr/src/uts/common/io/nvme/nvme.c | 204 | ||||
-rw-r--r-- | usr/src/uts/common/io/nvme/nvme.conf | 26 |
4 files changed, 324 insertions, 7 deletions
diff --git a/usr/src/cmd/nvmeadm/nvmeadm_print.c b/usr/src/cmd/nvmeadm/nvmeadm_print.c index 43c15925b2..dacdbfad70 100644 --- a/usr/src/cmd/nvmeadm/nvmeadm_print.c +++ b/usr/src/cmd/nvmeadm/nvmeadm_print.c @@ -245,7 +245,7 @@ nvme_print(int indent, const char *name, int index, const char *fmt, ...) int nvme_strlen(const char *str, int len) { - if (len < 0) + if (len <= 0) return (0); while (str[--len] == ' ') diff --git a/usr/src/man/man4d/nvme.4d b/usr/src/man/man4d/nvme.4d index 877b7c4081..db507b2c49 100644 --- a/usr/src/man/man4d/nvme.4d +++ b/usr/src/man/man4d/nvme.4d @@ -9,11 +9,10 @@ .\" http://www.illumos.org/license/CDDL. .\" .\" -.\" Copyright 2016 Nexenta Systems, Inc. All rights reserved. .\" Copyright (c) 2018, Joyent, Inc. -.\" Copyright 2019, Western Digital Corporation +.\" Copyright 2022 Tintri by DDN, Inc. All rights reserved. .\" -.Dd January 22, 2022 +.Dd May 11, 2022 .Dt NVME 4D .Os .Sh NAME @@ -108,7 +107,99 @@ The number of queues created will not exceed the number of interrupt vectors, .Em max-submission-queues , or the drive's hardware limitation. .El -. +.Pp +In addition to the above properties, some device-specific tunables can be +configured in +.Pa nvme.conf +using the +.Va nvme-config-list +global property. +The value of this property is a list of triplets. +The formal syntax is: +.Bl -column "nvme-config-list" -offset indent +.It nvme-config-list ::= <triplet> +.Op \&, <triplet> Ns +* +; +.It <triplet> ::= +.Qq <model> +\&, +.Qq <rev-list> +\&, +.Qq <tuple-list> +.It <rev-list> ::= +.Op <fwrev> Oo \&, <fwrev> Oc Ns * +.It <tuple-list> ::= +<tunable> +.Op \&, <tunable> Ns +* +.It <tunable> ::= +.Qq <name> +\&: +.Qq <value> +.El +.Pp +The +.Va <model> +and +.Va <fwrev> +are the strings that are returned in the +.Qq Model Number +and +.Qq Firmware Revision +fields, respectively, in the +.Qq Identify Controller +data structure as returned by the +.Qq IDENTIFY +command. +Specifying a +.Va <rev-list> +is optional, an empty string +.Po Qo Qc Pc +may be used instead. +.Pp +The +.Va <tuple-list> +contains one or more tunables to apply to all controllers that match the +specified model number and optional firmware revisions. +Each +.Va <tunable> +is a +.Va <name> +: +.Va <value> +pair. +Supported tunable names are: +.Bl -tag -width Va +.It Va ignore-unknown-vendor-status +Similar to the global property, this property accepts a boolean value specified +as either +.Qq on , +.Qq off , +.Qq true , +or +.Qq false +to enable or disable the driver continuing operation even if it receives an +unknown vendor command status from a controller. +.It Va min-phys-block-size +Same as the global property, this is the minimum physical block size that is +reported to +.Xr blkdev 4D . +This value must be a power of 2 greater than or equal to 512. +If the device reports a best block size larger than what is +specified here the driver will ignore the value specified here. +.It Va volatile-write-cache +Similar to the global property +.Va volatile-write-cache-enable , +this property accepts a boolean value specified as either +.Qq on , +.Qq off , +.Qq true , +or +.Qq false +to enable or disable the volatile write cache, if the hardware supports it. +By default the volatile write cache is enabled. +.El .Sh FILES .Bl -tag -compact -width Pa .It Pa /dev/dsk/cntnd0sn diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c index 3730b31abe..e65a7a4139 100644 --- a/usr/src/uts/common/io/nvme/nvme.c +++ b/usr/src/uts/common/io/nvme/nvme.c @@ -272,6 +272,31 @@ * can be less than max-submission-queues, in which case the completion * queues are shared. * + * In addition to the above properties, some device-specific tunables can be + * configured using the nvme-config-list global property. The value of this + * property is a list of triplets. The formal syntax is: + * + * nvme-config-list ::= <triplet> [, <triplet>]* ; + * <triplet> ::= "<model>" , "<rev-list>" , "<tuple-list>" + * <rev-list> ::= [ <fwrev> [, <fwrev>]*] + * <tuple-list> ::= <tunable> [, <tunable>]* + * <tunable> ::= <name> : <value> + * + * The <model> and <fwrev> are the strings in nvme_identify_ctrl_t`id_model and + * nvme_identify_ctrl_t`id_fwrev, respectively. The remainder of <tuple-list> + * contains one or more tunables to apply to all controllers that match the + * specified model number and optionally firmware revision. Each <tunable> is a + * <name> : <value> pair. Supported tunables are: + * + * - ignore-unknown-vendor-status: can be set to "on" to not handle any vendor + * specific command status as a fatal error leading device faulting + * + * - min-phys-block-size: the minimum physical block size to report to blkdev, + * which is among other things the basis for ZFS vdev ashift + * + * - volatile-write-cache: can be set to "on" or "off" to enable or disable the + * volatile write cache, if present + * * * TODO: * - figure out sane default for I/O queue depth reported to blkdev @@ -2698,6 +2723,180 @@ nvme_shutdown(nvme_t *nvme, int mode, boolean_t quiesce) } } +/* + * Return length of string without trailing spaces. + */ +static int +nvme_strlen(const char *str, int len) +{ + if (len <= 0) + return (0); + + while (str[--len] == ' ') + ; + + return (++len); +} + +static void +nvme_config_min_block_size(nvme_t *nvme, char *model, char *val) +{ + ulong_t bsize = 0; + char *msg = ""; + + if (ddi_strtoul(val, NULL, 0, &bsize) != 0) + goto err; + + if (!ISP2(bsize)) { + msg = ": not a power of 2"; + goto err; + } + + if (bsize < NVME_DEFAULT_MIN_BLOCK_SIZE) { + msg = ": too low"; + goto err; + } + + nvme->n_min_block_size = bsize; + return; + +err: + dev_err(nvme->n_dip, CE_WARN, + "!nvme-config-list: ignoring invalid min-phys-block-size '%s' " + "for model '%s'%s", val, model, msg); + + nvme->n_min_block_size = NVME_DEFAULT_MIN_BLOCK_SIZE; +} + +static void +nvme_config_boolean(nvme_t *nvme, char *model, char *name, char *val, + boolean_t *b) +{ + if (strcmp(val, "on") == 0 || + strcmp(val, "true") == 0) + *b = B_TRUE; + else if (strcmp(val, "off") == 0 || + strcmp(val, "false") == 0) + *b = B_FALSE; + else + dev_err(nvme->n_dip, CE_WARN, + "!nvme-config-list: invalid value for %s '%s'" + " for model '%s', ignoring", name, val, model); +} + +static void +nvme_config_list(nvme_t *nvme) +{ + char **config_list; + uint_t nelem; + int rv, i; + + /* + * We're following the pattern of 'sd-config-list' here, but extend it. + * Instead of two we have three separate strings for "model", "fwrev", + * and "name-value-list". + */ + rv = ddi_prop_lookup_string_array(DDI_DEV_T_ANY, nvme->n_dip, + DDI_PROP_DONTPASS, "nvme-config-list", &config_list, &nelem); + + if (rv != DDI_PROP_SUCCESS) { + if (rv == DDI_PROP_CANNOT_DECODE) { + dev_err(nvme->n_dip, CE_WARN, + "!nvme-config-list: cannot be decoded"); + } + + return; + } + + if ((nelem % 3) != 0) { + dev_err(nvme->n_dip, CE_WARN, "!nvme-config-list: must be " + "triplets of <model>/<fwrev>/<name-value-list> strings "); + goto out; + } + + for (i = 0; i < nelem; i += 3) { + char *model = config_list[i]; + char *fwrev = config_list[i + 1]; + char *nvp, *save_nv; + int id_model_len, id_fwrev_len; + + id_model_len = nvme_strlen(nvme->n_idctl->id_model, + sizeof (nvme->n_idctl->id_model)); + + if (strlen(model) != id_model_len) + continue; + + if (strncmp(model, nvme->n_idctl->id_model, id_model_len) != 0) + continue; + + id_fwrev_len = nvme_strlen(nvme->n_idctl->id_fwrev, + sizeof (nvme->n_idctl->id_fwrev)); + + if (strlen(fwrev) != 0) { + boolean_t match = B_FALSE; + char *fwr, *last_fw; + + for (fwr = strtok_r(fwrev, ",", &last_fw); + fwr != NULL; + fwr = strtok_r(NULL, ",", &last_fw)) { + if (strlen(fwr) != id_fwrev_len) + continue; + + if (strncmp(fwr, nvme->n_idctl->id_fwrev, + id_fwrev_len) == 0) + match = B_TRUE; + } + + if (!match) + continue; + } + + /* + * We should now have a comma-separated list of name:value + * pairs. + */ + for (nvp = strtok_r(config_list[i + 2], ",", &save_nv); + nvp != NULL; nvp = strtok_r(NULL, ",", &save_nv)) { + char *name = nvp; + char *val = strchr(nvp, ':'); + + if (val == NULL || name == val) { + dev_err(nvme->n_dip, CE_WARN, + "!nvme-config-list: <name-value-list> " + "for model '%s' is malformed", model); + goto out; + } + + /* + * Null-terminate 'name', move 'val' past ':' sep. + */ + *val++ = '\0'; + + /* + * Process the name:val pairs that we know about. + */ + if (strcmp(name, "ignore-unknown-vendor-status") == 0) { + nvme_config_boolean(nvme, model, name, val, + &nvme->n_ignore_unknown_vendor_status); + } else if (strcmp(name, "min-phys-block-size") == 0) { + nvme_config_min_block_size(nvme, model, val); + } else if (strcmp(name, "volatile-write-cache") == 0) { + nvme_config_boolean(nvme, model, name, val, + &nvme->n_write_cache_enabled); + } else { + /* + * Unknown 'name'. + */ + dev_err(nvme->n_dip, CE_WARN, + "!nvme-config-list: unknown config '%s' " + "for model '%s', ignoring", name, model); + } + } + } + +out: + ddi_prop_free(config_list); +} static void nvme_prepare_devid(nvme_t *nvme, uint32_t nsid) @@ -3089,6 +3288,11 @@ nvme_init(nvme_t *nvme) } /* + * Process nvme-config-list (if present) in nvme.conf. + */ + nvme_config_list(nvme); + + /* * Get Vendor & Product ID */ bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model)); diff --git a/usr/src/uts/common/io/nvme/nvme.conf b/usr/src/uts/common/io/nvme/nvme.conf index 982be2d538..dd6592e1a5 100644 --- a/usr/src/uts/common/io/nvme/nvme.conf +++ b/usr/src/uts/common/io/nvme/nvme.conf @@ -8,8 +8,7 @@ # http://www.illumos.org/license/CDDL. # # -# Copyright 2016 Nexenta Systems, Inc. All rights reserved. -# Copyright 2019 Western Digital Corporation +# Copyright 2022 Tintri by DDN, Inc. All rights reserved. # # @@ -67,3 +66,26 @@ # be a power of 2 greater than or equal to 512. # #min-phys-block-size=512; + +# +# Permit configuration of drive- and firmware-specific tunables. See nvme(4D) +# for a detailed explanation. +# +# Supported tunables are: +# +# * ignore-unknown-vendor-status:{on,off,true,false} +# The driver does currently not support any vendor specific extension to the +# specification. By default it will fault the device if it receives a vendor- +# specific command status. Setting this to "on" or "true" will disable this +# behaviour. +# +# * min-phys-block-size:<size> +# The minimum physical blocks size to be reported to blkdev. This value +# must be a power of 2 greater than or equal to 512. +# +# * volatile-write-cache:{on,off,true,false} +# Enable or disable the Volatile Write Cache, if present. +# +#nvme-config-list= +# "SOME DRIVE MODEL", "FWVER1,FWVER2", +# "min-phys-block-size:8192"; |