diff options
author | Hans Rosenfeld <hans.rosenfeld@nexenta.com> | 2015-12-03 18:08:09 +0100 |
---|---|---|
committer | Hans Rosenfeld <hans.rosenfeld@nexenta.com> | 2016-09-29 18:06:56 +0200 |
commit | 24979ca36afa68d08e082148fcbf4c5dc73f7849 (patch) | |
tree | b2b5657e3e1105cc10316001db1cb590f7b698bc /usr/src | |
parent | df8c228748280860358d01a3d32ce256aeb51c38 (diff) | |
download | illumos-joyent-24979ca36afa68d08e082148fcbf4c5dc73f7849.tar.gz |
7382 basic NVMe 1.1 support
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/man/man7d/nvme.7d | 7 | ||||
-rw-r--r-- | usr/src/pkg/manifests/driver-storage-nvme.mf | 6 | ||||
-rw-r--r-- | usr/src/uts/common/io/nvme/nvme.c | 77 | ||||
-rw-r--r-- | usr/src/uts/common/io/nvme/nvme_reg.h | 83 | ||||
-rw-r--r-- | usr/src/uts/common/io/nvme/nvme_var.h | 12 |
5 files changed, 145 insertions, 40 deletions
diff --git a/usr/src/man/man7d/nvme.7d b/usr/src/man/man7d/nvme.7d index 70ae723058..03420fd19d 100644 --- a/usr/src/man/man7d/nvme.7d +++ b/usr/src/man/man7d/nvme.7d @@ -48,7 +48,7 @@ to attach to devices supporting newer version of the NVMe specification. The default value is 1, limiting .Nm -to work with devices up to specification version 1.0. +to work with devices up to specification version 1.1. .It Va ignore-unknown-vendor-status This can be set to 1 to allow .Nm @@ -95,8 +95,9 @@ A controller number, typically one for each device found. Controller numbers are dynamically assigned by the system. .It Va tn -The target number, this corresponds to the namespace ID used by the -hardware. +The target number. +This is the namespace EUI64 if available, or the namespace ID used by +the hardware. Namespace ID 0 is reserved, hence target numbers start with 1. .It Va sn This is the diff --git a/usr/src/pkg/manifests/driver-storage-nvme.mf b/usr/src/pkg/manifests/driver-storage-nvme.mf index 3296a3beef..87b0954b7f 100644 --- a/usr/src/pkg/manifests/driver-storage-nvme.mf +++ b/usr/src/pkg/manifests/driver-storage-nvme.mf @@ -14,7 +14,7 @@ # # -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. +# Copyright 2016 Nexenta Systems, Inc. All rights reserved. # # @@ -25,7 +25,7 @@ <include global_zone_only_component> set name=pkg.fmri value=pkg:/driver/storage/nvme@$(PKGVERS) set name=pkg.description \ - value="Driver for Intel NVMe 1.0e compliant storage devices" + value="Driver for Intel NVMe 1.1b compliant storage devices" set name=pkg.summary value="NVMe driver" set name=info.classification \ value=org.opensolaris.category.2008:System/Hardware @@ -37,7 +37,7 @@ dir path=usr group=sys dir path=usr/share dir path=usr/share/man dir path=usr/share/man/man7d -driver name=nvme alias=pciex8086,953 class=disk perms="* 0600 root sys" +driver name=nvme alias=pciexclass,010802 class=disk perms="* 0600 root sys" file path=kernel/drv/$(ARCH64)/nvme group=sys file path=kernel/drv/nvme group=sys file path=kernel/drv/nvme.conf group=sys diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c index aaa3b5fb73..3fad056534 100644 --- a/usr/src/uts/common/io/nvme/nvme.c +++ b/usr/src/uts/common/io/nvme/nvme.c @@ -18,7 +18,7 @@ /* * blkdev driver for NVMe compliant storage devices * - * This driver was written to conform to version 1.0e of the NVMe specification. + * This driver was written to conform to version 1.1b of the NVMe specification. * It may work with newer versions, but that is completely untested and disabled * by default. * @@ -78,6 +78,11 @@ * thin provisioning and protection information. This driver does not support * any of this and ignores namespaces that have these attributes. * + * As of NVMe 1.1 namespaces can have an 64bit Extended Unique Identifier + * (EUI64). This driver uses the EUI64 if present to generate the devid and + * passes it to blkdev to use it in the device node names. As this is currently + * untested namespaces with EUI64 are ignored by default. + * * * Blkdev Interface: * @@ -93,8 +98,9 @@ * Blkdev also supports querying device/media information and generating a * devid. The driver reports the best block size as determined by the namespace * format back to blkdev as physical block size to support partition and block - * alignment. The devid is composed using the device vendor ID, model number, - * serial number, and the namespace ID. + * alignment. The devid is either based on the namespace EUI64, if present, or + * composed using the device vendor ID, model number, serial number, and the + * namespace ID. * * * Error Handling: @@ -140,7 +146,7 @@ * The following driver properties can be changed to control some aspects of the * drivers operation: * - strict-version: can be set to 0 to allow devices conforming to newer - * versions to be used + * versions or namespaces with EUI64 to be used * - ignore-unknown-vendor-status: can be set to 1 to not handle any vendor * specific command status as a fatal error leading device faulting * - admin-queue-len: the maximum length of the admin queue (16-4096) @@ -163,6 +169,11 @@ * - support for media formatting and hard partitioning into namespaces * - support for big-endian systems * - support for fast reboot + * - support for firmware updates + * - support for NVMe Subsystem Reset (1.1) + * - support for Scatter/Gather lists (1.1) + * - support for Reservations (1.1) + * - support for power management */ #include <sys/byteorder.h> @@ -192,7 +203,7 @@ /* NVMe spec version supported */ static const int nvme_version_major = 1; -static const int nvme_version_minor = 0; +static const int nvme_version_minor = 1; /* tunable for admin command timeout in seconds, default is 1s */ static volatile int nvme_admin_cmd_timeout = 1; @@ -360,7 +371,7 @@ static struct dev_ops nvme_dev_ops = { static struct modldrv nvme_modldrv = { .drv_modops = &mod_driverops, - .drv_linkinfo = "NVMe v1.0e", + .drv_linkinfo = "NVMe v1.1b", .drv_dev_ops = &nvme_dev_ops }; @@ -1828,6 +1839,14 @@ nvme_shutdown(nvme_t *nvme, int mode, boolean_t quiesce) static void nvme_prepare_devid(nvme_t *nvme, uint32_t nsid) { + /* + * Section 7.7 of the spec describes how to get a unique ID for + * the controller: the vendor ID, the model name and the serial + * number shall be unique when combined. + * + * If a namespace has no EUI64 we use the above and add the hex + * namespace ID to get a unique ID for the namespace. + */ char model[sizeof (nvme->n_idctl->id_model) + 1]; char serial[sizeof (nvme->n_idctl->id_serial) + 1]; @@ -1838,8 +1857,7 @@ nvme_prepare_devid(nvme_t *nvme, uint32_t nsid) model[sizeof (nvme->n_idctl->id_model)] = '\0'; serial[sizeof (nvme->n_idctl->id_serial)] = '\0'; - (void) snprintf(nvme->n_ns[nsid - 1].ns_devid, - sizeof (nvme->n_ns[0].ns_devid), "%4X-%s-%s-%X", + nvme->n_ns[nsid - 1].ns_devid = kmem_asprintf("%4X-%s-%s-%X", nvme->n_idctl->id_vid, model, serial, nsid); } @@ -1860,12 +1878,13 @@ nvme_init(nvme_t *nvme) /* Check controller version */ vs.r = nvme_get32(nvme, NVME_REG_VS); + nvme->n_version.v_major = vs.b.vs_mjr; + nvme->n_version.v_minor = vs.b.vs_mnr; dev_err(nvme->n_dip, CE_CONT, "?NVMe spec version %d.%d", - vs.b.vs_mjr, vs.b.vs_mnr); + nvme->n_version.v_major, nvme->n_version.v_minor); - if (nvme_version_major < vs.b.vs_mjr || - (nvme_version_major == vs.b.vs_mjr && - nvme_version_minor < vs.b.vs_mnr)) { + if (NVME_VERSION_HIGHER(&nvme->n_version, + nvme_version_major, nvme_version_minor)) { dev_err(nvme->n_dip, CE_WARN, "!no support for version > %d.%d", nvme_version_major, nvme_version_minor); if (nvme->n_strict_version) @@ -2164,7 +2183,26 @@ nvme_init(nvme_t *nvme) 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads; nvme->n_ns[i].ns_best_block_size = nvme->n_ns[i].ns_block_size; - nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id); + /* + * Get the EUI64 if present. If not present prepare the devid + * from other device data. + */ + if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1)) + bcopy(idns->id_eui64, nvme->n_ns[i].ns_eui64, + sizeof (nvme->n_ns[i].ns_eui64)); + + /*LINTED: E_BAD_PTR_CAST_ALIGN*/ + if (*(uint64_t *)nvme->n_ns[i].ns_eui64 == 0) { + nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id); + } else { + /* + * Until EUI64 support is tested on real hardware we + * will ignore namespaces with an EUI64. This can + * be overriden by setting strict-version=0 in nvme.conf + */ + if (nvme->n_strict_version) + nvme->n_ns[i].ns_ignore = B_TRUE; + } /* * Find the LBA format with no metadata and the best relative @@ -2662,6 +2700,8 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) if (nvme->n_ns[i].ns_idns) kmem_free(nvme->n_ns[i].ns_idns, sizeof (nvme_identify_nsid_t)); + if (nvme->n_ns[i].ns_devid) + strfree(nvme->n_ns[i].ns_devid); } kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) * @@ -2889,6 +2929,7 @@ nvme_bd_driveinfo(void *arg, bd_drive_t *drive) drive->d_removable = B_FALSE; drive->d_hotpluggable = B_FALSE; + bcopy(ns->ns_eui64, drive->d_eui64, sizeof (drive->d_eui64)); drive->d_target = ns->ns_id; drive->d_lun = 0; @@ -2992,6 +3033,12 @@ nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid) { nvme_namespace_t *ns = arg; - return (ddi_devid_init(devinfo, DEVID_ENCAP, strlen(ns->ns_devid), - ns->ns_devid, devid)); + /*LINTED: E_BAD_PTR_CAST_ALIGN*/ + if (*(uint64_t *)ns->ns_eui64 != 0) { + return (ddi_devid_init(devinfo, DEVID_SCSI3_WWN, + sizeof (ns->ns_eui64), ns->ns_eui64, devid)); + } else { + return (ddi_devid_init(devinfo, DEVID_ENCAP, + strlen(ns->ns_devid), ns->ns_devid, devid)); + } } diff --git a/usr/src/uts/common/io/nvme/nvme_reg.h b/usr/src/uts/common/io/nvme/nvme_reg.h index 9c4259fa88..3e4b77079b 100644 --- a/usr/src/uts/common/io/nvme/nvme_reg.h +++ b/usr/src/uts/common/io/nvme/nvme_reg.h @@ -33,6 +33,22 @@ extern "C" { #define NVME_MAX_ADMIN_QUEUE_LEN 4096 /* + * NVMe version + */ +typedef struct { + uint16_t v_minor; + uint16_t v_major; +} nvme_version_t; + +#define NVME_VERSION_ATLEAST(v, maj, min) \ + (((v)->v_major) > (maj) || \ + ((v)->v_major == (maj) && (v)->v_minor >= (min))) + +#define NVME_VERSION_HIGHER(v, maj, min) \ + (((v)->v_major) > (maj) || \ + ((v)->v_major == (maj) && (v)->v_minor > (min))) + +/* * NVMe registers and register fields */ #define NVME_REG_CAP 0x0 /* Controller Capabilities */ @@ -403,18 +419,21 @@ typedef struct { /* NVMe Power State Descriptor */ typedef struct { uint16_t psd_mp; /* Maximum Power */ - uint16_t psd_rsvd1; + uint8_t psd_rsvd1; + uint8_t psd_mps:1; /* Max Power Scale (1.1) */ + uint8_t psd_nops:1; /* Non-Operational State (1.1) */ + uint8_t psd_rsvd2:6; uint32_t psd_enlat; /* Entry Latency */ uint32_t psd_exlat; /* Exit Latency */ uint8_t psd_rrt:5; /* Relative Read Throughput */ - uint8_t psd_rsvd2:3; - uint8_t psd_rrl:5; /* Relative Read Latency */ uint8_t psd_rsvd3:3; + uint8_t psd_rrl:5; /* Relative Read Latency */ + uint8_t psd_rsvd4:3; uint8_t psd_rwt:5; /* Relative Write Throughput */ - uint8_t psd_rsvd4:3; + uint8_t psd_rsvd5:3; uint8_t psd_rwl:5; /* Relative Write Latency */ - uint8_t psd_rsvd5:3; - uint8_t psd_rsvd6[16]; + uint8_t psd_rsvd6:3; + uint8_t psd_rsvd7[16]; } nvme_idctl_psd_t; /* NVMe Identify Controller Data Structure */ @@ -428,11 +447,14 @@ typedef struct { uint8_t id_rab; /* Recommended Arbitration Burst */ uint8_t id_oui[3]; /* vendor IEEE OUI */ struct { /* Multi-Interface Capabilities */ - uint8_t m_multi:1; /* HW has multiple PCIe interfaces */ - uint8_t m_rsvd:7; + uint8_t m_multi_pci:1; /* HW has multiple PCIe interfaces */ + uint8_t m_multi_ctrl:1; /* HW has multiple controllers (1.1) */ + uint8_t m_sr_iov:1; /* controller is SR-IOV virt fn (1.1) */ + uint8_t m_rsvd:5; } id_mic; uint8_t id_mdts; /* Maximum Data Transfer Size */ - uint8_t id_rsvd_cc[256 - 78]; + uint16_t id_cntlid; /* Unique Controller Identifier (1.1) */ + uint8_t id_rsvd_cc[256 - 80]; /* Admin Command Set Attributes */ struct { /* Optional Admin Command Support */ @@ -458,7 +480,11 @@ typedef struct { uint8_t av_spec:1; /* use format from spec */ uint8_t av_rsvd:7; } id_avscc; - uint8_t id_rsvd_ac[256 - 9]; + struct { /* Autonomous Power State Trans (1.1) */ + uint8_t ap_sup:1; /* APST supported (1.1) */ + uint8_t ap_rsvd:7; + } id_apsta; + uint8_t id_rsvd_ac[256 - 10]; /* NVM Command Set Attributes */ nvme_idctl_qes_t id_sqes; /* Submission Queue Entry Size */ @@ -469,7 +495,10 @@ typedef struct { uint16_t on_compare:1; /* Compare */ uint16_t on_wr_unc:1; /* Write Uncorrectable */ uint16_t on_dset_mgmt:1; /* Dataset Management */ - uint16_t on_rsvd:13; + uint16_t on_wr_zero:1; /* Write Zeros (1.1) */ + uint16_t on_save:1; /* Save/Select in Get/Set Feat (1.1) */ + uint16_t on_reserve:1; /* Reservations (1.1) */ + uint16_t on_rsvd:10; } id_oncs; struct { /* Fused Operation Support */ uint16_t f_cmp_wr:1; /* Compare and Write */ @@ -491,7 +520,16 @@ typedef struct { uint8_t nv_spec:1; /* use format from spec */ uint8_t nv_rsvd:7; } id_nvscc; - uint8_t id_rsvd_nc_2[192 - 19]; + uint8_t id_rsvd_nc_2; + uint16_t id_acwu; /* Atomic Compare & Write Unit (1.1) */ + uint16_t id_rsvd_nc_3; + struct { /* SGL Support (1.1) */ + uint16_t sgl_sup:1; /* SGL Supported in NVM cmds (1.1) */ + uint16_t sgl_rsvd1:15; + uint16_t sgl_bucket:1; /* SGL Bit Bucket supported (1.1) */ + uint16_t sgl_rsvd2:15; + } id_sgls; + uint8_t id_rsvd_nc_4[192 - 28]; /* I/O Command Set Attributes */ uint8_t id_rsvd_ioc[1344]; @@ -537,12 +575,29 @@ typedef struct { uint8_t dp_type3:1; /* Protection Information Type 3 */ uint8_t dp_first:1; /* first 8 bytes of metadata */ uint8_t dp_last:1; /* last 8 bytes of metadata */ + uint8_t dp_rsvd:3; } id_dpc; struct { /* Data Protection Settings */ uint8_t dp_pinfo:3; /* Protection Information enabled */ uint8_t dp_first:1; /* first 8 bytes of metadata */ + uint8_t dp_rsvd:4; } id_dps; - uint8_t id_rsvd1[128 - 30]; + struct { /* NS Multi-Path/Sharing Cap (1.1) */ + uint8_t nm_shared:1; /* NS is shared (1.1) */ + uint8_t nm_rsvd:7; + } id_nmic; + struct { /* Reservation Capabilities (1.1) */ + uint8_t rc_persist:1; /* Persist Through Power Loss (1.1) */ + uint8_t rc_wr_excl:1; /* Write Exclusive (1.1) */ + uint8_t rc_excl:1; /* Exclusive Access (1.1) */ + uint8_t rc_wr_excl_r:1; /* Wr Excl - Registrants Only (1.1) */ + uint8_t rc_excl_r:1; /* Excl Acc - Registrants Only (1.1) */ + uint8_t rc_wr_excl_a:1; /* Wr Excl - All Registrants (1.1) */ + uint8_t rc_excl_a:1; /* Excl Acc - All Registrants (1.1) */ + uint8_t rc_rsvd:1; + } id_rescap; + uint8_t id_rsvd1[120 - 32]; + uint8_t id_eui64[8]; /* IEEE Extended Unique Id (1.1) */ nvme_idns_lbaf_t id_lbaf[16]; /* LBA Formats */ uint8_t id_rsvd2[192]; @@ -577,6 +632,8 @@ typedef union { #define NVME_FEAT_INTR_VECT 0x9 /* Interrupt Vector Configuration */ #define NVME_FEAT_WRITE_ATOM 0xa /* Write Atomicity */ #define NVME_FEAT_ASYNC_EVENT 0xb /* Asynchronous Event Configuration */ +#define NVME_FEAT_AUTO_PST 0xc /* Autonomous Power State Transition */ + /* (1.1) */ #define NVME_FEAT_PROGRESS 0x80 /* Software Progress Marker */ diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h index 0ddb1649db..fd6f93af88 100644 --- a/usr/src/uts/common/io/nvme/nvme_var.h +++ b/usr/src/uts/common/io/nvme/nvme_var.h @@ -124,6 +124,7 @@ struct nvme { char *n_product; char *n_vendor; + nvme_version_t n_version; boolean_t n_dead; boolean_t n_strict_version; boolean_t n_ignore_unknown_vendor_status; @@ -215,6 +216,8 @@ struct nvme { struct nvme_namespace { nvme_t *ns_nvme; + uint8_t ns_eui64[8]; + bd_handle_t ns_bd_hdl; uint32_t ns_id; @@ -227,13 +230,10 @@ struct nvme_namespace { nvme_identify_nsid_t *ns_idns; /* - * Section 7.7 of the spec describes how to get a unique ID for - * the controller: the vendor ID, the model name and the serial - * number shall be unique when combined. - * - * We add the hex namespace ID to get a unique ID for the namespace. + * If a namespace has no EUI64, we create a devid in + * nvme_prepare_devid(). */ - char ns_devid[4 + 1 + 20 + 1 + 40 + 1 + 8 + 1]; + char *ns_devid; }; struct nvme_task_arg { |