summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorHans Rosenfeld <hans.rosenfeld@nexenta.com>2015-12-03 18:08:09 +0100
committerHans Rosenfeld <hans.rosenfeld@nexenta.com>2016-09-29 18:06:56 +0200
commit24979ca36afa68d08e082148fcbf4c5dc73f7849 (patch)
treeb2b5657e3e1105cc10316001db1cb590f7b698bc /usr/src
parentdf8c228748280860358d01a3d32ce256aeb51c38 (diff)
downloadillumos-joyent-24979ca36afa68d08e082148fcbf4c5dc73f7849.tar.gz
7382 basic NVMe 1.1 support
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com> Reviewed by: Rick McNeal <rick.mcneal@nexenta.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/man/man7d/nvme.7d7
-rw-r--r--usr/src/pkg/manifests/driver-storage-nvme.mf6
-rw-r--r--usr/src/uts/common/io/nvme/nvme.c77
-rw-r--r--usr/src/uts/common/io/nvme/nvme_reg.h83
-rw-r--r--usr/src/uts/common/io/nvme/nvme_var.h12
5 files changed, 145 insertions, 40 deletions
diff --git a/usr/src/man/man7d/nvme.7d b/usr/src/man/man7d/nvme.7d
index 70ae723058..03420fd19d 100644
--- a/usr/src/man/man7d/nvme.7d
+++ b/usr/src/man/man7d/nvme.7d
@@ -48,7 +48,7 @@ to attach to devices supporting newer version of the NVMe
specification.
The default value is 1, limiting
.Nm
-to work with devices up to specification version 1.0.
+to work with devices up to specification version 1.1.
.It Va ignore-unknown-vendor-status
This can be set to 1 to allow
.Nm
@@ -95,8 +95,9 @@ A controller number, typically one for each
device found.
Controller numbers are dynamically assigned by the system.
.It Va tn
-The target number, this corresponds to the namespace ID used by the
-hardware.
+The target number.
+This is the namespace EUI64 if available, or the namespace ID used by
+the hardware.
Namespace ID 0 is reserved, hence target numbers start with 1.
.It Va sn
This is the
diff --git a/usr/src/pkg/manifests/driver-storage-nvme.mf b/usr/src/pkg/manifests/driver-storage-nvme.mf
index 3296a3beef..87b0954b7f 100644
--- a/usr/src/pkg/manifests/driver-storage-nvme.mf
+++ b/usr/src/pkg/manifests/driver-storage-nvme.mf
@@ -14,7 +14,7 @@
#
#
-# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2016 Nexenta Systems, Inc. All rights reserved.
#
#
@@ -25,7 +25,7 @@
<include global_zone_only_component>
set name=pkg.fmri value=pkg:/driver/storage/nvme@$(PKGVERS)
set name=pkg.description \
- value="Driver for Intel NVMe 1.0e compliant storage devices"
+ value="Driver for Intel NVMe 1.1b compliant storage devices"
set name=pkg.summary value="NVMe driver"
set name=info.classification \
value=org.opensolaris.category.2008:System/Hardware
@@ -37,7 +37,7 @@ dir path=usr group=sys
dir path=usr/share
dir path=usr/share/man
dir path=usr/share/man/man7d
-driver name=nvme alias=pciex8086,953 class=disk perms="* 0600 root sys"
+driver name=nvme alias=pciexclass,010802 class=disk perms="* 0600 root sys"
file path=kernel/drv/$(ARCH64)/nvme group=sys
file path=kernel/drv/nvme group=sys
file path=kernel/drv/nvme.conf group=sys
diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c
index aaa3b5fb73..3fad056534 100644
--- a/usr/src/uts/common/io/nvme/nvme.c
+++ b/usr/src/uts/common/io/nvme/nvme.c
@@ -18,7 +18,7 @@
/*
* blkdev driver for NVMe compliant storage devices
*
- * This driver was written to conform to version 1.0e of the NVMe specification.
+ * This driver was written to conform to version 1.1b of the NVMe specification.
* It may work with newer versions, but that is completely untested and disabled
* by default.
*
@@ -78,6 +78,11 @@
* thin provisioning and protection information. This driver does not support
* any of this and ignores namespaces that have these attributes.
*
+ * As of NVMe 1.1 namespaces can have an 64bit Extended Unique Identifier
+ * (EUI64). This driver uses the EUI64 if present to generate the devid and
+ * passes it to blkdev to use it in the device node names. As this is currently
+ * untested namespaces with EUI64 are ignored by default.
+ *
*
* Blkdev Interface:
*
@@ -93,8 +98,9 @@
* Blkdev also supports querying device/media information and generating a
* devid. The driver reports the best block size as determined by the namespace
* format back to blkdev as physical block size to support partition and block
- * alignment. The devid is composed using the device vendor ID, model number,
- * serial number, and the namespace ID.
+ * alignment. The devid is either based on the namespace EUI64, if present, or
+ * composed using the device vendor ID, model number, serial number, and the
+ * namespace ID.
*
*
* Error Handling:
@@ -140,7 +146,7 @@
* The following driver properties can be changed to control some aspects of the
* drivers operation:
* - strict-version: can be set to 0 to allow devices conforming to newer
- * versions to be used
+ * versions or namespaces with EUI64 to be used
* - ignore-unknown-vendor-status: can be set to 1 to not handle any vendor
* specific command status as a fatal error leading device faulting
* - admin-queue-len: the maximum length of the admin queue (16-4096)
@@ -163,6 +169,11 @@
* - support for media formatting and hard partitioning into namespaces
* - support for big-endian systems
* - support for fast reboot
+ * - support for firmware updates
+ * - support for NVMe Subsystem Reset (1.1)
+ * - support for Scatter/Gather lists (1.1)
+ * - support for Reservations (1.1)
+ * - support for power management
*/
#include <sys/byteorder.h>
@@ -192,7 +203,7 @@
/* NVMe spec version supported */
static const int nvme_version_major = 1;
-static const int nvme_version_minor = 0;
+static const int nvme_version_minor = 1;
/* tunable for admin command timeout in seconds, default is 1s */
static volatile int nvme_admin_cmd_timeout = 1;
@@ -360,7 +371,7 @@ static struct dev_ops nvme_dev_ops = {
static struct modldrv nvme_modldrv = {
.drv_modops = &mod_driverops,
- .drv_linkinfo = "NVMe v1.0e",
+ .drv_linkinfo = "NVMe v1.1b",
.drv_dev_ops = &nvme_dev_ops
};
@@ -1828,6 +1839,14 @@ nvme_shutdown(nvme_t *nvme, int mode, boolean_t quiesce)
static void
nvme_prepare_devid(nvme_t *nvme, uint32_t nsid)
{
+ /*
+ * Section 7.7 of the spec describes how to get a unique ID for
+ * the controller: the vendor ID, the model name and the serial
+ * number shall be unique when combined.
+ *
+ * If a namespace has no EUI64 we use the above and add the hex
+ * namespace ID to get a unique ID for the namespace.
+ */
char model[sizeof (nvme->n_idctl->id_model) + 1];
char serial[sizeof (nvme->n_idctl->id_serial) + 1];
@@ -1838,8 +1857,7 @@ nvme_prepare_devid(nvme_t *nvme, uint32_t nsid)
model[sizeof (nvme->n_idctl->id_model)] = '\0';
serial[sizeof (nvme->n_idctl->id_serial)] = '\0';
- (void) snprintf(nvme->n_ns[nsid - 1].ns_devid,
- sizeof (nvme->n_ns[0].ns_devid), "%4X-%s-%s-%X",
+ nvme->n_ns[nsid - 1].ns_devid = kmem_asprintf("%4X-%s-%s-%X",
nvme->n_idctl->id_vid, model, serial, nsid);
}
@@ -1860,12 +1878,13 @@ nvme_init(nvme_t *nvme)
/* Check controller version */
vs.r = nvme_get32(nvme, NVME_REG_VS);
+ nvme->n_version.v_major = vs.b.vs_mjr;
+ nvme->n_version.v_minor = vs.b.vs_mnr;
dev_err(nvme->n_dip, CE_CONT, "?NVMe spec version %d.%d",
- vs.b.vs_mjr, vs.b.vs_mnr);
+ nvme->n_version.v_major, nvme->n_version.v_minor);
- if (nvme_version_major < vs.b.vs_mjr ||
- (nvme_version_major == vs.b.vs_mjr &&
- nvme_version_minor < vs.b.vs_mnr)) {
+ if (NVME_VERSION_HIGHER(&nvme->n_version,
+ nvme_version_major, nvme_version_minor)) {
dev_err(nvme->n_dip, CE_WARN, "!no support for version > %d.%d",
nvme_version_major, nvme_version_minor);
if (nvme->n_strict_version)
@@ -2164,7 +2183,26 @@ nvme_init(nvme_t *nvme)
1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
nvme->n_ns[i].ns_best_block_size = nvme->n_ns[i].ns_block_size;
- nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id);
+ /*
+ * Get the EUI64 if present. If not present prepare the devid
+ * from other device data.
+ */
+ if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
+ bcopy(idns->id_eui64, nvme->n_ns[i].ns_eui64,
+ sizeof (nvme->n_ns[i].ns_eui64));
+
+ /*LINTED: E_BAD_PTR_CAST_ALIGN*/
+ if (*(uint64_t *)nvme->n_ns[i].ns_eui64 == 0) {
+ nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id);
+ } else {
+ /*
+ * Until EUI64 support is tested on real hardware we
+ * will ignore namespaces with an EUI64. This can
+ * be overriden by setting strict-version=0 in nvme.conf
+ */
+ if (nvme->n_strict_version)
+ nvme->n_ns[i].ns_ignore = B_TRUE;
+ }
/*
* Find the LBA format with no metadata and the best relative
@@ -2662,6 +2700,8 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
if (nvme->n_ns[i].ns_idns)
kmem_free(nvme->n_ns[i].ns_idns,
sizeof (nvme_identify_nsid_t));
+ if (nvme->n_ns[i].ns_devid)
+ strfree(nvme->n_ns[i].ns_devid);
}
kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) *
@@ -2889,6 +2929,7 @@ nvme_bd_driveinfo(void *arg, bd_drive_t *drive)
drive->d_removable = B_FALSE;
drive->d_hotpluggable = B_FALSE;
+ bcopy(ns->ns_eui64, drive->d_eui64, sizeof (drive->d_eui64));
drive->d_target = ns->ns_id;
drive->d_lun = 0;
@@ -2992,6 +3033,12 @@ nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
{
nvme_namespace_t *ns = arg;
- return (ddi_devid_init(devinfo, DEVID_ENCAP, strlen(ns->ns_devid),
- ns->ns_devid, devid));
+ /*LINTED: E_BAD_PTR_CAST_ALIGN*/
+ if (*(uint64_t *)ns->ns_eui64 != 0) {
+ return (ddi_devid_init(devinfo, DEVID_SCSI3_WWN,
+ sizeof (ns->ns_eui64), ns->ns_eui64, devid));
+ } else {
+ return (ddi_devid_init(devinfo, DEVID_ENCAP,
+ strlen(ns->ns_devid), ns->ns_devid, devid));
+ }
}
diff --git a/usr/src/uts/common/io/nvme/nvme_reg.h b/usr/src/uts/common/io/nvme/nvme_reg.h
index 9c4259fa88..3e4b77079b 100644
--- a/usr/src/uts/common/io/nvme/nvme_reg.h
+++ b/usr/src/uts/common/io/nvme/nvme_reg.h
@@ -33,6 +33,22 @@ extern "C" {
#define NVME_MAX_ADMIN_QUEUE_LEN 4096
/*
+ * NVMe version
+ */
+typedef struct {
+ uint16_t v_minor;
+ uint16_t v_major;
+} nvme_version_t;
+
+#define NVME_VERSION_ATLEAST(v, maj, min) \
+ (((v)->v_major) > (maj) || \
+ ((v)->v_major == (maj) && (v)->v_minor >= (min)))
+
+#define NVME_VERSION_HIGHER(v, maj, min) \
+ (((v)->v_major) > (maj) || \
+ ((v)->v_major == (maj) && (v)->v_minor > (min)))
+
+/*
* NVMe registers and register fields
*/
#define NVME_REG_CAP 0x0 /* Controller Capabilities */
@@ -403,18 +419,21 @@ typedef struct {
/* NVMe Power State Descriptor */
typedef struct {
uint16_t psd_mp; /* Maximum Power */
- uint16_t psd_rsvd1;
+ uint8_t psd_rsvd1;
+ uint8_t psd_mps:1; /* Max Power Scale (1.1) */
+ uint8_t psd_nops:1; /* Non-Operational State (1.1) */
+ uint8_t psd_rsvd2:6;
uint32_t psd_enlat; /* Entry Latency */
uint32_t psd_exlat; /* Exit Latency */
uint8_t psd_rrt:5; /* Relative Read Throughput */
- uint8_t psd_rsvd2:3;
- uint8_t psd_rrl:5; /* Relative Read Latency */
uint8_t psd_rsvd3:3;
+ uint8_t psd_rrl:5; /* Relative Read Latency */
+ uint8_t psd_rsvd4:3;
uint8_t psd_rwt:5; /* Relative Write Throughput */
- uint8_t psd_rsvd4:3;
+ uint8_t psd_rsvd5:3;
uint8_t psd_rwl:5; /* Relative Write Latency */
- uint8_t psd_rsvd5:3;
- uint8_t psd_rsvd6[16];
+ uint8_t psd_rsvd6:3;
+ uint8_t psd_rsvd7[16];
} nvme_idctl_psd_t;
/* NVMe Identify Controller Data Structure */
@@ -428,11 +447,14 @@ typedef struct {
uint8_t id_rab; /* Recommended Arbitration Burst */
uint8_t id_oui[3]; /* vendor IEEE OUI */
struct { /* Multi-Interface Capabilities */
- uint8_t m_multi:1; /* HW has multiple PCIe interfaces */
- uint8_t m_rsvd:7;
+ uint8_t m_multi_pci:1; /* HW has multiple PCIe interfaces */
+ uint8_t m_multi_ctrl:1; /* HW has multiple controllers (1.1) */
+ uint8_t m_sr_iov:1; /* controller is SR-IOV virt fn (1.1) */
+ uint8_t m_rsvd:5;
} id_mic;
uint8_t id_mdts; /* Maximum Data Transfer Size */
- uint8_t id_rsvd_cc[256 - 78];
+ uint16_t id_cntlid; /* Unique Controller Identifier (1.1) */
+ uint8_t id_rsvd_cc[256 - 80];
/* Admin Command Set Attributes */
struct { /* Optional Admin Command Support */
@@ -458,7 +480,11 @@ typedef struct {
uint8_t av_spec:1; /* use format from spec */
uint8_t av_rsvd:7;
} id_avscc;
- uint8_t id_rsvd_ac[256 - 9];
+ struct { /* Autonomous Power State Trans (1.1) */
+ uint8_t ap_sup:1; /* APST supported (1.1) */
+ uint8_t ap_rsvd:7;
+ } id_apsta;
+ uint8_t id_rsvd_ac[256 - 10];
/* NVM Command Set Attributes */
nvme_idctl_qes_t id_sqes; /* Submission Queue Entry Size */
@@ -469,7 +495,10 @@ typedef struct {
uint16_t on_compare:1; /* Compare */
uint16_t on_wr_unc:1; /* Write Uncorrectable */
uint16_t on_dset_mgmt:1; /* Dataset Management */
- uint16_t on_rsvd:13;
+ uint16_t on_wr_zero:1; /* Write Zeros (1.1) */
+ uint16_t on_save:1; /* Save/Select in Get/Set Feat (1.1) */
+ uint16_t on_reserve:1; /* Reservations (1.1) */
+ uint16_t on_rsvd:10;
} id_oncs;
struct { /* Fused Operation Support */
uint16_t f_cmp_wr:1; /* Compare and Write */
@@ -491,7 +520,16 @@ typedef struct {
uint8_t nv_spec:1; /* use format from spec */
uint8_t nv_rsvd:7;
} id_nvscc;
- uint8_t id_rsvd_nc_2[192 - 19];
+ uint8_t id_rsvd_nc_2;
+ uint16_t id_acwu; /* Atomic Compare & Write Unit (1.1) */
+ uint16_t id_rsvd_nc_3;
+ struct { /* SGL Support (1.1) */
+ uint16_t sgl_sup:1; /* SGL Supported in NVM cmds (1.1) */
+ uint16_t sgl_rsvd1:15;
+ uint16_t sgl_bucket:1; /* SGL Bit Bucket supported (1.1) */
+ uint16_t sgl_rsvd2:15;
+ } id_sgls;
+ uint8_t id_rsvd_nc_4[192 - 28];
/* I/O Command Set Attributes */
uint8_t id_rsvd_ioc[1344];
@@ -537,12 +575,29 @@ typedef struct {
uint8_t dp_type3:1; /* Protection Information Type 3 */
uint8_t dp_first:1; /* first 8 bytes of metadata */
uint8_t dp_last:1; /* last 8 bytes of metadata */
+ uint8_t dp_rsvd:3;
} id_dpc;
struct { /* Data Protection Settings */
uint8_t dp_pinfo:3; /* Protection Information enabled */
uint8_t dp_first:1; /* first 8 bytes of metadata */
+ uint8_t dp_rsvd:4;
} id_dps;
- uint8_t id_rsvd1[128 - 30];
+ struct { /* NS Multi-Path/Sharing Cap (1.1) */
+ uint8_t nm_shared:1; /* NS is shared (1.1) */
+ uint8_t nm_rsvd:7;
+ } id_nmic;
+ struct { /* Reservation Capabilities (1.1) */
+ uint8_t rc_persist:1; /* Persist Through Power Loss (1.1) */
+ uint8_t rc_wr_excl:1; /* Write Exclusive (1.1) */
+ uint8_t rc_excl:1; /* Exclusive Access (1.1) */
+ uint8_t rc_wr_excl_r:1; /* Wr Excl - Registrants Only (1.1) */
+ uint8_t rc_excl_r:1; /* Excl Acc - Registrants Only (1.1) */
+ uint8_t rc_wr_excl_a:1; /* Wr Excl - All Registrants (1.1) */
+ uint8_t rc_excl_a:1; /* Excl Acc - All Registrants (1.1) */
+ uint8_t rc_rsvd:1;
+ } id_rescap;
+ uint8_t id_rsvd1[120 - 32];
+ uint8_t id_eui64[8]; /* IEEE Extended Unique Id (1.1) */
nvme_idns_lbaf_t id_lbaf[16]; /* LBA Formats */
uint8_t id_rsvd2[192];
@@ -577,6 +632,8 @@ typedef union {
#define NVME_FEAT_INTR_VECT 0x9 /* Interrupt Vector Configuration */
#define NVME_FEAT_WRITE_ATOM 0xa /* Write Atomicity */
#define NVME_FEAT_ASYNC_EVENT 0xb /* Asynchronous Event Configuration */
+#define NVME_FEAT_AUTO_PST 0xc /* Autonomous Power State Transition */
+ /* (1.1) */
#define NVME_FEAT_PROGRESS 0x80 /* Software Progress Marker */
diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h
index 0ddb1649db..fd6f93af88 100644
--- a/usr/src/uts/common/io/nvme/nvme_var.h
+++ b/usr/src/uts/common/io/nvme/nvme_var.h
@@ -124,6 +124,7 @@ struct nvme {
char *n_product;
char *n_vendor;
+ nvme_version_t n_version;
boolean_t n_dead;
boolean_t n_strict_version;
boolean_t n_ignore_unknown_vendor_status;
@@ -215,6 +216,8 @@ struct nvme {
struct nvme_namespace {
nvme_t *ns_nvme;
+ uint8_t ns_eui64[8];
+
bd_handle_t ns_bd_hdl;
uint32_t ns_id;
@@ -227,13 +230,10 @@ struct nvme_namespace {
nvme_identify_nsid_t *ns_idns;
/*
- * Section 7.7 of the spec describes how to get a unique ID for
- * the controller: the vendor ID, the model name and the serial
- * number shall be unique when combined.
- *
- * We add the hex namespace ID to get a unique ID for the namespace.
+ * If a namespace has no EUI64, we create a devid in
+ * nvme_prepare_devid().
*/
- char ns_devid[4 + 1 + 20 + 1 + 40 + 1 + 8 + 1];
+ char *ns_devid;
};
struct nvme_task_arg {