summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/io/blkdev/blkdev.c16
-rw-r--r--usr/src/uts/common/io/nvme/nvme.c1059
-rw-r--r--usr/src/uts/common/io/nvme/nvme_reg.h341
-rw-r--r--usr/src/uts/common/io/nvme/nvme_var.h24
-rw-r--r--usr/src/uts/common/sys/Makefile3
-rw-r--r--usr/src/uts/common/sys/nvme.h574
-rw-r--r--usr/src/uts/common/sys/sunddi.h5
7 files changed, 1561 insertions, 461 deletions
diff --git a/usr/src/uts/common/io/blkdev/blkdev.c b/usr/src/uts/common/io/blkdev/blkdev.c
index 0c80d15cfe..d3b96c9f8a 100644
--- a/usr/src/uts/common/io/blkdev/blkdev.c
+++ b/usr/src/uts/common/io/blkdev/blkdev.c
@@ -1819,6 +1819,16 @@ bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
dev_info_t *child;
bd_drive_t drive = { 0 };
+ /*
+ * It's not an error if bd_attach_handle() is called on a handle that
+ * already is attached. We just ignore the request to attach and return.
+ * This way drivers using blkdev don't have to keep track about blkdev
+ * state, they can just call this function to make sure it attached.
+ */
+ if (hdl->h_child != NULL) {
+ return (DDI_SUCCESS);
+ }
+
/* if drivers don't override this, make it assume none */
drive.d_lun = -1;
hdl->h_ops.o_drive_info(hdl->h_private, &drive);
@@ -1882,6 +1892,12 @@ bd_detach_handle(bd_handle_t hdl)
int rv;
char *devnm;
+ /*
+ * It's not an error if bd_detach_handle() is called on a handle that
+ * already is detached. We just ignore the request to detach and return.
+ * This way drivers using blkdev don't have to keep track about blkdev
+ * state, they can just call this function to make sure it detached.
+ */
if (hdl->h_child == NULL) {
return (DDI_SUCCESS);
}
diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c
index cb2e9bdd22..c87be0d3f0 100644
--- a/usr/src/uts/common/io/nvme/nvme.c
+++ b/usr/src/uts/common/io/nvme/nvme.c
@@ -83,6 +83,19 @@
* passes it to blkdev to use it in the device node names. As this is currently
* untested namespaces with EUI64 are ignored by default.
*
+ * We currently support only (2 << NVME_MINOR_INST_SHIFT) - 2 namespaces in a
+ * single controller. This is an artificial limit imposed by the driver to be
+ * able to address a reasonable number of controllers and namespaces using a
+ * 32bit minor node number.
+ *
+ *
+ * Minor nodes:
+ *
+ * For each NVMe device the driver exposes one minor node for the controller and
+ * one minor node for each namespace. The only operations supported by those
+ * minor nodes are open(9E), close(9E), and ioctl(9E). This serves as the
+ * interface for the nvmeadm(1M) utility.
+ *
*
* Blkdev Interface:
*
@@ -164,7 +177,6 @@
* - polled I/O support to support kernel core dumping
* - FMA handling of media errors
* - support for devices supporting very large I/O requests using chained PRPs
- * - support for querying log pages from user space
* - support for configuring hardware parameters like interrupt coalescing
* - support for media formatting and hard partitioning into namespaces
* - support for big-endian systems
@@ -186,6 +198,7 @@
#include <sys/devops.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
+#include <sys/sunndi.h>
#include <sys/bitmap.h>
#include <sys/sysmacros.h>
#include <sys/param.h>
@@ -196,6 +209,10 @@
#include <sys/atomic.h>
#include <sys/archsystm.h>
#include <sys/sata/sata_hba.h>
+#include <sys/stat.h>
+#include <sys/policy.h>
+
+#include <sys/nvme.h>
#ifdef __x86
#include <sys/x86_archext.h>
@@ -210,7 +227,10 @@ static const int nvme_version_major = 1;
static const int nvme_version_minor = 2;
/* tunable for admin command timeout in seconds, default is 1s */
-static volatile int nvme_admin_cmd_timeout = 1;
+int nvme_admin_cmd_timeout = 1;
+
+/* tunable for FORMAT NVM command timeout in seconds, default is 600s */
+int nvme_format_cmd_timeout = 600;
static int nvme_attach(dev_info_t *, ddi_attach_cmd_t);
static int nvme_detach(dev_info_t *, ddi_detach_cmd_t);
@@ -243,10 +263,14 @@ static inline int nvme_check_cmd_status(nvme_cmd_t *);
static void nvme_abort_cmd(nvme_cmd_t *);
static int nvme_async_event(nvme_t *);
-static void *nvme_get_logpage(nvme_t *, uint8_t, ...);
+static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
+ boolean_t, uint8_t);
+static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
static void *nvme_identify(nvme_t *, uint32_t);
static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
uint32_t *);
+static boolean_t nvme_get_features(nvme_t *, uint32_t, uint8_t, uint32_t *,
+ void **, size_t *);
static boolean_t nvme_write_cache_set(nvme_t *, boolean_t);
static int nvme_set_nqueues(nvme_t *, uint16_t);
@@ -283,6 +307,16 @@ static void nvme_prp_dma_destructor(void *, void *);
static void nvme_prepare_devid(nvme_t *, uint32_t);
+static int nvme_open(dev_t *, int, int, cred_t *);
+static int nvme_close(dev_t, int, int, cred_t *);
+static int nvme_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+
+#define NVME_MINOR_INST_SHIFT 14
+#define NVME_MINOR(inst, nsid) (((inst) << NVME_MINOR_INST_SHIFT) | (nsid))
+#define NVME_MINOR_INST(minor) ((minor) >> NVME_MINOR_INST_SHIFT)
+#define NVME_MINOR_NSID(minor) ((minor) & ((1 << NVME_MINOR_INST_SHIFT) - 1))
+#define NVME_MINOR_MAX (NVME_MINOR(1, 0) - 2)
+
static void *nvme_state;
static kmem_cache_t *nvme_cmd_cache;
@@ -358,6 +392,27 @@ static ddi_device_acc_attr_t nvme_reg_acc_attr = {
.devacc_attr_dataorder = DDI_STRICTORDER_ACC
};
+static struct cb_ops nvme_cb_ops = {
+ .cb_open = nvme_open,
+ .cb_close = nvme_close,
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_read = nodev,
+ .cb_write = nodev,
+ .cb_ioctl = nvme_ioctl,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_str = 0,
+ .cb_flag = D_NEW | D_MP,
+ .cb_rev = CB_REV,
+ .cb_aread = nodev,
+ .cb_awrite = nodev
+};
+
static struct dev_ops nvme_dev_ops = {
.devo_rev = DEVO_REV,
.devo_refcnt = 0,
@@ -367,7 +422,7 @@ static struct dev_ops nvme_dev_ops = {
.devo_attach = nvme_attach,
.devo_detach = nvme_detach,
.devo_reset = nodev,
- .devo_cb_ops = NULL,
+ .devo_cb_ops = &nvme_cb_ops,
.devo_bus_ops = NULL,
.devo_power = NULL,
.devo_quiesce = nvme_quiesce,
@@ -844,7 +899,8 @@ nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
if (cmd->nc_nvme->n_strict_version) {
cmd->nc_nvme->n_dead = B_TRUE;
@@ -881,13 +937,15 @@ nvme_check_integrity_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_INT_NVM_WRITE:
/* write fail */
/* TODO: post ereport */
- bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
return (EIO);
case NVME_CQE_SC_INT_NVM_READ:
/* read fail */
/* TODO: post ereport */
- bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
return (EIO);
default:
@@ -915,9 +973,11 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_GEN_INV_FLD:
/* Invalid Field in Command */
- dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
- "invalid field in cmd %p", (void *)cmd);
- return (0);
+ if (!cmd->nc_dontpanic)
+ dev_err(cmd->nc_nvme->n_dip, CE_PANIC,
+ "programming error: invalid field in cmd %p",
+ (void *)cmd);
+ return (EIO);
case NVME_CQE_SC_GEN_ID_CNFL:
/* Command ID Conflict */
@@ -927,9 +987,11 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_GEN_INV_NS:
/* Invalid Namespace or Format */
- dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
- "invalid NS/format in cmd %p", (void *)cmd);
- return (0);
+ if (!cmd->nc_dontpanic)
+ dev_err(cmd->nc_nvme->n_dip, CE_PANIC,
+ "programming error: " "invalid NS/format in cmd %p",
+ (void *)cmd);
+ return (EINVAL);
case NVME_CQE_SC_GEN_NVM_LBA_RANGE:
/* LBA Out Of Range */
@@ -944,7 +1006,8 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
/* Data Transfer Error (DMA) */
/* TODO: post ereport */
atomic_inc_32(&cmd->nc_nvme->n_data_xfr_err);
- bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
return (EIO);
case NVME_CQE_SC_GEN_INTERNAL_ERR:
@@ -955,7 +1018,8 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
* in the async event handler.
*/
atomic_inc_32(&cmd->nc_nvme->n_internal_err);
- bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
return (EIO);
case NVME_CQE_SC_GEN_ABORT_REQUEST:
@@ -981,13 +1045,15 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_GEN_NVM_CAP_EXC:
/* Capacity Exceeded */
atomic_inc_32(&cmd->nc_nvme->n_nvm_cap_exc);
- bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
return (EIO);
case NVME_CQE_SC_GEN_NVM_NS_NOTRDY:
/* Namespace Not Ready */
atomic_inc_32(&cmd->nc_nvme->n_nvm_ns_notrdy);
- bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
return (EIO);
default:
@@ -1048,14 +1114,14 @@ nvme_check_specific_cmd_status(nvme_cmd_t *cmd)
/* Invalid Log Page */
ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_GET_LOG_PAGE);
atomic_inc_32(&cmd->nc_nvme->n_inv_log_page);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_INV_FORMAT:
/* Invalid Format */
ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_FORMAT);
atomic_inc_32(&cmd->nc_nvme->n_inv_format);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_INV_Q_DEL:
@@ -1070,7 +1136,8 @@ nvme_check_specific_cmd_status(nvme_cmd_t *cmd)
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ ||
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
atomic_inc_32(&cmd->nc_nvme->n_cnfl_attr);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_NVM_INV_PROT:
@@ -1079,14 +1146,16 @@ nvme_check_specific_cmd_status(nvme_cmd_t *cmd)
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ ||
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
atomic_inc_32(&cmd->nc_nvme->n_inv_prot);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_NVM_READONLY:
/* Write to Read Only Range */
ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
atomic_inc_32(&cmd->nc_nvme->n_readonly);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EROFS);
default:
@@ -1281,6 +1350,7 @@ nvme_async_event_task(void *arg)
nvme_t *nvme = cmd->nc_nvme;
nvme_error_log_entry_t *error_log = NULL;
nvme_health_log_t *health_log = NULL;
+ size_t logsize = 0;
nvme_async_event_t event;
int ret;
@@ -1328,8 +1398,8 @@ nvme_async_event_task(void *arg)
switch (event.b.ae_type) {
case NVME_ASYNC_TYPE_ERROR:
if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) {
- error_log = (nvme_error_log_entry_t *)
- nvme_get_logpage(nvme, event.b.ae_logpage);
+ (void) nvme_get_logpage(nvme, (void **)&error_log,
+ &logsize, event.b.ae_logpage);
} else {
dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
"async event reply: %d", event.b.ae_logpage);
@@ -1379,8 +1449,8 @@ nvme_async_event_task(void *arg)
case NVME_ASYNC_TYPE_HEALTH:
if (event.b.ae_logpage == NVME_LOGPAGE_HEALTH) {
- health_log = (nvme_health_log_t *)
- nvme_get_logpage(nvme, event.b.ae_logpage, -1);
+ (void) nvme_get_logpage(nvme, (void **)&health_log,
+ &logsize, event.b.ae_logpage, -1);
} else {
dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
"async event reply: %d", event.b.ae_logpage);
@@ -1427,11 +1497,10 @@ nvme_async_event_task(void *arg)
}
if (error_log)
- kmem_free(error_log, sizeof (nvme_error_log_entry_t) *
- nvme->n_error_log_len);
+ kmem_free(error_log, logsize);
if (health_log)
- kmem_free(health_log, sizeof (nvme_health_log_t));
+ kmem_free(health_log, logsize);
}
static int
@@ -1485,14 +1554,58 @@ nvme_async_event(nvme_t *nvme)
return (DDI_SUCCESS);
}
-static void *
-nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
+static int
+nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms,
+ uint8_t pi, boolean_t pil, uint8_t ses)
+{
+ nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
+ nvme_format_nvm_t format_nvm = { 0 };
+ int ret;
+
+ format_nvm.b.fm_lbaf = lbaf & 0xf;
+ format_nvm.b.fm_ms = ms ? 1 : 0;
+ format_nvm.b.fm_pi = pi & 0x7;
+ format_nvm.b.fm_pil = pil ? 1 : 0;
+ format_nvm.b.fm_ses = ses & 0x7;
+
+ cmd->nc_sqid = 0;
+ cmd->nc_callback = nvme_wakeup_cmd;
+ cmd->nc_sqe.sqe_nsid = nsid;
+ cmd->nc_sqe.sqe_opc = NVME_OPC_NVM_FORMAT;
+ cmd->nc_sqe.sqe_cdw10 = format_nvm.r;
+
+ /*
+ * Some devices like Samsung SM951 don't allow formatting of all
+ * namespaces in one command. Handle that gracefully.
+ */
+ if (nsid == (uint32_t)-1)
+ cmd->nc_dontpanic = B_TRUE;
+
+ if ((ret = nvme_admin_cmd(cmd, nvme_format_cmd_timeout))
+ != DDI_SUCCESS) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme_admin_cmd failed for FORMAT NVM");
+ return (EIO);
+ }
+
+ if ((ret = nvme_check_cmd_status(cmd)) != 0) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!FORMAT failed with sct = %x, sc = %x",
+ cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
+ }
+
+ nvme_free_cmd(cmd);
+ return (ret);
+}
+
+static int
+nvme_get_logpage(nvme_t *nvme, void **buf, size_t *bufsize, uint8_t logpage,
+ ...)
{
nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
- void *buf = NULL;
nvme_getlogpage_t getlogpage = { 0 };
- size_t bufsize;
va_list ap;
+ int ret = DDI_FAILURE;
va_start(ap, logpage);
@@ -1505,18 +1618,22 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
switch (logpage) {
case NVME_LOGPAGE_ERROR:
cmd->nc_sqe.sqe_nsid = (uint32_t)-1;
- bufsize = nvme->n_error_log_len *
- sizeof (nvme_error_log_entry_t);
+ /*
+ * The GET LOG PAGE command can use at most 2 pages to return
+ * data, PRP lists are not supported.
+ */
+ *bufsize = MIN(2 * nvme->n_pagesize,
+ nvme->n_error_log_len * sizeof (nvme_error_log_entry_t));
break;
case NVME_LOGPAGE_HEALTH:
cmd->nc_sqe.sqe_nsid = va_arg(ap, uint32_t);
- bufsize = sizeof (nvme_health_log_t);
+ *bufsize = sizeof (nvme_health_log_t);
break;
case NVME_LOGPAGE_FWSLOT:
cmd->nc_sqe.sqe_nsid = (uint32_t)-1;
- bufsize = sizeof (nvme_fwslot_log_t);
+ *bufsize = sizeof (nvme_fwslot_log_t);
break;
default:
@@ -1528,7 +1645,7 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
va_end(ap);
- getlogpage.b.lp_numd = bufsize / sizeof (uint32_t) - 1;
+ getlogpage.b.lp_numd = *bufsize / sizeof (uint32_t) - 1;
cmd->nc_sqe.sqe_cdw10 = getlogpage.r;
@@ -1557,7 +1674,7 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
dev_err(nvme->n_dip, CE_WARN,
"!nvme_admin_cmd failed for GET LOG PAGE");
- return (NULL);
+ return (ret);
}
if (nvme_check_cmd_status(cmd)) {
@@ -1567,13 +1684,15 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
goto fail;
}
- buf = kmem_alloc(bufsize, KM_SLEEP);
- bcopy(cmd->nc_dma->nd_memp, buf, bufsize);
+ *buf = kmem_alloc(*bufsize, KM_SLEEP);
+ bcopy(cmd->nc_dma->nd_memp, *buf, *bufsize);
+
+ ret = DDI_SUCCESS;
fail:
nvme_free_cmd(cmd);
- return (buf);
+ return (ret);
}
static void *
@@ -1684,6 +1803,130 @@ fail:
}
static boolean_t
+nvme_get_features(nvme_t *nvme, uint32_t nsid, uint8_t feature, uint32_t *res,
+ void **buf, size_t *bufsize)
+{
+ nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
+ boolean_t ret = B_FALSE;
+
+ ASSERT(res != NULL);
+
+ if (bufsize != NULL)
+ *bufsize = 0;
+
+ cmd->nc_sqid = 0;
+ cmd->nc_callback = nvme_wakeup_cmd;
+ cmd->nc_sqe.sqe_opc = NVME_OPC_GET_FEATURES;
+ cmd->nc_sqe.sqe_cdw10 = feature;
+ cmd->nc_sqe.sqe_cdw11 = *res;
+
+ switch (feature) {
+ case NVME_FEAT_ARBITRATION:
+ case NVME_FEAT_POWER_MGMT:
+ case NVME_FEAT_TEMPERATURE:
+ case NVME_FEAT_ERROR:
+ case NVME_FEAT_NQUEUES:
+ case NVME_FEAT_INTR_COAL:
+ case NVME_FEAT_INTR_VECT:
+ case NVME_FEAT_WRITE_ATOM:
+ case NVME_FEAT_ASYNC_EVENT:
+ case NVME_FEAT_PROGRESS:
+ break;
+
+ case NVME_FEAT_WRITE_CACHE:
+ if (!nvme->n_write_cache_present)
+ goto fail;
+ break;
+
+ case NVME_FEAT_LBA_RANGE:
+ if (!nvme->n_lba_range_supported)
+ goto fail;
+
+ /*
+ * The LBA Range Type feature is optional. There doesn't seem
+ * be a method of detecting whether it is supported other than
+ * using it. This will cause a "invalid field in command" error,
+ * which is normally considered a programming error and causes
+ * panic in nvme_check_generic_cmd_status().
+ */
+ cmd->nc_dontpanic = B_TRUE;
+ cmd->nc_sqe.sqe_nsid = nsid;
+ ASSERT(bufsize != NULL);
+ *bufsize = NVME_LBA_RANGE_BUFSIZE;
+
+ break;
+
+ case NVME_FEAT_AUTO_PST:
+ if (!nvme->n_auto_pst_supported)
+ goto fail;
+
+ ASSERT(bufsize != NULL);
+ *bufsize = NVME_AUTO_PST_BUFSIZE;
+ break;
+
+ default:
+ goto fail;
+ }
+
+ if (bufsize != NULL && *bufsize != 0) {
+ if (nvme_zalloc_dma(nvme, *bufsize, DDI_DMA_READ,
+ &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme_zalloc_dma failed for GET FEATURES");
+ goto fail;
+ }
+
+ if (cmd->nc_dma->nd_ncookie > 2) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!too many DMA cookies for GET FEATURES");
+ atomic_inc_32(&nvme->n_too_many_cookies);
+ goto fail;
+ }
+
+ cmd->nc_sqe.sqe_dptr.d_prp[0] =
+ cmd->nc_dma->nd_cookie.dmac_laddress;
+ if (cmd->nc_dma->nd_ncookie > 1) {
+ ddi_dma_nextcookie(cmd->nc_dma->nd_dmah,
+ &cmd->nc_dma->nd_cookie);
+ cmd->nc_sqe.sqe_dptr.d_prp[1] =
+ cmd->nc_dma->nd_cookie.dmac_laddress;
+ }
+ }
+
+ if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme_admin_cmd failed for GET FEATURES");
+ return (ret);
+ }
+
+ if (nvme_check_cmd_status(cmd)) {
+ if (feature == NVME_FEAT_LBA_RANGE &&
+ cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC &&
+ cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INV_FLD)
+ nvme->n_lba_range_supported = B_FALSE;
+ else
+ dev_err(nvme->n_dip, CE_WARN,
+ "!GET FEATURES %d failed with sct = %x, sc = %x",
+ feature, cmd->nc_cqe.cqe_sf.sf_sct,
+ cmd->nc_cqe.cqe_sf.sf_sc);
+ goto fail;
+ }
+
+ if (bufsize != NULL && *bufsize != 0) {
+ ASSERT(buf != NULL);
+ *buf = kmem_alloc(*bufsize, KM_SLEEP);
+ bcopy(cmd->nc_dma->nd_memp, *buf, *bufsize);
+ }
+
+ *res = cmd->nc_cqe.cqe_dw0;
+ ret = B_TRUE;
+
+fail:
+ nvme_free_cmd(cmd);
+ return (ret);
+}
+
+static boolean_t
nvme_write_cache_set(nvme_t *nvme, boolean_t enable)
{
nvme_write_cache_t nwc = { 0 };
@@ -1700,7 +1943,7 @@ nvme_write_cache_set(nvme_t *nvme, boolean_t enable)
static int
nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues)
{
- nvme_nqueue_t nq = { 0 };
+ nvme_nqueues_t nq = { 0 };
nq.b.nq_nsq = nq.b.nq_ncq = nqueues - 1;
@@ -1866,6 +2109,89 @@ nvme_prepare_devid(nvme_t *nvme, uint32_t nsid)
}
static int
+nvme_init_ns(nvme_t *nvme, int nsid)
+{
+ nvme_namespace_t *ns = &nvme->n_ns[nsid - 1];
+ nvme_identify_nsid_t *idns;
+ int last_rp;
+
+ ns->ns_nvme = nvme;
+ idns = nvme_identify(nvme, nsid);
+
+ if (idns == NULL) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!failed to identify namespace %d", nsid);
+ return (DDI_FAILURE);
+ }
+
+ ns->ns_idns = idns;
+ ns->ns_id = nsid;
+ ns->ns_block_count = idns->id_nsize;
+ ns->ns_block_size =
+ 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
+ ns->ns_best_block_size = ns->ns_block_size;
+
+ /*
+ * Get the EUI64 if present. Use it for devid and device node names.
+ */
+ if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
+ bcopy(idns->id_eui64, ns->ns_eui64, sizeof (ns->ns_eui64));
+
+ /*LINTED: E_BAD_PTR_CAST_ALIGN*/
+ if (*(uint64_t *)ns->ns_eui64 != 0) {
+ uint8_t *eui64 = ns->ns_eui64;
+
+ (void) snprintf(ns->ns_name, sizeof (ns->ns_name),
+ "%02x%02x%02x%02x%02x%02x%02x%02x",
+ eui64[0], eui64[1], eui64[2], eui64[3],
+ eui64[4], eui64[5], eui64[6], eui64[7]);
+ } else {
+ (void) snprintf(ns->ns_name, sizeof (ns->ns_name), "%d",
+ ns->ns_id);
+
+ nvme_prepare_devid(nvme, ns->ns_id);
+ }
+
+ /*
+ * Find the LBA format with no metadata and the best relative
+ * performance. A value of 3 means "degraded", 0 is best.
+ */
+ last_rp = 3;
+ for (int j = 0; j <= idns->id_nlbaf; j++) {
+ if (idns->id_lbaf[j].lbaf_lbads == 0)
+ break;
+ if (idns->id_lbaf[j].lbaf_ms != 0)
+ continue;
+ if (idns->id_lbaf[j].lbaf_rp >= last_rp)
+ continue;
+ last_rp = idns->id_lbaf[j].lbaf_rp;
+ ns->ns_best_block_size =
+ 1 << idns->id_lbaf[j].lbaf_lbads;
+ }
+
+ if (ns->ns_best_block_size < nvme->n_min_block_size)
+ ns->ns_best_block_size = nvme->n_min_block_size;
+
+ /*
+ * We currently don't support namespaces that use either:
+ * - thin provisioning
+ * - protection information
+ */
+ if (idns->id_nsfeat.f_thin ||
+ idns->id_dps.dp_pinfo) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!ignoring namespace %d, unsupported features: "
+ "thin = %d, pinfo = %d", nsid,
+ idns->id_nsfeat.f_thin, idns->id_dps.dp_pinfo);
+ ns->ns_ignore = B_TRUE;
+ } else {
+ ns->ns_ignore = B_FALSE;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static int
nvme_init(nvme_t *nvme)
{
nvme_reg_cc_t cc = { 0 };
@@ -2150,90 +2476,37 @@ nvme_init(nvme_t *nvme)
nvme->n_write_cache_enabled ? 1 : 0);
/*
- * Grab a copy of all mandatory log pages.
- *
- * TODO: should go away once user space tool exists to print logs
+ * Assume LBA Range Type feature is supported. If it isn't this
+ * will be set to B_FALSE by nvme_get_features().
*/
- nvme->n_error_log = (nvme_error_log_entry_t *)
- nvme_get_logpage(nvme, NVME_LOGPAGE_ERROR);
- nvme->n_health_log = (nvme_health_log_t *)
- nvme_get_logpage(nvme, NVME_LOGPAGE_HEALTH, -1);
- nvme->n_fwslot_log = (nvme_fwslot_log_t *)
- nvme_get_logpage(nvme, NVME_LOGPAGE_FWSLOT);
+ nvme->n_lba_range_supported = B_TRUE;
+
+ /*
+ * Check support for Autonomous Power State Transition.
+ */
+ if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
+ nvme->n_auto_pst_supported =
+ nvme->n_idctl->id_apsta.ap_sup == 0 ? B_FALSE : B_TRUE;
/*
* Identify Namespaces
*/
nvme->n_namespace_count = nvme->n_idctl->id_nn;
+ if (nvme->n_namespace_count > NVME_MINOR_MAX) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!too many namespaces: %d, limiting to %d\n",
+ nvme->n_namespace_count, NVME_MINOR_MAX);
+ nvme->n_namespace_count = NVME_MINOR_MAX;
+ }
+
nvme->n_ns = kmem_zalloc(sizeof (nvme_namespace_t) *
nvme->n_namespace_count, KM_SLEEP);
for (i = 0; i != nvme->n_namespace_count; i++) {
- nvme_identify_nsid_t *idns;
- int last_rp;
-
- nvme->n_ns[i].ns_nvme = nvme;
- nvme->n_ns[i].ns_idns = idns = nvme_identify(nvme, i + 1);
-
- if (idns == NULL) {
- dev_err(nvme->n_dip, CE_WARN,
- "!failed to identify namespace %d", i + 1);
+ mutex_init(&nvme->n_ns[i].ns_minor.nm_mutex, NULL, MUTEX_DRIVER,
+ NULL);
+ if (nvme_init_ns(nvme, i + 1) != DDI_SUCCESS)
goto fail;
- }
-
- nvme->n_ns[i].ns_id = i + 1;
- nvme->n_ns[i].ns_block_count = idns->id_nsize;
- nvme->n_ns[i].ns_block_size =
- 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
- nvme->n_ns[i].ns_best_block_size = nvme->n_ns[i].ns_block_size;
-
- /*
- * Get the EUI64 if present. If not present prepare the devid
- * from other device data.
- */
- if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
- bcopy(idns->id_eui64, nvme->n_ns[i].ns_eui64,
- sizeof (nvme->n_ns[i].ns_eui64));
-
- /*LINTED: E_BAD_PTR_CAST_ALIGN*/
- if (*(uint64_t *)nvme->n_ns[i].ns_eui64 == 0) {
- nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id);
- }
-
- /*
- * Find the LBA format with no metadata and the best relative
- * performance. A value of 3 means "degraded", 0 is best.
- */
- last_rp = 3;
- for (int j = 0; j <= idns->id_nlbaf; j++) {
- if (idns->id_lbaf[j].lbaf_lbads == 0)
- break;
- if (idns->id_lbaf[j].lbaf_ms != 0)
- continue;
- if (idns->id_lbaf[j].lbaf_rp >= last_rp)
- continue;
- last_rp = idns->id_lbaf[j].lbaf_rp;
- nvme->n_ns[i].ns_best_block_size =
- 1 << idns->id_lbaf[j].lbaf_lbads;
- }
-
- if (nvme->n_ns[i].ns_best_block_size < nvme->n_min_block_size)
- nvme->n_ns[i].ns_best_block_size =
- nvme->n_min_block_size;
-
- /*
- * We currently don't support namespaces that use either:
- * - thin provisioning
- * - protection information
- */
- if (idns->id_nsfeat.f_thin ||
- idns->id_dps.dp_pinfo) {
- dev_err(nvme->n_dip, CE_WARN,
- "!ignoring namespace %d, unsupported features: "
- "thin = %d, pinfo = %d", i + 1,
- idns->id_nsfeat.f_thin, idns->id_dps.dp_pinfo);
- nvme->n_ns[i].ns_ignore = B_TRUE;
- }
}
/*
@@ -2520,6 +2793,8 @@ nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
ddi_set_driver_private(dip, nvme);
nvme->n_dip = dip;
+ mutex_init(&nvme->n_minor.nm_mutex, NULL, MUTEX_DRIVER, NULL);
+
nvme->n_strict_version = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, "strict-version", 1) == 1 ? B_TRUE : B_FALSE;
nvme->n_ignore_unknown_vendor_status = ddi_prop_get_int(DDI_DEV_T_ANY,
@@ -2640,6 +2915,14 @@ nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
* Attach the blkdev driver for each namespace.
*/
for (i = 0; i != nvme->n_namespace_count; i++) {
+ if (ddi_create_minor_node(nvme->n_dip, nvme->n_ns[i].ns_name,
+ S_IFCHR, NVME_MINOR(ddi_get_instance(nvme->n_dip), i + 1),
+ DDI_NT_NVME_ATTACHMENT_POINT, 0) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN,
+ "!failed to create minor node for namespace %d", i);
+ goto fail;
+ }
+
if (nvme->n_ns[i].ns_ignore)
continue;
@@ -2661,6 +2944,14 @@ nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
}
+ if (ddi_create_minor_node(dip, "devctl", S_IFCHR,
+ NVME_MINOR(ddi_get_instance(dip), 0), DDI_NT_NVME_NEXUS, 0)
+ != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "nvme_attach: "
+ "cannot create devctl minor node");
+ goto fail;
+ }
+
return (DDI_SUCCESS);
fail:
@@ -2689,8 +2980,14 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
if (nvme == NULL)
return (DDI_FAILURE);
+ ddi_remove_minor_node(dip, "devctl");
+ mutex_destroy(&nvme->n_minor.nm_mutex);
+
if (nvme->n_ns) {
for (i = 0; i != nvme->n_namespace_count; i++) {
+ ddi_remove_minor_node(dip, nvme->n_ns[i].ns_name);
+ mutex_destroy(&nvme->n_ns[i].ns_minor.nm_mutex);
+
if (nvme->n_ns[i].ns_bd_hdl) {
(void) bd_detach_handle(
nvme->n_ns[i].ns_bd_hdl);
@@ -2745,7 +3042,7 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
nvme_free_qpair(nvme->n_adminq);
if (nvme->n_idctl)
- kmem_free(nvme->n_idctl, sizeof (nvme_identify_ctrl_t));
+ kmem_free(nvme->n_idctl, NVME_IDENTIFY_BUFSIZE);
if (nvme->n_progress & NVME_REGS_MAPPED)
ddi_regs_map_free(&nvme->n_regh);
@@ -3042,3 +3339,531 @@ nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
strlen(ns->ns_devid), ns->ns_devid, devid));
}
}
+
+static int
+nvme_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+#ifndef __lock_lint
+ _NOTE(ARGUNUSED(cred_p));
+#endif
+ minor_t minor = getminor(*devp);
+ nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
+ int nsid = NVME_MINOR_NSID(minor);
+ nvme_minor_state_t *nm;
+ int rv = 0;
+
+ if (otyp != OTYP_CHR)
+ return (EINVAL);
+
+ if (nvme == NULL)
+ return (ENXIO);
+
+ if (nsid > nvme->n_namespace_count)
+ return (ENXIO);
+
+ nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
+
+ mutex_enter(&nm->nm_mutex);
+ if (nm->nm_oexcl) {
+ rv = EBUSY;
+ goto out;
+ }
+
+ if (flag & FEXCL) {
+ if (nm->nm_ocnt != 0) {
+ rv = EBUSY;
+ goto out;
+ }
+ nm->nm_oexcl = B_TRUE;
+ }
+
+ nm->nm_ocnt++;
+
+out:
+ mutex_exit(&nm->nm_mutex);
+ return (rv);
+
+}
+
+static int
+nvme_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+#ifndef __lock_lint
+ _NOTE(ARGUNUSED(cred_p));
+ _NOTE(ARGUNUSED(flag));
+#endif
+ minor_t minor = getminor(dev);
+ nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
+ int nsid = NVME_MINOR_NSID(minor);
+ nvme_minor_state_t *nm;
+
+ if (otyp != OTYP_CHR)
+ return (ENXIO);
+
+ if (nvme == NULL)
+ return (ENXIO);
+
+ if (nsid > nvme->n_namespace_count)
+ return (ENXIO);
+
+ nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
+
+ mutex_enter(&nm->nm_mutex);
+ if (nm->nm_oexcl)
+ nm->nm_oexcl = B_FALSE;
+
+ ASSERT(nm->nm_ocnt > 0);
+ nm->nm_ocnt--;
+ mutex_exit(&nm->nm_mutex);
+
+ return (0);
+}
+
+static int
+nvme_ioctl_identify(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(cred_p));
+ int rv = 0;
+ void *idctl;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if (nioc->n_len < NVME_IDENTIFY_BUFSIZE)
+ return (EINVAL);
+
+ idctl = nvme_identify(nvme, nsid);
+ if (idctl == NULL)
+ return (EIO);
+
+ if (ddi_copyout(idctl, (void *)nioc->n_buf, NVME_IDENTIFY_BUFSIZE, mode)
+ != 0)
+ rv = EFAULT;
+
+ kmem_free(idctl, NVME_IDENTIFY_BUFSIZE);
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_capabilities(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
+ int mode, cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nsid, cred_p));
+ int rv = 0;
+ nvme_reg_cap_t cap = { 0 };
+ nvme_capabilities_t nc;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if (nioc->n_len < sizeof (nc))
+ return (EINVAL);
+
+ cap.r = nvme_get64(nvme, NVME_REG_CAP);
+
+ /*
+ * The MPSMIN and MPSMAX fields in the CAP register use 0 to
+ * specify the base page size of 4k (1<<12), so add 12 here to
+ * get the real page size value.
+ */
+ nc.mpsmax = 1 << (12 + cap.b.cap_mpsmax);
+ nc.mpsmin = 1 << (12 + cap.b.cap_mpsmin);
+
+ if (ddi_copyout(&nc, (void *)nioc->n_buf, sizeof (nc), mode) != 0)
+ rv = EFAULT;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_get_logpage(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
+ int mode, cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(cred_p));
+ void *log = NULL;
+ size_t bufsize = 0;
+ int rv = 0;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ switch (nioc->n_arg) {
+ case NVME_LOGPAGE_ERROR:
+ if (nsid != 0)
+ return (EINVAL);
+ break;
+ case NVME_LOGPAGE_HEALTH:
+ if (nsid != 0 && nvme->n_idctl->id_lpa.lp_smart == 0)
+ return (EINVAL);
+
+ if (nsid == 0)
+ nsid = (uint32_t)-1;
+
+ break;
+ case NVME_LOGPAGE_FWSLOT:
+ if (nsid != 0)
+ return (EINVAL);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (nvme_get_logpage(nvme, &log, &bufsize, nioc->n_arg, nsid)
+ != DDI_SUCCESS)
+ return (EIO);
+
+ if (nioc->n_len < bufsize) {
+ kmem_free(log, bufsize);
+ return (EINVAL);
+ }
+
+ if (ddi_copyout(log, (void *)nioc->n_buf, bufsize, mode) != 0)
+ rv = EFAULT;
+
+ nioc->n_len = bufsize;
+ kmem_free(log, bufsize);
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_get_features(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
+ int mode, cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(cred_p));
+ void *buf = NULL;
+ size_t bufsize = 0;
+ uint32_t res = 0;
+ uint8_t feature;
+ int rv = 0;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if ((nioc->n_arg >> 32) > 0xff)
+ return (EINVAL);
+
+ feature = (uint8_t)(nioc->n_arg >> 32);
+
+ switch (feature) {
+ case NVME_FEAT_ARBITRATION:
+ case NVME_FEAT_POWER_MGMT:
+ case NVME_FEAT_TEMPERATURE:
+ case NVME_FEAT_ERROR:
+ case NVME_FEAT_NQUEUES:
+ case NVME_FEAT_INTR_COAL:
+ case NVME_FEAT_WRITE_ATOM:
+ case NVME_FEAT_ASYNC_EVENT:
+ case NVME_FEAT_PROGRESS:
+ if (nsid != 0)
+ return (EINVAL);
+ break;
+
+ case NVME_FEAT_INTR_VECT:
+ if (nsid != 0)
+ return (EINVAL);
+
+ res = nioc->n_arg & 0xffffffffUL;
+ if (res >= nvme->n_intr_cnt)
+ return (EINVAL);
+ break;
+
+ case NVME_FEAT_LBA_RANGE:
+ if (nvme->n_lba_range_supported == B_FALSE)
+ return (EINVAL);
+
+ if (nsid == 0 ||
+ nsid > nvme->n_namespace_count)
+ return (EINVAL);
+
+ break;
+
+ case NVME_FEAT_WRITE_CACHE:
+ if (nsid != 0)
+ return (EINVAL);
+
+ if (!nvme->n_write_cache_present)
+ return (EINVAL);
+
+ break;
+
+ case NVME_FEAT_AUTO_PST:
+ if (nsid != 0)
+ return (EINVAL);
+
+ if (!nvme->n_auto_pst_supported)
+ return (EINVAL);
+
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ if (nvme_get_features(nvme, nsid, feature, &res, &buf, &bufsize) ==
+ B_FALSE)
+ return (EIO);
+
+ if (nioc->n_len < bufsize) {
+ kmem_free(buf, bufsize);
+ return (EINVAL);
+ }
+
+ if (buf && ddi_copyout(buf, (void*)nioc->n_buf, bufsize, mode) != 0)
+ rv = EFAULT;
+
+ kmem_free(buf, bufsize);
+ nioc->n_arg = res;
+ nioc->n_len = bufsize;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_intr_cnt(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nsid, mode, cred_p));
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ nioc->n_arg = nvme->n_intr_cnt;
+ return (0);
+}
+
+static int
+nvme_ioctl_version(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nsid, cred_p));
+ int rv = 0;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if (nioc->n_len < sizeof (nvme->n_version))
+ return (ENOMEM);
+
+ if (ddi_copyout(&nvme->n_version, (void *)nioc->n_buf,
+ sizeof (nvme->n_version), mode) != 0)
+ rv = EFAULT;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_format(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(mode));
+ nvme_format_nvm_t frmt = { 0 };
+ int c_nsid = nsid != 0 ? nsid - 1 : 0;
+
+ if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
+ return (EPERM);
+
+ frmt.r = nioc->n_arg & 0xffffffff;
+
+ /*
+ * Check whether the FORMAT NVM command is supported.
+ */
+ if (nvme->n_idctl->id_oacs.oa_format == 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow format or secure erase of individual namespace if that
+ * would cause a format or secure erase of all namespaces.
+ */
+ if (nsid != 0 && nvme->n_idctl->id_fna.fn_format != 0)
+ return (EINVAL);
+
+ if (nsid != 0 && frmt.b.fm_ses != NVME_FRMT_SES_NONE &&
+ nvme->n_idctl->id_fna.fn_sec_erase != 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow formatting with Protection Information.
+ */
+ if (frmt.b.fm_pi != 0 || frmt.b.fm_pil != 0 || frmt.b.fm_ms != 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow formatting using an illegal LBA format, or any LBA format
+ * that uses metadata.
+ */
+ if (frmt.b.fm_lbaf > nvme->n_ns[c_nsid].ns_idns->id_nlbaf ||
+ nvme->n_ns[c_nsid].ns_idns->id_lbaf[frmt.b.fm_lbaf].lbaf_ms != 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow formatting using an illegal Secure Erase setting.
+ */
+ if (frmt.b.fm_ses > NVME_FRMT_MAX_SES ||
+ (frmt.b.fm_ses == NVME_FRMT_SES_CRYPTO &&
+ nvme->n_idctl->id_fna.fn_crypt_erase == 0))
+ return (EINVAL);
+
+ if (nsid == 0)
+ nsid = (uint32_t)-1;
+
+ return (nvme_format_nvm(nvme, nsid, frmt.b.fm_lbaf, B_FALSE, 0, B_FALSE,
+ frmt.b.fm_ses));
+}
+
+static int
+nvme_ioctl_detach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nioc, mode));
+ int rv = 0;
+
+ if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
+ return (EPERM);
+
+ if (nsid == 0)
+ return (EINVAL);
+
+ rv = bd_detach_handle(nvme->n_ns[nsid - 1].ns_bd_hdl);
+ if (rv != DDI_SUCCESS)
+ rv = EBUSY;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_attach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nioc, mode));
+ nvme_identify_nsid_t *idns;
+ int rv = 0;
+
+ if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
+ return (EPERM);
+
+ if (nsid == 0)
+ return (EINVAL);
+
+ /*
+ * Identify namespace again, free old identify data.
+ */
+ idns = nvme->n_ns[nsid - 1].ns_idns;
+ if (nvme_init_ns(nvme, nsid) != DDI_SUCCESS)
+ return (EIO);
+
+ kmem_free(idns, sizeof (nvme_identify_nsid_t));
+
+ rv = bd_attach_handle(nvme->n_dip, nvme->n_ns[nsid - 1].ns_bd_hdl);
+ if (rv != DDI_SUCCESS)
+ rv = EBUSY;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p,
+ int *rval_p)
+{
+#ifndef __lock_lint
+ _NOTE(ARGUNUSED(rval_p));
+#endif
+ minor_t minor = getminor(dev);
+ nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
+ int nsid = NVME_MINOR_NSID(minor);
+ int rv = 0;
+ nvme_ioctl_t nioc;
+
+ int (*nvme_ioctl[])(nvme_t *, int, nvme_ioctl_t *, int, cred_t *) = {
+ NULL,
+ nvme_ioctl_identify,
+ nvme_ioctl_identify,
+ nvme_ioctl_capabilities,
+ nvme_ioctl_get_logpage,
+ nvme_ioctl_get_features,
+ nvme_ioctl_intr_cnt,
+ nvme_ioctl_version,
+ nvme_ioctl_format,
+ nvme_ioctl_detach,
+ nvme_ioctl_attach
+ };
+
+ if (nvme == NULL)
+ return (ENXIO);
+
+ if (nsid > nvme->n_namespace_count)
+ return (ENXIO);
+
+ if (IS_DEVCTL(cmd))
+ return (ndi_devctl_ioctl(nvme->n_dip, cmd, arg, mode, 0));
+
+#ifdef _MULTI_DATAMODEL
+ switch (ddi_model_convert_from(mode & FMODELS)) {
+ case DDI_MODEL_ILP32: {
+ nvme_ioctl32_t nioc32;
+ if (ddi_copyin((void*)arg, &nioc32, sizeof (nvme_ioctl32_t),
+ mode) != 0)
+ return (EFAULT);
+ nioc.n_len = nioc32.n_len;
+ nioc.n_buf = nioc32.n_buf;
+ nioc.n_arg = nioc32.n_arg;
+ break;
+ }
+ case DDI_MODEL_NONE:
+#endif
+ if (ddi_copyin((void*)arg, &nioc, sizeof (nvme_ioctl_t), mode)
+ != 0)
+ return (EFAULT);
+#ifdef _MULTI_DATAMODEL
+ break;
+ }
+#endif
+
+ if (cmd == NVME_IOC_IDENTIFY_CTRL) {
+ /*
+ * This makes NVME_IOC_IDENTIFY_CTRL work the same on devctl and
+ * attachment point nodes.
+ */
+ nsid = 0;
+ } else if (cmd == NVME_IOC_IDENTIFY_NSID && nsid == 0) {
+ /*
+ * This makes NVME_IOC_IDENTIFY_NSID work on a devctl node, it
+ * will always return identify data for namespace 1.
+ */
+ nsid = 1;
+ }
+
+ if (IS_NVME_IOC(cmd) && nvme_ioctl[NVME_IOC_CMD(cmd)] != NULL)
+ rv = nvme_ioctl[NVME_IOC_CMD(cmd)](nvme, nsid, &nioc, mode,
+ cred_p);
+ else
+ rv = EINVAL;
+
+#ifdef _MULTI_DATAMODEL
+ switch (ddi_model_convert_from(mode & FMODELS)) {
+ case DDI_MODEL_ILP32: {
+ nvme_ioctl32_t nioc32;
+
+ nioc32.n_len = (size32_t)nioc.n_len;
+ nioc32.n_buf = (uintptr32_t)nioc.n_buf;
+ nioc32.n_arg = nioc.n_arg;
+
+ if (ddi_copyout(&nioc32, (void *)arg, sizeof (nvme_ioctl32_t),
+ mode) != 0)
+ return (EFAULT);
+ break;
+ }
+ case DDI_MODEL_NONE:
+#endif
+ if (ddi_copyout(&nioc, (void *)arg, sizeof (nvme_ioctl_t), mode)
+ != 0)
+ return (EFAULT);
+#ifdef _MULTI_DATAMODEL
+ break;
+ }
+#endif
+
+ return (rv);
+}
diff --git a/usr/src/uts/common/io/nvme/nvme_reg.h b/usr/src/uts/common/io/nvme/nvme_reg.h
index 3e4b77079b..acff0e2362 100644
--- a/usr/src/uts/common/io/nvme/nvme_reg.h
+++ b/usr/src/uts/common/io/nvme/nvme_reg.h
@@ -20,6 +20,8 @@
#ifndef _NVME_REG_H
#define _NVME_REG_H
+#include <sys/nvme.h>
+
#pragma pack(1)
#ifdef __cplusplus
@@ -33,22 +35,6 @@ extern "C" {
#define NVME_MAX_ADMIN_QUEUE_LEN 4096
/*
- * NVMe version
- */
-typedef struct {
- uint16_t v_minor;
- uint16_t v_major;
-} nvme_version_t;
-
-#define NVME_VERSION_ATLEAST(v, maj, min) \
- (((v)->v_major) > (maj) || \
- ((v)->v_major == (maj) && (v)->v_minor >= (min)))
-
-#define NVME_VERSION_HIGHER(v, maj, min) \
- (((v)->v_major) > (maj) || \
- ((v)->v_major == (maj) && (v)->v_minor > (min)))
-
-/*
* NVMe registers and register fields
*/
#define NVME_REG_CAP 0x0 /* Controller Capabilities */
@@ -258,15 +244,6 @@ typedef struct {
* NVMe completion queue entry
*/
typedef struct {
- uint16_t sf_p:1; /* Phase Tag */
- uint16_t sf_sc:8; /* Status Code */
- uint16_t sf_sct:3; /* Status Code Type */
- uint16_t sf_rsvd2:2;
- uint16_t sf_m:1; /* More */
- uint16_t sf_dnr:1; /* Do Not Retry */
-} nvme_cqe_sf_t;
-
-typedef struct {
uint32_t cqe_dw0; /* Command Specific */
uint32_t cqe_rsvd1;
uint16_t cqe_sqhd; /* SQ Head Pointer */
@@ -408,203 +385,6 @@ typedef union {
#define NVME_IDENTIFY_CTRL 0x1 /* Identify Controller */
#define NVME_IDENTIFY_LIST 0x2 /* Identify List Namespaces */
-#define NVME_IDENTIFY_BUFSIZE 4096 /* buffer size for Identify */
-
-/* NVMe Queue Entry Size bitfield */
-typedef struct {
- uint8_t qes_min:4; /* minimum entry size */
- uint8_t qes_max:4; /* maximum entry size */
-} nvme_idctl_qes_t;
-
-/* NVMe Power State Descriptor */
-typedef struct {
- uint16_t psd_mp; /* Maximum Power */
- uint8_t psd_rsvd1;
- uint8_t psd_mps:1; /* Max Power Scale (1.1) */
- uint8_t psd_nops:1; /* Non-Operational State (1.1) */
- uint8_t psd_rsvd2:6;
- uint32_t psd_enlat; /* Entry Latency */
- uint32_t psd_exlat; /* Exit Latency */
- uint8_t psd_rrt:5; /* Relative Read Throughput */
- uint8_t psd_rsvd3:3;
- uint8_t psd_rrl:5; /* Relative Read Latency */
- uint8_t psd_rsvd4:3;
- uint8_t psd_rwt:5; /* Relative Write Throughput */
- uint8_t psd_rsvd5:3;
- uint8_t psd_rwl:5; /* Relative Write Latency */
- uint8_t psd_rsvd6:3;
- uint8_t psd_rsvd7[16];
-} nvme_idctl_psd_t;
-
-/* NVMe Identify Controller Data Structure */
-typedef struct {
- /* Controller Capabilities & Features */
- uint16_t id_vid; /* PCI vendor ID */
- uint16_t id_ssvid; /* PCI subsystem vendor ID */
- char id_serial[20]; /* Serial Number */
- char id_model[40]; /* Model Number */
- char id_fwrev[8]; /* Firmware Revision */
- uint8_t id_rab; /* Recommended Arbitration Burst */
- uint8_t id_oui[3]; /* vendor IEEE OUI */
- struct { /* Multi-Interface Capabilities */
- uint8_t m_multi_pci:1; /* HW has multiple PCIe interfaces */
- uint8_t m_multi_ctrl:1; /* HW has multiple controllers (1.1) */
- uint8_t m_sr_iov:1; /* controller is SR-IOV virt fn (1.1) */
- uint8_t m_rsvd:5;
- } id_mic;
- uint8_t id_mdts; /* Maximum Data Transfer Size */
- uint16_t id_cntlid; /* Unique Controller Identifier (1.1) */
- uint8_t id_rsvd_cc[256 - 80];
-
- /* Admin Command Set Attributes */
- struct { /* Optional Admin Command Support */
- uint16_t oa_security:1; /* Security Send & Receive */
- uint16_t oa_format:1; /* Format NVM */
- uint16_t oa_firmare:1; /* Firmware Activate & Download */
- uint16_t oa_rsvd:13;
- } id_oacs;
- uint8_t id_acl; /* Abort Command Limit */
- uint8_t id_aerl; /* Asynchronous Event Request Limit */
- struct { /* Firmware Updates */
- uint8_t fw_readonly:1; /* Slot 1 is Read-Only */
- uint8_t fw_nslot:3; /* number of firmware slots */
- uint8_t fw_rsvd:4;
- } id_frmw;
- struct { /* Log Page Attributes */
- uint8_t lp_smart:1; /* SMART/Health information per NS */
- uint8_t lp_rsvd:7;
- } id_lpa;
- uint8_t id_elpe; /* Error Log Page Entries */
- uint8_t id_npss; /* Number of Power States */
- struct { /* Admin Vendor Specific Command Conf */
- uint8_t av_spec:1; /* use format from spec */
- uint8_t av_rsvd:7;
- } id_avscc;
- struct { /* Autonomous Power State Trans (1.1) */
- uint8_t ap_sup:1; /* APST supported (1.1) */
- uint8_t ap_rsvd:7;
- } id_apsta;
- uint8_t id_rsvd_ac[256 - 10];
-
- /* NVM Command Set Attributes */
- nvme_idctl_qes_t id_sqes; /* Submission Queue Entry Size */
- nvme_idctl_qes_t id_cqes; /* Completion Queue Entry Size */
- uint16_t id_rsvd_nc_1;
- uint32_t id_nn; /* Number of Namespaces */
- struct { /* Optional NVM Command Support */
- uint16_t on_compare:1; /* Compare */
- uint16_t on_wr_unc:1; /* Write Uncorrectable */
- uint16_t on_dset_mgmt:1; /* Dataset Management */
- uint16_t on_wr_zero:1; /* Write Zeros (1.1) */
- uint16_t on_save:1; /* Save/Select in Get/Set Feat (1.1) */
- uint16_t on_reserve:1; /* Reservations (1.1) */
- uint16_t on_rsvd:10;
- } id_oncs;
- struct { /* Fused Operation Support */
- uint16_t f_cmp_wr:1; /* Compare and Write */
- uint16_t f_rsvd:15;
- } id_fuses;
- struct { /* Format NVM Attributes */
- uint8_t fn_format:1; /* Format applies to all NS */
- uint8_t fn_sec_erase:1; /* Secure Erase applies to all NS */
- uint8_t fn_crypt_erase:1; /* Cryptographic Erase supported */
- uint8_t fn_rsvd:5;
- } id_fna;
- struct { /* Volatile Write Cache */
- uint8_t vwc_present:1; /* Volatile Write Cache present */
- uint8_t rsvd:7;
- } id_vwc;
- uint16_t id_awun; /* Atomic Write Unit Normal */
- uint16_t id_awupf; /* Atomic Write Unit Power Fail */
- struct { /* NVM Vendor Specific Command Conf */
- uint8_t nv_spec:1; /* use format from spec */
- uint8_t nv_rsvd:7;
- } id_nvscc;
- uint8_t id_rsvd_nc_2;
- uint16_t id_acwu; /* Atomic Compare & Write Unit (1.1) */
- uint16_t id_rsvd_nc_3;
- struct { /* SGL Support (1.1) */
- uint16_t sgl_sup:1; /* SGL Supported in NVM cmds (1.1) */
- uint16_t sgl_rsvd1:15;
- uint16_t sgl_bucket:1; /* SGL Bit Bucket supported (1.1) */
- uint16_t sgl_rsvd2:15;
- } id_sgls;
- uint8_t id_rsvd_nc_4[192 - 28];
-
- /* I/O Command Set Attributes */
- uint8_t id_rsvd_ioc[1344];
-
- /* Power State Descriptors */
- nvme_idctl_psd_t id_psd[32];
-
- /* Vendor Specific */
- uint8_t id_vs[1024];
-} nvme_identify_ctrl_t;
-
-/* NVMe Identify Namespace LBA Format */
-typedef struct {
- uint16_t lbaf_ms; /* Metadata Size */
- uint8_t lbaf_lbads; /* LBA Data Size */
- uint8_t lbaf_rp:2; /* Relative Performance */
- uint8_t lbaf_rsvd1:6;
-} nvme_idns_lbaf_t;
-
-/* NVMe Identify Namespace Data Structure */
-typedef struct {
- uint64_t id_nsize; /* Namespace Size */
- uint64_t id_ncap; /* Namespace Capacity */
- uint64_t id_nuse; /* Namespace Utilization */
- struct { /* Namespace Features */
- uint8_t f_thin:1; /* Thin Provisioning */
- uint8_t f_rsvd:7;
- } id_nsfeat;
- uint8_t id_nlbaf; /* Number of LBA formats */
- struct { /* Formatted LBA size */
- uint8_t lba_format:4; /* LBA format */
- uint8_t lba_extlba:1; /* extended LBA (includes metadata) */
- uint8_t lba_rsvd:3;
- } id_flbas;
- struct { /* Metadata Capabilities */
- uint8_t mc_extlba:1; /* extended LBA transfers */
- uint8_t mc_separate:1; /* separate metadata transfers */
- uint8_t mc_rsvd:6;
- } id_mc;
- struct { /* Data Protection Capabilities */
- uint8_t dp_type1:1; /* Protection Information Type 1 */
- uint8_t dp_type2:1; /* Protection Information Type 2 */
- uint8_t dp_type3:1; /* Protection Information Type 3 */
- uint8_t dp_first:1; /* first 8 bytes of metadata */
- uint8_t dp_last:1; /* last 8 bytes of metadata */
- uint8_t dp_rsvd:3;
- } id_dpc;
- struct { /* Data Protection Settings */
- uint8_t dp_pinfo:3; /* Protection Information enabled */
- uint8_t dp_first:1; /* first 8 bytes of metadata */
- uint8_t dp_rsvd:4;
- } id_dps;
- struct { /* NS Multi-Path/Sharing Cap (1.1) */
- uint8_t nm_shared:1; /* NS is shared (1.1) */
- uint8_t nm_rsvd:7;
- } id_nmic;
- struct { /* Reservation Capabilities (1.1) */
- uint8_t rc_persist:1; /* Persist Through Power Loss (1.1) */
- uint8_t rc_wr_excl:1; /* Write Exclusive (1.1) */
- uint8_t rc_excl:1; /* Exclusive Access (1.1) */
- uint8_t rc_wr_excl_r:1; /* Wr Excl - Registrants Only (1.1) */
- uint8_t rc_excl_r:1; /* Excl Acc - Registrants Only (1.1) */
- uint8_t rc_wr_excl_a:1; /* Wr Excl - All Registrants (1.1) */
- uint8_t rc_excl_a:1; /* Excl Acc - All Registrants (1.1) */
- uint8_t rc_rsvd:1;
- } id_rescap;
- uint8_t id_rsvd1[120 - 32];
- uint8_t id_eui64[8]; /* IEEE Extended Unique Id (1.1) */
- nvme_idns_lbaf_t id_lbaf[16]; /* LBA Formats */
-
- uint8_t id_rsvd2[192];
-
- uint8_t id_vs[3712]; /* Vendor Specific */
-} nvme_identify_nsid_t;
-
/*
* NVMe Abort Command
@@ -619,79 +399,8 @@ typedef union {
/*
- * NVMe Get / Set Features
- */
-#define NVME_FEAT_ARBITRATION 0x1 /* Command Arbitration */
-#define NVME_FEAT_POWER_MGMT 0x2 /* Power Management */
-#define NVME_FEAT_LBA_RANGE 0x3 /* LBA Range Type */
-#define NVME_FEAT_TEMPERATURE 0x4 /* Temperature Threshold */
-#define NVME_FEAT_ERROR 0x5 /* Error Recovery */
-#define NVME_FEAT_WRITE_CACHE 0x6 /* Volatile Write Cache */
-#define NVME_FEAT_NQUEUES 0x7 /* Number of Queues */
-#define NVME_FEAT_INTR_COAL 0x8 /* Interrupt Coalescing */
-#define NVME_FEAT_INTR_VECT 0x9 /* Interrupt Vector Configuration */
-#define NVME_FEAT_WRITE_ATOM 0xa /* Write Atomicity */
-#define NVME_FEAT_ASYNC_EVENT 0xb /* Asynchronous Event Configuration */
-#define NVME_FEAT_AUTO_PST 0xc /* Autonomous Power State Transition */
- /* (1.1) */
-
-#define NVME_FEAT_PROGRESS 0x80 /* Software Progress Marker */
-
-/* Arbitration Feature */
-typedef struct {
- uint8_t arb_ab:3; /* Arbitration Burst */
- uint8_t arb_rsvd:5;
- uint8_t arb_lpw; /* Low Priority Weight */
- uint8_t arb_mpw; /* Medium Priority Weight */
- uint8_t arb_hpw; /* High Priority Weight */
-} nvme_arbitration_dw11_t;
-
-/* LBA Range Type Feature */
-typedef struct {
- uint32_t lr_num:6; /* Number of LBA ranges */
- uint32_t lr_rsvd:26;
-} nvme_lba_range_type_dw11_t;
-
-typedef struct {
- uint8_t lr_type; /* Type */
- struct { /* Attributes */
- uint8_t lr_write:1; /* may be overwritten */
- uint8_t lr_hidden:1; /* hidden from OS/EFI/BIOS */
- uint8_t lr_rsvd1:6;
- } lr_attr;
- uint8_t lr_rsvd2[14];
- uint64_t lr_slba; /* Starting LBA */
- uint64_t lr_nlb; /* Number of Logical Blocks */
- uint8_t lr_guid[16]; /* Unique Identifier */
- uint8_t lr_rsvd3[16];
-} nvme_lba_range_type_t;
-
-/* Volatile Write Cache Feature */
-typedef union {
- struct {
- uint32_t wc_wce:1; /* Volatile Write Cache Enable */
- uint32_t wc_rsvd:31;
- } b;
- uint32_t r;
-} nvme_write_cache_t;
-
-/* Number of Queues */
-typedef union {
- struct {
- uint16_t nq_nsq; /* Number of Submission Queues */
- uint16_t nq_ncq; /* Number of Completion Queues */
- } b;
- uint32_t r;
-} nvme_nqueue_t;
-
-
-/*
* NVMe Get Log Page
*/
-#define NVME_LOGPAGE_ERROR 0x1 /* Error Information */
-#define NVME_LOGPAGE_HEALTH 0x2 /* SMART/Health Information */
-#define NVME_LOGPAGE_FWSLOT 0x3 /* Firmware Slot Information */
-
typedef union {
struct {
uint8_t lp_lid; /* Log Page Identifier */
@@ -702,52 +411,6 @@ typedef union {
uint32_t r;
} nvme_getlogpage_t;
-typedef struct {
- uint64_t el_count; /* Error Count */
- uint16_t el_sqid; /* Submission Queue ID */
- uint16_t el_cid; /* Command ID */
- nvme_cqe_sf_t el_sf; /* Status Field */
- uint8_t el_byte; /* Parameter Error Location byte */
- uint8_t el_bit:3; /* Parameter Error Location bit */
- uint8_t el_rsvd1:5;
- uint64_t el_lba; /* Logical Block Address */
- uint32_t el_nsid; /* Namespace ID */
- uint8_t el_vendor; /* Vendor Specific Information avail */
- uint8_t el_rsvd2[64 - 29];
-} nvme_error_log_entry_t;
-
-typedef struct {
- uint64_t lo;
- uint64_t hi;
-} nvme_uint128_t;
-
-typedef struct {
- uint8_t hl_crit_warn; /* Critical Warning */
- uint16_t hl_temp; /* Temperature */
- uint8_t hl_avail_spare; /* Available Spare */
- uint8_t hl_avail_spare_thr; /* Available Spare Threshold */
- uint8_t hl_used; /* Percentage Used */
- uint8_t hl_rsvd1[32 - 6];
- nvme_uint128_t hl_data_read; /* Data Units Read */
- nvme_uint128_t hl_data_write; /* Data Units Written */
- nvme_uint128_t hl_host_read; /* Host Read Commands */
- nvme_uint128_t hl_host_write; /* Host Write Commands */
- nvme_uint128_t hl_ctrl_busy; /* Controller Busy Time */
- nvme_uint128_t hl_power_cycles; /* Power Cycles */
- nvme_uint128_t hl_power_on_hours; /* Power On Hours */
- nvme_uint128_t hl_unsafe_shutdn; /* Unsafe Shutdowns */
- nvme_uint128_t hl_media_errors; /* Media Errors */
- nvme_uint128_t hl_errors_logged; /* Number of errors logged */
- uint8_t hl_rsvd2[512 - 192];
-} nvme_health_log_t;
-
-typedef struct {
- uint8_t fw_afi:3; /* Active Firmware Slot */
- uint8_t fw_rsvd1:5;
- uint8_t fw_rsvd2[7];
- char fw_frs[7][8]; /* Firmware Revision / Slot */
- uint8_t fw_rsvd3[512 - 64];
-} nvme_fwslot_log_t;
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h
index fd6f93af88..651adaec8c 100644
--- a/usr/src/uts/common/io/nvme/nvme_var.h
+++ b/usr/src/uts/common/io/nvme/nvme_var.h
@@ -27,7 +27,7 @@
*/
#ifdef __cplusplus
-/* extern "C" { */
+extern "C" {
#endif
#define NVME_FMA_INIT 0x1
@@ -47,11 +47,18 @@
typedef struct nvme nvme_t;
typedef struct nvme_namespace nvme_namespace_t;
+typedef struct nvme_minor_state nvme_minor_state_t;
typedef struct nvme_dma nvme_dma_t;
typedef struct nvme_cmd nvme_cmd_t;
typedef struct nvme_qpair nvme_qpair_t;
typedef struct nvme_task_arg nvme_task_arg_t;
+struct nvme_minor_state {
+ kmutex_t nm_mutex;
+ boolean_t nm_oexcl;
+ uint_t nm_ocnt;
+};
+
struct nvme_dma {
ddi_dma_handle_t nd_dmah;
ddi_acc_handle_t nd_acch;
@@ -69,6 +76,7 @@ struct nvme_cmd {
void (*nc_callback)(void *);
bd_xfer_t *nc_xfer;
boolean_t nc_completed;
+ boolean_t nc_dontpanic;
uint16_t nc_sqid;
nvme_dma_t *nc_dma;
@@ -137,6 +145,8 @@ struct nvme {
boolean_t n_write_cache_present;
boolean_t n_write_cache_enabled;
int n_error_log_len;
+ boolean_t n_lba_range_supported;
+ boolean_t n_auto_pst_supported;
int n_nssr_supported;
int n_doorbell_stride;
@@ -168,9 +178,8 @@ struct nvme {
ddi_taskq_t *n_cmd_taskq;
- nvme_error_log_entry_t *n_error_log;
- nvme_health_log_t *n_health_log;
- nvme_fwslot_log_t *n_fwslot_log;
+ /* state for devctl minor node */
+ nvme_minor_state_t n_minor;
/* errors detected by driver */
uint32_t n_dma_bind_err;
@@ -217,6 +226,7 @@ struct nvme {
struct nvme_namespace {
nvme_t *ns_nvme;
uint8_t ns_eui64[8];
+ char ns_name[17];
bd_handle_t ns_bd_hdl;
@@ -229,6 +239,9 @@ struct nvme_namespace {
nvme_identify_nsid_t *ns_idns;
+ /* state for attachment point minor node */
+ nvme_minor_state_t ns_minor;
+
/*
* If a namespace has no EUI64, we create a devid in
* nvme_prepare_devid().
@@ -241,8 +254,9 @@ struct nvme_task_arg {
nvme_cmd_t *nt_cmd;
};
+
#ifdef __cplusplus
-/* } */
+}
#endif
#endif /* _NVME_VAR_H */
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 1c7662c28a..7ce40a658a 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -38,6 +38,7 @@ FILEMODE=644
# neither installed or shipped as part of the product:
# cpuid_drv.h: Private interface for cpuid consumers
# unix_bb_info.h: Private interface to kcov
+# nvme.h Private interface to nvme
#
i386_HDRS= \
@@ -54,6 +55,7 @@ i386_HDRS= \
firmload.h \
gfx_private.h \
mouse.h \
+ nvme.h \
ucode.h
sparc_HDRS= \
@@ -422,6 +424,7 @@ CHKHDRS= \
nexusdefs.h \
note.h \
null.h \
+ nvme.h \
nvpair.h \
nvpair_impl.h \
objfs.h \
diff --git a/usr/src/uts/common/sys/nvme.h b/usr/src/uts/common/sys/nvme.h
new file mode 100644
index 0000000000..916b439f3f
--- /dev/null
+++ b/usr/src/uts/common/sys/nvme.h
@@ -0,0 +1,574 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+#ifndef _SYS_NVME_H
+#define _SYS_NVME_H
+
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/types32.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Declarations used for communication between nvmeadm(1M) and nvme(7D)
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * NVMe ioctl definitions
+ */
+
+#define NVME_IOC (('N' << 24) | ('V' << 16) | ('M' << 8))
+#define NVME_IOC_IDENTIFY_CTRL (NVME_IOC | 1)
+#define NVME_IOC_IDENTIFY_NSID (NVME_IOC | 2)
+#define NVME_IOC_CAPABILITIES (NVME_IOC | 3)
+#define NVME_IOC_GET_LOGPAGE (NVME_IOC | 4)
+#define NVME_IOC_GET_FEATURES (NVME_IOC | 5)
+#define NVME_IOC_INTR_CNT (NVME_IOC | 6)
+#define NVME_IOC_VERSION (NVME_IOC | 7)
+#define NVME_IOC_FORMAT (NVME_IOC | 8)
+#define NVME_IOC_DETACH (NVME_IOC | 9)
+#define NVME_IOC_ATTACH (NVME_IOC | 10)
+#define NVME_IOC_MAX NVME_IOC_ATTACH
+
+#define IS_NVME_IOC(x) ((x) > NVME_IOC && (x) <= NVME_IOC_MAX)
+#define NVME_IOC_CMD(x) ((x) & 0xff)
+
+typedef struct {
+ size_t n_len;
+ uintptr_t n_buf;
+ uint64_t n_arg;
+} nvme_ioctl_t;
+
+#ifdef _KERNEL
+typedef struct {
+ size32_t n_len;
+ uintptr32_t n_buf;
+ uint64_t n_arg;
+} nvme_ioctl32_t;
+#endif
+
+/*
+ * NVMe capabilities
+ */
+typedef struct {
+ uint32_t mpsmax; /* Memory Page Size Maximum */
+ uint32_t mpsmin; /* Memory Page Size Minimum */
+} nvme_capabilities_t;
+
+/*
+ * NVMe version
+ */
+typedef struct {
+ uint16_t v_minor;
+ uint16_t v_major;
+} nvme_version_t;
+
+#define NVME_VERSION_ATLEAST(v, maj, min) \
+ (((v)->v_major) > (maj) || \
+ ((v)->v_major == (maj) && (v)->v_minor >= (min)))
+
+#define NVME_VERSION_HIGHER(v, maj, min) \
+ (((v)->v_major) > (maj) || \
+ ((v)->v_major == (maj) && (v)->v_minor > (min)))
+
+
+#pragma pack(1)
+
+/*
+ * NVMe Identify data structures
+ */
+
+#define NVME_IDENTIFY_BUFSIZE 4096 /* buffer size for Identify */
+
+/* NVMe Queue Entry Size bitfield */
+typedef struct {
+ uint8_t qes_min:4; /* minimum entry size */
+ uint8_t qes_max:4; /* maximum entry size */
+} nvme_idctl_qes_t;
+
+/* NVMe Power State Descriptor */
+typedef struct {
+ uint16_t psd_mp; /* Maximum Power */
+ uint8_t psd_rsvd1;
+ uint8_t psd_mps:1; /* Max Power Scale (1.1) */
+ uint8_t psd_nops:1; /* Non-Operational State (1.1) */
+ uint8_t psd_rsvd2:6;
+ uint32_t psd_enlat; /* Entry Latency */
+ uint32_t psd_exlat; /* Exit Latency */
+ uint8_t psd_rrt:5; /* Relative Read Throughput */
+ uint8_t psd_rsvd3:3;
+ uint8_t psd_rrl:5; /* Relative Read Latency */
+ uint8_t psd_rsvd4:3;
+ uint8_t psd_rwt:5; /* Relative Write Throughput */
+ uint8_t psd_rsvd5:3;
+ uint8_t psd_rwl:5; /* Relative Write Latency */
+ uint8_t psd_rsvd6:3;
+ uint8_t psd_rsvd7[16];
+} nvme_idctl_psd_t;
+
+/* NVMe Identify Controller Data Structure */
+typedef struct {
+ /* Controller Capabilities & Features */
+ uint16_t id_vid; /* PCI vendor ID */
+ uint16_t id_ssvid; /* PCI subsystem vendor ID */
+ char id_serial[20]; /* Serial Number */
+ char id_model[40]; /* Model Number */
+ char id_fwrev[8]; /* Firmware Revision */
+ uint8_t id_rab; /* Recommended Arbitration Burst */
+ uint8_t id_oui[3]; /* vendor IEEE OUI */
+ struct { /* Multi-Interface Capabilities */
+ uint8_t m_multi_pci:1; /* HW has multiple PCIe interfaces */
+ uint8_t m_multi_ctrl:1; /* HW has multiple controllers (1.1) */
+ uint8_t m_sr_iov:1; /* controller is SR-IOV virt fn (1.1) */
+ uint8_t m_rsvd:5;
+ } id_mic;
+ uint8_t id_mdts; /* Maximum Data Transfer Size */
+ uint16_t id_cntlid; /* Unique Controller Identifier (1.1) */
+ uint8_t id_rsvd_cc[256 - 80];
+
+ /* Admin Command Set Attributes */
+ struct { /* Optional Admin Command Support */
+ uint16_t oa_security:1; /* Security Send & Receive */
+ uint16_t oa_format:1; /* Format NVM */
+ uint16_t oa_firmware:1; /* Firmware Activate & Download */
+ uint16_t oa_rsvd:13;
+ } id_oacs;
+ uint8_t id_acl; /* Abort Command Limit */
+ uint8_t id_aerl; /* Asynchronous Event Request Limit */
+ struct { /* Firmware Updates */
+ uint8_t fw_readonly:1; /* Slot 1 is Read-Only */
+ uint8_t fw_nslot:3; /* number of firmware slots */
+ uint8_t fw_rsvd:4;
+ } id_frmw;
+ struct { /* Log Page Attributes */
+ uint8_t lp_smart:1; /* SMART/Health information per NS */
+ uint8_t lp_rsvd:7;
+ } id_lpa;
+ uint8_t id_elpe; /* Error Log Page Entries */
+ uint8_t id_npss; /* Number of Power States */
+ struct { /* Admin Vendor Specific Command Conf */
+ uint8_t av_spec:1; /* use format from spec */
+ uint8_t av_rsvd:7;
+ } id_avscc;
+ struct { /* Autonomous Power State Trans (1.1) */
+ uint8_t ap_sup:1; /* APST supported (1.1) */
+ uint8_t ap_rsvd:7;
+ } id_apsta;
+ uint8_t id_rsvd_ac[256 - 10];
+
+ /* NVM Command Set Attributes */
+ nvme_idctl_qes_t id_sqes; /* Submission Queue Entry Size */
+ nvme_idctl_qes_t id_cqes; /* Completion Queue Entry Size */
+ uint16_t id_rsvd_nc_1;
+ uint32_t id_nn; /* Number of Namespaces */
+ struct { /* Optional NVM Command Support */
+ uint16_t on_compare:1; /* Compare */
+ uint16_t on_wr_unc:1; /* Write Uncorrectable */
+ uint16_t on_dset_mgmt:1; /* Dataset Management */
+ uint16_t on_wr_zero:1; /* Write Zeros (1.1) */
+ uint16_t on_save:1; /* Save/Select in Get/Set Feat (1.1) */
+ uint16_t on_reserve:1; /* Reservations (1.1) */
+ uint16_t on_rsvd:10;
+ } id_oncs;
+ struct { /* Fused Operation Support */
+ uint16_t f_cmp_wr:1; /* Compare and Write */
+ uint16_t f_rsvd:15;
+ } id_fuses;
+ struct { /* Format NVM Attributes */
+ uint8_t fn_format:1; /* Format applies to all NS */
+ uint8_t fn_sec_erase:1; /* Secure Erase applies to all NS */
+ uint8_t fn_crypt_erase:1; /* Cryptographic Erase supported */
+ uint8_t fn_rsvd:5;
+ } id_fna;
+ struct { /* Volatile Write Cache */
+ uint8_t vwc_present:1; /* Volatile Write Cache present */
+ uint8_t rsvd:7;
+ } id_vwc;
+ uint16_t id_awun; /* Atomic Write Unit Normal */
+ uint16_t id_awupf; /* Atomic Write Unit Power Fail */
+ struct { /* NVM Vendor Specific Command Conf */
+ uint8_t nv_spec:1; /* use format from spec */
+ uint8_t nv_rsvd:7;
+ } id_nvscc;
+ uint8_t id_rsvd_nc_2;
+ uint16_t id_acwu; /* Atomic Compare & Write Unit (1.1) */
+ uint16_t id_rsvd_nc_3;
+ struct { /* SGL Support (1.1) */
+ uint16_t sgl_sup:1; /* SGL Supported in NVM cmds (1.1) */
+ uint16_t sgl_rsvd1:15;
+ uint16_t sgl_bucket:1; /* SGL Bit Bucket supported (1.1) */
+ uint16_t sgl_rsvd2:15;
+ } id_sgls;
+ uint8_t id_rsvd_nc_4[192 - 28];
+
+ /* I/O Command Set Attributes */
+ uint8_t id_rsvd_ioc[1344];
+
+ /* Power State Descriptors */
+ nvme_idctl_psd_t id_psd[32];
+
+ /* Vendor Specific */
+ uint8_t id_vs[1024];
+} nvme_identify_ctrl_t;
+
+/* NVMe Identify Namespace LBA Format */
+typedef struct {
+ uint16_t lbaf_ms; /* Metadata Size */
+ uint8_t lbaf_lbads; /* LBA Data Size */
+ uint8_t lbaf_rp:2; /* Relative Performance */
+ uint8_t lbaf_rsvd1:6;
+} nvme_idns_lbaf_t;
+
+/* NVMe Identify Namespace Data Structure */
+typedef struct {
+ uint64_t id_nsize; /* Namespace Size */
+ uint64_t id_ncap; /* Namespace Capacity */
+ uint64_t id_nuse; /* Namespace Utilization */
+ struct { /* Namespace Features */
+ uint8_t f_thin:1; /* Thin Provisioning */
+ uint8_t f_rsvd:7;
+ } id_nsfeat;
+ uint8_t id_nlbaf; /* Number of LBA formats */
+ struct { /* Formatted LBA size */
+ uint8_t lba_format:4; /* LBA format */
+ uint8_t lba_extlba:1; /* extended LBA (includes metadata) */
+ uint8_t lba_rsvd:3;
+ } id_flbas;
+ struct { /* Metadata Capabilities */
+ uint8_t mc_extlba:1; /* extended LBA transfers */
+ uint8_t mc_separate:1; /* separate metadata transfers */
+ uint8_t mc_rsvd:6;
+ } id_mc;
+ struct { /* Data Protection Capabilities */
+ uint8_t dp_type1:1; /* Protection Information Type 1 */
+ uint8_t dp_type2:1; /* Protection Information Type 2 */
+ uint8_t dp_type3:1; /* Protection Information Type 3 */
+ uint8_t dp_first:1; /* first 8 bytes of metadata */
+ uint8_t dp_last:1; /* last 8 bytes of metadata */
+ uint8_t dp_rsvd:3;
+ } id_dpc;
+ struct { /* Data Protection Settings */
+ uint8_t dp_pinfo:3; /* Protection Information enabled */
+ uint8_t dp_first:1; /* first 8 bytes of metadata */
+ uint8_t dp_rsvd:4;
+ } id_dps;
+ struct { /* NS Multi-Path/Sharing Cap (1.1) */
+ uint8_t nm_shared:1; /* NS is shared (1.1) */
+ uint8_t nm_rsvd:7;
+ } id_nmic;
+ struct { /* Reservation Capabilities (1.1) */
+ uint8_t rc_persist:1; /* Persist Through Power Loss (1.1) */
+ uint8_t rc_wr_excl:1; /* Write Exclusive (1.1) */
+ uint8_t rc_excl:1; /* Exclusive Access (1.1) */
+ uint8_t rc_wr_excl_r:1; /* Wr Excl - Registrants Only (1.1) */
+ uint8_t rc_excl_r:1; /* Excl Acc - Registrants Only (1.1) */
+ uint8_t rc_wr_excl_a:1; /* Wr Excl - All Registrants (1.1) */
+ uint8_t rc_excl_a:1; /* Excl Acc - All Registrants (1.1) */
+ uint8_t rc_rsvd:1;
+ } id_rescap;
+ uint8_t id_rsvd1[120 - 32];
+ uint8_t id_eui64[8]; /* IEEE Extended Unique Id (1.1) */
+ nvme_idns_lbaf_t id_lbaf[16]; /* LBA Formats */
+
+ uint8_t id_rsvd2[192];
+
+ uint8_t id_vs[3712]; /* Vendor Specific */
+} nvme_identify_nsid_t;
+
+
+/*
+ * NVMe completion queue entry status field
+ */
+typedef struct {
+ uint16_t sf_p:1; /* Phase Tag */
+ uint16_t sf_sc:8; /* Status Code */
+ uint16_t sf_sct:3; /* Status Code Type */
+ uint16_t sf_rsvd2:2;
+ uint16_t sf_m:1; /* More */
+ uint16_t sf_dnr:1; /* Do Not Retry */
+} nvme_cqe_sf_t;
+
+
+/*
+ * NVMe Get Log Page
+ */
+#define NVME_LOGPAGE_ERROR 0x1 /* Error Information */
+#define NVME_LOGPAGE_HEALTH 0x2 /* SMART/Health Information */
+#define NVME_LOGPAGE_FWSLOT 0x3 /* Firmware Slot Information */
+
+typedef struct {
+ uint64_t el_count; /* Error Count */
+ uint16_t el_sqid; /* Submission Queue ID */
+ uint16_t el_cid; /* Command ID */
+ nvme_cqe_sf_t el_sf; /* Status Field */
+ uint8_t el_byte; /* Parameter Error Location byte */
+ uint8_t el_bit:3; /* Parameter Error Location bit */
+ uint8_t el_rsvd1:5;
+ uint64_t el_lba; /* Logical Block Address */
+ uint32_t el_nsid; /* Namespace ID */
+ uint8_t el_vendor; /* Vendor Specific Information avail */
+ uint8_t el_rsvd2[64 - 29];
+} nvme_error_log_entry_t;
+
+typedef struct {
+ uint64_t lo;
+ uint64_t hi;
+} nvme_uint128_t;
+
+typedef struct {
+ struct { /* Critical Warning */
+ uint8_t cw_avail:1; /* available space too low */
+ uint8_t cw_temp:1; /* temperature too high */
+ uint8_t cw_reliab:1; /* degraded reliability */
+ uint8_t cw_readonly:1; /* media is read-only */
+ uint8_t cw_volatile:1; /* volatile memory backup failed */
+ uint8_t cw_rsvd:3;
+ } hl_crit_warn;
+ uint16_t hl_temp; /* Temperature */
+ uint8_t hl_avail_spare; /* Available Spare */
+ uint8_t hl_avail_spare_thr; /* Available Spare Threshold */
+ uint8_t hl_used; /* Percentage Used */
+ uint8_t hl_rsvd1[32 - 6];
+ nvme_uint128_t hl_data_read; /* Data Units Read */
+ nvme_uint128_t hl_data_write; /* Data Units Written */
+ nvme_uint128_t hl_host_read; /* Host Read Commands */
+ nvme_uint128_t hl_host_write; /* Host Write Commands */
+ nvme_uint128_t hl_ctrl_busy; /* Controller Busy Time */
+ nvme_uint128_t hl_power_cycles; /* Power Cycles */
+ nvme_uint128_t hl_power_on_hours; /* Power On Hours */
+ nvme_uint128_t hl_unsafe_shutdn; /* Unsafe Shutdowns */
+ nvme_uint128_t hl_media_errors; /* Media Errors */
+ nvme_uint128_t hl_errors_logged; /* Number of errors logged */
+ uint8_t hl_rsvd2[512 - 192];
+} nvme_health_log_t;
+
+typedef struct {
+ uint8_t fw_afi:3; /* Active Firmware Slot */
+ uint8_t fw_rsvd1:5;
+ uint8_t fw_rsvd2[7];
+ char fw_frs[7][8]; /* Firmware Revision / Slot */
+ uint8_t fw_rsvd3[512 - 64];
+} nvme_fwslot_log_t;
+
+
+/*
+ * NVMe Format NVM
+ */
+#define NVME_FRMT_SES_NONE 0
+#define NVME_FRMT_SES_USER 1
+#define NVME_FRMT_SES_CRYPTO 2
+#define NVME_FRMT_MAX_SES 2
+
+#define NVME_FRMT_MAX_LBAF 15
+
+typedef union {
+ struct {
+ uint32_t fm_lbaf:4; /* LBA Format */
+ uint32_t fm_ms:1; /* Metadata Settings */
+ uint32_t fm_pi:3; /* Protection Information */
+ uint32_t fm_pil:1; /* Prot. Information Location */
+ uint32_t fm_ses:3; /* Secure Erase Settings */
+ uint32_t fm_resvd:20;
+ } b;
+ uint32_t r;
+} nvme_format_nvm_t;
+
+
+/*
+ * NVMe Get / Set Features
+ */
+#define NVME_FEAT_ARBITRATION 0x1 /* Command Arbitration */
+#define NVME_FEAT_POWER_MGMT 0x2 /* Power Management */
+#define NVME_FEAT_LBA_RANGE 0x3 /* LBA Range Type */
+#define NVME_FEAT_TEMPERATURE 0x4 /* Temperature Threshold */
+#define NVME_FEAT_ERROR 0x5 /* Error Recovery */
+#define NVME_FEAT_WRITE_CACHE 0x6 /* Volatile Write Cache */
+#define NVME_FEAT_NQUEUES 0x7 /* Number of Queues */
+#define NVME_FEAT_INTR_COAL 0x8 /* Interrupt Coalescing */
+#define NVME_FEAT_INTR_VECT 0x9 /* Interrupt Vector Configuration */
+#define NVME_FEAT_WRITE_ATOM 0xa /* Write Atomicity */
+#define NVME_FEAT_ASYNC_EVENT 0xb /* Asynchronous Event Configuration */
+#define NVME_FEAT_AUTO_PST 0xc /* Autonomous Power State Transition */
+ /* (1.1) */
+
+#define NVME_FEAT_PROGRESS 0x80 /* Software Progress Marker */
+
+/* Arbitration Feature */
+typedef union {
+ struct {
+ uint8_t arb_ab:3; /* Arbitration Burst */
+ uint8_t arb_rsvd:5;
+ uint8_t arb_lpw; /* Low Priority Weight */
+ uint8_t arb_mpw; /* Medium Priority Weight */
+ uint8_t arb_hpw; /* High Priority Weight */
+ } b;
+ uint32_t r;
+} nvme_arbitration_t;
+
+/* Power Management Feature */
+typedef union {
+ struct {
+ uint32_t pm_ps:5; /* Power State */
+ uint32_t pm_rsvd:27;
+ } b;
+ uint32_t r;
+} nvme_power_mgmt_t;
+
+/* LBA Range Type Feature */
+typedef union {
+ struct {
+ uint32_t lr_num:6; /* Number of LBA ranges */
+ uint32_t lr_rsvd:26;
+ } b;
+ uint32_t r;
+} nvme_lba_range_type_t;
+
+typedef struct {
+ uint8_t lr_type; /* Type */
+ struct { /* Attributes */
+ uint8_t lr_write:1; /* may be overwritten */
+ uint8_t lr_hidden:1; /* hidden from OS/EFI/BIOS */
+ uint8_t lr_rsvd1:6;
+ } lr_attr;
+ uint8_t lr_rsvd2[14];
+ uint64_t lr_slba; /* Starting LBA */
+ uint64_t lr_nlb; /* Number of Logical Blocks */
+ uint8_t lr_guid[16]; /* Unique Identifier */
+ uint8_t lr_rsvd3[16];
+} nvme_lba_range_t;
+
+#define NVME_LBA_RANGE_BUFSIZE 4096
+
+/* Temperature Threshold Feature */
+typedef union {
+ struct {
+ uint16_t tt_tmpth; /* Temperature Threshold */
+ uint16_t tt_rsvd;
+ } b;
+ uint32_t r;
+} nvme_temp_threshold_t;
+
+/* Error Recovery Feature */
+typedef union {
+ struct {
+ uint16_t er_tler; /* Time-Limited Error Recovery */
+ uint16_t er_rsvd;
+ } b;
+ uint32_t r;
+} nvme_error_recovery_t;
+
+/* Volatile Write Cache Feature */
+typedef union {
+ struct {
+ uint32_t wc_wce:1; /* Volatile Write Cache Enable */
+ uint32_t wc_rsvd:31;
+ } b;
+ uint32_t r;
+} nvme_write_cache_t;
+
+/* Number of Queues Feature */
+typedef union {
+ struct {
+ uint16_t nq_nsq; /* Number of Submission Queues */
+ uint16_t nq_ncq; /* Number of Completion Queues */
+ } b;
+ uint32_t r;
+} nvme_nqueues_t;
+
+/* Interrupt Coalescing Feature */
+typedef union {
+ struct {
+ uint8_t ic_thr; /* Aggregation Threshold */
+ uint8_t ic_time; /* Aggregation Time */
+ uint16_t ic_rsvd;
+ } b;
+ uint32_t r;
+} nvme_intr_coal_t;
+
+/* Interrupt Configuration Features */
+typedef union {
+ struct {
+ uint16_t iv_iv; /* Interrupt Vector */
+ uint16_t iv_cd:1; /* Coalescing Disable */
+ uint16_t iv_rsvd:15;
+ } b;
+ uint32_t r;
+} nvme_intr_vect_t;
+
+/* Write Atomicity Feature */
+typedef union {
+ struct {
+ uint32_t wa_dn:1; /* Disable Normal */
+ uint32_t wa_rsvd:31;
+ } b;
+ uint32_t r;
+} nvme_write_atomicity_t;
+
+/* Asynchronous Event Configuration Feature */
+typedef union {
+ struct {
+ uint8_t aec_avail:1; /* available space too low */
+ uint8_t aec_temp:1; /* temperature too high */
+ uint8_t aec_reliab:1; /* degraded reliability */
+ uint8_t aec_readonly:1; /* media is read-only */
+ uint8_t aec_volatile:1; /* volatile memory backup failed */
+ uint8_t aec_rsvd1:3;
+ uint8_t aec_rsvd2[3];
+ } b;
+ uint32_t r;
+} nvme_async_event_conf_t;
+
+/* Autonomous Power State Transition Feature (1.1) */
+typedef union {
+ struct {
+ uint8_t apst_apste:1; /* APST enabled */
+ uint8_t apst_rsvd:7;
+ } b;
+ uint8_t r;
+} nvme_auto_power_state_trans_t;
+
+typedef struct {
+ uint32_t apst_rsvd1:3;
+ uint32_t apst_itps:5; /* Idle Transition Power State */
+ uint32_t apst_itpt:24; /* Idle Time Prior to Transition */
+ uint32_t apst_rsvd2;
+} nvme_auto_power_state_t;
+
+#define NVME_AUTO_PST_BUFSIZE 256
+
+/* Software Progress Marker Feature */
+typedef union {
+ struct {
+ uint8_t spm_pbslc; /* Pre-Boot Software Load Count */
+ uint8_t spm_rsvd[3];
+ } b;
+ uint32_t r;
+} nvme_software_progress_marker_t;
+
+#pragma pack() /* pack(1) */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_NVME_H */
diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h
index d5e52dbbfc..1d94c8fd2c 100644
--- a/usr/src/uts/common/sys/sunddi.h
+++ b/usr/src/uts/common/sys/sunddi.h
@@ -212,6 +212,8 @@ extern "C" {
#define DDI_NT_NEXUS "ddi_ctl:devctl" /* nexus drivers */
+#define DDI_NT_NVME_NEXUS "ddi_ctl:devctl:nvme" /* nexus drivers */
+
#define DDI_NT_SCSI_NEXUS "ddi_ctl:devctl:scsi" /* nexus drivers */
#define DDI_NT_SATA_NEXUS "ddi_ctl:devctl:sata" /* nexus drivers */
@@ -220,6 +222,9 @@ extern "C" {
#define DDI_NT_ATTACHMENT_POINT "ddi_ctl:attachment_point" /* attachment pt */
+#define DDI_NT_NVME_ATTACHMENT_POINT "ddi_ctl:attachment_point:nvme"
+ /* nvme attachment pt */
+
#define DDI_NT_SCSI_ATTACHMENT_POINT "ddi_ctl:attachment_point:scsi"
/* scsi attachment pt */