summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/Makefile1
-rw-r--r--usr/src/cmd/nvmeadm/Makefile43
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm.c1005
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm.h87
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm_dev.c201
-rw-r--r--usr/src/cmd/nvmeadm/nvmeadm_print.c1138
-rw-r--r--usr/src/man/man1m/Makefile3
-rw-r--r--usr/src/man/man1m/nvmeadm.1m410
-rw-r--r--usr/src/pkg/manifests/driver-storage-nvme.mf3
-rw-r--r--usr/src/uts/common/io/blkdev/blkdev.c16
-rw-r--r--usr/src/uts/common/io/nvme/nvme.c1059
-rw-r--r--usr/src/uts/common/io/nvme/nvme_reg.h341
-rw-r--r--usr/src/uts/common/io/nvme/nvme_var.h24
-rw-r--r--usr/src/uts/common/sys/Makefile3
-rw-r--r--usr/src/uts/common/sys/nvme.h574
-rw-r--r--usr/src/uts/common/sys/sunddi.h5
16 files changed, 4451 insertions, 462 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 08841eb06d..e33e3643f9 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -480,6 +480,7 @@ i386_SUBDIRS= \
addbadsec \
biosdev \
diskscan \
+ nvmeadm \
rtc \
ucodeadm \
xvm
diff --git a/usr/src/cmd/nvmeadm/Makefile b/usr/src/cmd/nvmeadm/Makefile
new file mode 100644
index 0000000000..c042d4075f
--- /dev/null
+++ b/usr/src/cmd/nvmeadm/Makefile
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Nexenta Systems, Inc.
+#
+
+
+PROG= nvmeadm
+
+OBJS= nvmeadm.o nvmeadm_dev.o nvmeadm_print.o
+SRCS= $(OBJS:%.o=%.c)
+
+include ../Makefile.cmd
+
+.KEEP_STATE:
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -ldevinfo
+C99MODE= $(C99_ENABLE)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+install: all $(ROOTUSRSBINPROG)
+
+clean:
+ $(RM) $(OBJS) $(PROG)
+
+lint: lint_SRCS
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/nvmeadm/nvmeadm.c b/usr/src/cmd/nvmeadm/nvmeadm.c
new file mode 100644
index 0000000000..13cace3ead
--- /dev/null
+++ b/usr/src/cmd/nvmeadm/nvmeadm.c
@@ -0,0 +1,1005 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+/*
+ * nvmeadm -- NVMe administration utility
+ *
+ * nvmeadm [-v] [-d] [-h] <command> [<ctl>[/<ns>][,...]] [args]
+ * commands: list
+ * identify
+ * get-logpage <logpage name>
+ * get-features <feature>[,...]
+ * format ...
+ * secure-erase ...
+ * detach ...
+ * attach ...
+ * get-param ...
+ * set-param ...
+ * load-firmware ...
+ * activate-firmware ...
+ * write-uncorrectable ...
+ * compare ...
+ * compare-and-write ...
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <ctype.h>
+#include <err.h>
+#include <sys/sunddi.h>
+#include <libdevinfo.h>
+
+#include <sys/nvme.h>
+
+#include "nvmeadm.h"
+
+typedef struct nvme_process_arg nvme_process_arg_t;
+typedef struct nvme_feature nvme_feature_t;
+typedef struct nvmeadm_cmd nvmeadm_cmd_t;
+
+struct nvme_process_arg {
+ int npa_argc;
+ char **npa_argv;
+ char *npa_name;
+ uint32_t npa_nsid;
+ boolean_t npa_isns;
+ const nvmeadm_cmd_t *npa_cmd;
+ di_node_t npa_node;
+ di_minor_t npa_minor;
+ char *npa_path;
+ char *npa_dsk;
+ nvme_identify_ctrl_t *npa_idctl;
+ nvme_identify_nsid_t *npa_idns;
+ nvme_version_t *npa_version;
+};
+
+struct nvme_feature {
+ char *f_name;
+ char *f_short;
+ uint8_t f_feature;
+ size_t f_bufsize;
+ uint_t f_getflags;
+ int (*f_get)(int, const nvme_feature_t *, nvme_identify_ctrl_t *);
+ void (*f_print)(uint64_t, void *, size_t, nvme_identify_ctrl_t *);
+};
+
+#define NVMEADM_CTRL 1
+#define NVMEADM_NS 2
+#define NVMEADM_BOTH (NVMEADM_CTRL | NVMEADM_NS)
+
+struct nvmeadm_cmd {
+ char *c_name;
+ char *c_desc;
+ char *c_flagdesc;
+ int (*c_func)(int, const nvme_process_arg_t *);
+ void (*c_usage)(const char *);
+ boolean_t c_multi;
+};
+
+
+static void usage(const nvmeadm_cmd_t *);
+static void nvme_walk(nvme_process_arg_t *, di_node_t);
+static boolean_t nvme_match(nvme_process_arg_t *);
+
+static int nvme_process(di_node_t, di_minor_t, void *);
+
+static int do_list(int, const nvme_process_arg_t *);
+static int do_identify(int, const nvme_process_arg_t *);
+static int do_get_logpage_error(int, const nvme_process_arg_t *);
+static int do_get_logpage_health(int, const nvme_process_arg_t *);
+static int do_get_logpage_fwslot(int, const nvme_process_arg_t *);
+static int do_get_logpage(int, const nvme_process_arg_t *);
+static int do_get_feat_common(int, const nvme_feature_t *,
+ nvme_identify_ctrl_t *);
+static int do_get_feat_intr_vect(int, const nvme_feature_t *,
+ nvme_identify_ctrl_t *);
+static int do_get_features(int, const nvme_process_arg_t *);
+static int do_format(int, const nvme_process_arg_t *);
+static int do_secure_erase(int, const nvme_process_arg_t *);
+static int do_attach_detach(int, const nvme_process_arg_t *);
+
+static void usage_list(const char *);
+static void usage_identify(const char *);
+static void usage_get_logpage(const char *);
+static void usage_get_features(const char *);
+static void usage_format(const char *);
+static void usage_secure_erase(const char *);
+static void usage_attach_detach(const char *);
+
+int verbose;
+int debug;
+int found;
+static int exitcode;
+
+static const nvmeadm_cmd_t nvmeadm_cmds[] = {
+ {
+ "list",
+ "list controllers and namespaces",
+ NULL,
+ do_list, usage_list, B_TRUE
+ },
+ {
+ "identify",
+ "identify controllers and/or namespaces",
+ NULL,
+ do_identify, usage_identify, B_TRUE
+ },
+ {
+ "get-logpage",
+ "get a log page from controllers and/or namespaces",
+ NULL,
+ do_get_logpage, usage_get_logpage, B_TRUE
+ },
+ {
+ "get-features",
+ "get features from controllers and/or namespaces",
+ NULL,
+ do_get_features, usage_get_features, B_TRUE
+ },
+ {
+ "format",
+ "format namespace(s) of a controller",
+ NULL,
+ do_format, usage_format, B_FALSE
+ },
+ {
+ "secure-erase",
+ "secure erase namespace(s) of a controller",
+ " -c Do a cryptographic erase.",
+ do_secure_erase, usage_secure_erase, B_FALSE
+ },
+ {
+ "detach",
+ "detach blkdev(7d) from namespace(s) of a controller",
+ NULL,
+ do_attach_detach, usage_attach_detach, B_FALSE
+ },
+ {
+ "attach",
+ "attach blkdev(7d) to namespace(s) of a controller",
+ NULL,
+ do_attach_detach, usage_attach_detach, B_FALSE
+ },
+ {
+ NULL, NULL, NULL,
+ NULL, NULL, B_FALSE
+ }
+};
+
+static const nvme_feature_t features[] = {
+ { "Arbitration", "",
+ NVME_FEAT_ARBITRATION, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_arbitration },
+ { "Power Management", "",
+ NVME_FEAT_POWER_MGMT, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_power_mgmt },
+ { "LBA Range Type", "range",
+ NVME_FEAT_LBA_RANGE, NVME_LBA_RANGE_BUFSIZE, NVMEADM_NS,
+ do_get_feat_common, nvme_print_feat_lba_range },
+ { "Temperature Threshold", "",
+ NVME_FEAT_TEMPERATURE, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_temperature },
+ { "Error Recovery", "",
+ NVME_FEAT_ERROR, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_error },
+ { "Volatile Write Cache", "cache",
+ NVME_FEAT_WRITE_CACHE, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_write_cache },
+ { "Number of Queues", "queues",
+ NVME_FEAT_NQUEUES, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_nqueues },
+ { "Interrupt Coalescing", "coalescing",
+ NVME_FEAT_INTR_COAL, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_intr_coal },
+ { "Interrupt Vector Configuration", "vector",
+ NVME_FEAT_INTR_VECT, 0, NVMEADM_CTRL,
+ do_get_feat_intr_vect, nvme_print_feat_intr_vect },
+ { "Write Atomicity", "atomicity",
+ NVME_FEAT_WRITE_ATOM, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_write_atom },
+ { "Asynchronous Event Configuration", "event",
+ NVME_FEAT_ASYNC_EVENT, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_async_event },
+ { "Autonomous Power State Transition", "",
+ NVME_FEAT_AUTO_PST, NVME_AUTO_PST_BUFSIZE, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_auto_pst },
+ { "Software Progress Marker", "progress",
+ NVME_FEAT_PROGRESS, 0, NVMEADM_CTRL,
+ do_get_feat_common, nvme_print_feat_progress },
+ { NULL, NULL, 0, 0, B_FALSE, NULL }
+};
+
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ extern int optind;
+ const nvmeadm_cmd_t *cmd;
+ di_node_t node;
+ nvme_process_arg_t npa = { 0 };
+ int help = 0;
+ char *tmp, *lasts = NULL;
+
+ while ((c = getopt(argc, argv, "dhv")) != -1) {
+ switch (c) {
+ case 'd':
+ debug++;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'h':
+ help++;
+ break;
+ case '?':
+ usage(NULL);
+ exit(-1);
+ }
+ }
+
+ if (optind == argc) {
+ usage(NULL);
+ if (help)
+ exit(0);
+ else
+ exit(-1);
+ }
+
+ /* Look up the specified command in the command table. */
+ for (cmd = &nvmeadm_cmds[0]; cmd->c_name != NULL; cmd++)
+ if (strcmp(cmd->c_name, argv[optind]) == 0)
+ break;
+
+ if (cmd->c_name == NULL) {
+ usage(NULL);
+ exit(-1);
+ }
+
+ if (help) {
+ usage(cmd);
+ exit(0);
+ }
+
+ npa.npa_cmd = cmd;
+
+ optind++;
+
+ /*
+ * All commands but "list" require a ctl/ns argument.
+ */
+ if ((optind == argc || (strncmp(argv[optind], "nvme", 4) != 0)) &&
+ cmd->c_func != do_list) {
+ warnx("missing controller/namespace name");
+ usage(cmd);
+ exit(-1);
+ }
+
+
+ /* Store the remaining arguments for use by the command. */
+ npa.npa_argc = argc - optind - 1;
+ npa.npa_argv = &argv[optind + 1];
+
+ /*
+ * Make sure we're not running commands on multiple controllers that
+ * aren't allowed to do that.
+ */
+ if (argv[optind] != NULL && strchr(argv[optind], ',') != NULL &&
+ cmd->c_multi == B_FALSE) {
+ warnx("%s not allowed on multiple controllers",
+ cmd->c_name);
+ usage(cmd);
+ exit(-1);
+ }
+
+ /*
+ * Get controller/namespace arguments and run command.
+ */
+ npa.npa_name = strtok_r(argv[optind], ",", &lasts);
+ do {
+ if (npa.npa_name != NULL) {
+ tmp = strchr(npa.npa_name, '/');
+ if (tmp != NULL) {
+ unsigned long nsid;
+ *tmp++ = '\0';
+ errno = 0;
+ nsid = strtoul(tmp, NULL, 10);
+ if (nsid >= UINT32_MAX || errno != 0) {
+ warn("invalid namespace %s", tmp);
+ exitcode--;
+ continue;
+ }
+ if (nsid == 0) {
+ warnx("invalid namespace %s", tmp);
+ exitcode--;
+ continue;
+ }
+ npa.npa_nsid = nsid;
+ npa.npa_isns = B_TRUE;
+ }
+ }
+
+ if ((node = di_init("/", DINFOSUBTREE | DINFOMINOR)) == NULL)
+ err(-1, "failed to initialize libdevinfo");
+ nvme_walk(&npa, node);
+ di_fini(node);
+
+ if (found == 0) {
+ if (npa.npa_name != NULL) {
+ warnx("%s%.*s%.*d: no such controller or "
+ "namespace", npa.npa_name,
+ npa.npa_nsid > 0 ? -1 : 0, "/",
+ npa.npa_nsid > 0 ? -1 : 0, npa.npa_nsid);
+ } else {
+ warnx("no controllers found");
+ }
+ exitcode--;
+ }
+ found = 0;
+ npa.npa_name = strtok_r(NULL, ",", &lasts);
+ } while (npa.npa_name != NULL);
+
+ exit(exitcode);
+}
+
+static void
+usage(const nvmeadm_cmd_t *cmd)
+{
+ (void) fprintf(stderr, "usage:\n");
+ (void) fprintf(stderr, " %s -h %s\n", getprogname(),
+ cmd != NULL ? cmd->c_name : "[<command>]");
+ (void) fprintf(stderr, " %s [-dv] ", getprogname());
+
+ if (cmd != NULL) {
+ cmd->c_usage(cmd->c_name);
+ } else {
+ (void) fprintf(stderr,
+ "<command> <ctl>[/<ns>][,...] [<args>]\n");
+ (void) fprintf(stderr,
+ "\n Manage NVMe controllers and namespaces.\n");
+ (void) fprintf(stderr, "\ncommands:\n");
+
+ for (cmd = &nvmeadm_cmds[0]; cmd->c_name != NULL; cmd++)
+ (void) fprintf(stderr, " %-15s - %s\n",
+ cmd->c_name, cmd->c_desc);
+ }
+ (void) fprintf(stderr, "\nflags:\n"
+ " -h print usage information\n"
+ " -d print information useful for debugging %s\n"
+ " -v print verbose information\n", getprogname());
+ if (cmd != NULL && cmd->c_flagdesc != NULL)
+ (void) fprintf(stderr, "%s\n", cmd->c_flagdesc);
+}
+
+static boolean_t
+nvme_match(nvme_process_arg_t *npa)
+{
+ char *name;
+ uint32_t nsid = 0;
+
+ if (npa->npa_name == NULL)
+ return (B_TRUE);
+
+ if (asprintf(&name, "%s%d", di_driver_name(npa->npa_node),
+ di_instance(npa->npa_node)) < 0)
+ err(-1, "nvme_match()");
+
+ if (strcmp(name, npa->npa_name) != 0) {
+ free(name);
+ return (B_FALSE);
+ }
+
+ free(name);
+
+ if (npa->npa_isns) {
+ if (npa->npa_nsid == 0)
+ return (B_TRUE);
+ nsid = strtoul(di_minor_name(npa->npa_minor), NULL, 10);
+ }
+
+ if (npa->npa_isns && npa->npa_nsid != nsid)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+char *
+nvme_dskname(const nvme_process_arg_t *npa)
+{
+ char *path = NULL;
+ di_node_t child;
+ di_dim_t dim;
+ char *addr;
+
+ dim = di_dim_init();
+
+ for (child = di_child_node(npa->npa_node);
+ child != DI_NODE_NIL;
+ child = di_sibling_node(child)) {
+ addr = di_bus_addr(child);
+ if (addr == NULL)
+ continue;
+
+ if (addr[0] == 'w')
+ addr++;
+
+ if (strncasecmp(addr, di_minor_name(npa->npa_minor),
+ strchrnul(addr, ',') - addr) != 0)
+ continue;
+
+ path = di_dim_path_dev(dim, di_driver_name(child),
+ di_instance(child), "c");
+
+ if (path != NULL) {
+ path[strlen(path) - 2] = '\0';
+ path = strrchr(path, '/') + 1;
+ if (path != NULL) {
+ path = strdup(path);
+ if (path == NULL)
+ err(-1, "nvme_dskname");
+ }
+ }
+
+ break;
+ }
+
+ di_dim_fini(dim);
+ return (path);
+}
+
+static int
+nvme_process(di_node_t node, di_minor_t minor, void *arg)
+{
+ nvme_process_arg_t *npa = arg;
+ int fd;
+
+ npa->npa_node = node;
+ npa->npa_minor = minor;
+
+ if (!nvme_match(npa))
+ return (DI_WALK_CONTINUE);
+
+ if ((fd = nvme_open(minor)) < 0)
+ return (DI_WALK_CONTINUE);
+
+ found++;
+
+ npa->npa_path = di_devfs_path(node);
+ if (npa->npa_path == NULL)
+ goto out;
+
+ npa->npa_version = nvme_version(fd);
+ if (npa->npa_version == NULL)
+ goto out;
+
+ npa->npa_idctl = nvme_identify_ctrl(fd);
+ if (npa->npa_idctl == NULL)
+ goto out;
+
+ npa->npa_idns = nvme_identify_nsid(fd);
+ if (npa->npa_idns == NULL)
+ goto out;
+
+ if (npa->npa_isns)
+ npa->npa_dsk = nvme_dskname(npa);
+
+ exitcode += npa->npa_cmd->c_func(fd, npa);
+
+out:
+ di_devfs_path_free(npa->npa_path);
+ free(npa->npa_dsk);
+ free(npa->npa_version);
+ free(npa->npa_idctl);
+ free(npa->npa_idns);
+
+ npa->npa_version = NULL;
+ npa->npa_idctl = NULL;
+ npa->npa_idns = NULL;
+
+ nvme_close(fd);
+
+ return (DI_WALK_CONTINUE);
+}
+
+static void
+nvme_walk(nvme_process_arg_t *npa, di_node_t node)
+{
+ char *minor_nodetype = DDI_NT_NVME_NEXUS;
+
+ if (npa->npa_isns)
+ minor_nodetype = DDI_NT_NVME_ATTACHMENT_POINT;
+
+ (void) di_walk_minor(node, minor_nodetype, 0, npa, nvme_process);
+}
+
+static void
+usage_list(const char *c_name)
+{
+ (void) fprintf(stderr, "%s [<ctl>[/<ns>][,...]\n\n"
+ " List NVMe controllers and their namespaces. If no "
+ "controllers and/or name-\n spaces are specified, all "
+ "controllers and namespaces in the system will be\n "
+ "listed.\n", c_name);
+}
+
+static int
+do_list_nsid(int fd, const nvme_process_arg_t *npa)
+{
+ _NOTE(ARGUNUSED(fd));
+
+ (void) printf(" %s/%s (%s): ", npa->npa_name,
+ di_minor_name(npa->npa_minor),
+ npa->npa_dsk != NULL ? npa->npa_dsk : "unattached");
+ nvme_print_nsid_summary(npa->npa_idns);
+
+ return (0);
+}
+
+static int
+do_list(int fd, const nvme_process_arg_t *npa)
+{
+ _NOTE(ARGUNUSED(fd));
+
+ nvme_process_arg_t ns_npa = { 0 };
+ nvmeadm_cmd_t cmd = { 0 };
+ char *name;
+
+ if (asprintf(&name, "%s%d", di_driver_name(npa->npa_node),
+ di_instance(npa->npa_node)) < 0)
+ err(-1, "do_list()");
+
+ (void) printf("%s: ", name);
+ nvme_print_ctrl_summary(npa->npa_idctl, npa->npa_version);
+
+ ns_npa.npa_name = name;
+ ns_npa.npa_isns = B_TRUE;
+ ns_npa.npa_nsid = npa->npa_nsid;
+ cmd = *(npa->npa_cmd);
+ cmd.c_func = do_list_nsid;
+ ns_npa.npa_cmd = &cmd;
+
+ nvme_walk(&ns_npa, npa->npa_node);
+
+ free(name);
+
+ return (exitcode);
+}
+
+static void
+usage_identify(const char *c_name)
+{
+ (void) fprintf(stderr, "%s <ctl>[/<ns>][,...]\n\n"
+ " Print detailed information about the specified NVMe "
+ "controllers and/or name-\n spaces.\n", c_name);
+}
+
+static int
+do_identify(int fd, const nvme_process_arg_t *npa)
+{
+ if (npa->npa_nsid == 0) {
+ nvme_capabilities_t *cap;
+
+ cap = nvme_capabilities(fd);
+ if (cap == NULL)
+ return (-1);
+
+ (void) printf("%s: ", npa->npa_name);
+ nvme_print_identify_ctrl(npa->npa_idctl, cap,
+ npa->npa_version);
+
+ free(cap);
+ } else {
+ (void) printf("%s/%s: ", npa->npa_name,
+ di_minor_name(npa->npa_minor));
+ nvme_print_identify_nsid(npa->npa_idns,
+ npa->npa_version);
+ }
+
+ return (0);
+}
+
+static void
+usage_get_logpage(const char *c_name)
+{
+ (void) fprintf(stderr, "%s <ctl>[/<ns>][,...] <logpage>\n\n"
+ " Print the specified log page of the specified NVMe "
+ "controllers and/or name-\n spaces. Supported log pages "
+ "are error, health, and firmware.\n", c_name);
+}
+
+static int
+do_get_logpage_error(int fd, const nvme_process_arg_t *npa)
+{
+ int nlog = npa->npa_idctl->id_elpe + 1;
+ size_t bufsize = sizeof (nvme_error_log_entry_t) * nlog;
+ nvme_error_log_entry_t *elog;
+
+ if (npa->npa_nsid != 0)
+ errx(-1, "Error Log not available on a per-namespace basis");
+
+ elog = nvme_get_logpage(fd, NVME_LOGPAGE_ERROR, &bufsize);
+
+ if (elog == NULL)
+ return (-1);
+
+ nlog = bufsize / sizeof (nvme_error_log_entry_t);
+
+ (void) printf("%s: ", npa->npa_name);
+ nvme_print_error_log(nlog, elog);
+
+ free(elog);
+
+ return (0);
+}
+
+static int
+do_get_logpage_health(int fd, const nvme_process_arg_t *npa)
+{
+ size_t bufsize = sizeof (nvme_health_log_t);
+ nvme_health_log_t *hlog;
+
+ if (npa->npa_nsid != 0) {
+ if (npa->npa_idctl->id_lpa.lp_smart == 0)
+ errx(-1, "SMART/Health information not available "
+ "on a per-namespace basis on this controller");
+ }
+
+ hlog = nvme_get_logpage(fd, NVME_LOGPAGE_HEALTH, &bufsize);
+
+ if (hlog == NULL)
+ return (-1);
+
+ (void) printf("%s: ", npa->npa_name);
+ nvme_print_health_log(hlog, npa->npa_idctl);
+
+ free(hlog);
+
+ return (0);
+}
+
+static int
+do_get_logpage_fwslot(int fd, const nvme_process_arg_t *npa)
+{
+ size_t bufsize = sizeof (nvme_fwslot_log_t);
+ nvme_fwslot_log_t *fwlog;
+
+ if (npa->npa_nsid != 0)
+ errx(-1, "Firmware Slot information not available on a "
+ "per-namespace basis");
+
+ fwlog = nvme_get_logpage(fd, NVME_LOGPAGE_FWSLOT, &bufsize);
+
+ if (fwlog == NULL)
+ return (-1);
+
+ (void) printf("%s: ", npa->npa_name);
+ nvme_print_fwslot_log(fwlog);
+
+ free(fwlog);
+
+ return (0);
+}
+
+static int
+do_get_logpage(int fd, const nvme_process_arg_t *npa)
+{
+ int ret = 0;
+ int (*func)(int, const nvme_process_arg_t *);
+
+ if (npa->npa_argc < 1) {
+ warnx("missing logpage name");
+ usage(npa->npa_cmd);
+ exit(-1);
+ }
+
+ if (strcmp(npa->npa_argv[0], "error") == 0)
+ func = do_get_logpage_error;
+ else if (strcmp(npa->npa_argv[0], "health") == 0)
+ func = do_get_logpage_health;
+ else if (strcmp(npa->npa_argv[0], "firmware") == 0)
+ func = do_get_logpage_fwslot;
+ else
+ errx(-1, "invalid log page: %s", npa->npa_argv[0]);
+
+ ret = func(fd, npa);
+ return (ret);
+}
+
+static void
+usage_get_features(const char *c_name)
+{
+ const nvme_feature_t *feat;
+
+ (void) fprintf(stderr, "%s <ctl>[/<ns>][,...] [<feature>[,...]]\n\n"
+ " Print the specified features of the specified NVMe controllers "
+ "and/or\n namespaces. Supported features are:\n\n", c_name);
+ (void) fprintf(stderr, " %-35s %-14s %s\n",
+ "FEATURE NAME", "SHORT NAME", "CONTROLLER/NAMESPACE");
+ for (feat = &features[0]; feat->f_feature != 0; feat++) {
+ char *type;
+
+ if ((feat->f_getflags & NVMEADM_BOTH) == NVMEADM_BOTH)
+ type = "both";
+ else if ((feat->f_getflags & NVMEADM_CTRL) != 0)
+ type = "controller only";
+ else
+ type = "namespace only";
+
+ (void) fprintf(stderr, " %-35s %-14s %s\n",
+ feat->f_name, feat->f_short, type);
+ }
+
+}
+
+static int
+do_get_feat_common(int fd, const nvme_feature_t *feat,
+ nvme_identify_ctrl_t *idctl)
+{
+ void *buf = NULL;
+ size_t bufsize = feat->f_bufsize;
+ uint64_t res;
+
+ if (nvme_get_feature(fd, feat->f_feature, 0, &res, &bufsize, &buf)
+ == B_FALSE)
+ return (EINVAL);
+
+ nvme_print(2, feat->f_name, -1, NULL);
+ feat->f_print(res, buf, bufsize, idctl);
+ free(buf);
+
+ return (0);
+}
+
+static int
+do_get_feat_intr_vect(int fd, const nvme_feature_t *feat,
+ nvme_identify_ctrl_t *idctl)
+{
+ uint64_t res;
+ uint64_t arg;
+ int intr_cnt;
+
+ intr_cnt = nvme_intr_cnt(fd);
+
+ if (intr_cnt == -1)
+ return (EINVAL);
+
+ nvme_print(2, feat->f_name, -1, NULL);
+
+ for (arg = 0; arg < intr_cnt; arg++) {
+ if (nvme_get_feature(fd, feat->f_feature, arg, &res, NULL, NULL)
+ == B_FALSE)
+ return (EINVAL);
+
+ feat->f_print(res, NULL, 0, idctl);
+ }
+
+ return (0);
+}
+
+static int
+do_get_features(int fd, const nvme_process_arg_t *npa)
+{
+ const nvme_feature_t *feat;
+ char *f, *flist, *lasts;
+ boolean_t header_printed = B_FALSE;
+
+ if (npa->npa_argc > 1)
+ errx(-1, "unexpected arguments");
+
+ /*
+ * No feature list given, print all supported features.
+ */
+ if (npa->npa_argc == 0) {
+ (void) printf("%s: Get Features\n", npa->npa_name);
+ for (feat = &features[0]; feat->f_feature != 0; feat++) {
+ if ((npa->npa_nsid != 0 &&
+ (feat->f_getflags & NVMEADM_NS) == 0) ||
+ (npa->npa_nsid == 0 &&
+ (feat->f_getflags & NVMEADM_CTRL) == 0))
+ continue;
+
+ (void) feat->f_get(fd, feat, npa->npa_idctl);
+ }
+
+ return (0);
+ }
+
+ /*
+ * Process feature list.
+ */
+ flist = strdup(npa->npa_argv[0]);
+ if (flist == NULL)
+ err(-1, "do_get_features");
+
+ for (f = strtok_r(flist, ",", &lasts);
+ f != NULL;
+ f = strtok_r(NULL, ",", &lasts)) {
+ while (isspace(*f))
+ f++;
+
+ for (feat = &features[0]; feat->f_feature != 0; feat++) {
+ if (strncasecmp(feat->f_name, f, strlen(f)) == 0 ||
+ strncasecmp(feat->f_short, f, strlen(f)) == 0)
+ break;
+ }
+
+ if (feat->f_feature == 0) {
+ warnx("unknown feature %s", f);
+ continue;
+ }
+
+ if ((npa->npa_nsid != 0 &&
+ (feat->f_getflags & NVMEADM_NS) == 0) ||
+ (npa->npa_nsid == 0 &&
+ (feat->f_getflags & NVMEADM_CTRL) == 0)) {
+ warnx("feature %s %s supported for namespaces",
+ feat->f_name, (feat->f_getflags & NVMEADM_NS) != 0 ?
+ "only" : "not");
+ continue;
+ }
+
+ if (!header_printed) {
+ (void) printf("%s: Get Features\n", npa->npa_name);
+ header_printed = B_TRUE;
+ }
+
+ if (feat->f_get(fd, feat, npa->npa_idctl) != 0) {
+ warnx("unsupported feature: %s", feat->f_name);
+ continue;
+ }
+ }
+
+ free(flist);
+ return (0);
+}
+
+static int
+do_format_common(int fd, const nvme_process_arg_t *npa, unsigned long lbaf,
+ unsigned long ses)
+{
+ nvme_process_arg_t ns_npa = { 0 };
+ nvmeadm_cmd_t cmd = { 0 };
+
+ cmd = *(npa->npa_cmd);
+ cmd.c_func = do_attach_detach;
+ cmd.c_name = "detach";
+ ns_npa = *npa;
+ ns_npa.npa_cmd = &cmd;
+
+ if (do_attach_detach(fd, &ns_npa) != 0)
+ return (exitcode);
+ if (nvme_format_nvm(fd, lbaf, ses) == B_FALSE) {
+ warn("%s failed", npa->npa_cmd->c_name);
+ exitcode += -1;
+ }
+ cmd.c_name = "attach";
+ exitcode += do_attach_detach(fd, &ns_npa);
+
+ return (exitcode);
+}
+
+static void
+usage_format(const char *c_name)
+{
+ (void) fprintf(stderr, "%s <ctl>[/<ns>] [<lba-format>]\n\n"
+ " Format one or all namespaces of the specified NVMe "
+ "controller. Supported LBA\n formats can be queried with "
+ "the \"%s identify\" command on the namespace\n to be "
+ "formatted.\n", c_name, getprogname());
+}
+
+static int
+do_format(int fd, const nvme_process_arg_t *npa)
+{
+ unsigned long lbaf;
+
+ if (npa->npa_idctl->id_oacs.oa_format == 0)
+ errx(-1, "%s not supported", npa->npa_cmd->c_name);
+
+ if (npa->npa_isns && npa->npa_idctl->id_fna.fn_format != 0)
+ errx(-1, "%s not supported on individual namespace",
+ npa->npa_cmd->c_name);
+
+
+ if (npa->npa_argc > 0) {
+ errno = 0;
+ lbaf = strtoul(npa->npa_argv[0], NULL, 10);
+
+ if (errno != 0 || lbaf > NVME_FRMT_MAX_LBAF)
+ errx(-1, "invalid LBA format %d", lbaf + 1);
+
+ if (npa->npa_idns->id_lbaf[lbaf].lbaf_ms != 0)
+ errx(-1, "LBA formats with metadata not supported");
+ } else {
+ lbaf = npa->npa_idns->id_flbas.lba_format;
+ }
+
+ return (do_format_common(fd, npa, lbaf, 0));
+}
+
+static void
+usage_secure_erase(const char *c_name)
+{
+ (void) fprintf(stderr, "%s <ctl>[/<ns>] [-c]\n\n"
+ " Secure-Erase one or all namespaces of the specified "
+ "NVMe controller.\n", c_name);
+}
+
+static int
+do_secure_erase(int fd, const nvme_process_arg_t *npa)
+{
+ unsigned long lbaf;
+ uint8_t ses = NVME_FRMT_SES_USER;
+
+ if (npa->npa_idctl->id_oacs.oa_format == 0)
+ errx(-1, "%s not supported", npa->npa_cmd->c_name);
+
+ if (npa->npa_isns && npa->npa_idctl->id_fna.fn_sec_erase != 0)
+ errx(-1, "%s not supported on individual namespace",
+ npa->npa_cmd->c_name);
+
+ if (npa->npa_argc > 0) {
+ if (strcmp(npa->npa_argv[0], "-c") == 0)
+ ses = NVME_FRMT_SES_CRYPTO;
+ else
+ usage(npa->npa_cmd);
+ }
+
+ if (ses == NVME_FRMT_SES_CRYPTO &&
+ npa->npa_idctl->id_fna.fn_crypt_erase == 0)
+ errx(-1, "cryptographic %s not supported",
+ npa->npa_cmd->c_name);
+
+ lbaf = npa->npa_idns->id_flbas.lba_format;
+
+ return (do_format_common(fd, npa, lbaf, ses));
+}
+
+static void
+usage_attach_detach(const char *c_name)
+{
+ (void) fprintf(stderr, "%s <ctl>[/<ns>]\n\n"
+ " %c%s blkdev(7d) %s one or all namespaces of the "
+ "specified NVMe controller.\n",
+ c_name, toupper(c_name[0]), &c_name[1],
+ c_name[0] == 'd' ? "from" : "to");
+}
+
+static int
+do_attach_detach(int fd, const nvme_process_arg_t *npa)
+{
+ char *c_name = npa->npa_cmd->c_name;
+
+ if (!npa->npa_isns) {
+ nvme_process_arg_t ns_npa = { 0 };
+
+ ns_npa.npa_name = npa->npa_name;
+ ns_npa.npa_isns = B_TRUE;
+ ns_npa.npa_cmd = npa->npa_cmd;
+
+ nvme_walk(&ns_npa, npa->npa_node);
+
+ return (exitcode);
+ } else {
+ if ((c_name[0] == 'd' ? nvme_detach : nvme_attach)(fd)
+ == B_FALSE) {
+ warn("%s failed", c_name);
+ return (-1);
+ }
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/nvmeadm/nvmeadm.h b/usr/src/cmd/nvmeadm/nvmeadm.h
new file mode 100644
index 0000000000..4464350ace
--- /dev/null
+++ b/usr/src/cmd/nvmeadm/nvmeadm.h
@@ -0,0 +1,87 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+#ifndef _NVMEADM_H
+#define _NVMEADM_H
+
+#include <stdio.h>
+#include <libdevinfo.h>
+#include <sys/nvme.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int verbose;
+extern int debug;
+
+/* printing functions */
+extern void nvme_print(int, char *, int, const char *, ...);
+extern void nvme_print_ctrl_summary(nvme_identify_ctrl_t *, nvme_version_t *);
+extern void nvme_print_nsid_summary(nvme_identify_nsid_t *);
+extern void nvme_print_identify_ctrl(nvme_identify_ctrl_t *,
+ nvme_capabilities_t *, nvme_version_t *);
+extern void nvme_print_identify_nsid(nvme_identify_nsid_t *, nvme_version_t *);
+extern void nvme_print_error_log(int, nvme_error_log_entry_t *);
+extern void nvme_print_health_log(nvme_health_log_t *, nvme_identify_ctrl_t *);
+extern void nvme_print_fwslot_log(nvme_fwslot_log_t *);
+
+extern void nvme_print_feat_arbitration(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_power_mgmt(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_lba_range(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_temperature(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_error(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_write_cache(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_nqueues(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_intr_coal(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_intr_vect(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_write_atom(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_async_event(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_auto_pst(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+extern void nvme_print_feat_progress(uint64_t, void *, size_t,
+ nvme_identify_ctrl_t *);
+
+/* device node functions */
+extern int nvme_open(di_minor_t);
+extern void nvme_close(int);
+extern nvme_version_t *nvme_version(int);
+extern nvme_capabilities_t *nvme_capabilities(int);
+extern nvme_identify_ctrl_t *nvme_identify_ctrl(int);
+extern nvme_identify_nsid_t *nvme_identify_nsid(int);
+extern void *nvme_get_logpage(int, uint8_t, size_t *);
+extern boolean_t nvme_get_feature(int, uint8_t, uint32_t, uint64_t *, size_t *,
+ void **);
+extern int nvme_intr_cnt(int);
+extern boolean_t nvme_format_nvm(int, uint8_t, uint8_t);
+extern boolean_t nvme_detach(int);
+extern boolean_t nvme_attach(int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NVMEADM_H */
diff --git a/usr/src/cmd/nvmeadm/nvmeadm_dev.c b/usr/src/cmd/nvmeadm/nvmeadm_dev.c
new file mode 100644
index 0000000000..2ac3946a5d
--- /dev/null
+++ b/usr/src/cmd/nvmeadm/nvmeadm_dev.c
@@ -0,0 +1,201 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <err.h>
+#include <libdevinfo.h>
+#include <sys/nvme.h>
+#include <assert.h>
+
+#include "nvmeadm.h"
+
+
+static boolean_t
+nvme_ioctl(int fd, int ioc, size_t *bufsize, void **buf, uint64_t arg,
+ uint64_t *res)
+{
+ nvme_ioctl_t nioc = { 0 };
+
+ if (buf != NULL)
+ *buf = NULL;
+
+ if (res != NULL)
+ *res = ~0ULL;
+
+ if (bufsize != NULL && *bufsize != 0) {
+ assert(buf != NULL);
+
+ if ((nioc.n_buf = (uintptr_t)calloc(*bufsize, 1)) == NULL)
+ err(-1, "nvme_ioctl()");
+
+ nioc.n_len = *bufsize;
+ }
+
+ nioc.n_arg = arg;
+
+ if (ioctl(fd, ioc, &nioc) != 0) {
+ if (debug)
+ warn("nvme_ioctl()");
+ if (nioc.n_buf != 0)
+ free((void *)nioc.n_buf);
+
+ return (B_FALSE);
+ }
+
+ if (res != NULL)
+ *res = nioc.n_arg;
+
+ if (bufsize != NULL)
+ *bufsize = nioc.n_len;
+
+ if (buf != NULL)
+ *buf = (void *)nioc.n_buf;
+
+ return (B_TRUE);
+}
+
+nvme_capabilities_t *
+nvme_capabilities(int fd)
+{
+ void *cap = NULL;
+ size_t bufsize = sizeof (nvme_capabilities_t);
+
+ (void) nvme_ioctl(fd, NVME_IOC_CAPABILITIES, &bufsize, &cap, 0, NULL);
+
+ return (cap);
+}
+
+nvme_version_t *
+nvme_version(int fd)
+{
+ void *vs = NULL;
+ size_t bufsize = sizeof (nvme_version_t);
+
+ (void) nvme_ioctl(fd, NVME_IOC_VERSION, &bufsize, &vs, 0, NULL);
+
+ return (vs);
+}
+
+nvme_identify_ctrl_t *
+nvme_identify_ctrl(int fd)
+{
+ void *idctl = NULL;
+ size_t bufsize = NVME_IDENTIFY_BUFSIZE;
+
+ (void) nvme_ioctl(fd, NVME_IOC_IDENTIFY_CTRL, &bufsize, &idctl, 0,
+ NULL);
+
+ return (idctl);
+}
+
+nvme_identify_nsid_t *
+nvme_identify_nsid(int fd)
+{
+ void *idns = NULL;
+ size_t bufsize = NVME_IDENTIFY_BUFSIZE;
+
+ (void) nvme_ioctl(fd, NVME_IOC_IDENTIFY_NSID, &bufsize, &idns, 0, NULL);
+
+ return (idns);
+}
+
+void *
+nvme_get_logpage(int fd, uint8_t logpage, size_t *bufsize)
+{
+ void *buf = NULL;
+
+ (void) nvme_ioctl(fd, NVME_IOC_GET_LOGPAGE, bufsize, &buf, logpage,
+ NULL);
+
+ return (buf);
+}
+
+boolean_t
+nvme_get_feature(int fd, uint8_t feature, uint32_t arg, uint64_t *res,
+ size_t *bufsize, void **buf)
+{
+ return (nvme_ioctl(fd, NVME_IOC_GET_FEATURES, bufsize, buf,
+ (uint64_t)feature << 32 | arg, res));
+}
+
+int
+nvme_intr_cnt(int fd)
+{
+ uint64_t res = 0;
+
+ (void) nvme_ioctl(fd, NVME_IOC_INTR_CNT, NULL, NULL, 0, &res);
+ return ((int)res);
+}
+
+boolean_t
+nvme_format_nvm(int fd, uint8_t lbaf, uint8_t ses)
+{
+ nvme_format_nvm_t frmt = { 0 };
+
+ frmt.b.fm_lbaf = lbaf & 0xf;
+ frmt.b.fm_ses = ses & 0x7;
+
+ return (nvme_ioctl(fd, NVME_IOC_FORMAT, NULL, NULL, frmt.r, NULL));
+}
+
+boolean_t
+nvme_detach(int fd)
+{
+ return (nvme_ioctl(fd, NVME_IOC_DETACH, NULL, NULL, 0, NULL));
+}
+
+boolean_t
+nvme_attach(int fd)
+{
+ return (nvme_ioctl(fd, NVME_IOC_ATTACH, NULL, NULL, 0, NULL));
+}
+
+int
+nvme_open(di_minor_t minor)
+{
+ char *devpath, *path;
+ int fd;
+
+ if ((devpath = di_devfs_minor_path(minor)) == NULL)
+ err(-1, "nvme_open()");
+
+ if (asprintf(&path, "/devices%s", devpath) < 0) {
+ di_devfs_path_free(devpath);
+ err(-1, "nvme_open()");
+ }
+
+ di_devfs_path_free(devpath);
+
+ fd = open(path, O_RDWR);
+ free(path);
+
+ if (fd < 0) {
+ if (debug)
+ warn("nvme_open(%s)", path);
+ return (-1);
+ }
+
+ return (fd);
+}
+
+void
+nvme_close(int fd)
+{
+ (void) close(fd);
+}
diff --git a/usr/src/cmd/nvmeadm/nvmeadm_print.c b/usr/src/cmd/nvmeadm/nvmeadm_print.c
new file mode 100644
index 0000000000..582a849a3e
--- /dev/null
+++ b/usr/src/cmd/nvmeadm/nvmeadm_print.c
@@ -0,0 +1,1138 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+/*
+ * functions for printing of NVMe data structures and their members
+ */
+
+#include <sys/byteorder.h>
+#include <sys/types.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <stdarg.h>
+#include <err.h>
+#include <assert.h>
+
+#include "nvmeadm.h"
+
+static int nvme_strlen(const char *, int);
+
+static void nvme_print_str(int, char *, int, const char *, int);
+static void nvme_print_double(int, char *, double, int, char *);
+static void nvme_print_uint64(int, char *, uint64_t, const char *, char *);
+static void nvme_print_uint128(int, char *, nvme_uint128_t, char *, int, int);
+static void nvme_print_bit(int, char *, int, char *, char *);
+
+#define ARRAYSIZE(x) (sizeof (x) / sizeof (*(x)))
+
+static const char *generic_status_codes[] = {
+ "Successful Completion",
+ "Invalid Command Opcode",
+ "Invalid Field in Command",
+ "Command ID Conflict",
+ "Data Transfer Error",
+ "Commands Aborted due to Power Loss Notification",
+ "Internal Error",
+ "Command Abort Requested",
+ "Command Aborted due to SQ Deletion",
+ "Command Aborted due to Failed Fused Command",
+ "Command Aborted due to Missing Fused Command",
+ "Invalid Namespace or Format",
+ "Command Sequence Error",
+ /* NVMe 1.1 */
+ "Invalid SGL Segment Descriptor",
+ "Invalid Number of SGL Descriptors",
+ "Data SGL Length Invalid",
+ "Metadata SGL Length Invalid",
+ "SGL Descriptor Type Invalid",
+ /* NVMe 1.2 */
+ "Invalid Use of Controller Memory Buffer",
+ "PRP Offset Invalid",
+ "Atomic Write Unit Exceeded"
+};
+
+static const char *specific_status_codes[] = {
+ "Completion Queue Invalid",
+ "Invalid Queue Identifier",
+ "Invalid Queue Size",
+ "Abort Command Limit Exceeded",
+ "Reserved",
+ "Asynchronous Event Request Limit Exceeded",
+ "Invalid Firmware Slot",
+ "Invalid Firmware Image",
+ "Invalid Interrupt Vector",
+ "Invalid Log Page",
+ "Invalid Format",
+ "Firmware Activation Requires Conventional Reset",
+ "Invalid Queue Deletion",
+ /* NVMe 1.1 */
+ "Feature Identifier Not Saveable",
+ "Feature Not Changeable",
+ "Feature Not Namespace Specific",
+ "Firmware Activation Requires NVM Subsystem Reset",
+ /* NVMe 1.2 */
+ "Firmware Activation Requires Reset",
+ "Firmware Activation Requires Maximum Time Violation",
+ "Firmware Activation Prohibited",
+ "Overlapping Range",
+ "Namespace Insufficient Capacity",
+ "Namespace Identifier Unavailable",
+ "Reserved",
+ "Namespace Already Attached",
+ "Namespace Is Private",
+ "Namespace Not Attached",
+ "Thin Provisioning Not Supported",
+ "Controller List Invalid"
+};
+
+static const char *generic_nvm_status_codes[] = {
+ "LBA Out Of Range",
+ "Capacity Exceeded",
+ "Namespace Not Ready",
+ /* NVMe 1.1 */
+ "Reservation Conflict",
+ /* NVMe 1.2 */
+ "Format In Progress",
+};
+
+static const char *specific_nvm_status_codes[] = {
+ "Conflicting Attributes",
+ "Invalid Protection Information",
+ "Attempted Write to Read Only Range"
+};
+
+static const char *media_nvm_status_codes[] = {
+ "Write Fault",
+ "Unrecovered Read Error",
+ "End-to-End Guard Check Error",
+ "End-to-End Application Tag Check Error",
+ "End-to-End Reference Tag Check Error",
+ "Compare Failure",
+ "Access Denied",
+ /* NVMe 1.2 */
+ "Deallocated or Unwritten Logical Block"
+};
+
+static const char *status_code_types[] = {
+ "Generic Command Status",
+ "Command Specific Status",
+ "Media Errors",
+ "Reserved",
+ "Reserved",
+ "Reserved",
+ "Reserved",
+ "Vendor Specific"
+};
+
+static const char *lbaf_relative_performance[] = {
+ "Best", "Better", "Good", "Degraded"
+};
+
+static const char *lba_range_types[] = {
+ "Reserved", "Filesystem", "RAID", "Cache", "Page/Swap File"
+};
+
+/*
+ * nvme_print
+ *
+ * This function prints a string indented by the specified number of spaces,
+ * optionally followed by the specified index if it is >= 0. If a format string
+ * is specified, a single colon and the required number of spaces for alignment
+ * are printed before the format string and any remaining arguments are passed
+ * vprintf.
+ *
+ * NVME_PRINT_ALIGN was chosen so that all values will be lined up nicely even
+ * for the longest name at its default indentation.
+ */
+
+#define NVME_PRINT_ALIGN 43
+
+void
+nvme_print(int indent, char *name, int index, const char *fmt, ...)
+{
+ int align = NVME_PRINT_ALIGN - (indent + strlen(name) + 1);
+ va_list ap;
+
+ if (index >= 0)
+ align -= snprintf(NULL, 0, " %d", index);
+
+ if (align < 0)
+ align = 0;
+
+ va_start(ap, fmt);
+
+ (void) printf("%*s%s", indent, "", name);
+
+ if (index >= 0)
+ (void) printf(" %d", index);
+
+ if (fmt != NULL) {
+ (void) printf(": %*s", align, "");
+ (void) vprintf(fmt, ap);
+ }
+
+ (void) printf("\n");
+ va_end(ap);
+}
+
+/*
+ * nvme_strlen -- return length of string without trailing whitespace
+ */
+static int
+nvme_strlen(const char *str, int len)
+{
+ if (len < 0)
+ return (0);
+
+ while (str[--len] == ' ')
+ ;
+
+ return (++len);
+}
+
+/*
+ * nvme_print_str -- print a string up to the specified length
+ */
+static void
+nvme_print_str(int indent, char *name, int index, const char *value, int len)
+{
+ if (len == 0)
+ len = strlen(value);
+
+ nvme_print(indent, name, index, "%.*s", nvme_strlen(value, len), value);
+}
+
+/*
+ * nvme_print_double -- print a double up to a specified number of places with
+ * optional unit
+ */
+static void
+nvme_print_double(int indent, char *name, double value, int places, char *unit)
+{
+ if (unit == NULL)
+ unit = "";
+
+ nvme_print(indent, name, -1, "%.*g%s", places, value, unit);
+}
+
+/*
+ * nvme_print_uint64 -- print uint64_t with optional unit in decimal or another
+ * format specified
+ */
+static void
+nvme_print_uint64(int indent, char *name, uint64_t value, const char *fmt,
+ char *unit)
+{
+ char *tmp_fmt;
+
+ if (unit == NULL)
+ unit = "";
+
+ if (fmt == NULL)
+ fmt = "%"PRId64;
+
+ if (asprintf(&tmp_fmt, "%s%%s", fmt) < 0)
+ err(-1, "nvme_print_uint64()");
+
+ nvme_print(indent, name, -1, tmp_fmt, value, unit);
+
+ free(tmp_fmt);
+}
+
+/*
+ * nvme_print_uint128 -- print a 128bit uint with optional unit, after applying
+ * binary and/or decimal shifting
+ */
+static void
+nvme_print_uint128(int indent, char *name, nvme_uint128_t value, char *unit,
+ int scale_bits, int scale_tens)
+{
+ const char hex[] = "0123456789abcdef";
+ uint8_t o[(128 + scale_bits) / 3];
+ char p[sizeof (o) * 2];
+ char *pp = &p[0];
+ int i, x;
+ uint64_t rem = 0;
+
+ if (unit == NULL)
+ unit = "";
+
+ /*
+ * Don't allow binary shifting by more than 64 bits to keep the
+ * arithmetic simple. Also limit decimal shifting based on the size
+ * of any possible remainder from binary shifting.
+ */
+ assert(scale_bits <= 64);
+ assert(scale_tens <= (64 - scale_bits) / 3);
+
+ bzero(o, sizeof (o));
+ bzero(p, sizeof (p));
+
+ /*
+ * Convert the two 64-bit numbers into a series of BCD digits using
+ * a double-dabble algorithm. By using more or less iterations than
+ * 128 we can do a binary shift in either direction.
+ */
+ for (x = 0; x != 128 - scale_bits; x++) {
+ for (i = 0; i != sizeof (o); i++) {
+ if ((o[i] & 0xf0) > 0x40)
+ o[i] += 0x30;
+
+ if ((o[i] & 0xf) > 4)
+ o[i] += 3;
+ }
+
+ for (i = 0; i != sizeof (o) - 1; i++)
+ o[i] = (o[i] << 1) + (o[i+1] >> 7);
+
+ o[i] = (o[i] << 1) + (value.hi >> 63);
+
+ value.hi = (value.hi << 1) + (value.lo >> 63);
+ value.lo = (value.lo << 1);
+ }
+
+ /*
+ * If we're supposed to do a decimal left shift (* 10^x), too,
+ * calculate the remainder of the previous binary shift operation.
+ */
+ if (scale_tens > 0) {
+ rem = value.hi >> (64 - scale_bits);
+
+ for (i = 0; i != scale_tens; i++)
+ rem *= 10;
+
+ rem >>= scale_bits;
+ }
+
+ /*
+ * Construct the decimal number for printing. Skip leading zeros.
+ */
+ for (i = 0; i < sizeof (o); i++)
+ if (o[i] != 0)
+ break;
+
+ if (i == sizeof (o)) {
+ /*
+ * The converted number is 0. Just print the calculated
+ * remainder and return.
+ */
+ nvme_print(indent, name, -1, "%"PRId64"%s", rem, unit);
+ return;
+ } else {
+ if (o[i] > 0xf)
+ *pp++ = hex[o[i] >> 4];
+
+ *pp++ = hex[o[i] & 0xf];
+
+ for (i++; i < sizeof (o); i++) {
+ *pp++ = hex[o[i] >> 4];
+ *pp++ = hex[o[i] & 0xf];
+ }
+ }
+
+ /*
+ * For negative decimal scaling, use the printf precision specifier to
+ * truncate the results according to the requested decimal scaling. For
+ * positive decimal scaling we print the remainder padded with 0.
+ */
+ nvme_print(indent, name, -1, "%.*s%0.*"PRId64"%s",
+ strlen(p) + scale_tens, p,
+ scale_tens > 0 ? scale_tens : 0, rem,
+ unit);
+}
+
+/*
+ * nvme_print_bit -- print a bit with optional names for both states
+ */
+static void
+nvme_print_bit(int indent, char *name, int value, char *s_true, char *s_false)
+{
+ if (s_true == NULL)
+ s_true = "supported";
+ if (s_false == NULL)
+ s_false = "unsupported";
+
+ nvme_print(indent, name, -1, "%s", value ? s_true : s_false);
+}
+
+/*
+ * nvme_print_ctrl_summary -- print a 1-line summary of the IDENTIFY CONTROLLER
+ * data structure
+ */
+void
+nvme_print_ctrl_summary(nvme_identify_ctrl_t *idctl, nvme_version_t *version)
+{
+ (void) printf("model: %.*s, serial: %.*s, FW rev: %.*s, NVMe v%d.%d\n",
+ nvme_strlen(idctl->id_model, sizeof (idctl->id_model)),
+ idctl->id_model,
+ nvme_strlen(idctl->id_serial, sizeof (idctl->id_serial)),
+ idctl->id_serial,
+ nvme_strlen(idctl->id_fwrev, sizeof (idctl->id_fwrev)),
+ idctl->id_fwrev,
+ version->v_major, version->v_minor);
+}
+
+/*
+ * nvme_print_nsid_summary -- print a 1-line summary of the IDENTIFY NAMESPACE
+ * data structure
+ */
+void
+nvme_print_nsid_summary(nvme_identify_nsid_t *idns)
+{
+ int bsize = 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
+
+ (void) printf("Size = %"PRId64" MB, "
+ "Capacity = %"PRId64" MB, "
+ "Used = %"PRId64" MB\n",
+ idns->id_nsize * bsize / 1024 / 1024,
+ idns->id_ncap * bsize / 1024 / 1024,
+ idns->id_nuse * bsize / 1024 / 1024);
+
+}
+
+/*
+ * nvme_print_identify_ctrl
+ *
+ * This function pretty-prints the structure returned by the IDENTIFY CONTROLLER
+ * command.
+ */
+void
+nvme_print_identify_ctrl(nvme_identify_ctrl_t *idctl,
+ nvme_capabilities_t *cap, nvme_version_t *version)
+{
+ int i;
+
+ nvme_print(0, "Identify Controller", -1, NULL);
+ nvme_print(2, "Controller Capabilities and Features", -1, NULL);
+ nvme_print_str(4, "Model", -1,
+ idctl->id_model, sizeof (idctl->id_model));
+ nvme_print_str(4, "Serial", -1,
+ idctl->id_serial, sizeof (idctl->id_serial));
+ nvme_print_str(4, "Firmware Revision", -1,
+ idctl->id_fwrev, sizeof (idctl->id_fwrev));
+ if (verbose) {
+ nvme_print_uint64(4, "PCI vendor ID",
+ idctl->id_vid, "0x%0.4"PRIx64, NULL);
+ nvme_print_uint64(4, "subsystem vendor ID",
+ idctl->id_ssvid, "0x%0.4"PRIx64, NULL);
+ nvme_print_uint64(4, "Recommended Arbitration Burst",
+ idctl->id_rab, NULL, NULL);
+ nvme_print(4, "Vendor IEEE OUI", -1, "%0.2X-%0.2X-%0.2X",
+ idctl->id_oui[0], idctl->id_oui[1], idctl->id_oui[2]);
+ }
+ nvme_print(4, "Multi-Interface Capabilities", -1, NULL);
+ nvme_print_bit(6, "Multiple PCI Express ports",
+ idctl->id_mic.m_multi_pci, NULL, NULL);
+
+ if (NVME_VERSION_ATLEAST(version, 1, 1)) {
+ nvme_print_bit(6, "Multiple Controllers",
+ idctl->id_mic.m_multi_ctrl, NULL, NULL);
+ nvme_print_bit(6, "Is SR-IOV virtual function",
+ idctl->id_mic.m_sr_iov, "yes", "no");
+ }
+ if (idctl->id_mdts > 0)
+ nvme_print_uint64(4, "Maximum Data Transfer Size",
+ (1 << idctl->id_mdts) * cap->mpsmin / 1024, NULL, "kB");
+ else
+ nvme_print_str(4, "Maximum Data Transfer Size", -1,
+ "unlimited", 0);
+
+ if (NVME_VERSION_ATLEAST(version, 1, 1)) {
+ nvme_print_uint64(4, "Unique Controller Identifier",
+ idctl->id_cntlid, "0x%0.4"PRIx64, NULL);
+ }
+
+ nvme_print(2, "Admin Command Set Attributes", -1, NULL);
+ nvme_print(4, "Optional Admin Command Support", -1, NULL);
+ nvme_print_bit(6, "Security Send & Receive",
+ idctl->id_oacs.oa_security, NULL, NULL);
+ nvme_print_bit(6, "Format NVM",
+ idctl->id_oacs.oa_format, NULL, NULL);
+ nvme_print_bit(6, "Firmware Activate & Download",
+ idctl->id_oacs.oa_firmware, NULL, NULL);
+ if (verbose) {
+ nvme_print_uint64(4, "Abort Command Limit",
+ (uint16_t)idctl->id_acl + 1, NULL, NULL);
+ nvme_print_uint64(4, "Asynchronous Event Request Limit",
+ (uint16_t)idctl->id_aerl + 1, NULL, NULL);
+ }
+ nvme_print(4, "Firmware Updates", -1, NULL);
+ nvme_print_bit(6, "Firmware Slot 1",
+ idctl->id_frmw.fw_readonly, "read-only", "writable");
+ nvme_print_uint64(6, "No. of Firmware Slots",
+ idctl->id_frmw.fw_nslot, NULL, NULL);
+ nvme_print(2, "Log Page Attributes", -1, NULL);
+ nvme_print_bit(6, "per Namespace SMART/Health info",
+ idctl->id_lpa.lp_smart, NULL, NULL);
+ nvme_print_uint64(4, "Error Log Page Entries",
+ (uint16_t)idctl->id_elpe + 1, NULL, NULL);
+ nvme_print_uint64(4, "Number of Power States",
+ (uint16_t)idctl->id_npss + 1, NULL, NULL);
+ if (verbose) {
+ nvme_print_bit(4, "Admin Vendor-specific Command Format",
+ idctl->id_avscc.av_spec, "standard", "vendor-specific");
+ }
+
+ if (NVME_VERSION_ATLEAST(version, 1, 1)) {
+ nvme_print_bit(4, "Autonomous Power State Transitions",
+ idctl->id_apsta.ap_sup, NULL, NULL);
+ }
+
+ nvme_print(2, "NVM Command Set Attributes", -1, NULL);
+ if (verbose) {
+ nvme_print(4, "Submission Queue Entry Size", -1,
+ "min %d, max %d",
+ 1 << idctl->id_sqes.qes_min, 1 << idctl->id_sqes.qes_max);
+ nvme_print(4, "Completion Queue Entry Size", -1,
+ "min %d, max %d",
+ 1 << idctl->id_cqes.qes_min, 1 << idctl->id_cqes.qes_max);
+ }
+ nvme_print_uint64(4, "Number of Namespaces",
+ idctl->id_nn, NULL, NULL);
+ nvme_print(4, "Optional NVM Command Support", -1, NULL);
+ nvme_print_bit(6, "Compare",
+ idctl->id_oncs.on_compare, NULL, NULL);
+ nvme_print_bit(6, "Write Uncorrectable",
+ idctl->id_oncs.on_wr_unc, NULL, NULL);
+ nvme_print_bit(6, "Dataset Management",
+ idctl->id_oncs.on_dset_mgmt, NULL, NULL);
+
+ if (NVME_VERSION_ATLEAST(version, 1, 1)) {
+ nvme_print_bit(6, "Write Zeros",
+ idctl->id_oncs.on_wr_zero, NULL, NULL);
+ nvme_print_bit(6, "Save/Select in Get/Set Features",
+ idctl->id_oncs.on_save, NULL, NULL);
+ nvme_print_bit(6, "Reservations",
+ idctl->id_oncs.on_reserve, NULL, NULL);
+ }
+
+ nvme_print(4, "Fused Operation Support", -1, NULL);
+ nvme_print_bit(6, "Compare and Write",
+ idctl->id_fuses.f_cmp_wr, NULL, NULL);
+ nvme_print(4, "Format NVM Attributes", -1, NULL);
+ nvme_print_bit(6, "per Namespace Format",
+ idctl->id_fna.fn_format == 0, NULL, NULL);
+ nvme_print_bit(6, "per Namespace Secure Erase",
+ idctl->id_fna.fn_sec_erase == 0, NULL, NULL);
+ nvme_print_bit(6, "Cryptographic Erase",
+ idctl->id_fna.fn_crypt_erase, NULL, NULL);
+ nvme_print_bit(4, "Volatile Write Cache",
+ idctl->id_vwc.vwc_present, "present", "not present");
+ nvme_print_uint64(4, "Atomic Write Unit Normal",
+ (uint32_t)idctl->id_awun + 1, NULL,
+ idctl->id_awun == 0 ? " block" : " blocks");
+ nvme_print_uint64(4, "Atomic Write Unit Power Fail",
+ (uint32_t)idctl->id_awupf + 1, NULL,
+ idctl->id_awupf == 0 ? " block" : " blocks");
+
+ if (verbose != 0)
+ nvme_print_bit(4, "NVM Vendor-specific Command Format",
+ idctl->id_nvscc.nv_spec, "standard", "vendor-specific");
+
+ if (NVME_VERSION_ATLEAST(version, 1, 1)) {
+ nvme_print_uint64(4, "Atomic Compare & Write Size",
+ (uint32_t)idctl->id_acwu + 1, NULL,
+ idctl->id_acwu == 0 ? " block" : " blocks");
+ nvme_print(4, "SGL Support", -1, NULL);
+ nvme_print_bit(6, "SGLs in NVM commands",
+ idctl->id_sgls.sgl_sup, NULL, NULL);
+ nvme_print_bit(6, "SGL Bit Bucket Descriptor",
+ idctl->id_sgls.sgl_bucket, NULL, NULL);
+ }
+
+ for (i = 0; i != idctl->id_npss + 1; i++) {
+ double scale = 0.01;
+ double power = 0;
+ int places = 2;
+ char *unit = "W";
+
+ if (NVME_VERSION_ATLEAST(version, 1, 1) &&
+ idctl->id_psd[i].psd_mps == 1) {
+ scale = 0.0001;
+ places = 4;
+ }
+
+ power = (double)idctl->id_psd[i].psd_mp * scale;
+ if (power < 1.0) {
+ power *= 1000.0;
+ unit = "mW";
+ }
+
+ nvme_print(4, "Power State Descriptor", i, NULL);
+ nvme_print_double(6, "Maximum Power", power, places, unit);
+ nvme_print_bit(6, "Non-Operational State",
+ idctl->id_psd[i].psd_nops, "yes", "no");
+ nvme_print_uint64(6, "Entry Latency",
+ idctl->id_psd[i].psd_enlat, NULL, "us");
+ nvme_print_uint64(6, "Exit Latency",
+ idctl->id_psd[i].psd_exlat, NULL, "us");
+ nvme_print_uint64(6, "Relative Read Throughput (0 = best)",
+ idctl->id_psd[i].psd_rrt, NULL, NULL);
+ nvme_print_uint64(6, "Relative Read Latency (0 = best)",
+ idctl->id_psd[i].psd_rrl, NULL, NULL);
+ nvme_print_uint64(6, "Relative Write Throughput (0 = best)",
+ idctl->id_psd[i].psd_rwt, NULL, NULL);
+ nvme_print_uint64(6, "Relative Write Latency (0 = best)",
+ idctl->id_psd[i].psd_rwl, NULL, NULL);
+ }
+}
+
+/*
+ * nvme_print_identify_nsid
+ *
+ * This function pretty-prints the structure returned by the IDENTIFY NAMESPACE
+ * command.
+ */
+void
+nvme_print_identify_nsid(nvme_identify_nsid_t *idns, nvme_version_t *version)
+{
+ int bsize = 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
+ int i;
+
+ nvme_print(0, "Identify Namespace", -1, NULL);
+ nvme_print(2, "Namespace Capabilities and Features", -1, NULL);
+ nvme_print_uint64(4, "Namespace Size",
+ idns->id_nsize * bsize / 1024 / 1024, NULL, "MB");
+ nvme_print_uint64(4, "Namespace Capacity",
+ idns->id_ncap * bsize / 1024 / 1024, NULL, "MB");
+ nvme_print_uint64(4, "Namespace Utilization",
+ idns->id_nuse * bsize / 1024 / 1024, NULL, "MB");
+ nvme_print(4, "Namespace Features", -1, NULL);
+ nvme_print_bit(6, "Thin Provisioning",
+ idns->id_nsfeat.f_thin, NULL, NULL);
+ nvme_print_uint64(4, "Number of LBA Formats",
+ (uint16_t)idns->id_nlbaf + 1, NULL, NULL);
+ nvme_print(4, "Formatted LBA Size", -1, NULL);
+ nvme_print_uint64(6, "LBA Format",
+ (uint16_t)idns->id_flbas.lba_format, NULL, NULL);
+ nvme_print_bit(6, "Extended Data LBA",
+ idns->id_flbas.lba_extlba, "yes", "no");
+ nvme_print(4, "Metadata Capabilities", -1, NULL);
+ nvme_print_bit(6, "Extended Data LBA",
+ idns->id_mc.mc_extlba, NULL, NULL);
+ nvme_print_bit(6, "Separate Metadata",
+ idns->id_mc.mc_separate, NULL, NULL);
+ nvme_print(4, "End-to-End Data Protection Capabilities", -1, NULL);
+ nvme_print_bit(6, "Protection Information Type 1",
+ idns->id_dpc.dp_type1, NULL, NULL);
+ nvme_print_bit(6, "Protection Information Type 2",
+ idns->id_dpc.dp_type2, NULL, NULL);
+ nvme_print_bit(6, "Protection Information Type 3",
+ idns->id_dpc.dp_type3, NULL, NULL);
+ nvme_print_bit(6, "Protection Information first",
+ idns->id_dpc.dp_first, NULL, NULL);
+ nvme_print_bit(6, "Protection Information last",
+ idns->id_dpc.dp_last, NULL, NULL);
+ nvme_print(4, "End-to-End Data Protection Settings", -1, NULL);
+ if (idns->id_dps.dp_pinfo == 0)
+ nvme_print_str(6, "Protection Information", -1,
+ "disabled", 0);
+ else
+ nvme_print_uint64(6, "Protection Information Type",
+ idns->id_dps.dp_pinfo, NULL, NULL);
+ nvme_print_bit(6, "Protection Information in Metadata",
+ idns->id_dps.dp_first, "first 8 bytes", "last 8 bytes");
+
+ if (NVME_VERSION_ATLEAST(version, 1, 1)) {
+ nvme_print(4, "Namespace Multi-Path I/O and Namespace Sharing "
+ "Capabilities", -1, NULL);
+ nvme_print_bit(6, "Namespace is shared",
+ idns->id_nmic.nm_shared, "yes", "no");
+ nvme_print(2, "Reservation Capabilities", -1, NULL);
+ nvme_print_bit(6, "Persist Through Power Loss",
+ idns->id_rescap.rc_persist, NULL, NULL);
+ nvme_print_bit(6, "Write Exclusive",
+ idns->id_rescap.rc_wr_excl, NULL, NULL);
+ nvme_print_bit(6, "Exclusive Access",
+ idns->id_rescap.rc_excl, NULL, NULL);
+ nvme_print_bit(6, "Write Exclusive - Registrants Only",
+ idns->id_rescap.rc_wr_excl_r, NULL, NULL);
+ nvme_print_bit(6, "Exclusive Access - Registrants Only",
+ idns->id_rescap.rc_excl_r, NULL, NULL);
+ nvme_print_bit(6, "Write Exclusive - All Registrants",
+ idns->id_rescap.rc_wr_excl_a, NULL, NULL);
+ nvme_print_bit(6, "Exclusive Access - All Registrants",
+ idns->id_rescap.rc_excl_a, NULL, NULL);
+
+ nvme_print(4, "IEEE Extended Unique Identifier", -1,
+ "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X",
+ idns->id_eui64[0], idns->id_eui64[1],
+ idns->id_eui64[2], idns->id_eui64[3],
+ idns->id_eui64[4], idns->id_eui64[5],
+ idns->id_eui64[6], idns->id_eui64[7]);
+ }
+
+ for (i = 0; i <= idns->id_nlbaf; i++) {
+ if (verbose == 0 && idns->id_lbaf[i].lbaf_ms != 0)
+ continue;
+
+ nvme_print(4, "LBA Format", i, NULL);
+ nvme_print_uint64(6, "Metadata Size",
+ idns->id_lbaf[i].lbaf_ms, NULL, " bytes");
+ nvme_print_uint64(6, "LBA Data Size",
+ 1 << idns->id_lbaf[i].lbaf_lbads, NULL, " bytes");
+ nvme_print_str(6, "Relative Performance", -1,
+ lbaf_relative_performance[idns->id_lbaf[i].lbaf_rp], 0);
+ }
+}
+
+/*
+ * nvme_print_error_log
+ *
+ * This function pretty-prints all non-zero error log entries, or all entries
+ * if verbose is set.
+ */
+void
+nvme_print_error_log(int nlog, nvme_error_log_entry_t *elog)
+{
+ int i;
+
+ nvme_print(0, "Error Log", -1, NULL);
+ for (i = 0; i != nlog; i++)
+ if (elog[i].el_count == 0)
+ break;
+ nvme_print_uint64(2, "Number of Error Log Entries", i, NULL, NULL);
+
+ for (i = 0; i != nlog; i++) {
+ int sc = elog[i].el_sf.sf_sc;
+ const char *sc_str = "";
+
+ if (elog[i].el_count == 0 && verbose == 0)
+ break;
+
+ switch (elog[i].el_sf.sf_sct) {
+ case 0: /* Generic Command Status */
+ if (sc < ARRAYSIZE(generic_status_codes))
+ sc_str = generic_status_codes[sc];
+ else if (sc >= 0x80 &&
+ sc - 0x80 < ARRAYSIZE(generic_nvm_status_codes))
+ sc_str = generic_nvm_status_codes[sc - 0x80];
+ break;
+ case 1: /* Specific Command Status */
+ if (sc < ARRAYSIZE(specific_status_codes))
+ sc_str = specific_status_codes[sc];
+ else if (sc >= 0x80 &&
+ sc - 0x80 < ARRAYSIZE(specific_nvm_status_codes))
+ sc_str = specific_nvm_status_codes[sc - 0x80];
+ break;
+ case 2: /* Media Errors */
+ if (sc >= 0x80 &&
+ sc - 0x80 < ARRAYSIZE(media_nvm_status_codes))
+ sc_str = media_nvm_status_codes[sc - 0x80];
+ break;
+ case 7: /* Vendor Specific */
+ sc_str = "Unknown Vendor Specific";
+ break;
+ default:
+ sc_str = "Reserved";
+ break;
+ }
+
+ nvme_print(2, "Entry", i, NULL);
+ nvme_print_uint64(4, "Error Count",
+ elog[i].el_count, NULL, NULL);
+ nvme_print_uint64(4, "Submission Queue ID",
+ elog[i].el_sqid, NULL, NULL);
+ nvme_print_uint64(4, "Command ID",
+ elog[i].el_cid, NULL, NULL);
+ nvme_print(4, "Status Field", -1, NULL);
+ nvme_print_uint64(6, "Phase Tag",
+ elog[i].el_sf.sf_p, NULL, NULL);
+ nvme_print(6, "Status Code", -1, "0x%0.2x (%s)",
+ sc, sc_str);
+ nvme_print(6, "Status Code Type", -1, "0x%x (%s)",
+ elog[i].el_sf.sf_sct,
+ status_code_types[elog[i].el_sf.sf_sct]);
+ nvme_print_bit(6, "More",
+ elog[i].el_sf.sf_m, "yes", "no");
+ nvme_print_bit(6, "Do Not Retry",
+ elog[i].el_sf.sf_m, "yes", "no");
+ nvme_print_uint64(4, "Parameter Error Location byte",
+ elog[i].el_byte, "0x%0.2"PRIx64, NULL);
+ nvme_print_uint64(4, "Parameter Error Location bit",
+ elog[i].el_bit, NULL, NULL);
+ nvme_print_uint64(4, "Logical Block Address",
+ elog[i].el_lba, NULL, NULL);
+ nvme_print(4, "Namespace ID", -1, "%d",
+ elog[i].el_nsid == 0xffffffff ?
+ 0 : elog[i].el_nsid);
+ nvme_print_uint64(4,
+ "Vendor Specifc Information Available",
+ elog[i].el_vendor, NULL, NULL);
+ }
+}
+
+/*
+ * nvme_print_health_log
+ *
+ * This function pretty-prints a summary of the SMART/Health log, or all
+ * of the log if verbose is set.
+ */
+void
+nvme_print_health_log(nvme_health_log_t *hlog, nvme_identify_ctrl_t *idctl)
+{
+ nvme_print(0, "SMART/Health Information", -1, NULL);
+ nvme_print(2, "Critical Warnings", -1, NULL);
+ nvme_print_bit(4, "Available Space",
+ hlog->hl_crit_warn.cw_avail, "low", "OK");
+ nvme_print_bit(4, "Temperature",
+ hlog->hl_crit_warn.cw_temp, "too high", "OK");
+ nvme_print_bit(4, "Device Reliability",
+ hlog->hl_crit_warn.cw_reliab, "degraded", "OK");
+ nvme_print_bit(4, "Media",
+ hlog->hl_crit_warn.cw_readonly, "read-only", "OK");
+ if (idctl->id_vwc.vwc_present != 0)
+ nvme_print_bit(4, "Volatile Memory Backup",
+ hlog->hl_crit_warn.cw_volatile, "failed", "OK");
+
+ nvme_print_uint64(2, "Temperature",
+ hlog->hl_temp - 273, NULL, "C");
+ nvme_print_uint64(2, "Available Spare Capacity",
+ hlog->hl_avail_spare, NULL, "%");
+
+ if (verbose != 0)
+ nvme_print_uint64(2, "Available Spare Threshold",
+ hlog->hl_avail_spare_thr, NULL, "%");
+
+ nvme_print_uint64(2, "Device Life Used",
+ hlog->hl_used, NULL, "%");
+
+ if (verbose == 0)
+ return;
+
+ /*
+ * The following two fields are in 1000 512 byte units. Convert that to
+ * GB by doing binary shifts (9 left and 30 right) and muliply by 10^3.
+ */
+ nvme_print_uint128(2, "Data Read",
+ hlog->hl_data_read, "GB", 30 - 9, 3);
+ nvme_print_uint128(2, "Data Written",
+ hlog->hl_data_write, "GB", 30 - 9, 3);
+
+ nvme_print_uint128(2, "Read Commands",
+ hlog->hl_host_read, NULL, 0, 0);
+ nvme_print_uint128(2, "Write Commands",
+ hlog->hl_host_write, NULL, 0, 0);
+ nvme_print_uint128(2, "Controller Busy",
+ hlog->hl_ctrl_busy, "min", 0, 0);
+ nvme_print_uint128(2, "Power Cycles",
+ hlog->hl_power_cycles, NULL, 0, 0);
+ nvme_print_uint128(2, "Power On",
+ hlog->hl_power_on_hours, "h", 0, 0);
+ nvme_print_uint128(2, "Unsafe Shutdowns",
+ hlog->hl_unsafe_shutdn, NULL, 0, 0);
+ nvme_print_uint128(2, "Uncorrectable Media Errors",
+ hlog->hl_media_errors, NULL, 0, 0);
+ nvme_print_uint128(2, "Errors Logged",
+ hlog->hl_errors_logged, NULL, 0, 0);
+}
+
+/*
+ * nvme_print_fwslot_log
+ *
+ * This function pretty-prints the firmware slot information.
+ */
+void
+nvme_print_fwslot_log(nvme_fwslot_log_t *fwlog)
+{
+ int i;
+
+ nvme_print(0, "Firmware Slot Information", -1, NULL);
+ nvme_print_uint64(2, "Active Firmware Slot", fwlog->fw_afi, NULL, NULL);
+
+ for (i = 0; i != ARRAYSIZE(fwlog->fw_frs); i++) {
+ if (fwlog->fw_frs[i][0] == '\0')
+ break;
+ nvme_print_str(2, "Firmware Revision for Slot", i + 1,
+ fwlog->fw_frs[i], sizeof (fwlog->fw_frs[i]));
+ }
+}
+
+/*
+ * nvme_print_feat_*
+ *
+ * These functions pretty-print the data structures returned by GET FEATURES.
+ */
+void
+nvme_print_feat_arbitration(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_arbitration_t arb;
+
+ arb.r = (uint32_t)res;
+ if (arb.b.arb_ab != 7)
+ nvme_print_uint64(4, "Arbitration Burst",
+ 1 << arb.b.arb_ab, NULL, NULL);
+ else
+ nvme_print_str(4, "Arbitration Burst", 0,
+ "no limit", 0);
+ nvme_print_uint64(4, "Low Priority Weight",
+ (uint16_t)arb.b.arb_lpw + 1, NULL, NULL);
+ nvme_print_uint64(4, "Medium Priority Weight",
+ (uint16_t)arb.b.arb_mpw + 1, NULL, NULL);
+ nvme_print_uint64(4, "High Priority Weight",
+ (uint16_t)arb.b.arb_hpw + 1, NULL, NULL);
+}
+
+void
+nvme_print_feat_power_mgmt(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_power_mgmt_t pm;
+
+ pm.r = (uint32_t)res;
+ nvme_print_uint64(4, "Power State", (uint8_t)pm.b.pm_ps,
+ NULL, NULL);
+}
+
+void
+nvme_print_feat_lba_range(uint64_t res, void *buf, size_t bufsize,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ nvme_lba_range_type_t lrt;
+ nvme_lba_range_t *lr;
+ size_t n_lr;
+ int i;
+
+ if (buf == NULL)
+ return;
+
+ lrt.r = res;
+ lr = buf;
+
+ n_lr = bufsize / sizeof (nvme_lba_range_t);
+ if (n_lr > lrt.b.lr_num + 1)
+ n_lr = lrt.b.lr_num + 1;
+
+ nvme_print_uint64(4, "Number of LBA Ranges",
+ (uint8_t)lrt.b.lr_num + 1, NULL, NULL);
+
+ for (i = 0; i != n_lr; i++) {
+ if (verbose == 0 && lr[i].lr_nlb == 0)
+ continue;
+
+ nvme_print(4, "LBA Range", i, NULL);
+ if (lr[i].lr_type < ARRAYSIZE(lba_range_types))
+ nvme_print_str(6, "Type", -1,
+ lba_range_types[lr[i].lr_type], 0);
+ else
+ nvme_print_uint64(6, "Type",
+ lr[i].lr_type, NULL, NULL);
+ nvme_print(6, "Attributes", -1, NULL);
+ nvme_print_bit(8, "Writable",
+ lr[i].lr_attr.lr_write, "yes", "no");
+ nvme_print_bit(8, "Hidden",
+ lr[i].lr_attr.lr_hidden, "yes", "no");
+ nvme_print_uint64(6, "Starting LBA",
+ lr[i].lr_slba, NULL, NULL);
+ nvme_print_uint64(6, "Number of Logical Blocks",
+ lr[i].lr_nlb, NULL, NULL);
+ nvme_print(6, "Unique Identifier", -1,
+ "%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x"
+ "%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x",
+ lr[i].lr_guid[0], lr[i].lr_guid[1],
+ lr[i].lr_guid[2], lr[i].lr_guid[3],
+ lr[i].lr_guid[4], lr[i].lr_guid[5],
+ lr[i].lr_guid[6], lr[i].lr_guid[7],
+ lr[i].lr_guid[8], lr[i].lr_guid[9],
+ lr[i].lr_guid[10], lr[i].lr_guid[11],
+ lr[i].lr_guid[12], lr[i].lr_guid[13],
+ lr[i].lr_guid[14], lr[i].lr_guid[15]);
+ }
+}
+
+void
+nvme_print_feat_temperature(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_temp_threshold_t tt;
+
+ tt.r = (uint32_t)res;
+ nvme_print_uint64(4, "Temperature Threshold", tt.b.tt_tmpth - 273,
+ NULL, "C");
+}
+
+void
+nvme_print_feat_error(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_error_recovery_t er;
+
+ er.r = (uint32_t)res;
+ if (er.b.er_tler > 0)
+ nvme_print_uint64(4, "Time Limited Error Recovery",
+ (uint32_t)er.b.er_tler * 100, NULL, "ms");
+ else
+ nvme_print_str(4, "Time Limited Error Recovery", -1,
+ "no time limit", 0);
+}
+
+void
+nvme_print_feat_write_cache(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_write_cache_t wc;
+
+ wc.r = (uint32_t)res;
+ nvme_print_bit(4, "Volatile Write Cache",
+ wc.b.wc_wce, "enabled", "disabled");
+}
+
+void
+nvme_print_feat_nqueues(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_nqueues_t nq;
+
+ nq.r = (uint32_t)res;
+ nvme_print_uint64(4, "Number of Submission Queues",
+ nq.b.nq_nsq + 1, NULL, NULL);
+ nvme_print_uint64(4, "Number of Completion Queues",
+ nq.b.nq_ncq + 1, NULL, NULL);
+}
+
+void
+nvme_print_feat_intr_coal(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_intr_coal_t ic;
+
+ ic.r = (uint32_t)res;
+ nvme_print_uint64(4, "Aggregation Threshold",
+ ic.b.ic_thr + 1, NULL, NULL);
+ nvme_print_uint64(4, "Aggregation Time",
+ (uint16_t)ic.b.ic_time * 100, NULL, "us");
+}
+void
+nvme_print_feat_intr_vect(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_intr_vect_t iv;
+ char *tmp;
+
+ iv.r = (uint32_t)res;
+ if (asprintf(&tmp, "Vector %d Coalescing Disable", iv.b.iv_iv) < 0)
+ err(-1, "nvme_print_feat_common()");
+
+ nvme_print_bit(4, tmp, iv.b.iv_cd, "yes", "no");
+}
+
+void
+nvme_print_feat_write_atom(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_write_atomicity_t wa;
+
+ wa.r = (uint32_t)res;
+ nvme_print_bit(4, "Disable Normal", wa.b.wa_dn, "yes", "no");
+}
+
+void
+nvme_print_feat_async_event(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *idctl)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ nvme_async_event_conf_t aec;
+
+ aec.r = (uint32_t)res;
+ nvme_print_bit(4, "Available Space below threshold",
+ aec.b.aec_avail, "enabled", "disabled");
+ nvme_print_bit(4, "Temperature above threshold",
+ aec.b.aec_temp, "enabled", "disabled");
+ nvme_print_bit(4, "Device Reliability compromised",
+ aec.b.aec_reliab, "enabled", "disabled");
+ nvme_print_bit(4, "Media read-only",
+ aec.b.aec_readonly, "enabled", "disabled");
+ if (idctl->id_vwc.vwc_present != 0)
+ nvme_print_bit(4, "Volatile Memory Backup failed",
+ aec.b.aec_volatile, "enabled", "disabled");
+}
+
+void
+nvme_print_feat_auto_pst(uint64_t res, void *buf, size_t bufsize,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ nvme_auto_power_state_trans_t apst;
+ nvme_auto_power_state_t *aps;
+ int i;
+ int cnt = bufsize / sizeof (nvme_auto_power_state_t);
+
+ if (buf == NULL)
+ return;
+
+ apst.r = res;
+ aps = buf;
+
+ nvme_print_bit(4, "Autonomous Power State Transition",
+ apst.b.apst_apste, "enabled", "disabled");
+ for (i = 0; i != cnt; i++) {
+ if (aps[i].apst_itps == 0 && aps[i].apst_itpt == 0)
+ break;
+
+ nvme_print(4, "Power State", i, NULL);
+ nvme_print_uint64(6, "Idle Transition Power State",
+ (uint16_t)aps[i].apst_itps, NULL, NULL);
+ nvme_print_uint64(6, "Idle Time Prior to Transition",
+ aps[i].apst_itpt, NULL, "ms");
+ }
+}
+
+void
+nvme_print_feat_progress(uint64_t res, void *b, size_t s,
+ nvme_identify_ctrl_t *id)
+{
+ _NOTE(ARGUNUSED(b));
+ _NOTE(ARGUNUSED(s));
+ _NOTE(ARGUNUSED(id));
+ nvme_software_progress_marker_t spm;
+
+ spm.r = (uint32_t)res;
+ nvme_print_uint64(4, "Pre-Boot Software Load Count",
+ spm.b.spm_pbslc, NULL, NULL);
+}
diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile
index 543928c16b..19a8cd0f88 100644
--- a/usr/src/man/man1m/Makefile
+++ b/usr/src/man/man1m/Makefile
@@ -554,7 +554,8 @@ _MANFILES= 6to4relay.1m \
i386_MANFILES= \
acpidump.1m \
- acpixtract.1m
+ acpixtract.1m \
+ nvmeadm.1m
sparc_MANFILES= cvcd.1m \
dcs.1m \
diff --git a/usr/src/man/man1m/nvmeadm.1m b/usr/src/man/man1m/nvmeadm.1m
new file mode 100644
index 0000000000..9e1cfc1014
--- /dev/null
+++ b/usr/src/man/man1m/nvmeadm.1m
@@ -0,0 +1,410 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+.\"
+.Dd May 04, 2016
+.Dt NVMEADM 1M
+.Os
+.Sh NAME
+.Nm nvmeadm
+.Nd NVMe administration utility
+.Sh SYNOPSIS
+.Nm
+.Fl h
+.Op Ar command
+.Nm
+.Op Fl dv
+.Cm list
+.Op Ar ctl[/ns][,...]
+.Nm
+.Op Fl dv
+.Cm identify
+.Ar ctl[/ns][,...]
+.Nm
+.Op Fl dv
+.Cm get-logpage
+.Ar ctl[/ns][,...]
+.Ar logpage
+.Nm
+.Op Fl dv
+.Cm get-features
+.Ar ctl[/ns][,...]
+.Op Ar feature-list
+.Nm
+.Op Fl dv
+.Cm format
+.Ar ctl[/ns]
+.Op Ar lba-format
+.Nm
+.Op Fl dv
+.Cm secure-erase
+.Ar ctl[/ns]
+.Op Fl c
+.Nm
+.Op Fl dv
+.Cm detach
+.Ar ctl[/ns]
+.Nm
+.Op Fl dv
+.Cm attach
+.Ar ctl[/ns]
+.Sh DESCRIPTION
+The
+.Nm
+utility can be used to enumerate the NVMe controllers and their
+namespaces, query hardware information from a NVMe controller or
+namespace, and to format or secure-erase a NVMe controller or
+namespace.
+.Pp
+The information returned by the hardware is printed by
+.Nm
+in a human-readable form were applicable.
+Generally all 0-based counts are normalized and values may be
+converted to human-readable units such as MB (megabytes), W (watts),
+or C (degrees Celsius).
+.Sh OPTIONS
+The following options are supported:
+.Bl -tag -width Ds
+.It Fl h
+Print a short help text for
+.Nm ,
+or for an optionally specified
+.Nm
+command.
+.It Fl d
+Enable debugging output.
+.It Fl v
+Enable verbose output.
+.El
+.Sh ARGUMENTS
+.Nm
+expects the following kinds of arguments:
+.Bl -tag -width "ctl/[ns]"
+.It Ar command
+Any command
+.Nm
+understands.
+See section
+.Sx COMMANDS .
+.It Ar ctl/[ns]
+Specifies a NVMe controller and optionally a namespace within that
+controller.
+The controller name consists of the driver name
+.Qq nvme
+followed by an instance number.
+A namespace is specified by appending a single
+.Qq /
+followed by the namespace ID to the controller name.
+The namespace ID is the EUI64 of the namespace, or a positive non-zero
+decimal number if the namespace doesn't have an EUI64.
+For commands that don't change the device state multiple controllers
+and namespaces can be specified as a comma-separated list.
+.Pp
+The list of controllers and namespaces present in the system can be
+queried with the
+.Cm list
+command without any arguments.
+.It Ar logpage
+Specifies the log page name for the
+.Cm get-logpage
+command.
+.It Ar feature-list
+A comma-separated list of feature names for the
+.Cm get-features
+command.
+Feature names can be specified in upper or lower case and can be
+shortened the shortest unique name.
+Some features may also have an alternative short name.
+.It Ar lba-format
+A non-zero integer specifying the LBA format for the
+.Cm format
+command.
+The list of supported LBA formats on a namespace can be retrieved
+with the
+.Nm
+.Cm identify
+command.
+.El
+.Sh COMMANDS
+.Bl -tag -width ""
+.It Xo
+.Nm
+.Cm list
+.Op Ar ctl[/ns][,...]
+.Xc
+Lists the NVMe controllers and their namespaces in the system and
+prints a 1-line summary of their basic properties for each.
+If a list of controllers and/or namespaces is given then the listing
+is limited to those devices.
+.It Xo
+.Nm
+.Cm identify
+.Ar ctl[/ns][,...]
+.Xc
+Print detailed information about the specified controllers and/or
+namespaces.
+The information returned differs depending on whether a controller or
+a namespace is specified.
+For an explanation of the data printed by this command refer to the
+description of the
+.Qq IDENTIFY
+admin command in the NVMe specification.
+.It Xo
+.Nm
+.Cm get-logpage
+.Ar ctl[/ns][,...]
+.Ar logpage
+.Xc
+Print the specified log page of the specified controllers and/or namespaces.
+Most log pages are only available on a per-controller basis.
+Known log pages are:
+.Bl -tag -width "firmware"
+.It error
+Error Information
+.It health
+SMART/Health Information.
+A controller may support this log page on a per-namespace basis.
+.It firmware
+Firmware Slot Information
+.El
+.Pp
+For an explanation of the contents of the log pages refer to the
+description of the
+.Qq GET LOGPAGE
+admin command in the NVMe specification.
+.It Xo
+.Nm
+.Cm get-features
+.Ar ctl[/ns][,...]
+.Op Ar feature-list
+.Xc
+Prints information about the specified features, or all features if
+none are given, of the specified controllers and/or namespaces.
+Feature names are case-insensitive, and they can be shortened as long
+as they remain unique.
+Some features also have alternative short names to which the same
+rules apply.
+The following features are supported:
+.Pp
+.TS
+tab(:);
+l l l.
+FULL NAME:SHORT NAME:CONTROLLER/NAMESPACE
+Arbitration::controller
+Power Management::controller
+LBA Range Type:range:namespace
+Temperature Threshold::controller
+Error Recovery::controller
+Volatile Write Cache:cache:controller
+Number of Queues:queues:controller
+Interrupt Coalescing:coalescing:controller
+Interrupt Vector Configuration:vector:controller
+Write Atomicity:atomicity:controller
+Asynchronous Event Configuration:event:controller
+Autonomous Power State Transition::controller
+Software Progress Marker:progress:controller
+.TE
+.Pp
+For an explanation of the individual features refer to the description
+of the
+.Qq SET FEATURES
+admin command in the NVMe specification.
+.It Xo
+.Nm
+.Cm format
+.Ar ctl[/ns]
+.Op Ar lba-format
+.Xc
+Formats the specified namespace or all namespaces of the specified
+controller.
+This command implies a
+.Nm
+.Cm detach
+and subsequent
+.Nm
+.Cm attach
+of the specified namespace(s), which will cause a changed LBA format
+to be detected.
+If no LBA format is specified the LBA format currently used by the
+namespace will be used.
+When formatting all namespaces without specifying a LBA format the LBA
+format of namespace 1 will be used.
+A list of LBA formats supported by a namespace can be queried with the
+.Nm
+.Cm identify
+command.
+.Pp
+Note that not all devices support formatting individual or all
+namespaces, or support formatting at all.
+.Pp
+LBA formats using a non-zero metadata size are not supported by
+.Nm
+or
+.Xr nvme 7D .
+.Pp
+The list of supported LBA formats on a namespace can be retrieved
+with the
+.Nm
+.Cm identify
+command.
+.It Xo
+.Nm
+.Cm secure-erase
+.Ar ctl[/ns]
+.Op Fl c
+.Xc
+Erases the specified namespace or all namespaces of the controller.
+The flag
+.Fl c
+will cause a cryptographic erase instead of a normal erase.
+This command implies a
+.Nm
+.Cm detach
+and
+.Nm
+.Cm attach
+of the specified namespace(s).
+.Pp
+Note that not all devices support erasing individual or all
+namespaces, or support erasing at all.
+.It Xo
+.Nm
+.Cm detach
+.Ar ctl[/ns]
+.Xc
+Temporarily detaches the
+.Xr blkdev 7D
+instance from the specified namespace or all namespaces of the controller.
+This will prevent I/O access to the affected namespace(s).
+Detach will only succeed if the affected namespace(s) are not
+currently opened.
+The detached state will not persist across reboots or reloads of the
+.Xr nvme 7D
+driver.
+.It Xo
+.Nm
+.Cm attach
+.Ar ctl[/ns]
+.Xc
+Attaches the
+.Xr blkdev 7D
+instance to the specified namespace or all namespaces of the controller.
+This will make I/O accesses to the namespace(s) possible again after a
+previous
+.Nm
+.Cm detach
+command.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+.Bl -tag -width ""
+.It Sy Example 1: List all NVMe controllers and namespaces
+.Bd -literal
+# nvmeadm list
+nvme1: model: INTEL SSDPEDMD800G4, serial: CVFT4134001R800CGN, FW rev: 8DV10049, NVMe v1.0
+ nvme1/1 (c1t1d0): Size = 763097 MB, Capacity = 763097 MB, Used = 763097 MB
+nvme4: model: SAMSUNG MZVPV128HDGM-00000, serial: S1XVNYAGA00640, FW rev: BXW7300Q, NVMe v1.1
+ nvme4/1 (c2t2d0): Size = 122104 MB, Capacity = 122104 MB, Used = 5127 MB
+.Ed
+.It Sy Example 2: Identify a namespace
+.Bd -literal
+# nvmeadm identify nvme4/1
+nvme4/1: Identify Namespace
+ Namespace Capabilities and Features
+ Namespace Size: 122104MB
+ Namespace Capacity: 122104MB
+ Namespace Utilization: 5127MB
+ Namespace Features
+ Thin Provisioning: unsupported
+ Number of LBA Formats: 1
+ Formatted LBA Size
+ LBA Format: 1
+ Extended Data LBA: no
+ Metadata Capabilities
+ Extended Data LBA: unsupported
+ Separate Metadata: unsupported
+ End-to-End Data Protection Capabilities
+ Protection Information Type 1: unsupported
+ Protection Information Type 2: unsupported
+ Protection Information Type 3: unsupported
+ Protection Information first: unsupported
+ Protection Information last: unsupported
+ End-to-End Data Protection Settings
+ Protection Information: disabled
+ Protection Information in Metadata: last 8 bytes
+ LBA Format 1
+ Metadata Size: 0 bytes
+ LBA Data Size: 512 bytes
+ Relative Performance: Best
+.Ed
+.It Sy Example 3: Get SMART/Health information (verbose)
+.Bd -literal
+# nvmeadm -v get-logpage nvme4/1 health
+nvme4/1: SMART/Health Information
+ Critical Warnings
+ Available Space: OK
+ Temperature: OK
+ Device Reliability: OK
+ Media: OK
+ Volatile Memory Backup: OK
+ Temperature: 37C
+ Available Spare Capacity: 100%
+ Available Spare Threshold: 10%
+ Device Life Used: 0%
+ Data Read: 0GB
+ Data Written: 64GB
+ Read Commands: 52907
+ Write Commands: 567874
+ Controller Busy: 1min
+ Power Cycles: 6
+ Power On: 141h
+ Unsafe Shutdowns: 1
+ Uncorrectable Media Errors: 0
+ Errors Logged: 1
+.Ed
+.It Sy Example 4: Get Asynchronous Event Configuration information
+.Bd -literal
+# nvmeadm get-features nvme0,nvme4 event,power
+nvme0: Get Features
+ Asynchronous Event Configuration
+ Available Space below threshold: disabled
+ Temperature above threshold: disabled
+ Device Reliability compromised: disabled
+ Media read-only: disabled
+ Power Management
+ Power State: 0
+nvme4: Get Features
+ Asynchronous Event Configuration
+ Available Space below threshold: disabled
+ Temperature above threshold: disabled
+ Device Reliability compromised: disabled
+ Media read-only: disabled
+ Volatile Memory Backup failed: disabled
+ Power Management
+ Power State: 0
+.Ed
+.El
+.Sh INTERFACE STABILITY
+The command line interface of
+.Nm
+is
+.Sy Evolving .
+The output of
+.Nm
+is
+.Sy Not-an-Interface
+and may change any time.
+.Sh SEE ALSO
+.Xr nvme 7D
+.Pp
+.Lk http://www.nvmexpress.org/specifications/ "NVMe specifications"
diff --git a/usr/src/pkg/manifests/driver-storage-nvme.mf b/usr/src/pkg/manifests/driver-storage-nvme.mf
index 87b0954b7f..45b5a333f7 100644
--- a/usr/src/pkg/manifests/driver-storage-nvme.mf
+++ b/usr/src/pkg/manifests/driver-storage-nvme.mf
@@ -34,6 +34,7 @@ dir path=kernel group=sys
dir path=kernel/drv group=sys
dir path=kernel/drv/$(ARCH64) group=sys
dir path=usr group=sys
+dir path=usr/sbin
dir path=usr/share
dir path=usr/share/man
dir path=usr/share/man/man7d
@@ -41,5 +42,7 @@ driver name=nvme alias=pciexclass,010802 class=disk perms="* 0600 root sys"
file path=kernel/drv/$(ARCH64)/nvme group=sys
file path=kernel/drv/nvme group=sys
file path=kernel/drv/nvme.conf group=sys
+file path=usr/sbin/nvmeadm mode=0555
+file path=usr/share/man/man1m/nvmeadm.1m
file path=usr/share/man/man7d/nvme.7d
license lic_CDDL license=lic_CDDL
diff --git a/usr/src/uts/common/io/blkdev/blkdev.c b/usr/src/uts/common/io/blkdev/blkdev.c
index 0c80d15cfe..d3b96c9f8a 100644
--- a/usr/src/uts/common/io/blkdev/blkdev.c
+++ b/usr/src/uts/common/io/blkdev/blkdev.c
@@ -1819,6 +1819,16 @@ bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
dev_info_t *child;
bd_drive_t drive = { 0 };
+ /*
+ * It's not an error if bd_attach_handle() is called on a handle that
+ * already is attached. We just ignore the request to attach and return.
+ * This way drivers using blkdev don't have to keep track about blkdev
+ * state, they can just call this function to make sure it attached.
+ */
+ if (hdl->h_child != NULL) {
+ return (DDI_SUCCESS);
+ }
+
/* if drivers don't override this, make it assume none */
drive.d_lun = -1;
hdl->h_ops.o_drive_info(hdl->h_private, &drive);
@@ -1882,6 +1892,12 @@ bd_detach_handle(bd_handle_t hdl)
int rv;
char *devnm;
+ /*
+ * It's not an error if bd_detach_handle() is called on a handle that
+ * already is detached. We just ignore the request to detach and return.
+ * This way drivers using blkdev don't have to keep track about blkdev
+ * state, they can just call this function to make sure it detached.
+ */
if (hdl->h_child == NULL) {
return (DDI_SUCCESS);
}
diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c
index cb2e9bdd22..c87be0d3f0 100644
--- a/usr/src/uts/common/io/nvme/nvme.c
+++ b/usr/src/uts/common/io/nvme/nvme.c
@@ -83,6 +83,19 @@
* passes it to blkdev to use it in the device node names. As this is currently
* untested namespaces with EUI64 are ignored by default.
*
+ * We currently support only (2 << NVME_MINOR_INST_SHIFT) - 2 namespaces in a
+ * single controller. This is an artificial limit imposed by the driver to be
+ * able to address a reasonable number of controllers and namespaces using a
+ * 32bit minor node number.
+ *
+ *
+ * Minor nodes:
+ *
+ * For each NVMe device the driver exposes one minor node for the controller and
+ * one minor node for each namespace. The only operations supported by those
+ * minor nodes are open(9E), close(9E), and ioctl(9E). This serves as the
+ * interface for the nvmeadm(1M) utility.
+ *
*
* Blkdev Interface:
*
@@ -164,7 +177,6 @@
* - polled I/O support to support kernel core dumping
* - FMA handling of media errors
* - support for devices supporting very large I/O requests using chained PRPs
- * - support for querying log pages from user space
* - support for configuring hardware parameters like interrupt coalescing
* - support for media formatting and hard partitioning into namespaces
* - support for big-endian systems
@@ -186,6 +198,7 @@
#include <sys/devops.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
+#include <sys/sunndi.h>
#include <sys/bitmap.h>
#include <sys/sysmacros.h>
#include <sys/param.h>
@@ -196,6 +209,10 @@
#include <sys/atomic.h>
#include <sys/archsystm.h>
#include <sys/sata/sata_hba.h>
+#include <sys/stat.h>
+#include <sys/policy.h>
+
+#include <sys/nvme.h>
#ifdef __x86
#include <sys/x86_archext.h>
@@ -210,7 +227,10 @@ static const int nvme_version_major = 1;
static const int nvme_version_minor = 2;
/* tunable for admin command timeout in seconds, default is 1s */
-static volatile int nvme_admin_cmd_timeout = 1;
+int nvme_admin_cmd_timeout = 1;
+
+/* tunable for FORMAT NVM command timeout in seconds, default is 600s */
+int nvme_format_cmd_timeout = 600;
static int nvme_attach(dev_info_t *, ddi_attach_cmd_t);
static int nvme_detach(dev_info_t *, ddi_detach_cmd_t);
@@ -243,10 +263,14 @@ static inline int nvme_check_cmd_status(nvme_cmd_t *);
static void nvme_abort_cmd(nvme_cmd_t *);
static int nvme_async_event(nvme_t *);
-static void *nvme_get_logpage(nvme_t *, uint8_t, ...);
+static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
+ boolean_t, uint8_t);
+static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
static void *nvme_identify(nvme_t *, uint32_t);
static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
uint32_t *);
+static boolean_t nvme_get_features(nvme_t *, uint32_t, uint8_t, uint32_t *,
+ void **, size_t *);
static boolean_t nvme_write_cache_set(nvme_t *, boolean_t);
static int nvme_set_nqueues(nvme_t *, uint16_t);
@@ -283,6 +307,16 @@ static void nvme_prp_dma_destructor(void *, void *);
static void nvme_prepare_devid(nvme_t *, uint32_t);
+static int nvme_open(dev_t *, int, int, cred_t *);
+static int nvme_close(dev_t, int, int, cred_t *);
+static int nvme_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+
+#define NVME_MINOR_INST_SHIFT 14
+#define NVME_MINOR(inst, nsid) (((inst) << NVME_MINOR_INST_SHIFT) | (nsid))
+#define NVME_MINOR_INST(minor) ((minor) >> NVME_MINOR_INST_SHIFT)
+#define NVME_MINOR_NSID(minor) ((minor) & ((1 << NVME_MINOR_INST_SHIFT) - 1))
+#define NVME_MINOR_MAX (NVME_MINOR(1, 0) - 2)
+
static void *nvme_state;
static kmem_cache_t *nvme_cmd_cache;
@@ -358,6 +392,27 @@ static ddi_device_acc_attr_t nvme_reg_acc_attr = {
.devacc_attr_dataorder = DDI_STRICTORDER_ACC
};
+static struct cb_ops nvme_cb_ops = {
+ .cb_open = nvme_open,
+ .cb_close = nvme_close,
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_read = nodev,
+ .cb_write = nodev,
+ .cb_ioctl = nvme_ioctl,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_str = 0,
+ .cb_flag = D_NEW | D_MP,
+ .cb_rev = CB_REV,
+ .cb_aread = nodev,
+ .cb_awrite = nodev
+};
+
static struct dev_ops nvme_dev_ops = {
.devo_rev = DEVO_REV,
.devo_refcnt = 0,
@@ -367,7 +422,7 @@ static struct dev_ops nvme_dev_ops = {
.devo_attach = nvme_attach,
.devo_detach = nvme_detach,
.devo_reset = nodev,
- .devo_cb_ops = NULL,
+ .devo_cb_ops = &nvme_cb_ops,
.devo_bus_ops = NULL,
.devo_power = NULL,
.devo_quiesce = nvme_quiesce,
@@ -844,7 +899,8 @@ nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
if (cmd->nc_nvme->n_strict_version) {
cmd->nc_nvme->n_dead = B_TRUE;
@@ -881,13 +937,15 @@ nvme_check_integrity_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_INT_NVM_WRITE:
/* write fail */
/* TODO: post ereport */
- bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
return (EIO);
case NVME_CQE_SC_INT_NVM_READ:
/* read fail */
/* TODO: post ereport */
- bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
return (EIO);
default:
@@ -915,9 +973,11 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_GEN_INV_FLD:
/* Invalid Field in Command */
- dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
- "invalid field in cmd %p", (void *)cmd);
- return (0);
+ if (!cmd->nc_dontpanic)
+ dev_err(cmd->nc_nvme->n_dip, CE_PANIC,
+ "programming error: invalid field in cmd %p",
+ (void *)cmd);
+ return (EIO);
case NVME_CQE_SC_GEN_ID_CNFL:
/* Command ID Conflict */
@@ -927,9 +987,11 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_GEN_INV_NS:
/* Invalid Namespace or Format */
- dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
- "invalid NS/format in cmd %p", (void *)cmd);
- return (0);
+ if (!cmd->nc_dontpanic)
+ dev_err(cmd->nc_nvme->n_dip, CE_PANIC,
+ "programming error: " "invalid NS/format in cmd %p",
+ (void *)cmd);
+ return (EINVAL);
case NVME_CQE_SC_GEN_NVM_LBA_RANGE:
/* LBA Out Of Range */
@@ -944,7 +1006,8 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
/* Data Transfer Error (DMA) */
/* TODO: post ereport */
atomic_inc_32(&cmd->nc_nvme->n_data_xfr_err);
- bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
return (EIO);
case NVME_CQE_SC_GEN_INTERNAL_ERR:
@@ -955,7 +1018,8 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
* in the async event handler.
*/
atomic_inc_32(&cmd->nc_nvme->n_internal_err);
- bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
return (EIO);
case NVME_CQE_SC_GEN_ABORT_REQUEST:
@@ -981,13 +1045,15 @@ nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
case NVME_CQE_SC_GEN_NVM_CAP_EXC:
/* Capacity Exceeded */
atomic_inc_32(&cmd->nc_nvme->n_nvm_cap_exc);
- bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
return (EIO);
case NVME_CQE_SC_GEN_NVM_NS_NOTRDY:
/* Namespace Not Ready */
atomic_inc_32(&cmd->nc_nvme->n_nvm_ns_notrdy);
- bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
return (EIO);
default:
@@ -1048,14 +1114,14 @@ nvme_check_specific_cmd_status(nvme_cmd_t *cmd)
/* Invalid Log Page */
ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_GET_LOG_PAGE);
atomic_inc_32(&cmd->nc_nvme->n_inv_log_page);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_INV_FORMAT:
/* Invalid Format */
ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_FORMAT);
atomic_inc_32(&cmd->nc_nvme->n_inv_format);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_INV_Q_DEL:
@@ -1070,7 +1136,8 @@ nvme_check_specific_cmd_status(nvme_cmd_t *cmd)
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ ||
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
atomic_inc_32(&cmd->nc_nvme->n_cnfl_attr);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_NVM_INV_PROT:
@@ -1079,14 +1146,16 @@ nvme_check_specific_cmd_status(nvme_cmd_t *cmd)
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ ||
cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
atomic_inc_32(&cmd->nc_nvme->n_inv_prot);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EINVAL);
case NVME_CQE_SC_SPC_NVM_READONLY:
/* Write to Read Only Range */
ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
atomic_inc_32(&cmd->nc_nvme->n_readonly);
- bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
+ if (cmd->nc_xfer != NULL)
+ bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
return (EROFS);
default:
@@ -1281,6 +1350,7 @@ nvme_async_event_task(void *arg)
nvme_t *nvme = cmd->nc_nvme;
nvme_error_log_entry_t *error_log = NULL;
nvme_health_log_t *health_log = NULL;
+ size_t logsize = 0;
nvme_async_event_t event;
int ret;
@@ -1328,8 +1398,8 @@ nvme_async_event_task(void *arg)
switch (event.b.ae_type) {
case NVME_ASYNC_TYPE_ERROR:
if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) {
- error_log = (nvme_error_log_entry_t *)
- nvme_get_logpage(nvme, event.b.ae_logpage);
+ (void) nvme_get_logpage(nvme, (void **)&error_log,
+ &logsize, event.b.ae_logpage);
} else {
dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
"async event reply: %d", event.b.ae_logpage);
@@ -1379,8 +1449,8 @@ nvme_async_event_task(void *arg)
case NVME_ASYNC_TYPE_HEALTH:
if (event.b.ae_logpage == NVME_LOGPAGE_HEALTH) {
- health_log = (nvme_health_log_t *)
- nvme_get_logpage(nvme, event.b.ae_logpage, -1);
+ (void) nvme_get_logpage(nvme, (void **)&health_log,
+ &logsize, event.b.ae_logpage, -1);
} else {
dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
"async event reply: %d", event.b.ae_logpage);
@@ -1427,11 +1497,10 @@ nvme_async_event_task(void *arg)
}
if (error_log)
- kmem_free(error_log, sizeof (nvme_error_log_entry_t) *
- nvme->n_error_log_len);
+ kmem_free(error_log, logsize);
if (health_log)
- kmem_free(health_log, sizeof (nvme_health_log_t));
+ kmem_free(health_log, logsize);
}
static int
@@ -1485,14 +1554,58 @@ nvme_async_event(nvme_t *nvme)
return (DDI_SUCCESS);
}
-static void *
-nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
+static int
+nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms,
+ uint8_t pi, boolean_t pil, uint8_t ses)
+{
+ nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
+ nvme_format_nvm_t format_nvm = { 0 };
+ int ret;
+
+ format_nvm.b.fm_lbaf = lbaf & 0xf;
+ format_nvm.b.fm_ms = ms ? 1 : 0;
+ format_nvm.b.fm_pi = pi & 0x7;
+ format_nvm.b.fm_pil = pil ? 1 : 0;
+ format_nvm.b.fm_ses = ses & 0x7;
+
+ cmd->nc_sqid = 0;
+ cmd->nc_callback = nvme_wakeup_cmd;
+ cmd->nc_sqe.sqe_nsid = nsid;
+ cmd->nc_sqe.sqe_opc = NVME_OPC_NVM_FORMAT;
+ cmd->nc_sqe.sqe_cdw10 = format_nvm.r;
+
+ /*
+ * Some devices like Samsung SM951 don't allow formatting of all
+ * namespaces in one command. Handle that gracefully.
+ */
+ if (nsid == (uint32_t)-1)
+ cmd->nc_dontpanic = B_TRUE;
+
+ if ((ret = nvme_admin_cmd(cmd, nvme_format_cmd_timeout))
+ != DDI_SUCCESS) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme_admin_cmd failed for FORMAT NVM");
+ return (EIO);
+ }
+
+ if ((ret = nvme_check_cmd_status(cmd)) != 0) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!FORMAT failed with sct = %x, sc = %x",
+ cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
+ }
+
+ nvme_free_cmd(cmd);
+ return (ret);
+}
+
+static int
+nvme_get_logpage(nvme_t *nvme, void **buf, size_t *bufsize, uint8_t logpage,
+ ...)
{
nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
- void *buf = NULL;
nvme_getlogpage_t getlogpage = { 0 };
- size_t bufsize;
va_list ap;
+ int ret = DDI_FAILURE;
va_start(ap, logpage);
@@ -1505,18 +1618,22 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
switch (logpage) {
case NVME_LOGPAGE_ERROR:
cmd->nc_sqe.sqe_nsid = (uint32_t)-1;
- bufsize = nvme->n_error_log_len *
- sizeof (nvme_error_log_entry_t);
+ /*
+ * The GET LOG PAGE command can use at most 2 pages to return
+ * data, PRP lists are not supported.
+ */
+ *bufsize = MIN(2 * nvme->n_pagesize,
+ nvme->n_error_log_len * sizeof (nvme_error_log_entry_t));
break;
case NVME_LOGPAGE_HEALTH:
cmd->nc_sqe.sqe_nsid = va_arg(ap, uint32_t);
- bufsize = sizeof (nvme_health_log_t);
+ *bufsize = sizeof (nvme_health_log_t);
break;
case NVME_LOGPAGE_FWSLOT:
cmd->nc_sqe.sqe_nsid = (uint32_t)-1;
- bufsize = sizeof (nvme_fwslot_log_t);
+ *bufsize = sizeof (nvme_fwslot_log_t);
break;
default:
@@ -1528,7 +1645,7 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
va_end(ap);
- getlogpage.b.lp_numd = bufsize / sizeof (uint32_t) - 1;
+ getlogpage.b.lp_numd = *bufsize / sizeof (uint32_t) - 1;
cmd->nc_sqe.sqe_cdw10 = getlogpage.r;
@@ -1557,7 +1674,7 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
dev_err(nvme->n_dip, CE_WARN,
"!nvme_admin_cmd failed for GET LOG PAGE");
- return (NULL);
+ return (ret);
}
if (nvme_check_cmd_status(cmd)) {
@@ -1567,13 +1684,15 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...)
goto fail;
}
- buf = kmem_alloc(bufsize, KM_SLEEP);
- bcopy(cmd->nc_dma->nd_memp, buf, bufsize);
+ *buf = kmem_alloc(*bufsize, KM_SLEEP);
+ bcopy(cmd->nc_dma->nd_memp, *buf, *bufsize);
+
+ ret = DDI_SUCCESS;
fail:
nvme_free_cmd(cmd);
- return (buf);
+ return (ret);
}
static void *
@@ -1684,6 +1803,130 @@ fail:
}
static boolean_t
+nvme_get_features(nvme_t *nvme, uint32_t nsid, uint8_t feature, uint32_t *res,
+ void **buf, size_t *bufsize)
+{
+ nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
+ boolean_t ret = B_FALSE;
+
+ ASSERT(res != NULL);
+
+ if (bufsize != NULL)
+ *bufsize = 0;
+
+ cmd->nc_sqid = 0;
+ cmd->nc_callback = nvme_wakeup_cmd;
+ cmd->nc_sqe.sqe_opc = NVME_OPC_GET_FEATURES;
+ cmd->nc_sqe.sqe_cdw10 = feature;
+ cmd->nc_sqe.sqe_cdw11 = *res;
+
+ switch (feature) {
+ case NVME_FEAT_ARBITRATION:
+ case NVME_FEAT_POWER_MGMT:
+ case NVME_FEAT_TEMPERATURE:
+ case NVME_FEAT_ERROR:
+ case NVME_FEAT_NQUEUES:
+ case NVME_FEAT_INTR_COAL:
+ case NVME_FEAT_INTR_VECT:
+ case NVME_FEAT_WRITE_ATOM:
+ case NVME_FEAT_ASYNC_EVENT:
+ case NVME_FEAT_PROGRESS:
+ break;
+
+ case NVME_FEAT_WRITE_CACHE:
+ if (!nvme->n_write_cache_present)
+ goto fail;
+ break;
+
+ case NVME_FEAT_LBA_RANGE:
+ if (!nvme->n_lba_range_supported)
+ goto fail;
+
+ /*
+ * The LBA Range Type feature is optional. There doesn't seem
+ * be a method of detecting whether it is supported other than
+ * using it. This will cause a "invalid field in command" error,
+ * which is normally considered a programming error and causes
+ * panic in nvme_check_generic_cmd_status().
+ */
+ cmd->nc_dontpanic = B_TRUE;
+ cmd->nc_sqe.sqe_nsid = nsid;
+ ASSERT(bufsize != NULL);
+ *bufsize = NVME_LBA_RANGE_BUFSIZE;
+
+ break;
+
+ case NVME_FEAT_AUTO_PST:
+ if (!nvme->n_auto_pst_supported)
+ goto fail;
+
+ ASSERT(bufsize != NULL);
+ *bufsize = NVME_AUTO_PST_BUFSIZE;
+ break;
+
+ default:
+ goto fail;
+ }
+
+ if (bufsize != NULL && *bufsize != 0) {
+ if (nvme_zalloc_dma(nvme, *bufsize, DDI_DMA_READ,
+ &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme_zalloc_dma failed for GET FEATURES");
+ goto fail;
+ }
+
+ if (cmd->nc_dma->nd_ncookie > 2) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!too many DMA cookies for GET FEATURES");
+ atomic_inc_32(&nvme->n_too_many_cookies);
+ goto fail;
+ }
+
+ cmd->nc_sqe.sqe_dptr.d_prp[0] =
+ cmd->nc_dma->nd_cookie.dmac_laddress;
+ if (cmd->nc_dma->nd_ncookie > 1) {
+ ddi_dma_nextcookie(cmd->nc_dma->nd_dmah,
+ &cmd->nc_dma->nd_cookie);
+ cmd->nc_sqe.sqe_dptr.d_prp[1] =
+ cmd->nc_dma->nd_cookie.dmac_laddress;
+ }
+ }
+
+ if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!nvme_admin_cmd failed for GET FEATURES");
+ return (ret);
+ }
+
+ if (nvme_check_cmd_status(cmd)) {
+ if (feature == NVME_FEAT_LBA_RANGE &&
+ cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC &&
+ cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INV_FLD)
+ nvme->n_lba_range_supported = B_FALSE;
+ else
+ dev_err(nvme->n_dip, CE_WARN,
+ "!GET FEATURES %d failed with sct = %x, sc = %x",
+ feature, cmd->nc_cqe.cqe_sf.sf_sct,
+ cmd->nc_cqe.cqe_sf.sf_sc);
+ goto fail;
+ }
+
+ if (bufsize != NULL && *bufsize != 0) {
+ ASSERT(buf != NULL);
+ *buf = kmem_alloc(*bufsize, KM_SLEEP);
+ bcopy(cmd->nc_dma->nd_memp, *buf, *bufsize);
+ }
+
+ *res = cmd->nc_cqe.cqe_dw0;
+ ret = B_TRUE;
+
+fail:
+ nvme_free_cmd(cmd);
+ return (ret);
+}
+
+static boolean_t
nvme_write_cache_set(nvme_t *nvme, boolean_t enable)
{
nvme_write_cache_t nwc = { 0 };
@@ -1700,7 +1943,7 @@ nvme_write_cache_set(nvme_t *nvme, boolean_t enable)
static int
nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues)
{
- nvme_nqueue_t nq = { 0 };
+ nvme_nqueues_t nq = { 0 };
nq.b.nq_nsq = nq.b.nq_ncq = nqueues - 1;
@@ -1866,6 +2109,89 @@ nvme_prepare_devid(nvme_t *nvme, uint32_t nsid)
}
static int
+nvme_init_ns(nvme_t *nvme, int nsid)
+{
+ nvme_namespace_t *ns = &nvme->n_ns[nsid - 1];
+ nvme_identify_nsid_t *idns;
+ int last_rp;
+
+ ns->ns_nvme = nvme;
+ idns = nvme_identify(nvme, nsid);
+
+ if (idns == NULL) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!failed to identify namespace %d", nsid);
+ return (DDI_FAILURE);
+ }
+
+ ns->ns_idns = idns;
+ ns->ns_id = nsid;
+ ns->ns_block_count = idns->id_nsize;
+ ns->ns_block_size =
+ 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
+ ns->ns_best_block_size = ns->ns_block_size;
+
+ /*
+ * Get the EUI64 if present. Use it for devid and device node names.
+ */
+ if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
+ bcopy(idns->id_eui64, ns->ns_eui64, sizeof (ns->ns_eui64));
+
+ /*LINTED: E_BAD_PTR_CAST_ALIGN*/
+ if (*(uint64_t *)ns->ns_eui64 != 0) {
+ uint8_t *eui64 = ns->ns_eui64;
+
+ (void) snprintf(ns->ns_name, sizeof (ns->ns_name),
+ "%02x%02x%02x%02x%02x%02x%02x%02x",
+ eui64[0], eui64[1], eui64[2], eui64[3],
+ eui64[4], eui64[5], eui64[6], eui64[7]);
+ } else {
+ (void) snprintf(ns->ns_name, sizeof (ns->ns_name), "%d",
+ ns->ns_id);
+
+ nvme_prepare_devid(nvme, ns->ns_id);
+ }
+
+ /*
+ * Find the LBA format with no metadata and the best relative
+ * performance. A value of 3 means "degraded", 0 is best.
+ */
+ last_rp = 3;
+ for (int j = 0; j <= idns->id_nlbaf; j++) {
+ if (idns->id_lbaf[j].lbaf_lbads == 0)
+ break;
+ if (idns->id_lbaf[j].lbaf_ms != 0)
+ continue;
+ if (idns->id_lbaf[j].lbaf_rp >= last_rp)
+ continue;
+ last_rp = idns->id_lbaf[j].lbaf_rp;
+ ns->ns_best_block_size =
+ 1 << idns->id_lbaf[j].lbaf_lbads;
+ }
+
+ if (ns->ns_best_block_size < nvme->n_min_block_size)
+ ns->ns_best_block_size = nvme->n_min_block_size;
+
+ /*
+ * We currently don't support namespaces that use either:
+ * - thin provisioning
+ * - protection information
+ */
+ if (idns->id_nsfeat.f_thin ||
+ idns->id_dps.dp_pinfo) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!ignoring namespace %d, unsupported features: "
+ "thin = %d, pinfo = %d", nsid,
+ idns->id_nsfeat.f_thin, idns->id_dps.dp_pinfo);
+ ns->ns_ignore = B_TRUE;
+ } else {
+ ns->ns_ignore = B_FALSE;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static int
nvme_init(nvme_t *nvme)
{
nvme_reg_cc_t cc = { 0 };
@@ -2150,90 +2476,37 @@ nvme_init(nvme_t *nvme)
nvme->n_write_cache_enabled ? 1 : 0);
/*
- * Grab a copy of all mandatory log pages.
- *
- * TODO: should go away once user space tool exists to print logs
+ * Assume LBA Range Type feature is supported. If it isn't this
+ * will be set to B_FALSE by nvme_get_features().
*/
- nvme->n_error_log = (nvme_error_log_entry_t *)
- nvme_get_logpage(nvme, NVME_LOGPAGE_ERROR);
- nvme->n_health_log = (nvme_health_log_t *)
- nvme_get_logpage(nvme, NVME_LOGPAGE_HEALTH, -1);
- nvme->n_fwslot_log = (nvme_fwslot_log_t *)
- nvme_get_logpage(nvme, NVME_LOGPAGE_FWSLOT);
+ nvme->n_lba_range_supported = B_TRUE;
+
+ /*
+ * Check support for Autonomous Power State Transition.
+ */
+ if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
+ nvme->n_auto_pst_supported =
+ nvme->n_idctl->id_apsta.ap_sup == 0 ? B_FALSE : B_TRUE;
/*
* Identify Namespaces
*/
nvme->n_namespace_count = nvme->n_idctl->id_nn;
+ if (nvme->n_namespace_count > NVME_MINOR_MAX) {
+ dev_err(nvme->n_dip, CE_WARN,
+ "!too many namespaces: %d, limiting to %d\n",
+ nvme->n_namespace_count, NVME_MINOR_MAX);
+ nvme->n_namespace_count = NVME_MINOR_MAX;
+ }
+
nvme->n_ns = kmem_zalloc(sizeof (nvme_namespace_t) *
nvme->n_namespace_count, KM_SLEEP);
for (i = 0; i != nvme->n_namespace_count; i++) {
- nvme_identify_nsid_t *idns;
- int last_rp;
-
- nvme->n_ns[i].ns_nvme = nvme;
- nvme->n_ns[i].ns_idns = idns = nvme_identify(nvme, i + 1);
-
- if (idns == NULL) {
- dev_err(nvme->n_dip, CE_WARN,
- "!failed to identify namespace %d", i + 1);
+ mutex_init(&nvme->n_ns[i].ns_minor.nm_mutex, NULL, MUTEX_DRIVER,
+ NULL);
+ if (nvme_init_ns(nvme, i + 1) != DDI_SUCCESS)
goto fail;
- }
-
- nvme->n_ns[i].ns_id = i + 1;
- nvme->n_ns[i].ns_block_count = idns->id_nsize;
- nvme->n_ns[i].ns_block_size =
- 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
- nvme->n_ns[i].ns_best_block_size = nvme->n_ns[i].ns_block_size;
-
- /*
- * Get the EUI64 if present. If not present prepare the devid
- * from other device data.
- */
- if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
- bcopy(idns->id_eui64, nvme->n_ns[i].ns_eui64,
- sizeof (nvme->n_ns[i].ns_eui64));
-
- /*LINTED: E_BAD_PTR_CAST_ALIGN*/
- if (*(uint64_t *)nvme->n_ns[i].ns_eui64 == 0) {
- nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id);
- }
-
- /*
- * Find the LBA format with no metadata and the best relative
- * performance. A value of 3 means "degraded", 0 is best.
- */
- last_rp = 3;
- for (int j = 0; j <= idns->id_nlbaf; j++) {
- if (idns->id_lbaf[j].lbaf_lbads == 0)
- break;
- if (idns->id_lbaf[j].lbaf_ms != 0)
- continue;
- if (idns->id_lbaf[j].lbaf_rp >= last_rp)
- continue;
- last_rp = idns->id_lbaf[j].lbaf_rp;
- nvme->n_ns[i].ns_best_block_size =
- 1 << idns->id_lbaf[j].lbaf_lbads;
- }
-
- if (nvme->n_ns[i].ns_best_block_size < nvme->n_min_block_size)
- nvme->n_ns[i].ns_best_block_size =
- nvme->n_min_block_size;
-
- /*
- * We currently don't support namespaces that use either:
- * - thin provisioning
- * - protection information
- */
- if (idns->id_nsfeat.f_thin ||
- idns->id_dps.dp_pinfo) {
- dev_err(nvme->n_dip, CE_WARN,
- "!ignoring namespace %d, unsupported features: "
- "thin = %d, pinfo = %d", i + 1,
- idns->id_nsfeat.f_thin, idns->id_dps.dp_pinfo);
- nvme->n_ns[i].ns_ignore = B_TRUE;
- }
}
/*
@@ -2520,6 +2793,8 @@ nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
ddi_set_driver_private(dip, nvme);
nvme->n_dip = dip;
+ mutex_init(&nvme->n_minor.nm_mutex, NULL, MUTEX_DRIVER, NULL);
+
nvme->n_strict_version = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, "strict-version", 1) == 1 ? B_TRUE : B_FALSE;
nvme->n_ignore_unknown_vendor_status = ddi_prop_get_int(DDI_DEV_T_ANY,
@@ -2640,6 +2915,14 @@ nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
* Attach the blkdev driver for each namespace.
*/
for (i = 0; i != nvme->n_namespace_count; i++) {
+ if (ddi_create_minor_node(nvme->n_dip, nvme->n_ns[i].ns_name,
+ S_IFCHR, NVME_MINOR(ddi_get_instance(nvme->n_dip), i + 1),
+ DDI_NT_NVME_ATTACHMENT_POINT, 0) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN,
+ "!failed to create minor node for namespace %d", i);
+ goto fail;
+ }
+
if (nvme->n_ns[i].ns_ignore)
continue;
@@ -2661,6 +2944,14 @@ nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
}
+ if (ddi_create_minor_node(dip, "devctl", S_IFCHR,
+ NVME_MINOR(ddi_get_instance(dip), 0), DDI_NT_NVME_NEXUS, 0)
+ != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "nvme_attach: "
+ "cannot create devctl minor node");
+ goto fail;
+ }
+
return (DDI_SUCCESS);
fail:
@@ -2689,8 +2980,14 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
if (nvme == NULL)
return (DDI_FAILURE);
+ ddi_remove_minor_node(dip, "devctl");
+ mutex_destroy(&nvme->n_minor.nm_mutex);
+
if (nvme->n_ns) {
for (i = 0; i != nvme->n_namespace_count; i++) {
+ ddi_remove_minor_node(dip, nvme->n_ns[i].ns_name);
+ mutex_destroy(&nvme->n_ns[i].ns_minor.nm_mutex);
+
if (nvme->n_ns[i].ns_bd_hdl) {
(void) bd_detach_handle(
nvme->n_ns[i].ns_bd_hdl);
@@ -2745,7 +3042,7 @@ nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
nvme_free_qpair(nvme->n_adminq);
if (nvme->n_idctl)
- kmem_free(nvme->n_idctl, sizeof (nvme_identify_ctrl_t));
+ kmem_free(nvme->n_idctl, NVME_IDENTIFY_BUFSIZE);
if (nvme->n_progress & NVME_REGS_MAPPED)
ddi_regs_map_free(&nvme->n_regh);
@@ -3042,3 +3339,531 @@ nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
strlen(ns->ns_devid), ns->ns_devid, devid));
}
}
+
+static int
+nvme_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+#ifndef __lock_lint
+ _NOTE(ARGUNUSED(cred_p));
+#endif
+ minor_t minor = getminor(*devp);
+ nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
+ int nsid = NVME_MINOR_NSID(minor);
+ nvme_minor_state_t *nm;
+ int rv = 0;
+
+ if (otyp != OTYP_CHR)
+ return (EINVAL);
+
+ if (nvme == NULL)
+ return (ENXIO);
+
+ if (nsid > nvme->n_namespace_count)
+ return (ENXIO);
+
+ nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
+
+ mutex_enter(&nm->nm_mutex);
+ if (nm->nm_oexcl) {
+ rv = EBUSY;
+ goto out;
+ }
+
+ if (flag & FEXCL) {
+ if (nm->nm_ocnt != 0) {
+ rv = EBUSY;
+ goto out;
+ }
+ nm->nm_oexcl = B_TRUE;
+ }
+
+ nm->nm_ocnt++;
+
+out:
+ mutex_exit(&nm->nm_mutex);
+ return (rv);
+
+}
+
+static int
+nvme_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+#ifndef __lock_lint
+ _NOTE(ARGUNUSED(cred_p));
+ _NOTE(ARGUNUSED(flag));
+#endif
+ minor_t minor = getminor(dev);
+ nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
+ int nsid = NVME_MINOR_NSID(minor);
+ nvme_minor_state_t *nm;
+
+ if (otyp != OTYP_CHR)
+ return (ENXIO);
+
+ if (nvme == NULL)
+ return (ENXIO);
+
+ if (nsid > nvme->n_namespace_count)
+ return (ENXIO);
+
+ nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
+
+ mutex_enter(&nm->nm_mutex);
+ if (nm->nm_oexcl)
+ nm->nm_oexcl = B_FALSE;
+
+ ASSERT(nm->nm_ocnt > 0);
+ nm->nm_ocnt--;
+ mutex_exit(&nm->nm_mutex);
+
+ return (0);
+}
+
+static int
+nvme_ioctl_identify(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(cred_p));
+ int rv = 0;
+ void *idctl;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if (nioc->n_len < NVME_IDENTIFY_BUFSIZE)
+ return (EINVAL);
+
+ idctl = nvme_identify(nvme, nsid);
+ if (idctl == NULL)
+ return (EIO);
+
+ if (ddi_copyout(idctl, (void *)nioc->n_buf, NVME_IDENTIFY_BUFSIZE, mode)
+ != 0)
+ rv = EFAULT;
+
+ kmem_free(idctl, NVME_IDENTIFY_BUFSIZE);
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_capabilities(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
+ int mode, cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nsid, cred_p));
+ int rv = 0;
+ nvme_reg_cap_t cap = { 0 };
+ nvme_capabilities_t nc;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if (nioc->n_len < sizeof (nc))
+ return (EINVAL);
+
+ cap.r = nvme_get64(nvme, NVME_REG_CAP);
+
+ /*
+ * The MPSMIN and MPSMAX fields in the CAP register use 0 to
+ * specify the base page size of 4k (1<<12), so add 12 here to
+ * get the real page size value.
+ */
+ nc.mpsmax = 1 << (12 + cap.b.cap_mpsmax);
+ nc.mpsmin = 1 << (12 + cap.b.cap_mpsmin);
+
+ if (ddi_copyout(&nc, (void *)nioc->n_buf, sizeof (nc), mode) != 0)
+ rv = EFAULT;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_get_logpage(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
+ int mode, cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(cred_p));
+ void *log = NULL;
+ size_t bufsize = 0;
+ int rv = 0;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ switch (nioc->n_arg) {
+ case NVME_LOGPAGE_ERROR:
+ if (nsid != 0)
+ return (EINVAL);
+ break;
+ case NVME_LOGPAGE_HEALTH:
+ if (nsid != 0 && nvme->n_idctl->id_lpa.lp_smart == 0)
+ return (EINVAL);
+
+ if (nsid == 0)
+ nsid = (uint32_t)-1;
+
+ break;
+ case NVME_LOGPAGE_FWSLOT:
+ if (nsid != 0)
+ return (EINVAL);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (nvme_get_logpage(nvme, &log, &bufsize, nioc->n_arg, nsid)
+ != DDI_SUCCESS)
+ return (EIO);
+
+ if (nioc->n_len < bufsize) {
+ kmem_free(log, bufsize);
+ return (EINVAL);
+ }
+
+ if (ddi_copyout(log, (void *)nioc->n_buf, bufsize, mode) != 0)
+ rv = EFAULT;
+
+ nioc->n_len = bufsize;
+ kmem_free(log, bufsize);
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_get_features(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
+ int mode, cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(cred_p));
+ void *buf = NULL;
+ size_t bufsize = 0;
+ uint32_t res = 0;
+ uint8_t feature;
+ int rv = 0;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if ((nioc->n_arg >> 32) > 0xff)
+ return (EINVAL);
+
+ feature = (uint8_t)(nioc->n_arg >> 32);
+
+ switch (feature) {
+ case NVME_FEAT_ARBITRATION:
+ case NVME_FEAT_POWER_MGMT:
+ case NVME_FEAT_TEMPERATURE:
+ case NVME_FEAT_ERROR:
+ case NVME_FEAT_NQUEUES:
+ case NVME_FEAT_INTR_COAL:
+ case NVME_FEAT_WRITE_ATOM:
+ case NVME_FEAT_ASYNC_EVENT:
+ case NVME_FEAT_PROGRESS:
+ if (nsid != 0)
+ return (EINVAL);
+ break;
+
+ case NVME_FEAT_INTR_VECT:
+ if (nsid != 0)
+ return (EINVAL);
+
+ res = nioc->n_arg & 0xffffffffUL;
+ if (res >= nvme->n_intr_cnt)
+ return (EINVAL);
+ break;
+
+ case NVME_FEAT_LBA_RANGE:
+ if (nvme->n_lba_range_supported == B_FALSE)
+ return (EINVAL);
+
+ if (nsid == 0 ||
+ nsid > nvme->n_namespace_count)
+ return (EINVAL);
+
+ break;
+
+ case NVME_FEAT_WRITE_CACHE:
+ if (nsid != 0)
+ return (EINVAL);
+
+ if (!nvme->n_write_cache_present)
+ return (EINVAL);
+
+ break;
+
+ case NVME_FEAT_AUTO_PST:
+ if (nsid != 0)
+ return (EINVAL);
+
+ if (!nvme->n_auto_pst_supported)
+ return (EINVAL);
+
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ if (nvme_get_features(nvme, nsid, feature, &res, &buf, &bufsize) ==
+ B_FALSE)
+ return (EIO);
+
+ if (nioc->n_len < bufsize) {
+ kmem_free(buf, bufsize);
+ return (EINVAL);
+ }
+
+ if (buf && ddi_copyout(buf, (void*)nioc->n_buf, bufsize, mode) != 0)
+ rv = EFAULT;
+
+ kmem_free(buf, bufsize);
+ nioc->n_arg = res;
+ nioc->n_len = bufsize;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_intr_cnt(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nsid, mode, cred_p));
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ nioc->n_arg = nvme->n_intr_cnt;
+ return (0);
+}
+
+static int
+nvme_ioctl_version(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nsid, cred_p));
+ int rv = 0;
+
+ if ((mode & FREAD) == 0)
+ return (EPERM);
+
+ if (nioc->n_len < sizeof (nvme->n_version))
+ return (ENOMEM);
+
+ if (ddi_copyout(&nvme->n_version, (void *)nioc->n_buf,
+ sizeof (nvme->n_version), mode) != 0)
+ rv = EFAULT;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_format(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(mode));
+ nvme_format_nvm_t frmt = { 0 };
+ int c_nsid = nsid != 0 ? nsid - 1 : 0;
+
+ if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
+ return (EPERM);
+
+ frmt.r = nioc->n_arg & 0xffffffff;
+
+ /*
+ * Check whether the FORMAT NVM command is supported.
+ */
+ if (nvme->n_idctl->id_oacs.oa_format == 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow format or secure erase of individual namespace if that
+ * would cause a format or secure erase of all namespaces.
+ */
+ if (nsid != 0 && nvme->n_idctl->id_fna.fn_format != 0)
+ return (EINVAL);
+
+ if (nsid != 0 && frmt.b.fm_ses != NVME_FRMT_SES_NONE &&
+ nvme->n_idctl->id_fna.fn_sec_erase != 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow formatting with Protection Information.
+ */
+ if (frmt.b.fm_pi != 0 || frmt.b.fm_pil != 0 || frmt.b.fm_ms != 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow formatting using an illegal LBA format, or any LBA format
+ * that uses metadata.
+ */
+ if (frmt.b.fm_lbaf > nvme->n_ns[c_nsid].ns_idns->id_nlbaf ||
+ nvme->n_ns[c_nsid].ns_idns->id_lbaf[frmt.b.fm_lbaf].lbaf_ms != 0)
+ return (EINVAL);
+
+ /*
+ * Don't allow formatting using an illegal Secure Erase setting.
+ */
+ if (frmt.b.fm_ses > NVME_FRMT_MAX_SES ||
+ (frmt.b.fm_ses == NVME_FRMT_SES_CRYPTO &&
+ nvme->n_idctl->id_fna.fn_crypt_erase == 0))
+ return (EINVAL);
+
+ if (nsid == 0)
+ nsid = (uint32_t)-1;
+
+ return (nvme_format_nvm(nvme, nsid, frmt.b.fm_lbaf, B_FALSE, 0, B_FALSE,
+ frmt.b.fm_ses));
+}
+
+static int
+nvme_ioctl_detach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nioc, mode));
+ int rv = 0;
+
+ if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
+ return (EPERM);
+
+ if (nsid == 0)
+ return (EINVAL);
+
+ rv = bd_detach_handle(nvme->n_ns[nsid - 1].ns_bd_hdl);
+ if (rv != DDI_SUCCESS)
+ rv = EBUSY;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl_attach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
+ cred_t *cred_p)
+{
+ _NOTE(ARGUNUSED(nioc, mode));
+ nvme_identify_nsid_t *idns;
+ int rv = 0;
+
+ if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
+ return (EPERM);
+
+ if (nsid == 0)
+ return (EINVAL);
+
+ /*
+ * Identify namespace again, free old identify data.
+ */
+ idns = nvme->n_ns[nsid - 1].ns_idns;
+ if (nvme_init_ns(nvme, nsid) != DDI_SUCCESS)
+ return (EIO);
+
+ kmem_free(idns, sizeof (nvme_identify_nsid_t));
+
+ rv = bd_attach_handle(nvme->n_dip, nvme->n_ns[nsid - 1].ns_bd_hdl);
+ if (rv != DDI_SUCCESS)
+ rv = EBUSY;
+
+ return (rv);
+}
+
+static int
+nvme_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p,
+ int *rval_p)
+{
+#ifndef __lock_lint
+ _NOTE(ARGUNUSED(rval_p));
+#endif
+ minor_t minor = getminor(dev);
+ nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
+ int nsid = NVME_MINOR_NSID(minor);
+ int rv = 0;
+ nvme_ioctl_t nioc;
+
+ int (*nvme_ioctl[])(nvme_t *, int, nvme_ioctl_t *, int, cred_t *) = {
+ NULL,
+ nvme_ioctl_identify,
+ nvme_ioctl_identify,
+ nvme_ioctl_capabilities,
+ nvme_ioctl_get_logpage,
+ nvme_ioctl_get_features,
+ nvme_ioctl_intr_cnt,
+ nvme_ioctl_version,
+ nvme_ioctl_format,
+ nvme_ioctl_detach,
+ nvme_ioctl_attach
+ };
+
+ if (nvme == NULL)
+ return (ENXIO);
+
+ if (nsid > nvme->n_namespace_count)
+ return (ENXIO);
+
+ if (IS_DEVCTL(cmd))
+ return (ndi_devctl_ioctl(nvme->n_dip, cmd, arg, mode, 0));
+
+#ifdef _MULTI_DATAMODEL
+ switch (ddi_model_convert_from(mode & FMODELS)) {
+ case DDI_MODEL_ILP32: {
+ nvme_ioctl32_t nioc32;
+ if (ddi_copyin((void*)arg, &nioc32, sizeof (nvme_ioctl32_t),
+ mode) != 0)
+ return (EFAULT);
+ nioc.n_len = nioc32.n_len;
+ nioc.n_buf = nioc32.n_buf;
+ nioc.n_arg = nioc32.n_arg;
+ break;
+ }
+ case DDI_MODEL_NONE:
+#endif
+ if (ddi_copyin((void*)arg, &nioc, sizeof (nvme_ioctl_t), mode)
+ != 0)
+ return (EFAULT);
+#ifdef _MULTI_DATAMODEL
+ break;
+ }
+#endif
+
+ if (cmd == NVME_IOC_IDENTIFY_CTRL) {
+ /*
+ * This makes NVME_IOC_IDENTIFY_CTRL work the same on devctl and
+ * attachment point nodes.
+ */
+ nsid = 0;
+ } else if (cmd == NVME_IOC_IDENTIFY_NSID && nsid == 0) {
+ /*
+ * This makes NVME_IOC_IDENTIFY_NSID work on a devctl node, it
+ * will always return identify data for namespace 1.
+ */
+ nsid = 1;
+ }
+
+ if (IS_NVME_IOC(cmd) && nvme_ioctl[NVME_IOC_CMD(cmd)] != NULL)
+ rv = nvme_ioctl[NVME_IOC_CMD(cmd)](nvme, nsid, &nioc, mode,
+ cred_p);
+ else
+ rv = EINVAL;
+
+#ifdef _MULTI_DATAMODEL
+ switch (ddi_model_convert_from(mode & FMODELS)) {
+ case DDI_MODEL_ILP32: {
+ nvme_ioctl32_t nioc32;
+
+ nioc32.n_len = (size32_t)nioc.n_len;
+ nioc32.n_buf = (uintptr32_t)nioc.n_buf;
+ nioc32.n_arg = nioc.n_arg;
+
+ if (ddi_copyout(&nioc32, (void *)arg, sizeof (nvme_ioctl32_t),
+ mode) != 0)
+ return (EFAULT);
+ break;
+ }
+ case DDI_MODEL_NONE:
+#endif
+ if (ddi_copyout(&nioc, (void *)arg, sizeof (nvme_ioctl_t), mode)
+ != 0)
+ return (EFAULT);
+#ifdef _MULTI_DATAMODEL
+ break;
+ }
+#endif
+
+ return (rv);
+}
diff --git a/usr/src/uts/common/io/nvme/nvme_reg.h b/usr/src/uts/common/io/nvme/nvme_reg.h
index 3e4b77079b..acff0e2362 100644
--- a/usr/src/uts/common/io/nvme/nvme_reg.h
+++ b/usr/src/uts/common/io/nvme/nvme_reg.h
@@ -20,6 +20,8 @@
#ifndef _NVME_REG_H
#define _NVME_REG_H
+#include <sys/nvme.h>
+
#pragma pack(1)
#ifdef __cplusplus
@@ -33,22 +35,6 @@ extern "C" {
#define NVME_MAX_ADMIN_QUEUE_LEN 4096
/*
- * NVMe version
- */
-typedef struct {
- uint16_t v_minor;
- uint16_t v_major;
-} nvme_version_t;
-
-#define NVME_VERSION_ATLEAST(v, maj, min) \
- (((v)->v_major) > (maj) || \
- ((v)->v_major == (maj) && (v)->v_minor >= (min)))
-
-#define NVME_VERSION_HIGHER(v, maj, min) \
- (((v)->v_major) > (maj) || \
- ((v)->v_major == (maj) && (v)->v_minor > (min)))
-
-/*
* NVMe registers and register fields
*/
#define NVME_REG_CAP 0x0 /* Controller Capabilities */
@@ -258,15 +244,6 @@ typedef struct {
* NVMe completion queue entry
*/
typedef struct {
- uint16_t sf_p:1; /* Phase Tag */
- uint16_t sf_sc:8; /* Status Code */
- uint16_t sf_sct:3; /* Status Code Type */
- uint16_t sf_rsvd2:2;
- uint16_t sf_m:1; /* More */
- uint16_t sf_dnr:1; /* Do Not Retry */
-} nvme_cqe_sf_t;
-
-typedef struct {
uint32_t cqe_dw0; /* Command Specific */
uint32_t cqe_rsvd1;
uint16_t cqe_sqhd; /* SQ Head Pointer */
@@ -408,203 +385,6 @@ typedef union {
#define NVME_IDENTIFY_CTRL 0x1 /* Identify Controller */
#define NVME_IDENTIFY_LIST 0x2 /* Identify List Namespaces */
-#define NVME_IDENTIFY_BUFSIZE 4096 /* buffer size for Identify */
-
-/* NVMe Queue Entry Size bitfield */
-typedef struct {
- uint8_t qes_min:4; /* minimum entry size */
- uint8_t qes_max:4; /* maximum entry size */
-} nvme_idctl_qes_t;
-
-/* NVMe Power State Descriptor */
-typedef struct {
- uint16_t psd_mp; /* Maximum Power */
- uint8_t psd_rsvd1;
- uint8_t psd_mps:1; /* Max Power Scale (1.1) */
- uint8_t psd_nops:1; /* Non-Operational State (1.1) */
- uint8_t psd_rsvd2:6;
- uint32_t psd_enlat; /* Entry Latency */
- uint32_t psd_exlat; /* Exit Latency */
- uint8_t psd_rrt:5; /* Relative Read Throughput */
- uint8_t psd_rsvd3:3;
- uint8_t psd_rrl:5; /* Relative Read Latency */
- uint8_t psd_rsvd4:3;
- uint8_t psd_rwt:5; /* Relative Write Throughput */
- uint8_t psd_rsvd5:3;
- uint8_t psd_rwl:5; /* Relative Write Latency */
- uint8_t psd_rsvd6:3;
- uint8_t psd_rsvd7[16];
-} nvme_idctl_psd_t;
-
-/* NVMe Identify Controller Data Structure */
-typedef struct {
- /* Controller Capabilities & Features */
- uint16_t id_vid; /* PCI vendor ID */
- uint16_t id_ssvid; /* PCI subsystem vendor ID */
- char id_serial[20]; /* Serial Number */
- char id_model[40]; /* Model Number */
- char id_fwrev[8]; /* Firmware Revision */
- uint8_t id_rab; /* Recommended Arbitration Burst */
- uint8_t id_oui[3]; /* vendor IEEE OUI */
- struct { /* Multi-Interface Capabilities */
- uint8_t m_multi_pci:1; /* HW has multiple PCIe interfaces */
- uint8_t m_multi_ctrl:1; /* HW has multiple controllers (1.1) */
- uint8_t m_sr_iov:1; /* controller is SR-IOV virt fn (1.1) */
- uint8_t m_rsvd:5;
- } id_mic;
- uint8_t id_mdts; /* Maximum Data Transfer Size */
- uint16_t id_cntlid; /* Unique Controller Identifier (1.1) */
- uint8_t id_rsvd_cc[256 - 80];
-
- /* Admin Command Set Attributes */
- struct { /* Optional Admin Command Support */
- uint16_t oa_security:1; /* Security Send & Receive */
- uint16_t oa_format:1; /* Format NVM */
- uint16_t oa_firmare:1; /* Firmware Activate & Download */
- uint16_t oa_rsvd:13;
- } id_oacs;
- uint8_t id_acl; /* Abort Command Limit */
- uint8_t id_aerl; /* Asynchronous Event Request Limit */
- struct { /* Firmware Updates */
- uint8_t fw_readonly:1; /* Slot 1 is Read-Only */
- uint8_t fw_nslot:3; /* number of firmware slots */
- uint8_t fw_rsvd:4;
- } id_frmw;
- struct { /* Log Page Attributes */
- uint8_t lp_smart:1; /* SMART/Health information per NS */
- uint8_t lp_rsvd:7;
- } id_lpa;
- uint8_t id_elpe; /* Error Log Page Entries */
- uint8_t id_npss; /* Number of Power States */
- struct { /* Admin Vendor Specific Command Conf */
- uint8_t av_spec:1; /* use format from spec */
- uint8_t av_rsvd:7;
- } id_avscc;
- struct { /* Autonomous Power State Trans (1.1) */
- uint8_t ap_sup:1; /* APST supported (1.1) */
- uint8_t ap_rsvd:7;
- } id_apsta;
- uint8_t id_rsvd_ac[256 - 10];
-
- /* NVM Command Set Attributes */
- nvme_idctl_qes_t id_sqes; /* Submission Queue Entry Size */
- nvme_idctl_qes_t id_cqes; /* Completion Queue Entry Size */
- uint16_t id_rsvd_nc_1;
- uint32_t id_nn; /* Number of Namespaces */
- struct { /* Optional NVM Command Support */
- uint16_t on_compare:1; /* Compare */
- uint16_t on_wr_unc:1; /* Write Uncorrectable */
- uint16_t on_dset_mgmt:1; /* Dataset Management */
- uint16_t on_wr_zero:1; /* Write Zeros (1.1) */
- uint16_t on_save:1; /* Save/Select in Get/Set Feat (1.1) */
- uint16_t on_reserve:1; /* Reservations (1.1) */
- uint16_t on_rsvd:10;
- } id_oncs;
- struct { /* Fused Operation Support */
- uint16_t f_cmp_wr:1; /* Compare and Write */
- uint16_t f_rsvd:15;
- } id_fuses;
- struct { /* Format NVM Attributes */
- uint8_t fn_format:1; /* Format applies to all NS */
- uint8_t fn_sec_erase:1; /* Secure Erase applies to all NS */
- uint8_t fn_crypt_erase:1; /* Cryptographic Erase supported */
- uint8_t fn_rsvd:5;
- } id_fna;
- struct { /* Volatile Write Cache */
- uint8_t vwc_present:1; /* Volatile Write Cache present */
- uint8_t rsvd:7;
- } id_vwc;
- uint16_t id_awun; /* Atomic Write Unit Normal */
- uint16_t id_awupf; /* Atomic Write Unit Power Fail */
- struct { /* NVM Vendor Specific Command Conf */
- uint8_t nv_spec:1; /* use format from spec */
- uint8_t nv_rsvd:7;
- } id_nvscc;
- uint8_t id_rsvd_nc_2;
- uint16_t id_acwu; /* Atomic Compare & Write Unit (1.1) */
- uint16_t id_rsvd_nc_3;
- struct { /* SGL Support (1.1) */
- uint16_t sgl_sup:1; /* SGL Supported in NVM cmds (1.1) */
- uint16_t sgl_rsvd1:15;
- uint16_t sgl_bucket:1; /* SGL Bit Bucket supported (1.1) */
- uint16_t sgl_rsvd2:15;
- } id_sgls;
- uint8_t id_rsvd_nc_4[192 - 28];
-
- /* I/O Command Set Attributes */
- uint8_t id_rsvd_ioc[1344];
-
- /* Power State Descriptors */
- nvme_idctl_psd_t id_psd[32];
-
- /* Vendor Specific */
- uint8_t id_vs[1024];
-} nvme_identify_ctrl_t;
-
-/* NVMe Identify Namespace LBA Format */
-typedef struct {
- uint16_t lbaf_ms; /* Metadata Size */
- uint8_t lbaf_lbads; /* LBA Data Size */
- uint8_t lbaf_rp:2; /* Relative Performance */
- uint8_t lbaf_rsvd1:6;
-} nvme_idns_lbaf_t;
-
-/* NVMe Identify Namespace Data Structure */
-typedef struct {
- uint64_t id_nsize; /* Namespace Size */
- uint64_t id_ncap; /* Namespace Capacity */
- uint64_t id_nuse; /* Namespace Utilization */
- struct { /* Namespace Features */
- uint8_t f_thin:1; /* Thin Provisioning */
- uint8_t f_rsvd:7;
- } id_nsfeat;
- uint8_t id_nlbaf; /* Number of LBA formats */
- struct { /* Formatted LBA size */
- uint8_t lba_format:4; /* LBA format */
- uint8_t lba_extlba:1; /* extended LBA (includes metadata) */
- uint8_t lba_rsvd:3;
- } id_flbas;
- struct { /* Metadata Capabilities */
- uint8_t mc_extlba:1; /* extended LBA transfers */
- uint8_t mc_separate:1; /* separate metadata transfers */
- uint8_t mc_rsvd:6;
- } id_mc;
- struct { /* Data Protection Capabilities */
- uint8_t dp_type1:1; /* Protection Information Type 1 */
- uint8_t dp_type2:1; /* Protection Information Type 2 */
- uint8_t dp_type3:1; /* Protection Information Type 3 */
- uint8_t dp_first:1; /* first 8 bytes of metadata */
- uint8_t dp_last:1; /* last 8 bytes of metadata */
- uint8_t dp_rsvd:3;
- } id_dpc;
- struct { /* Data Protection Settings */
- uint8_t dp_pinfo:3; /* Protection Information enabled */
- uint8_t dp_first:1; /* first 8 bytes of metadata */
- uint8_t dp_rsvd:4;
- } id_dps;
- struct { /* NS Multi-Path/Sharing Cap (1.1) */
- uint8_t nm_shared:1; /* NS is shared (1.1) */
- uint8_t nm_rsvd:7;
- } id_nmic;
- struct { /* Reservation Capabilities (1.1) */
- uint8_t rc_persist:1; /* Persist Through Power Loss (1.1) */
- uint8_t rc_wr_excl:1; /* Write Exclusive (1.1) */
- uint8_t rc_excl:1; /* Exclusive Access (1.1) */
- uint8_t rc_wr_excl_r:1; /* Wr Excl - Registrants Only (1.1) */
- uint8_t rc_excl_r:1; /* Excl Acc - Registrants Only (1.1) */
- uint8_t rc_wr_excl_a:1; /* Wr Excl - All Registrants (1.1) */
- uint8_t rc_excl_a:1; /* Excl Acc - All Registrants (1.1) */
- uint8_t rc_rsvd:1;
- } id_rescap;
- uint8_t id_rsvd1[120 - 32];
- uint8_t id_eui64[8]; /* IEEE Extended Unique Id (1.1) */
- nvme_idns_lbaf_t id_lbaf[16]; /* LBA Formats */
-
- uint8_t id_rsvd2[192];
-
- uint8_t id_vs[3712]; /* Vendor Specific */
-} nvme_identify_nsid_t;
-
/*
* NVMe Abort Command
@@ -619,79 +399,8 @@ typedef union {
/*
- * NVMe Get / Set Features
- */
-#define NVME_FEAT_ARBITRATION 0x1 /* Command Arbitration */
-#define NVME_FEAT_POWER_MGMT 0x2 /* Power Management */
-#define NVME_FEAT_LBA_RANGE 0x3 /* LBA Range Type */
-#define NVME_FEAT_TEMPERATURE 0x4 /* Temperature Threshold */
-#define NVME_FEAT_ERROR 0x5 /* Error Recovery */
-#define NVME_FEAT_WRITE_CACHE 0x6 /* Volatile Write Cache */
-#define NVME_FEAT_NQUEUES 0x7 /* Number of Queues */
-#define NVME_FEAT_INTR_COAL 0x8 /* Interrupt Coalescing */
-#define NVME_FEAT_INTR_VECT 0x9 /* Interrupt Vector Configuration */
-#define NVME_FEAT_WRITE_ATOM 0xa /* Write Atomicity */
-#define NVME_FEAT_ASYNC_EVENT 0xb /* Asynchronous Event Configuration */
-#define NVME_FEAT_AUTO_PST 0xc /* Autonomous Power State Transition */
- /* (1.1) */
-
-#define NVME_FEAT_PROGRESS 0x80 /* Software Progress Marker */
-
-/* Arbitration Feature */
-typedef struct {
- uint8_t arb_ab:3; /* Arbitration Burst */
- uint8_t arb_rsvd:5;
- uint8_t arb_lpw; /* Low Priority Weight */
- uint8_t arb_mpw; /* Medium Priority Weight */
- uint8_t arb_hpw; /* High Priority Weight */
-} nvme_arbitration_dw11_t;
-
-/* LBA Range Type Feature */
-typedef struct {
- uint32_t lr_num:6; /* Number of LBA ranges */
- uint32_t lr_rsvd:26;
-} nvme_lba_range_type_dw11_t;
-
-typedef struct {
- uint8_t lr_type; /* Type */
- struct { /* Attributes */
- uint8_t lr_write:1; /* may be overwritten */
- uint8_t lr_hidden:1; /* hidden from OS/EFI/BIOS */
- uint8_t lr_rsvd1:6;
- } lr_attr;
- uint8_t lr_rsvd2[14];
- uint64_t lr_slba; /* Starting LBA */
- uint64_t lr_nlb; /* Number of Logical Blocks */
- uint8_t lr_guid[16]; /* Unique Identifier */
- uint8_t lr_rsvd3[16];
-} nvme_lba_range_type_t;
-
-/* Volatile Write Cache Feature */
-typedef union {
- struct {
- uint32_t wc_wce:1; /* Volatile Write Cache Enable */
- uint32_t wc_rsvd:31;
- } b;
- uint32_t r;
-} nvme_write_cache_t;
-
-/* Number of Queues */
-typedef union {
- struct {
- uint16_t nq_nsq; /* Number of Submission Queues */
- uint16_t nq_ncq; /* Number of Completion Queues */
- } b;
- uint32_t r;
-} nvme_nqueue_t;
-
-
-/*
* NVMe Get Log Page
*/
-#define NVME_LOGPAGE_ERROR 0x1 /* Error Information */
-#define NVME_LOGPAGE_HEALTH 0x2 /* SMART/Health Information */
-#define NVME_LOGPAGE_FWSLOT 0x3 /* Firmware Slot Information */
-
typedef union {
struct {
uint8_t lp_lid; /* Log Page Identifier */
@@ -702,52 +411,6 @@ typedef union {
uint32_t r;
} nvme_getlogpage_t;
-typedef struct {
- uint64_t el_count; /* Error Count */
- uint16_t el_sqid; /* Submission Queue ID */
- uint16_t el_cid; /* Command ID */
- nvme_cqe_sf_t el_sf; /* Status Field */
- uint8_t el_byte; /* Parameter Error Location byte */
- uint8_t el_bit:3; /* Parameter Error Location bit */
- uint8_t el_rsvd1:5;
- uint64_t el_lba; /* Logical Block Address */
- uint32_t el_nsid; /* Namespace ID */
- uint8_t el_vendor; /* Vendor Specific Information avail */
- uint8_t el_rsvd2[64 - 29];
-} nvme_error_log_entry_t;
-
-typedef struct {
- uint64_t lo;
- uint64_t hi;
-} nvme_uint128_t;
-
-typedef struct {
- uint8_t hl_crit_warn; /* Critical Warning */
- uint16_t hl_temp; /* Temperature */
- uint8_t hl_avail_spare; /* Available Spare */
- uint8_t hl_avail_spare_thr; /* Available Spare Threshold */
- uint8_t hl_used; /* Percentage Used */
- uint8_t hl_rsvd1[32 - 6];
- nvme_uint128_t hl_data_read; /* Data Units Read */
- nvme_uint128_t hl_data_write; /* Data Units Written */
- nvme_uint128_t hl_host_read; /* Host Read Commands */
- nvme_uint128_t hl_host_write; /* Host Write Commands */
- nvme_uint128_t hl_ctrl_busy; /* Controller Busy Time */
- nvme_uint128_t hl_power_cycles; /* Power Cycles */
- nvme_uint128_t hl_power_on_hours; /* Power On Hours */
- nvme_uint128_t hl_unsafe_shutdn; /* Unsafe Shutdowns */
- nvme_uint128_t hl_media_errors; /* Media Errors */
- nvme_uint128_t hl_errors_logged; /* Number of errors logged */
- uint8_t hl_rsvd2[512 - 192];
-} nvme_health_log_t;
-
-typedef struct {
- uint8_t fw_afi:3; /* Active Firmware Slot */
- uint8_t fw_rsvd1:5;
- uint8_t fw_rsvd2[7];
- char fw_frs[7][8]; /* Firmware Revision / Slot */
- uint8_t fw_rsvd3[512 - 64];
-} nvme_fwslot_log_t;
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h
index fd6f93af88..651adaec8c 100644
--- a/usr/src/uts/common/io/nvme/nvme_var.h
+++ b/usr/src/uts/common/io/nvme/nvme_var.h
@@ -27,7 +27,7 @@
*/
#ifdef __cplusplus
-/* extern "C" { */
+extern "C" {
#endif
#define NVME_FMA_INIT 0x1
@@ -47,11 +47,18 @@
typedef struct nvme nvme_t;
typedef struct nvme_namespace nvme_namespace_t;
+typedef struct nvme_minor_state nvme_minor_state_t;
typedef struct nvme_dma nvme_dma_t;
typedef struct nvme_cmd nvme_cmd_t;
typedef struct nvme_qpair nvme_qpair_t;
typedef struct nvme_task_arg nvme_task_arg_t;
+struct nvme_minor_state {
+ kmutex_t nm_mutex;
+ boolean_t nm_oexcl;
+ uint_t nm_ocnt;
+};
+
struct nvme_dma {
ddi_dma_handle_t nd_dmah;
ddi_acc_handle_t nd_acch;
@@ -69,6 +76,7 @@ struct nvme_cmd {
void (*nc_callback)(void *);
bd_xfer_t *nc_xfer;
boolean_t nc_completed;
+ boolean_t nc_dontpanic;
uint16_t nc_sqid;
nvme_dma_t *nc_dma;
@@ -137,6 +145,8 @@ struct nvme {
boolean_t n_write_cache_present;
boolean_t n_write_cache_enabled;
int n_error_log_len;
+ boolean_t n_lba_range_supported;
+ boolean_t n_auto_pst_supported;
int n_nssr_supported;
int n_doorbell_stride;
@@ -168,9 +178,8 @@ struct nvme {
ddi_taskq_t *n_cmd_taskq;
- nvme_error_log_entry_t *n_error_log;
- nvme_health_log_t *n_health_log;
- nvme_fwslot_log_t *n_fwslot_log;
+ /* state for devctl minor node */
+ nvme_minor_state_t n_minor;
/* errors detected by driver */
uint32_t n_dma_bind_err;
@@ -217,6 +226,7 @@ struct nvme {
struct nvme_namespace {
nvme_t *ns_nvme;
uint8_t ns_eui64[8];
+ char ns_name[17];
bd_handle_t ns_bd_hdl;
@@ -229,6 +239,9 @@ struct nvme_namespace {
nvme_identify_nsid_t *ns_idns;
+ /* state for attachment point minor node */
+ nvme_minor_state_t ns_minor;
+
/*
* If a namespace has no EUI64, we create a devid in
* nvme_prepare_devid().
@@ -241,8 +254,9 @@ struct nvme_task_arg {
nvme_cmd_t *nt_cmd;
};
+
#ifdef __cplusplus
-/* } */
+}
#endif
#endif /* _NVME_VAR_H */
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 1c7662c28a..7ce40a658a 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -38,6 +38,7 @@ FILEMODE=644
# neither installed or shipped as part of the product:
# cpuid_drv.h: Private interface for cpuid consumers
# unix_bb_info.h: Private interface to kcov
+# nvme.h Private interface to nvme
#
i386_HDRS= \
@@ -54,6 +55,7 @@ i386_HDRS= \
firmload.h \
gfx_private.h \
mouse.h \
+ nvme.h \
ucode.h
sparc_HDRS= \
@@ -422,6 +424,7 @@ CHKHDRS= \
nexusdefs.h \
note.h \
null.h \
+ nvme.h \
nvpair.h \
nvpair_impl.h \
objfs.h \
diff --git a/usr/src/uts/common/sys/nvme.h b/usr/src/uts/common/sys/nvme.h
new file mode 100644
index 0000000000..916b439f3f
--- /dev/null
+++ b/usr/src/uts/common/sys/nvme.h
@@ -0,0 +1,574 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+#ifndef _SYS_NVME_H
+#define _SYS_NVME_H
+
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/types32.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Declarations used for communication between nvmeadm(1M) and nvme(7D)
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * NVMe ioctl definitions
+ */
+
+#define NVME_IOC (('N' << 24) | ('V' << 16) | ('M' << 8))
+#define NVME_IOC_IDENTIFY_CTRL (NVME_IOC | 1)
+#define NVME_IOC_IDENTIFY_NSID (NVME_IOC | 2)
+#define NVME_IOC_CAPABILITIES (NVME_IOC | 3)
+#define NVME_IOC_GET_LOGPAGE (NVME_IOC | 4)
+#define NVME_IOC_GET_FEATURES (NVME_IOC | 5)
+#define NVME_IOC_INTR_CNT (NVME_IOC | 6)
+#define NVME_IOC_VERSION (NVME_IOC | 7)
+#define NVME_IOC_FORMAT (NVME_IOC | 8)
+#define NVME_IOC_DETACH (NVME_IOC | 9)
+#define NVME_IOC_ATTACH (NVME_IOC | 10)
+#define NVME_IOC_MAX NVME_IOC_ATTACH
+
+#define IS_NVME_IOC(x) ((x) > NVME_IOC && (x) <= NVME_IOC_MAX)
+#define NVME_IOC_CMD(x) ((x) & 0xff)
+
+typedef struct {
+ size_t n_len;
+ uintptr_t n_buf;
+ uint64_t n_arg;
+} nvme_ioctl_t;
+
+#ifdef _KERNEL
+typedef struct {
+ size32_t n_len;
+ uintptr32_t n_buf;
+ uint64_t n_arg;
+} nvme_ioctl32_t;
+#endif
+
+/*
+ * NVMe capabilities
+ */
+typedef struct {
+ uint32_t mpsmax; /* Memory Page Size Maximum */
+ uint32_t mpsmin; /* Memory Page Size Minimum */
+} nvme_capabilities_t;
+
+/*
+ * NVMe version
+ */
+typedef struct {
+ uint16_t v_minor;
+ uint16_t v_major;
+} nvme_version_t;
+
+#define NVME_VERSION_ATLEAST(v, maj, min) \
+ (((v)->v_major) > (maj) || \
+ ((v)->v_major == (maj) && (v)->v_minor >= (min)))
+
+#define NVME_VERSION_HIGHER(v, maj, min) \
+ (((v)->v_major) > (maj) || \
+ ((v)->v_major == (maj) && (v)->v_minor > (min)))
+
+
+#pragma pack(1)
+
+/*
+ * NVMe Identify data structures
+ */
+
+#define NVME_IDENTIFY_BUFSIZE 4096 /* buffer size for Identify */
+
+/* NVMe Queue Entry Size bitfield */
+typedef struct {
+ uint8_t qes_min:4; /* minimum entry size */
+ uint8_t qes_max:4; /* maximum entry size */
+} nvme_idctl_qes_t;
+
+/* NVMe Power State Descriptor */
+typedef struct {
+ uint16_t psd_mp; /* Maximum Power */
+ uint8_t psd_rsvd1;
+ uint8_t psd_mps:1; /* Max Power Scale (1.1) */
+ uint8_t psd_nops:1; /* Non-Operational State (1.1) */
+ uint8_t psd_rsvd2:6;
+ uint32_t psd_enlat; /* Entry Latency */
+ uint32_t psd_exlat; /* Exit Latency */
+ uint8_t psd_rrt:5; /* Relative Read Throughput */
+ uint8_t psd_rsvd3:3;
+ uint8_t psd_rrl:5; /* Relative Read Latency */
+ uint8_t psd_rsvd4:3;
+ uint8_t psd_rwt:5; /* Relative Write Throughput */
+ uint8_t psd_rsvd5:3;
+ uint8_t psd_rwl:5; /* Relative Write Latency */
+ uint8_t psd_rsvd6:3;
+ uint8_t psd_rsvd7[16];
+} nvme_idctl_psd_t;
+
+/* NVMe Identify Controller Data Structure */
+typedef struct {
+ /* Controller Capabilities & Features */
+ uint16_t id_vid; /* PCI vendor ID */
+ uint16_t id_ssvid; /* PCI subsystem vendor ID */
+ char id_serial[20]; /* Serial Number */
+ char id_model[40]; /* Model Number */
+ char id_fwrev[8]; /* Firmware Revision */
+ uint8_t id_rab; /* Recommended Arbitration Burst */
+ uint8_t id_oui[3]; /* vendor IEEE OUI */
+ struct { /* Multi-Interface Capabilities */
+ uint8_t m_multi_pci:1; /* HW has multiple PCIe interfaces */
+ uint8_t m_multi_ctrl:1; /* HW has multiple controllers (1.1) */
+ uint8_t m_sr_iov:1; /* controller is SR-IOV virt fn (1.1) */
+ uint8_t m_rsvd:5;
+ } id_mic;
+ uint8_t id_mdts; /* Maximum Data Transfer Size */
+ uint16_t id_cntlid; /* Unique Controller Identifier (1.1) */
+ uint8_t id_rsvd_cc[256 - 80];
+
+ /* Admin Command Set Attributes */
+ struct { /* Optional Admin Command Support */
+ uint16_t oa_security:1; /* Security Send & Receive */
+ uint16_t oa_format:1; /* Format NVM */
+ uint16_t oa_firmware:1; /* Firmware Activate & Download */
+ uint16_t oa_rsvd:13;
+ } id_oacs;
+ uint8_t id_acl; /* Abort Command Limit */
+ uint8_t id_aerl; /* Asynchronous Event Request Limit */
+ struct { /* Firmware Updates */
+ uint8_t fw_readonly:1; /* Slot 1 is Read-Only */
+ uint8_t fw_nslot:3; /* number of firmware slots */
+ uint8_t fw_rsvd:4;
+ } id_frmw;
+ struct { /* Log Page Attributes */
+ uint8_t lp_smart:1; /* SMART/Health information per NS */
+ uint8_t lp_rsvd:7;
+ } id_lpa;
+ uint8_t id_elpe; /* Error Log Page Entries */
+ uint8_t id_npss; /* Number of Power States */
+ struct { /* Admin Vendor Specific Command Conf */
+ uint8_t av_spec:1; /* use format from spec */
+ uint8_t av_rsvd:7;
+ } id_avscc;
+ struct { /* Autonomous Power State Trans (1.1) */
+ uint8_t ap_sup:1; /* APST supported (1.1) */
+ uint8_t ap_rsvd:7;
+ } id_apsta;
+ uint8_t id_rsvd_ac[256 - 10];
+
+ /* NVM Command Set Attributes */
+ nvme_idctl_qes_t id_sqes; /* Submission Queue Entry Size */
+ nvme_idctl_qes_t id_cqes; /* Completion Queue Entry Size */
+ uint16_t id_rsvd_nc_1;
+ uint32_t id_nn; /* Number of Namespaces */
+ struct { /* Optional NVM Command Support */
+ uint16_t on_compare:1; /* Compare */
+ uint16_t on_wr_unc:1; /* Write Uncorrectable */
+ uint16_t on_dset_mgmt:1; /* Dataset Management */
+ uint16_t on_wr_zero:1; /* Write Zeros (1.1) */
+ uint16_t on_save:1; /* Save/Select in Get/Set Feat (1.1) */
+ uint16_t on_reserve:1; /* Reservations (1.1) */
+ uint16_t on_rsvd:10;
+ } id_oncs;
+ struct { /* Fused Operation Support */
+ uint16_t f_cmp_wr:1; /* Compare and Write */
+ uint16_t f_rsvd:15;
+ } id_fuses;
+ struct { /* Format NVM Attributes */
+ uint8_t fn_format:1; /* Format applies to all NS */
+ uint8_t fn_sec_erase:1; /* Secure Erase applies to all NS */
+ uint8_t fn_crypt_erase:1; /* Cryptographic Erase supported */
+ uint8_t fn_rsvd:5;
+ } id_fna;
+ struct { /* Volatile Write Cache */
+ uint8_t vwc_present:1; /* Volatile Write Cache present */
+ uint8_t rsvd:7;
+ } id_vwc;
+ uint16_t id_awun; /* Atomic Write Unit Normal */
+ uint16_t id_awupf; /* Atomic Write Unit Power Fail */
+ struct { /* NVM Vendor Specific Command Conf */
+ uint8_t nv_spec:1; /* use format from spec */
+ uint8_t nv_rsvd:7;
+ } id_nvscc;
+ uint8_t id_rsvd_nc_2;
+ uint16_t id_acwu; /* Atomic Compare & Write Unit (1.1) */
+ uint16_t id_rsvd_nc_3;
+ struct { /* SGL Support (1.1) */
+ uint16_t sgl_sup:1; /* SGL Supported in NVM cmds (1.1) */
+ uint16_t sgl_rsvd1:15;
+ uint16_t sgl_bucket:1; /* SGL Bit Bucket supported (1.1) */
+ uint16_t sgl_rsvd2:15;
+ } id_sgls;
+ uint8_t id_rsvd_nc_4[192 - 28];
+
+ /* I/O Command Set Attributes */
+ uint8_t id_rsvd_ioc[1344];
+
+ /* Power State Descriptors */
+ nvme_idctl_psd_t id_psd[32];
+
+ /* Vendor Specific */
+ uint8_t id_vs[1024];
+} nvme_identify_ctrl_t;
+
+/* NVMe Identify Namespace LBA Format */
+typedef struct {
+ uint16_t lbaf_ms; /* Metadata Size */
+ uint8_t lbaf_lbads; /* LBA Data Size */
+ uint8_t lbaf_rp:2; /* Relative Performance */
+ uint8_t lbaf_rsvd1:6;
+} nvme_idns_lbaf_t;
+
+/* NVMe Identify Namespace Data Structure */
+typedef struct {
+ uint64_t id_nsize; /* Namespace Size */
+ uint64_t id_ncap; /* Namespace Capacity */
+ uint64_t id_nuse; /* Namespace Utilization */
+ struct { /* Namespace Features */
+ uint8_t f_thin:1; /* Thin Provisioning */
+ uint8_t f_rsvd:7;
+ } id_nsfeat;
+ uint8_t id_nlbaf; /* Number of LBA formats */
+ struct { /* Formatted LBA size */
+ uint8_t lba_format:4; /* LBA format */
+ uint8_t lba_extlba:1; /* extended LBA (includes metadata) */
+ uint8_t lba_rsvd:3;
+ } id_flbas;
+ struct { /* Metadata Capabilities */
+ uint8_t mc_extlba:1; /* extended LBA transfers */
+ uint8_t mc_separate:1; /* separate metadata transfers */
+ uint8_t mc_rsvd:6;
+ } id_mc;
+ struct { /* Data Protection Capabilities */
+ uint8_t dp_type1:1; /* Protection Information Type 1 */
+ uint8_t dp_type2:1; /* Protection Information Type 2 */
+ uint8_t dp_type3:1; /* Protection Information Type 3 */
+ uint8_t dp_first:1; /* first 8 bytes of metadata */
+ uint8_t dp_last:1; /* last 8 bytes of metadata */
+ uint8_t dp_rsvd:3;
+ } id_dpc;
+ struct { /* Data Protection Settings */
+ uint8_t dp_pinfo:3; /* Protection Information enabled */
+ uint8_t dp_first:1; /* first 8 bytes of metadata */
+ uint8_t dp_rsvd:4;
+ } id_dps;
+ struct { /* NS Multi-Path/Sharing Cap (1.1) */
+ uint8_t nm_shared:1; /* NS is shared (1.1) */
+ uint8_t nm_rsvd:7;
+ } id_nmic;
+ struct { /* Reservation Capabilities (1.1) */
+ uint8_t rc_persist:1; /* Persist Through Power Loss (1.1) */
+ uint8_t rc_wr_excl:1; /* Write Exclusive (1.1) */
+ uint8_t rc_excl:1; /* Exclusive Access (1.1) */
+ uint8_t rc_wr_excl_r:1; /* Wr Excl - Registrants Only (1.1) */
+ uint8_t rc_excl_r:1; /* Excl Acc - Registrants Only (1.1) */
+ uint8_t rc_wr_excl_a:1; /* Wr Excl - All Registrants (1.1) */
+ uint8_t rc_excl_a:1; /* Excl Acc - All Registrants (1.1) */
+ uint8_t rc_rsvd:1;
+ } id_rescap;
+ uint8_t id_rsvd1[120 - 32];
+ uint8_t id_eui64[8]; /* IEEE Extended Unique Id (1.1) */
+ nvme_idns_lbaf_t id_lbaf[16]; /* LBA Formats */
+
+ uint8_t id_rsvd2[192];
+
+ uint8_t id_vs[3712]; /* Vendor Specific */
+} nvme_identify_nsid_t;
+
+
+/*
+ * NVMe completion queue entry status field
+ */
+typedef struct {
+ uint16_t sf_p:1; /* Phase Tag */
+ uint16_t sf_sc:8; /* Status Code */
+ uint16_t sf_sct:3; /* Status Code Type */
+ uint16_t sf_rsvd2:2;
+ uint16_t sf_m:1; /* More */
+ uint16_t sf_dnr:1; /* Do Not Retry */
+} nvme_cqe_sf_t;
+
+
+/*
+ * NVMe Get Log Page
+ */
+#define NVME_LOGPAGE_ERROR 0x1 /* Error Information */
+#define NVME_LOGPAGE_HEALTH 0x2 /* SMART/Health Information */
+#define NVME_LOGPAGE_FWSLOT 0x3 /* Firmware Slot Information */
+
+typedef struct {
+ uint64_t el_count; /* Error Count */
+ uint16_t el_sqid; /* Submission Queue ID */
+ uint16_t el_cid; /* Command ID */
+ nvme_cqe_sf_t el_sf; /* Status Field */
+ uint8_t el_byte; /* Parameter Error Location byte */
+ uint8_t el_bit:3; /* Parameter Error Location bit */
+ uint8_t el_rsvd1:5;
+ uint64_t el_lba; /* Logical Block Address */
+ uint32_t el_nsid; /* Namespace ID */
+ uint8_t el_vendor; /* Vendor Specific Information avail */
+ uint8_t el_rsvd2[64 - 29];
+} nvme_error_log_entry_t;
+
+typedef struct {
+ uint64_t lo;
+ uint64_t hi;
+} nvme_uint128_t;
+
+typedef struct {
+ struct { /* Critical Warning */
+ uint8_t cw_avail:1; /* available space too low */
+ uint8_t cw_temp:1; /* temperature too high */
+ uint8_t cw_reliab:1; /* degraded reliability */
+ uint8_t cw_readonly:1; /* media is read-only */
+ uint8_t cw_volatile:1; /* volatile memory backup failed */
+ uint8_t cw_rsvd:3;
+ } hl_crit_warn;
+ uint16_t hl_temp; /* Temperature */
+ uint8_t hl_avail_spare; /* Available Spare */
+ uint8_t hl_avail_spare_thr; /* Available Spare Threshold */
+ uint8_t hl_used; /* Percentage Used */
+ uint8_t hl_rsvd1[32 - 6];
+ nvme_uint128_t hl_data_read; /* Data Units Read */
+ nvme_uint128_t hl_data_write; /* Data Units Written */
+ nvme_uint128_t hl_host_read; /* Host Read Commands */
+ nvme_uint128_t hl_host_write; /* Host Write Commands */
+ nvme_uint128_t hl_ctrl_busy; /* Controller Busy Time */
+ nvme_uint128_t hl_power_cycles; /* Power Cycles */
+ nvme_uint128_t hl_power_on_hours; /* Power On Hours */
+ nvme_uint128_t hl_unsafe_shutdn; /* Unsafe Shutdowns */
+ nvme_uint128_t hl_media_errors; /* Media Errors */
+ nvme_uint128_t hl_errors_logged; /* Number of errors logged */
+ uint8_t hl_rsvd2[512 - 192];
+} nvme_health_log_t;
+
+typedef struct {
+ uint8_t fw_afi:3; /* Active Firmware Slot */
+ uint8_t fw_rsvd1:5;
+ uint8_t fw_rsvd2[7];
+ char fw_frs[7][8]; /* Firmware Revision / Slot */
+ uint8_t fw_rsvd3[512 - 64];
+} nvme_fwslot_log_t;
+
+
+/*
+ * NVMe Format NVM
+ */
+#define NVME_FRMT_SES_NONE 0
+#define NVME_FRMT_SES_USER 1
+#define NVME_FRMT_SES_CRYPTO 2
+#define NVME_FRMT_MAX_SES 2
+
+#define NVME_FRMT_MAX_LBAF 15
+
+typedef union {
+ struct {
+ uint32_t fm_lbaf:4; /* LBA Format */
+ uint32_t fm_ms:1; /* Metadata Settings */
+ uint32_t fm_pi:3; /* Protection Information */
+ uint32_t fm_pil:1; /* Prot. Information Location */
+ uint32_t fm_ses:3; /* Secure Erase Settings */
+ uint32_t fm_resvd:20;
+ } b;
+ uint32_t r;
+} nvme_format_nvm_t;
+
+
+/*
+ * NVMe Get / Set Features
+ */
+#define NVME_FEAT_ARBITRATION 0x1 /* Command Arbitration */
+#define NVME_FEAT_POWER_MGMT 0x2 /* Power Management */
+#define NVME_FEAT_LBA_RANGE 0x3 /* LBA Range Type */
+#define NVME_FEAT_TEMPERATURE 0x4 /* Temperature Threshold */
+#define NVME_FEAT_ERROR 0x5 /* Error Recovery */
+#define NVME_FEAT_WRITE_CACHE 0x6 /* Volatile Write Cache */
+#define NVME_FEAT_NQUEUES 0x7 /* Number of Queues */
+#define NVME_FEAT_INTR_COAL 0x8 /* Interrupt Coalescing */
+#define NVME_FEAT_INTR_VECT 0x9 /* Interrupt Vector Configuration */
+#define NVME_FEAT_WRITE_ATOM 0xa /* Write Atomicity */
+#define NVME_FEAT_ASYNC_EVENT 0xb /* Asynchronous Event Configuration */
+#define NVME_FEAT_AUTO_PST 0xc /* Autonomous Power State Transition */
+ /* (1.1) */
+
+#define NVME_FEAT_PROGRESS 0x80 /* Software Progress Marker */
+
+/* Arbitration Feature */
+typedef union {
+ struct {
+ uint8_t arb_ab:3; /* Arbitration Burst */
+ uint8_t arb_rsvd:5;
+ uint8_t arb_lpw; /* Low Priority Weight */
+ uint8_t arb_mpw; /* Medium Priority Weight */
+ uint8_t arb_hpw; /* High Priority Weight */
+ } b;
+ uint32_t r;
+} nvme_arbitration_t;
+
+/* Power Management Feature */
+typedef union {
+ struct {
+ uint32_t pm_ps:5; /* Power State */
+ uint32_t pm_rsvd:27;
+ } b;
+ uint32_t r;
+} nvme_power_mgmt_t;
+
+/* LBA Range Type Feature */
+typedef union {
+ struct {
+ uint32_t lr_num:6; /* Number of LBA ranges */
+ uint32_t lr_rsvd:26;
+ } b;
+ uint32_t r;
+} nvme_lba_range_type_t;
+
+typedef struct {
+ uint8_t lr_type; /* Type */
+ struct { /* Attributes */
+ uint8_t lr_write:1; /* may be overwritten */
+ uint8_t lr_hidden:1; /* hidden from OS/EFI/BIOS */
+ uint8_t lr_rsvd1:6;
+ } lr_attr;
+ uint8_t lr_rsvd2[14];
+ uint64_t lr_slba; /* Starting LBA */
+ uint64_t lr_nlb; /* Number of Logical Blocks */
+ uint8_t lr_guid[16]; /* Unique Identifier */
+ uint8_t lr_rsvd3[16];
+} nvme_lba_range_t;
+
+#define NVME_LBA_RANGE_BUFSIZE 4096
+
+/* Temperature Threshold Feature */
+typedef union {
+ struct {
+ uint16_t tt_tmpth; /* Temperature Threshold */
+ uint16_t tt_rsvd;
+ } b;
+ uint32_t r;
+} nvme_temp_threshold_t;
+
+/* Error Recovery Feature */
+typedef union {
+ struct {
+ uint16_t er_tler; /* Time-Limited Error Recovery */
+ uint16_t er_rsvd;
+ } b;
+ uint32_t r;
+} nvme_error_recovery_t;
+
+/* Volatile Write Cache Feature */
+typedef union {
+ struct {
+ uint32_t wc_wce:1; /* Volatile Write Cache Enable */
+ uint32_t wc_rsvd:31;
+ } b;
+ uint32_t r;
+} nvme_write_cache_t;
+
+/* Number of Queues Feature */
+typedef union {
+ struct {
+ uint16_t nq_nsq; /* Number of Submission Queues */
+ uint16_t nq_ncq; /* Number of Completion Queues */
+ } b;
+ uint32_t r;
+} nvme_nqueues_t;
+
+/* Interrupt Coalescing Feature */
+typedef union {
+ struct {
+ uint8_t ic_thr; /* Aggregation Threshold */
+ uint8_t ic_time; /* Aggregation Time */
+ uint16_t ic_rsvd;
+ } b;
+ uint32_t r;
+} nvme_intr_coal_t;
+
+/* Interrupt Configuration Features */
+typedef union {
+ struct {
+ uint16_t iv_iv; /* Interrupt Vector */
+ uint16_t iv_cd:1; /* Coalescing Disable */
+ uint16_t iv_rsvd:15;
+ } b;
+ uint32_t r;
+} nvme_intr_vect_t;
+
+/* Write Atomicity Feature */
+typedef union {
+ struct {
+ uint32_t wa_dn:1; /* Disable Normal */
+ uint32_t wa_rsvd:31;
+ } b;
+ uint32_t r;
+} nvme_write_atomicity_t;
+
+/* Asynchronous Event Configuration Feature */
+typedef union {
+ struct {
+ uint8_t aec_avail:1; /* available space too low */
+ uint8_t aec_temp:1; /* temperature too high */
+ uint8_t aec_reliab:1; /* degraded reliability */
+ uint8_t aec_readonly:1; /* media is read-only */
+ uint8_t aec_volatile:1; /* volatile memory backup failed */
+ uint8_t aec_rsvd1:3;
+ uint8_t aec_rsvd2[3];
+ } b;
+ uint32_t r;
+} nvme_async_event_conf_t;
+
+/* Autonomous Power State Transition Feature (1.1) */
+typedef union {
+ struct {
+ uint8_t apst_apste:1; /* APST enabled */
+ uint8_t apst_rsvd:7;
+ } b;
+ uint8_t r;
+} nvme_auto_power_state_trans_t;
+
+typedef struct {
+ uint32_t apst_rsvd1:3;
+ uint32_t apst_itps:5; /* Idle Transition Power State */
+ uint32_t apst_itpt:24; /* Idle Time Prior to Transition */
+ uint32_t apst_rsvd2;
+} nvme_auto_power_state_t;
+
+#define NVME_AUTO_PST_BUFSIZE 256
+
+/* Software Progress Marker Feature */
+typedef union {
+ struct {
+ uint8_t spm_pbslc; /* Pre-Boot Software Load Count */
+ uint8_t spm_rsvd[3];
+ } b;
+ uint32_t r;
+} nvme_software_progress_marker_t;
+
+#pragma pack() /* pack(1) */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_NVME_H */
diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h
index d5e52dbbfc..1d94c8fd2c 100644
--- a/usr/src/uts/common/sys/sunddi.h
+++ b/usr/src/uts/common/sys/sunddi.h
@@ -212,6 +212,8 @@ extern "C" {
#define DDI_NT_NEXUS "ddi_ctl:devctl" /* nexus drivers */
+#define DDI_NT_NVME_NEXUS "ddi_ctl:devctl:nvme" /* nexus drivers */
+
#define DDI_NT_SCSI_NEXUS "ddi_ctl:devctl:scsi" /* nexus drivers */
#define DDI_NT_SATA_NEXUS "ddi_ctl:devctl:sata" /* nexus drivers */
@@ -220,6 +222,9 @@ extern "C" {
#define DDI_NT_ATTACHMENT_POINT "ddi_ctl:attachment_point" /* attachment pt */
+#define DDI_NT_NVME_ATTACHMENT_POINT "ddi_ctl:attachment_point:nvme"
+ /* nvme attachment pt */
+
#define DDI_NT_SCSI_ATTACHMENT_POINT "ddi_ctl:attachment_point:scsi"
/* scsi attachment pt */