diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-08-26 11:19:45 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-08-26 11:19:45 +0000 |
commit | b27adfefbf4be653598563c490246150f9ce471d (patch) | |
tree | 242edb1e209decd5756874adb7dd356359d15b38 /usr/src | |
parent | c89aa2cda9c4c5a89db5c7b85f420548cd437c12 (diff) | |
parent | 0ddc0ebb74cedb0ac394818c6e166c47eb8e62e5 (diff) | |
download | illumos-joyent-b27adfefbf4be653598563c490246150f9ce471d.tar.gz |
[illumos-gate merge]
commit ff892b7ce7155120a08759aeabce0b4cd24bc867
7258 6951 caused uberdata32_t size to be incorrect
commit fbc2697c538d75e4d5d938d24a995afa043c99d2
7286 sata doesn't support 4knative disks
commit 910f0d12b47aeda4ed059254cc4af0d8c272d0ba
7315 nvme queue DMA attribute count_max is 0-based
commit e8ba2a389f6ca6999ca72dabbe2871e894bf6b67
7313 bump nvme admin command timeout to 1s
commit 2f95345b6f2a0bd2d48718fe10e82e351cb920c6
7312 zfs checksum errors observed in a zpool full of NVMe SSDs
6908 Samsung SSD SM951-NVMe shows checksum errors
commit 75b41617efad806d8ab3d1866425189c0b0bc6aa
7306 nvme ignores interrupt enabling failure
commit bf26ea4b8945ba545fde8b47a2a31ce2d11b82cd
7296 nvme initial interrupt issues
commit 34c938c74e6f278ee870d39330b571ffea1b808e
7294 several small nvme fixes from Tegile
commit 0d140ff944190d3ff2fdba65e9864f3c03248162
7279 nvme.c: idns->id_nlbaf is a 0's based value.
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/lib/libc/inc/thr_uberdata.h | 9 | ||||
-rw-r--r-- | usr/src/uts/common/io/nvme/nvme.c | 104 | ||||
-rw-r--r-- | usr/src/uts/common/io/nvme/nvme_var.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/io/sata/impl/sata.c | 42 |
4 files changed, 97 insertions, 61 deletions
diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h index ac7af4ce29..678f68895f 100644 --- a/usr/src/lib/libc/inc/thr_uberdata.h +++ b/usr/src/lib/libc/inc/thr_uberdata.h @@ -905,15 +905,24 @@ typedef struct _qexthdlr { _quick_exithdlr_func_t hdlr; /* handler itself */ } _qexthdlr_t; +/* + * We add a pad on 32-bit systems to allow us to always have the structure size + * be 32-bytes which helps us deal with the compiler's alignment when building + * in ILP32 / LP64 systems. + */ typedef struct { mutex_t exitfns_lock; _qexthdlr_t *head; +#if !defined(_LP64) + uint32_t pad; +#endif } quickexit_root_t; #ifdef _SYSCALL32 typedef struct { mutex_t exitfns_lock; caddr32_t head; + uint32_t pad; } quickexit_root32_t; #endif /* _SYSCALL32 */ diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c index 1cb0389063..41ee79f0ed 100644 --- a/usr/src/uts/common/io/nvme/nvme.c +++ b/usr/src/uts/common/io/nvme/nvme.c @@ -11,6 +11,8 @@ /* * Copyright 2016 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Tegile Systems, Inc. All rights reserved. + * Copyright (c) 2016 The MathWorks, Inc. All rights reserved. */ /* @@ -190,6 +192,9 @@ static const int nvme_version_major = 1; static const int nvme_version_minor = 0; +/* tunable for admin command timeout in seconds, default is 1s */ +static volatile int nvme_admin_cmd_timeout = 1; + static int nvme_attach(dev_info_t *, ddi_attach_cmd_t); static int nvme_detach(dev_info_t *, ddi_detach_cmd_t); static int nvme_quiesce(dev_info_t *); @@ -268,7 +273,7 @@ static ddi_dma_attr_t nvme_queue_dma_attr = { .dma_attr_version = DMA_ATTR_V0, .dma_attr_addr_lo = 0, .dma_attr_addr_hi = 0xffffffffffffffffULL, - .dma_attr_count_max = (UINT16_MAX + 1) * sizeof (nvme_sqe_t), + .dma_attr_count_max = (UINT16_MAX + 1) * sizeof (nvme_sqe_t) - 1, .dma_attr_align = 0x1000, .dma_attr_burstsizes = 0x7ff, .dma_attr_minxfer = 0x1000, @@ -296,7 +301,7 @@ static ddi_dma_attr_t nvme_prp_dma_attr = { .dma_attr_burstsizes = 0x7ff, .dma_attr_minxfer = 0x1000, .dma_attr_maxxfer = 0x1000, - .dma_attr_seg = 0xffffffffffffffffULL, + .dma_attr_seg = 0xfff, .dma_attr_sgllen = -1, .dma_attr_granular = 1, .dma_attr_flags = 0, @@ -782,7 +787,7 @@ nvme_check_vendor_cmd_status(nvme_cmd_t *cmd) "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc, cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct, cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m); - if (cmd->nc_nvme->n_ignore_unknown_vendor_status) { + if (!cmd->nc_nvme->n_ignore_unknown_vendor_status) { cmd->nc_nvme->n_dead = B_TRUE; ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST); } @@ -1083,7 +1088,7 @@ nvme_abort_cmd(nvme_cmd_t *abort_cmd) * Send the ABORT to the hardware. The ABORT command will return _after_ * the aborted command has completed (aborted or otherwise). */ - if (nvme_admin_cmd(cmd, NVME_ADMIN_CMD_TIMEOUT) != DDI_SUCCESS) { + if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { sema_v(&nvme->n_abort_sema); dev_err(nvme->n_dip, CE_WARN, "!nvme_admin_cmd failed for ABORT"); @@ -1117,9 +1122,9 @@ nvme_abort_cmd(nvme_cmd_t *abort_cmd) * will be declared dead and FMA will be notified. */ static boolean_t -nvme_wait_cmd(nvme_cmd_t *cmd, uint_t usec) +nvme_wait_cmd(nvme_cmd_t *cmd, uint_t sec) { - clock_t timeout = ddi_get_lbolt() + drv_usectohz(usec); + clock_t timeout = ddi_get_lbolt() + drv_usectohz(sec * MICROSEC); nvme_t *nvme = cmd->nc_nvme; nvme_reg_csts_t csts; @@ -1353,7 +1358,7 @@ nvme_async_event_task(void *arg) } static int -nvme_admin_cmd(nvme_cmd_t *cmd, int usec) +nvme_admin_cmd(nvme_cmd_t *cmd, int sec) { int ret; @@ -1369,7 +1374,7 @@ nvme_admin_cmd(nvme_cmd_t *cmd, int usec) return (DDI_FAILURE); } - if (nvme_wait_cmd(cmd, usec) == B_FALSE) { + if (nvme_wait_cmd(cmd, sec) == B_FALSE) { /* * The command timed out. An abort command was posted that * will take care of the cleanup. @@ -1408,7 +1413,7 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...) { nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); void *buf = NULL; - nvme_getlogpage_t getlogpage; + nvme_getlogpage_t getlogpage = { 0 }; size_t bufsize; va_list ap; @@ -1472,7 +1477,7 @@ nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...) cmd->nc_dma->nd_cookie.dmac_laddress; } - if (nvme_admin_cmd(cmd, NVME_ADMIN_CMD_TIMEOUT) != DDI_SUCCESS) { + if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { dev_err(nvme->n_dip, CE_WARN, "!nvme_admin_cmd failed for GET LOG PAGE"); return (NULL); @@ -1528,7 +1533,7 @@ nvme_identify(nvme_t *nvme, uint32_t nsid) cmd->nc_dma->nd_cookie.dmac_laddress; } - if (nvme_admin_cmd(cmd, NVME_ADMIN_CMD_TIMEOUT) != DDI_SUCCESS) { + if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { dev_err(nvme->n_dip, CE_WARN, "!nvme_admin_cmd failed for IDENTIFY"); return (NULL); @@ -1556,7 +1561,7 @@ nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues) nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); nvme_nqueue_t nq = { 0 }; - nq.b.nq_nsq = nq.b.nq_ncq = nqueues; + nq.b.nq_nsq = nq.b.nq_ncq = nqueues - 1; cmd->nc_sqid = 0; cmd->nc_callback = nvme_wakeup_cmd; @@ -1564,7 +1569,7 @@ nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues) cmd->nc_sqe.sqe_cdw10 = NVME_FEAT_NQUEUES; cmd->nc_sqe.sqe_cdw11 = nq.r; - if (nvme_admin_cmd(cmd, NVME_ADMIN_CMD_TIMEOUT) != DDI_SUCCESS) { + if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { dev_err(nvme->n_dip, CE_WARN, "!nvme_admin_cmd failed for SET FEATURES (NQUEUES)"); return (0); @@ -1585,7 +1590,7 @@ nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues) * Always use the same number of submission and completion queues, and * never use more than the requested number of queues. */ - return (MIN(nqueues, MIN(nq.b.nq_nsq, nq.b.nq_ncq))); + return (MIN(nqueues, MIN(nq.b.nq_nsq, nq.b.nq_ncq) + 1)); } static int @@ -1610,7 +1615,7 @@ nvme_create_io_qpair(nvme_t *nvme, nvme_qpair_t *qp, uint16_t idx) cmd->nc_sqe.sqe_cdw11 = c_dw11.r; cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_cqdma->nd_cookie.dmac_laddress; - if (nvme_admin_cmd(cmd, NVME_ADMIN_CMD_TIMEOUT) != DDI_SUCCESS) { + if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { dev_err(nvme->n_dip, CE_WARN, "!nvme_admin_cmd failed for CREATE CQUEUE"); return (DDI_FAILURE); @@ -1637,7 +1642,7 @@ nvme_create_io_qpair(nvme_t *nvme, nvme_qpair_t *qp, uint16_t idx) cmd->nc_sqe.sqe_cdw11 = s_dw11.r; cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_sqdma->nd_cookie.dmac_laddress; - if (nvme_admin_cmd(cmd, NVME_ADMIN_CMD_TIMEOUT) != DDI_SUCCESS) { + if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { dev_err(nvme->n_dip, CE_WARN, "!nvme_admin_cmd failed for CREATE SQUEUE"); return (DDI_FAILURE); @@ -1746,14 +1751,6 @@ nvme_init(nvme_t *nvme) char model[sizeof (nvme->n_idctl->id_model) + 1]; char *vendor, *product; - /* Setup fixed interrupt for admin queue. */ - if (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_FIXED, 1) - != DDI_SUCCESS) { - dev_err(nvme->n_dip, CE_WARN, - "!failed to setup fixed interrupt"); - goto fail; - } - /* Check controller version */ vs.r = nvme_get32(nvme, NVME_REG_VS); dev_err(nvme->n_dip, CE_CONT, "?NVMe spec version %d.%d", @@ -1806,6 +1803,7 @@ nvme_init(nvme_t *nvme) nvme->n_prp_dma_attr.dma_attr_maxxfer = nvme->n_pagesize; nvme->n_prp_dma_attr.dma_attr_minxfer = nvme->n_pagesize; nvme->n_prp_dma_attr.dma_attr_align = nvme->n_pagesize; + nvme->n_prp_dma_attr.dma_attr_seg = nvme->n_pagesize - 1; /* * Reset controller if it's still in ready state. @@ -1845,11 +1843,13 @@ nvme_init(nvme_t *nvme) nvme_put64(nvme, NVME_REG_ASQ, asq); nvme_put64(nvme, NVME_REG_ACQ, acq); - cc.b.cc_ams = 0; /* use Round-Robin arbitration */ - cc.b.cc_css = 0; /* use NVM command set */ + cc.b.cc_ams = 0; /* use Round-Robin arbitration */ + cc.b.cc_css = 0; /* use NVM command set */ cc.b.cc_mps = nvme->n_pageshift - 12; - cc.b.cc_shn = 0; /* no shutdown in progress */ - cc.b.cc_en = 1; /* enable controller */ + cc.b.cc_shn = 0; /* no shutdown in progress */ + cc.b.cc_en = 1; /* enable controller */ + cc.b.cc_iosqes = 6; /* submission queue entry is 2^6 bytes long */ + cc.b.cc_iocqes = 4; /* completion queue entry is 2^4 bytes long */ nvme_put32(nvme, NVME_REG_CC, cc.r); @@ -1890,6 +1890,20 @@ nvme_init(nvme_t *nvme) sema_init(&nvme->n_abort_sema, 1, NULL, SEMA_DRIVER, NULL); /* + * Setup initial interrupt for admin queue. + */ + if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 1) + != DDI_SUCCESS) && + (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 1) + != DDI_SUCCESS) && + (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_FIXED, 1) + != DDI_SUCCESS)) { + dev_err(nvme->n_dip, CE_WARN, + "!failed to setup initial interrupt"); + goto fail; + } + + /* * Post an asynchronous event command to catch errors. */ if (nvme_async_event(nvme) != DDI_SUCCESS) { @@ -2038,7 +2052,7 @@ nvme_init(nvme_t *nvme) * performance. A value of 3 means "degraded", 0 is best. */ last_rp = 3; - for (int j = 0; j != idns->id_nlbaf; j++) { + for (int j = 0; j <= idns->id_nlbaf; j++) { if (idns->id_lbaf[j].lbaf_lbads == 0) break; if (idns->id_lbaf[j].lbaf_ms != 0) @@ -2114,8 +2128,8 @@ nvme_init(nvme_t *nvme) if (nvme->n_ioq_count < nqueues) { nvme_release_interrupts(nvme); - if (nvme_setup_interrupts(nvme, nvme->n_intr_type, nqueues) - != DDI_SUCCESS) { + if (nvme_setup_interrupts(nvme, nvme->n_intr_type, + nvme->n_ioq_count) != DDI_SUCCESS) { dev_err(nvme->n_dip, CE_WARN, "!failed to reduce number of interrupts"); goto fail; @@ -2171,6 +2185,7 @@ nvme_intr(caddr_t arg1, caddr_t arg2) /*LINTED: E_PTR_BAD_CAST_ALIGN*/ nvme_t *nvme = (nvme_t *)arg1; int inum = (int)(uintptr_t)arg2; + int ccnt = 0; int qnum; nvme_cmd_t *cmd; @@ -2188,10 +2203,11 @@ nvme_intr(caddr_t arg1, caddr_t arg2) while ((cmd = nvme_retrieve_cmd(nvme, nvme->n_ioq[qnum]))) { taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq, cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent); + ccnt++; } } - return (DDI_INTR_CLAIMED); + return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); } static void @@ -2222,7 +2238,6 @@ nvme_release_interrupts(nvme_t *nvme) static int nvme_setup_interrupts(nvme_t *nvme, int intr_type, int nqpairs) { - int failed = 0; int nintrs, navail, count; int ret; int i; @@ -2292,25 +2307,18 @@ nvme_setup_interrupts(nvme_t *nvme, int intr_type, int nqpairs) (void) ddi_intr_get_cap(nvme->n_inth[0], &nvme->n_intr_cap); for (i = 0; i < count; i++) { - if (nvme->n_inth[i] == NULL) - break; + if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK) + ret = ddi_intr_block_enable(&nvme->n_inth[i], 1); + else + ret = ddi_intr_enable(nvme->n_inth[i]); - if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK) { - if (ddi_intr_block_enable(&nvme->n_inth[i], 1) != - DDI_SUCCESS) - failed++; - } else { - if (ddi_intr_enable(nvme->n_inth[i]) != DDI_SUCCESS) - failed++; + if (ret != DDI_SUCCESS) { + dev_err(nvme->n_dip, CE_WARN, + "!%s: enabling interrupt %d failed", __func__, i); + goto fail; } } - if (failed != 0) { - dev_err(nvme->n_dip, CE_WARN, - "!%s: enabling interrupts failed", __func__); - goto fail; - } - nvme->n_intr_type = intr_type; nvme->n_progress |= NVME_INTERRUPTS; diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h index 8071da3872..f23e63b5db 100644 --- a/usr/src/uts/common/io/nvme/nvme_var.h +++ b/usr/src/uts/common/io/nvme/nvme_var.h @@ -10,7 +10,7 @@ */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ #ifndef _NVME_VAR_H @@ -42,7 +42,6 @@ #define NVME_DEFAULT_ASYNC_EVENT_LIMIT 10 #define NVME_MIN_ASYNC_EVENT_LIMIT 1 -#define NVME_ADMIN_CMD_TIMEOUT 100000 typedef struct nvme nvme_t; typedef struct nvme_namespace nvme_namespace_t; diff --git a/usr/src/uts/common/io/sata/impl/sata.c b/usr/src/uts/common/io/sata/impl/sata.c index 66c141bf83..c4013d0efd 100644 --- a/usr/src/uts/common/io/sata/impl/sata.c +++ b/usr/src/uts/common/io/sata/impl/sata.c @@ -24,6 +24,7 @@ */ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Argo Technologies SA */ /* @@ -4516,6 +4517,7 @@ sata_txlt_read_capacity(sata_pkt_txlate_t *spx) struct buf *bp = spx->txlt_sata_pkt->satapkt_cmd.satacmd_bp; sata_drive_info_t *sdinfo; uint64_t val; + uint32_t lbsize = DEV_BSIZE; uchar_t *rbuf; int rval, reason; kmutex_t *cport_mutex = &(SATA_TXLT_CPORT_MUTEX(spx)); @@ -4554,17 +4556,28 @@ sata_txlt_read_capacity(sata_pkt_txlate_t *spx) */ val = MIN(sdinfo->satadrv_capacity - 1, UINT32_MAX); + if (sdinfo->satadrv_id.ai_phys_sect_sz & SATA_L2PS_CHECK_BIT) { + /* physical/logical sector size word is valid */ + + if (sdinfo->satadrv_id.ai_phys_sect_sz & + SATA_L2PS_BIG_SECTORS) { + /* if this set 117-118 words are valid */ + lbsize = sdinfo->satadrv_id.ai_words_lsec[0] | + (sdinfo->satadrv_id.ai_words_lsec[1] << 16); + lbsize <<= 1; /* convert from words to bytes */ + } + } rbuf = (uchar_t *)bp->b_un.b_addr; /* Need to swap endians to match scsi format */ rbuf[0] = (val >> 24) & 0xff; rbuf[1] = (val >> 16) & 0xff; rbuf[2] = (val >> 8) & 0xff; rbuf[3] = val & 0xff; - /* block size - always 512 bytes, for now */ - rbuf[4] = 0; - rbuf[5] = 0; - rbuf[6] = 0x02; - rbuf[7] = 0; + rbuf[4] = (lbsize >> 24) & 0xff; + rbuf[5] = (lbsize >> 16) & 0xff; + rbuf[6] = (lbsize >> 8) & 0xff; + rbuf[7] = lbsize & 0xff; + scsipkt->pkt_state |= STATE_XFERRED_DATA; scsipkt->pkt_resid = 0; @@ -4614,6 +4627,7 @@ sata_txlt_read_capacity16(sata_pkt_txlate_t *spx) sata_drive_info_t *sdinfo; uint64_t val; uint16_t l2p_exp; + uint32_t lbsize = DEV_BSIZE; uchar_t *rbuf; int rval, reason; #define TPE 0x80 @@ -4697,6 +4711,14 @@ sata_txlt_read_capacity16(sata_pkt_txlate_t *spx) sdinfo->satadrv_id.ai_phys_sect_sz & SATA_L2PS_EXP_MASK; } + + if (sdinfo->satadrv_id.ai_phys_sect_sz & + SATA_L2PS_BIG_SECTORS) { + /* if this set 117-118 words are valid */ + lbsize = sdinfo->satadrv_id.ai_words_lsec[0] | + (sdinfo->satadrv_id.ai_words_lsec[1] << 16); + lbsize <<= 1; /* convert from words to bytes */ + } } rbuf = (uchar_t *)bp->b_un.b_addr; @@ -4711,12 +4733,10 @@ sata_txlt_read_capacity16(sata_pkt_txlate_t *spx) rbuf[5] = (val >> 16) & 0xff; rbuf[6] = (val >> 8) & 0xff; rbuf[7] = val & 0xff; - - /* logical block length in bytes = 512 (for now) */ - /* rbuf[8] = 0; */ - /* rbuf[9] = 0; */ - rbuf[10] = 0x02; - /* rbuf[11] = 0; */ + rbuf[8] = (lbsize >> 24) & 0xff; + rbuf[9] = (lbsize >> 16) & 0xff; + rbuf[10] = (lbsize >> 8) & 0xff; + rbuf[11] = lbsize & 0xff; /* p_type, prot_en, unspecified by SAT-2 */ /* rbuf[12] = 0; */ |