diff options
-rw-r--r-- | manifest | 2 | ||||
-rw-r--r-- | usr/src/cmd/dladm/dladm.c | 15 | ||||
-rw-r--r-- | usr/src/common/avl/avl.c | 24 | ||||
-rw-r--r-- | usr/src/man/man4d/Makefile | 2 | ||||
-rw-r--r-- | usr/src/man/man4d/vioblk.4d | 3 | ||||
-rw-r--r-- | usr/src/man/man4d/vioscsi.4d | 92 | ||||
-rw-r--r-- | usr/src/man/man4d/virtio.4d | 5 | ||||
-rw-r--r-- | usr/src/pkg/manifests/driver-storage-vioscsi.p5m | 36 | ||||
-rw-r--r-- | usr/src/uts/common/Makefile.files | 5 | ||||
-rw-r--r-- | usr/src/uts/common/Makefile.rules | 5 | ||||
-rw-r--r-- | usr/src/uts/common/io/vioscsi/vioscsi.c | 1636 | ||||
-rw-r--r-- | usr/src/uts/common/io/vioscsi/vioscsi.h | 313 | ||||
-rw-r--r-- | usr/src/uts/intel/Makefile.intel | 7 | ||||
-rw-r--r-- | usr/src/uts/intel/vioscsi/Makefile | 47 |
14 files changed, 2155 insertions, 37 deletions
@@ -813,6 +813,7 @@ f kernel/drv/amd64/usbsprl 0755 root sys f kernel/drv/amd64/vgatext 0755 root sys f kernel/drv/amd64/vioblk 0755 root sys f kernel/drv/amd64/vioif 0755 root sys +f kernel/drv/amd64/vioscsi 0755 root sys f kernel/drv/amd64/vmxnet 0755 root sys f kernel/drv/amd64/vmxnet3s 0755 root sys f kernel/drv/amd64/vnd 0755 root sys @@ -19013,6 +19014,7 @@ f usr/share/man/man4d/usbsksp.4d 0444 root bin f usr/share/man/man4d/usbsprl.4d 0444 root bin f usr/share/man/man4d/vioblk.4d 0444 root bin f usr/share/man/man4d/vioif.4d 0444 root bin +f usr/share/man/man4d/vioscsi.4d 0444 root bin f usr/share/man/man4d/virtio.4d 0444 root bin f usr/share/man/man4d/virtualkm.4d 0444 root bin f usr/share/man/man4d/vnd.4d 0444 root bin diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c index 4de300ef65..dac9006a22 100644 --- a/usr/src/cmd/dladm/dladm.c +++ b/usr/src/cmd/dladm/dladm.c @@ -26,6 +26,7 @@ * Copyright (c) 2015 Joyent, Inc. All rights reserved. * Copyright 2020 Peter Tribble. * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2021 RackTop Systems, Inc. */ #include <stdio.h> @@ -787,9 +788,9 @@ static const ofmt_field_t aggr_x_fields[] = { /* name, field width, index callback */ { "LINK", 12, AGGR_X_LINK, print_xaggr_cb}, { "PORT", 15, AGGR_X_PORT, print_xaggr_cb}, -{ "SPEED", 5, AGGR_X_SPEED, print_xaggr_cb}, -{ "DUPLEX", 10, AGGR_X_DUPLEX, print_xaggr_cb}, -{ "STATE", 10, AGGR_X_STATE, print_xaggr_cb}, +{ "SPEED", 9, AGGR_X_SPEED, print_xaggr_cb}, +{ "DUPLEX", 9, AGGR_X_DUPLEX, print_xaggr_cb}, +{ "STATE", 9, AGGR_X_STATE, print_xaggr_cb}, { "ADDRESS", 19, AGGR_X_ADDRESS, print_xaggr_cb}, { "PORTSTATE", 16, AGGR_X_PORTSTATE, print_xaggr_cb}, { NULL, 0, 0, NULL}} @@ -860,9 +861,9 @@ static const ofmt_field_t phys_fields[] = { offsetof(link_fields_buf_t, link_phys_media), print_default_cb}, { "STATE", 11, offsetof(link_fields_buf_t, link_phys_state), print_default_cb}, -{ "SPEED", 7, +{ "SPEED", 9, offsetof(link_fields_buf_t, link_phys_speed), print_default_cb}, -{ "DUPLEX", 10, +{ "DUPLEX", 9, offsetof(link_fields_buf_t, link_phys_duplex), print_default_cb}, { "DEVICE", 13, offsetof(link_fields_buf_t, link_phys_device), print_default_cb}, @@ -1042,7 +1043,7 @@ typedef struct vnic_fields_buf_s { char vnic_link[DLPI_LINKNAME_MAX]; char vnic_over[DLPI_LINKNAME_MAX]; - char vnic_speed[6]; + char vnic_speed[10]; char vnic_macaddr[18]; char vnic_macaddrtype[19]; char vnic_vid[6]; @@ -1054,7 +1055,7 @@ static const ofmt_field_t vnic_fields[] = { offsetof(vnic_fields_buf_t, vnic_link), print_default_cb}, { "OVER", 11, offsetof(vnic_fields_buf_t, vnic_over), print_default_cb}, -{ "SPEED", 6, +{ "SPEED", 9, offsetof(vnic_fields_buf_t, vnic_speed), print_default_cb}, { "MACADDRESS", 18, offsetof(vnic_fields_buf_t, vnic_macaddr), print_default_cb}, diff --git a/usr/src/common/avl/avl.c b/usr/src/common/avl/avl.c index 0411afb4c5..ed752bde3d 100644 --- a/usr/src/common/avl/avl.c +++ b/usr/src/common/avl/avl.c @@ -105,21 +105,6 @@ #include <sys/cmn_err.h> /* - * Small arrays to translate between balance (or diff) values and child indices. - * - * Code that deals with binary tree data structures will randomly use - * left and right children when examining a tree. C "if()" statements - * which evaluate randomly suffer from very poor hardware branch prediction. - * In this code we avoid some of the branch mispredictions by using the - * following translation arrays. They replace random branches with an - * additional memory reference. Since the translation arrays are both very - * small the data should remain efficiently in cache. - */ -static const int avl_child2balance[2] = {-1, 1}; -static const int avl_balance2child[] = {0, 0, 1}; - - -/* * Walk from one node to the previous valued node (ie. an infix walk * towards the left). At any given node we do one of 2 things: * @@ -274,8 +259,7 @@ avl_find(avl_tree_t *tree, const void *value, avl_index_t *where) #endif return (AVL_NODE2DATA(node, off)); } - child = avl_balance2child[1 + diff]; - + child = (diff > 0); } if (where != NULL) @@ -528,7 +512,7 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where) * Compute the new balance */ old_balance = AVL_XBALANCE(node); - new_balance = old_balance + avl_child2balance[which_child]; + new_balance = old_balance + (which_child ? 1 : -1); /* * If we introduced equal balance, then we are done immediately @@ -708,7 +692,7 @@ avl_remove(avl_tree_t *tree, void *data) * choose node to swap from whichever side is taller */ old_balance = AVL_XBALANCE(delete); - left = avl_balance2child[old_balance + 1]; + left = (old_balance > 0); right = 1 - left; /* @@ -792,7 +776,7 @@ avl_remove(avl_tree_t *tree, void *data) */ node = parent; old_balance = AVL_XBALANCE(node); - new_balance = old_balance - avl_child2balance[which_child]; + new_balance = old_balance - (which_child ? 1 : -1); parent = AVL_XPARENT(node); which_child = AVL_XCHILD(node); diff --git a/usr/src/man/man4d/Makefile b/usr/src/man/man4d/Makefile index 2b20c60b32..4a6402de9c 100644 --- a/usr/src/man/man4d/Makefile +++ b/usr/src/man/man4d/Makefile @@ -17,6 +17,7 @@ # Copyright 2018 Nexenta Systems, Inc. # Copyright 2020 Peter Tribble # Copyright 2021 Oxide Computer Company +# Copyright 2022 RackTop Systems, Inc. # include $(SRC)/Makefile.master @@ -245,6 +246,7 @@ i386_MANFILES= ahci.4d \ usmn.4d \ vioblk.4d \ vioif.4d \ + vioscsi.4d \ virtio.4d \ wpi.4d \ xhci.4d \ diff --git a/usr/src/man/man4d/vioblk.4d b/usr/src/man/man4d/vioblk.4d index 4d7a2b1d9a..c8a751ec8d 100644 --- a/usr/src/man/man4d/vioblk.4d +++ b/usr/src/man/man4d/vioblk.4d @@ -11,7 +11,7 @@ .\" .\" Copyright 2020 Oxide Computer Company .\" -.Dd August 28, 2021 +.Dd June 14, 2022 .Dt VIOBLK 4D .Os .Sh NAME @@ -82,6 +82,7 @@ x86 device driver. .El .Sh SEE ALSO .Xr blkdev 4D , +.Xr vioscsi 4D , .Xr virtio 4D , .Xr dkio 4I , .Xr diskinfo 8 diff --git a/usr/src/man/man4d/vioscsi.4d b/usr/src/man/man4d/vioscsi.4d new file mode 100644 index 0000000000..2058fd65ed --- /dev/null +++ b/usr/src/man/man4d/vioscsi.4d @@ -0,0 +1,92 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2022 RackTop Systems, Inc. +.\" +.Dd June 17, 2022 +.Dt VIOSCSI 4D +.Os +.Sh NAME +.Nm vioscsi +.Nd virtio SCSI driver +.Sh DESCRIPTION +The +.Nm +driver provides a virtual SCSI transport, allowing +access to +.Xr virtio 4D +based SCSI targets and logical units. +The driver supports the following capabilities: +.Bl -dash +.It +Dynamic hot-plug (if supported by the host) +.It +Honors packet timeouts specified in +.Xr scsi_pkt 9S +.It +Reset of target or logical unit via +.Xr scsi_reset 9F +.It +Abort for individual commands via +.Xr scsi_abort 9F +.It +Command queueing (both tagged and untagged) +.It +Honors per logical unit queue depth from device +.It +Up to 255 targets, and 16384 logical units per PCI function +.El +.Pp +The +.Nm +driver is based on +.Xr iport 9 , +and uses a single iport per PCI function, +with a unit-address of "iport0". +Children of the iport use a unit-address with the +format "target,lun", where both target and lun are +presented as hexadecimal values. +.Sh NOTES +The +.Nm +driver may be slightly less efficient than +.Xr vioblk 4D , +but it may support use as boot media, as well +as attachments to SCSI pass-through devices, which +may include devices such as tape drives via +.Xr st 4D +and enclosures via +.Xr ses 4D . +.Sh ARCHITECTURE +The +.Nm +driver is only supported on +.Sy x86 . +.Sh FILES +.Bl -tag -width Pa +.It Pa /kernel/drv/amd64/vioscsi +x86 device driver. +.El +.Sh SEE ALSO +.Xr sd 4D , +.Xr ses 4D , +.Xr st 4D , +.Xr vioblk 4D , +.Xr virtio 4D , +.Xr iport 9 , +.Xr scsi_abort 9F , +.Xr scsi_reset 9F , +.Xr scsi_pkt 9S +.Rs +.%T Virtual I/O Device (VIRTIO) Version 1.1 +.%D April, 2019 +.%U https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html +.Re diff --git a/usr/src/man/man4d/virtio.4d b/usr/src/man/man4d/virtio.4d index 4a424ba518..47cd03535f 100644 --- a/usr/src/man/man4d/virtio.4d +++ b/usr/src/man/man4d/virtio.4d @@ -11,7 +11,7 @@ .\" .\" Copyright 2020 Oxide Computer Company .\" -.Dd October 3, 2020 +.Dd June 14, 2022 .Dt VIRTIO 4D .Os .Sh NAME @@ -39,7 +39,8 @@ x86 device driver. .El .Sh SEE ALSO .Xr vioblk 4D , -.Xr vioif 4D +.Xr vioif 4D , +.Xr vioscsi 4D .Rs .%T Virtual I/O Device (VIRTIO) Version 1.1 .%D April, 2019 diff --git a/usr/src/pkg/manifests/driver-storage-vioscsi.p5m b/usr/src/pkg/manifests/driver-storage-vioscsi.p5m new file mode 100644 index 0000000000..9693a137b8 --- /dev/null +++ b/usr/src/pkg/manifests/driver-storage-vioscsi.p5m @@ -0,0 +1,36 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Nexenta by DDN, Inc. All rights reserved. +# Copyright 2022 RackTop Systems, Inc. +# + +# +# The default for payload-bearing actions in this package is to appear in the +# global zone only. See the include file for greater detail, as well as +# information about overriding the defaults. +# +<include global_zone_only_component> +set name=pkg.fmri value=pkg:/driver/storage/vioscsi@$(PKGVERS) +set name=pkg.summary value="Virtio SCSI" +set name=pkg.description value="Virtio SCSI driver" +set name=info.classification value=org.opensolaris.category.2008:Drivers/Storage +set name=variant.arch value=$(ARCH) +dir path=kernel group=sys +dir path=kernel/drv group=sys +dir path=kernel/drv/$(ARCH64) group=sys +file path=kernel/drv/$(ARCH64)/vioscsi group=sys +dir path=usr/share/man +dir path=usr/share/man/man4d +file path=usr/share/man/man4d/vioscsi.4d +driver name=vioscsi class=scsi-self-identifying alias=pci1af4,1004 +license lic_CDDL license=lic_CDDL diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 3aacd2dde2..800bf9fbde 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -28,7 +28,7 @@ # Copyright 2021 Joyent, Inc. # Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> -# Copyright 2020 RackTop Systems, Inc. +# Copyright 2022 RackTop Systems, Inc. # Copyright 2021 Oxide Computer Company # @@ -2133,6 +2133,9 @@ VIOBLK_OBJS = vioblk.o # Virtio network driver VIOIF_OBJS = vioif.o +# Virtio SCSI driver +VIOSCSI_OBJS = vioscsi.o + # # kiconv modules # diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index 0ae5c754b4..b047275e06 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -27,6 +27,7 @@ # Copyright 2018 Nexenta Systems, Inc. # Copyright (c) 2017 by Delphix. All rights reserved. # Copyright 2021 Oxide Computer Company +# Copyright 2022 RackTop Systems, Inc. # # @@ -1552,6 +1553,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioif/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioscsi/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(COMMONBASE)/idspace/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) diff --git a/usr/src/uts/common/io/vioscsi/vioscsi.c b/usr/src/uts/common/io/vioscsi/vioscsi.c new file mode 100644 index 0000000000..1e35252a6c --- /dev/null +++ b/usr/src/uts/common/io/vioscsi/vioscsi.c @@ -0,0 +1,1636 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. + * Copyright 2022 RackTop Systems, Inc. + */ + +#include "vioscsi.h" + +static char vioscsi_ident[] = "VIRTIO SCSI driver"; + +static uint_t vioscsi_ctl_handler(caddr_t arg1, caddr_t arg2); +static uint_t vioscsi_evt_handler(caddr_t arg1, caddr_t arg2); +static uint_t vioscsi_cmd_handler(caddr_t arg1, caddr_t arg2); + +static int vioscsi_tran_getcap(struct scsi_address *, char *, int); +static int vioscsi_tran_setcap(struct scsi_address *, char *, int, int); +static int vioscsi_tran_reset(struct scsi_address *, int); + +static int vioscsi_tran_start(struct scsi_address *, struct scsi_pkt *); +static int vioscsi_tran_abort(struct scsi_address *, struct scsi_pkt *); + +static int vioscsi_iport_attach(dev_info_t *); +static int vioscsi_iport_detach(dev_info_t *); + +static int vioscsi_req_init(vioscsi_softc_t *, vioscsi_request_t *, + virtio_queue_t *, int); +static void vioscsi_req_fini(vioscsi_request_t *); +static boolean_t vioscsi_req_abort(vioscsi_softc_t *, vioscsi_request_t *); +static void vioscsi_lun_changed(vioscsi_softc_t *sc, uint8_t target); +static void vioscsi_discover(void *); + +/* + * DMA attributes. We support a linked list, but most of our uses require a + * single aligned buffer. The HBA buffers will use a copy of this adjusted for + * the actual virtio limits. + */ +static ddi_dma_attr_t virtio_dma_attr = { + .dma_attr_version = DMA_ATTR_V0, + .dma_attr_addr_lo = 0, + .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull, + .dma_attr_count_max = 0x00000000FFFFFFFFull, + .dma_attr_align = 1, + .dma_attr_burstsizes = 1, + .dma_attr_minxfer = 1, + .dma_attr_maxxfer = 0xFFFFFFFFull, + .dma_attr_seg = 0xFFFFFFFFFFFFFFFFull, + .dma_attr_sgllen = 1, + .dma_attr_granular = 1, + .dma_attr_flags = 0, +}; + +/* + * this avoids calls to drv_usectohz that might be expensive: + */ +static clock_t vioscsi_hz; + +static boolean_t +vioscsi_poll_until(vioscsi_softc_t *sc, vioscsi_request_t *req, + ddi_intr_handler_t func, clock_t until) +{ + until *= 1000000; /* convert to usec */ + while (until > 0) { + (void) func((caddr_t)sc, NULL); + if (req->vr_done) { + return (B_TRUE); + } + drv_usecwait(10); + until -= 10; + } + atomic_or_8(&req->vr_expired, 1); + return (B_FALSE); +} + +static boolean_t +vioscsi_tmf(vioscsi_softc_t *sc, uint32_t func, uint8_t target, uint16_t lun, + vioscsi_request_t *task) +{ + vioscsi_request_t req; + vioscsi_tmf_res_t *res; + vioscsi_tmf_req_t *tmf; + + bzero(&req, sizeof (req)); + + if (vioscsi_req_init(sc, &req, sc->vs_ctl_vq, KM_NOSLEEP) != 0) { + return (B_FALSE); + } + + tmf = &req.vr_req->tmf; + res = &req.vr_res->tmf; + + tmf->type = VIRTIO_SCSI_T_TMF; + tmf->subtype = func; + tmf->lun[0] = 1; + tmf->lun[1] = target; + tmf->lun[2] = 0x40 | (lun >> 8); + tmf->lun[3] = lun & 0xff; + tmf->tag = (uint64_t)task; + + virtio_chain_clear(req.vr_vic); + if (virtio_chain_append(req.vr_vic, req.vr_req_pa, sizeof (*tmf), + VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { + return (B_FALSE); + } + + if (virtio_chain_append(req.vr_vic, req.vr_res_pa, sizeof (*res), + VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { + return (B_FALSE); + } + + /* + * Make sure the device can see our request: + */ + virtio_dma_sync(req.vr_dma, DDI_DMA_SYNC_FORDEV); + + /* + * Push chain into the queue: + */ + virtio_chain_submit(req.vr_vic, B_TRUE); + + /* + * Wait for it to complete -- these should always complete in a tiny + * amount of time. Give it 5 seconds to be sure. + */ + if (!vioscsi_poll_until(sc, &req, vioscsi_ctl_handler, 5)) { + /* + * We timed out -- this should *NEVER* happen! + * There is no safe way to deal with this if it occurs, so we + * just warn and leak the resources. Plan for a reboot soon. + */ + dev_err(sc->vs_dip, CE_WARN, + "task mgmt timeout! (target %d lun %d)", target, lun); + return (B_FALSE); + } + + vioscsi_req_fini(&req); + + switch (res->response) { + case VIRTIO_SCSI_S_OK: + case VIRTIO_SCSI_S_FUNCTION_SUCCEEDED: + break; + default: + return (B_FALSE); + } + return (B_TRUE); +} + +static boolean_t +vioscsi_lun_reset(vioscsi_softc_t *sc, uint8_t target, uint16_t lun) +{ + return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET, + target, lun, NULL)); +} + +static boolean_t +vioscsi_target_reset(vioscsi_softc_t *sc, uint8_t target) +{ + return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET, + target, 0, NULL)); +} + +static boolean_t +vioscsi_req_abort(vioscsi_softc_t *sc, vioscsi_request_t *req) +{ + return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_ABORT_TASK, + req->vr_target, req->vr_lun, req)); +} + +static void +vioscsi_dev_abort(vioscsi_dev_t *vd) +{ + vioscsi_request_t *req; + list_t *l = &vd->vd_reqs; + + mutex_enter(&vd->vd_lock); + for (req = list_head(l); req != NULL; req = list_next(l, req)) { + (void) vioscsi_tmf(vd->vd_sc, VIRTIO_SCSI_T_TMF_ABORT_TASK, + req->vr_target, req->vr_lun, req); + } + mutex_exit(&vd->vd_lock); +} + +static void +vioscsi_dev_timeout(void *arg) +{ + vioscsi_dev_t *vd = arg; + vioscsi_softc_t *sc = vd->vd_sc; + vioscsi_request_t *req; + timeout_id_t tid; + clock_t now; + list_t *l; + + mutex_enter(&vd->vd_lock); + if ((tid = vd->vd_timeout) == 0) { + /* + * We are shutting down, stop and do not reschedule. + */ + mutex_exit(&vd->vd_lock); + return; + } + vd->vd_timeout = 0; + + now = ddi_get_lbolt(); + l = &vd->vd_reqs; + + for (req = list_head(l); req != NULL; req = list_next(l, req)) { + /* + * The list is sorted by expiration time, so if we reach an + * item that hasn't expired yet, we're done. + */ + if (now < req->vr_expire) { + break; + } + atomic_or_8(&req->vr_expired, 1); + + /* + * This command timed out, so send an abort. + */ + dev_err(sc->vs_dip, CE_WARN, "cmd timed out (%ds)", + (int)req->vr_time); + (void) vioscsi_req_abort(sc, req); + } + + if (!list_is_empty(l)) { + /* + * Check again in a second. + * If these wake ups are too expensive, we could + * calculate other timeouts, but that would require + * doing untimeout if we want to wake up earlier. + * This is probably cheaper, and certainly simpler. + */ + vd->vd_timeout = timeout(vioscsi_dev_timeout, vd, vioscsi_hz); + } + mutex_exit(&vd->vd_lock); +} + +static void +vioscsi_poll(vioscsi_softc_t *sc, vioscsi_request_t *req) +{ + if (vioscsi_poll_until(sc, req, vioscsi_cmd_handler, req->vr_time)) { + return; + } + + /* + * Try a "gentle" task abort -- timeouts may be quasi-normal for some + * types of requests and devices. + */ + if (vioscsi_req_abort(sc, req) && + vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 1)) { + return; + } + + /* + * A little more forceful with a lun reset: + */ + if (vioscsi_lun_reset(sc, req->vr_target, req->vr_lun) && + vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 1)) { + return; + } + + /* + * If all else fails, reset the target, and keep trying. + * This can wind up blocking forever, but if it does it means we are in + * a very bad situation (and the virtio device is busted). + * We may also be leaking request structures at this point, but only at + * the maximum rate of one per minute. + */ + for (;;) { + dev_err(sc->vs_dip, CE_WARN, "request stuck, resetting target"); + (void) vioscsi_target_reset(sc, req->vr_target); + if (vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 60)) { + return; + } + } +} + +static void +vioscsi_start(vioscsi_softc_t *sc, vioscsi_request_t *req) +{ + vioscsi_cmd_req_t *cmd = &req->vr_req->cmd; + + req->vr_done = 0; + req->vr_expired = 0; + cmd->lun[0] = 1; + cmd->lun[1] = req->vr_target; + cmd->lun[2] = 0x40 | ((req->vr_lun >> 8) & 0xff); + cmd->lun[3] = req->vr_lun & 0xff; + cmd->lun[4] = 0; + cmd->lun[5] = 0; + cmd->lun[6] = 0; + cmd->lun[7] = 0; + cmd->tag = (uint64_t)req; + cmd->prio = 0; + cmd->crn = 0; + cmd->task_attr = req->vr_task_attr; + + /* + * Make sure the device can see our CDB data: + */ + virtio_dma_sync(req->vr_dma, DDI_DMA_SYNC_FORDEV); + + /* + * Determine whether we expect to poll before submitting (because we + * cannot touch the request after submission if we are not polling). + */ + if (req->vr_poll) { + /* + * Push chain into the queue: + */ + virtio_chain_submit(req->vr_vic, B_TRUE); + + /* + * NB: Interrupts may be enabled, or might not be. It is fine + * either way. + */ + vioscsi_poll(sc, req); + } else { + /* + * Push chain into the queue: + */ + virtio_chain_submit(req->vr_vic, B_TRUE); + } +} + +static int +vioscsi_tran_start(struct scsi_address *ap, struct scsi_pkt *pkt) +{ + struct scsi_device *sd = scsi_address_device(ap); + vioscsi_dev_t *vd = scsi_device_hba_private_get(sd); + vioscsi_request_t *req = pkt->pkt_ha_private; + virtio_chain_t *vic = req->vr_vic; + vioscsi_cmd_req_t *cmd = &req->vr_req->cmd; + vioscsi_cmd_res_t *res = &req->vr_res->cmd; + + if (pkt->pkt_cdbp == NULL) { + return (TRAN_BADPKT); + } + + bzero(cmd, sizeof (*cmd)); + bcopy(pkt->pkt_cdbp, cmd->cdb, pkt->pkt_cdblen); + + /* + * Default expiration is 10 seconds, clip at an hour. + * (order of operations here is to avoid wrapping, if run in a 32-bit + * kernel) + */ + req->vr_time = min(pkt->pkt_time ? pkt->pkt_time : 10, 3600); + req->vr_dev = vd; + req->vr_poll = ((pkt->pkt_flags & FLAG_NOINTR) != 0); + req->vr_target = vd->vd_target; + req->vr_lun = vd->vd_lun; + req->vr_start = ddi_get_lbolt(); + req->vr_expire = req->vr_start + req->vr_time * vioscsi_hz; + + /* + * Configure task queuing behavior: + */ + if (pkt->pkt_flags & (FLAG_HTAG|FLAG_HEAD)) { + req->vr_task_attr = VIRTIO_SCSI_S_HEAD; + } else if (pkt->pkt_flags & FLAG_OTAG) { + req->vr_task_attr = VIRTIO_SCSI_S_ORDERED; + } else if (pkt->pkt_flags & FLAG_SENSING) { + req->vr_task_attr = VIRTIO_SCSI_S_ACA; + } else { /* FLAG_STAG is also our default */ + req->vr_task_attr = VIRTIO_SCSI_S_SIMPLE; + } + + /* + * Make sure we start with a clear chain: + */ + virtio_chain_clear(vic); + + /* + * The KVM SCSI emulation requires that all outgoing buffers are added + * first with the request header being the first entry. After the + * outgoing have been added then the incoming buffers with the response + * buffer being the first of the incoming. This requirement is + * independent of using chained ring entries or one ring entry with + * indirect buffers. + */ + + /* + * Add request header: + */ + if (virtio_chain_append(vic, req->vr_req_pa, sizeof (*cmd), + VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { + return (TRAN_BUSY); + } + + /* + * Add write buffers: + */ + if (pkt->pkt_dma_flags & DDI_DMA_WRITE) { + for (int i = 0; i < pkt->pkt_numcookies; i++) { + if (virtio_chain_append(vic, + pkt->pkt_cookies[i].dmac_laddress, + pkt->pkt_cookies[i].dmac_size, + VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { + return (TRAN_BUSY); + } + } + } + + /* + * Add response header: + */ + if (virtio_chain_append(vic, req->vr_res_pa, sizeof (*res), + VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { + return (TRAN_BUSY); + } + + /* + * Add read buffers: + */ + if (pkt->pkt_dma_flags & DDI_DMA_READ) { + for (int i = 0; i < pkt->pkt_numcookies; i++) { + if (virtio_chain_append(vic, + pkt->pkt_cookies[i].dmac_laddress, + pkt->pkt_cookies[i].dmac_size, + VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { + return (TRAN_BUSY); + } + } + } + + /* + * Check for queue depth, and add to the timeout list: + */ + mutex_enter(&vd->vd_lock); + if (vd->vd_num_cmd >= vd->vd_max_cmd) { + mutex_exit(&vd->vd_lock); + return (TRAN_BUSY); + } + vd->vd_num_cmd++; + + if (!req->vr_poll) { + /* + * Add the request to the timeout list. + * + * In order to minimize the work done during timeout handling, + * we keep requests sorted. This assumes that requests mostly + * have the same timeout, and requests with long timeouts are + * infrequent. + */ + list_t *l = &vd->vd_reqs; + vioscsi_request_t *r; + + for (r = list_tail(l); r != NULL; r = list_prev(l, r)) { + /* + * Avoids wrapping lbolt: + */ + if ((req->vr_expire - r->vr_expire) >= 0) { + list_insert_after(l, r, req); + break; + } + } + if (r == NULL) { + /* + * List empty, or this one expires before others: + */ + list_insert_tail(l, req); + } + if (vd->vd_timeout == 0) { + vd->vd_timeout = timeout(vioscsi_dev_timeout, vd, + vioscsi_hz); + } + } + + mutex_exit(&vd->vd_lock); + + vioscsi_start(vd->vd_sc, req); + return (TRAN_ACCEPT); +} + +static int +vioscsi_tran_abort(struct scsi_address *ap, struct scsi_pkt *pkt) +{ + struct scsi_device *sd; + vioscsi_dev_t *vd; + vioscsi_request_t *req; + + if ((ap == NULL) || + ((sd = scsi_address_device(ap)) == NULL) || + ((vd = scsi_device_hba_private_get(sd)) == NULL)) { + return (0); + } + if (pkt == NULL) { + /* + * Abort all requests for the LUN. + */ + vioscsi_dev_abort(vd); + return (1); + } + if ((req = pkt->pkt_ha_private) != NULL) { + return (vioscsi_req_abort(vd->vd_sc, req) ? 1 : 0); + } + + return (0); +} + +static void +vioscsi_req_fini(vioscsi_request_t *req) +{ + if (req->vr_dma != NULL) { + virtio_dma_free(req->vr_dma); + req->vr_dma = NULL; + } + if (req->vr_vic != NULL) { + virtio_chain_free(req->vr_vic); + req->vr_vic = NULL; + } +} + +static int +vioscsi_req_init(vioscsi_softc_t *sc, vioscsi_request_t *req, + virtio_queue_t *vq, int sleep) +{ + uint64_t pa; + + bzero(req, sizeof (*req)); + list_link_init(&req->vr_node); + req->vr_vq = vq; + req->vr_dma = virtio_dma_alloc(sc->vs_virtio, sizeof (vioscsi_op_t), + &virtio_dma_attr, DDI_DMA_STREAMING | DDI_DMA_READ | DDI_DMA_WRITE, + sleep); + req->vr_vic = virtio_chain_alloc(vq, sleep); + if ((req->vr_dma == NULL) || (req->vr_vic == NULL)) { + return (-1); + } + virtio_chain_data_set(req->vr_vic, req); + req->vr_req = virtio_dma_va(req->vr_dma, VIOSCSI_REQ_OFFSET); + req->vr_res = virtio_dma_va(req->vr_dma, VIOSCSI_RES_OFFSET); + pa = virtio_dma_cookie_pa(req->vr_dma, 0); + req->vr_req_pa = pa + VIOSCSI_REQ_OFFSET; + req->vr_res_pa = pa + VIOSCSI_RES_OFFSET; + return (0); +} + +static void +vioscsi_tran_pkt_destructor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran) +{ + vioscsi_request_t *req = pkt->pkt_ha_private; + + vioscsi_req_fini(req); +} + +static int +vioscsi_tran_pkt_constructor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran, + int sleep) +{ + vioscsi_softc_t *sc = tran->tran_hba_private; + vioscsi_request_t *req = pkt->pkt_ha_private; + + if (vioscsi_req_init(sc, req, sc->vs_cmd_vq, sleep) != 0) { + vioscsi_req_fini(req); + return (-1); + } + req->vr_pkt = pkt; + return (0); +} + +static int +vioscsi_tran_setup_pkt(struct scsi_pkt *pkt, int (*cb)(caddr_t), caddr_t arg) +{ + if ((pkt->pkt_dma_flags & DDI_DMA_RDWR) == DDI_DMA_RDWR) { + /* + * We can do read, or write, but not both. + */ + return (-1); + } + + return (0); +} + +static void +vioscsi_tran_teardown_pkt(struct scsi_pkt *pkt) +{ + vioscsi_request_t *req = pkt->pkt_ha_private; + virtio_chain_t *vic = req->vr_vic; + + virtio_chain_clear(vic); +} + +static int +vioscsi_tran_getcap(struct scsi_address *ap, char *cap, int whom) +{ + int rval = 0; + vioscsi_softc_t *sc = ap->a_hba_tran->tran_hba_private; + + if (cap == NULL) + return (-1); + + switch (scsi_hba_lookup_capstr(cap)) { + case SCSI_CAP_CDB_LEN: + rval = sc->vs_cdb_size; + break; + + case SCSI_CAP_ARQ: + case SCSI_CAP_LUN_RESET: + case SCSI_CAP_TAGGED_QING: + case SCSI_CAP_UNTAGGED_QING: + rval = 1; + break; + + default: + rval = -1; + } + return (rval); +} + +static int +vioscsi_tran_setcap(struct scsi_address *ap, char *cap, int value, int whom) +{ + int rval = 1; + + if (cap == NULL || whom == 0) { + return (-1); + } + + switch (scsi_hba_lookup_capstr(cap)) { + default: + rval = 1; + } + return (rval); +} + +static int +vioscsi_tran_reset(struct scsi_address *ap, int level) +{ + struct scsi_device *sd; + vioscsi_dev_t *vd; + + if ((ap == NULL) || + ((sd = scsi_address_device(ap)) == NULL) || + ((vd = scsi_device_hba_private_get(sd)) == NULL)) { + return (0); + } + + switch (level) { + case RESET_LUN: + if (vioscsi_lun_reset(vd->vd_sc, vd->vd_target, vd->vd_lun)) { + return (1); + } + break; + case RESET_TARGET: + if (vioscsi_target_reset(vd->vd_sc, vd->vd_target)) { + return (1); + } + break; + case RESET_ALL: + default: + break; + } + return (0); +} + +static boolean_t +vioscsi_parse_unit_address(const char *ua, int *tgt, int *lun) +{ + long num; + char *end; + + if ((ddi_strtol(ua, &end, 16, &num) != 0) || + ((*end != ',') && (*end != 0))) { + return (B_FALSE); + } + *tgt = (int)num; + if (*end == 0) { + *lun = 0; + return (B_TRUE); + } + end++; /* skip comma */ + if ((ddi_strtol(end, &end, 16, &num) != 0) || (*end != 0)) { + return (B_FALSE); + } + *lun = (int)num; + return (B_TRUE); +} + +uint_t +vioscsi_ctl_handler(caddr_t arg1, caddr_t arg2) +{ + vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1; + virtio_chain_t *vic; + + while ((vic = virtio_queue_poll(sc->vs_ctl_vq)) != NULL) { + vioscsi_request_t *req; + + if ((req = virtio_chain_data(vic)) == NULL) { + dev_err(sc->vs_dip, CE_WARN, "missing ctl chain data"); + continue; + } + atomic_or_8(&req->vr_done, 1); + } + return (DDI_INTR_CLAIMED); +} + +uint_t +vioscsi_evt_handler(caddr_t arg1, caddr_t arg2) +{ + vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1; + virtio_chain_t *vic; + boolean_t missed = B_FALSE; + + while ((vic = virtio_queue_poll(sc->vs_evt_vq)) != NULL) { + vioscsi_evt_t *evt; + vioscsi_event_t *ve; + uint8_t target; + + if ((ve = virtio_chain_data(vic)) == NULL) { + /* + * This should never occur, it's a bug if it does. + */ + dev_err(sc->vs_dip, CE_WARN, "missing evt chain data"); + continue; + } + evt = ve->ve_evt; + + virtio_dma_sync(ve->ve_dma, DDI_DMA_SYNC_FORKERNEL); + + target = evt->lun[1]; + switch (evt->event & 0x7FFFFFFF) { + case VIRTIO_SCSI_T_TRANSPORT_RESET: + switch (evt->reason) { + case VIRTIO_SCSI_EVT_RESET_HARD: + /* + * We could reset-notify, but this doesn't seem + * to get fired for targets initiated from + * host. + */ + break; + case VIRTIO_SCSI_EVT_RESET_REMOVED: + case VIRTIO_SCSI_EVT_RESET_RESCAN: + /* + * We can treat these the same for the target, + * and not worry about the actual LUN id here. + */ + vioscsi_lun_changed(sc, target); + break; + default: + /* + * Some other event we don't know about. + */ + break; + } + break; + case VIRTIO_SCSI_T_NO_EVENT: + /* + * If this happens, we missed some event(s). + */ + missed = B_TRUE; + break; + case VIRTIO_SCSI_T_ASYNC_NOTIFY: + /* + * We don't register for these, so we don't expect + * them. + */ + break; + } + + if (evt->event & VIRTIO_SCSI_T_EVENTS_MISSED) { + missed = B_TRUE; + } + + /* + * Resubmit the chain for the next event. + */ + virtio_chain_submit(vic, B_TRUE); + } + + if (missed) { + (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover, sc, + DDI_NOSLEEP); + } + + return (DDI_INTR_CLAIMED); +} + +uint_t +vioscsi_cmd_handler(caddr_t arg1, caddr_t arg2) +{ + vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1; + virtio_chain_t *vic; + + while ((vic = virtio_queue_poll(sc->vs_cmd_vq)) != NULL) { + + vioscsi_request_t *req; + vioscsi_dev_t *vd; + struct scsi_pkt *pkt; + struct virtio_scsi_cmd_resp *res; + + if ((req = virtio_chain_data(vic)) == NULL) { + /* + * This should never occur, it's a bug if it does. + */ + dev_err(sc->vs_dip, CE_WARN, "missing cmd chain data"); + continue; + } + + virtio_dma_sync(req->vr_dma, DDI_DMA_SYNC_FORKERNEL); + res = &req->vr_res->cmd; + pkt = req->vr_pkt; + + if (pkt == NULL) { + /* + * This is an internal request (from discovery), and + * doesn't have an associated SCSI pkt structure. In + * this case, the notification we've done is + * sufficient, and the submitter will examine the + * response field directly. + */ + if (req->vr_poll) { + atomic_or_8(&req->vr_done, 1); + } + continue; + } + + if ((vd = req->vr_dev) != NULL) { + mutex_enter(&vd->vd_lock); + vd->vd_num_cmd--; + list_remove(&vd->vd_reqs, req); + mutex_exit(&vd->vd_lock); + } + + switch (res->response) { + + case VIRTIO_SCSI_S_OK: + /* + * Request processed successfully, check SCSI status. + */ + pkt->pkt_scbp[0] = res->status; + pkt->pkt_resid = 0; + pkt->pkt_reason = CMD_CMPLT; + pkt->pkt_state = + STATE_GOT_BUS | STATE_GOT_TARGET | + STATE_SENT_CMD | STATE_GOT_STATUS; + if ((pkt->pkt_numcookies > 0) && + (pkt->pkt_cookies[0].dmac_size > 0)) { + pkt->pkt_state |= STATE_XFERRED_DATA; + } + + /* + * For CHECK_CONDITION, fill out the ARQ details: + */ + if (res->status == STATUS_CHECK) { + /* + * ARQ status and arq structure: + */ + pkt->pkt_state |= STATE_ARQ_DONE; + pkt->pkt_scbp[1] = STATUS_GOOD; + struct scsi_arq_status *ars = + (void *)pkt->pkt_scbp; + ars->sts_rqpkt_reason = CMD_CMPLT; + ars->sts_rqpkt_resid = 0; + ars->sts_rqpkt_state = + STATE_GOT_BUS | STATE_GOT_TARGET | + STATE_GOT_STATUS | STATE_SENT_CMD | + STATE_XFERRED_DATA; + bcopy(res->sense, &ars->sts_sensedata, + res->sense_len); + } + break; + + case VIRTIO_SCSI_S_BAD_TARGET: + case VIRTIO_SCSI_S_INCORRECT_LUN: + pkt->pkt_reason = CMD_DEV_GONE; + break; + + case VIRTIO_SCSI_S_OVERRUN: + dev_err(sc->vs_dip, CE_WARN, "OVERRUN"); + pkt->pkt_reason = CMD_DATA_OVR; + break; + + case VIRTIO_SCSI_S_RESET: + pkt->pkt_reason = CMD_RESET; + pkt->pkt_statistics |= STAT_DEV_RESET; + break; + + case VIRTIO_SCSI_S_ABORTED: + if (req->vr_expired) { + pkt->pkt_statistics |= STAT_TIMEOUT; + pkt->pkt_reason = CMD_TIMEOUT; + } else { + pkt->pkt_reason = CMD_ABORTED; + pkt->pkt_statistics |= STAT_ABORTED; + } + break; + + case VIRTIO_SCSI_S_BUSY: + /* + * Busy, should have been caught at submission: + */ + pkt->pkt_reason = CMD_TRAN_ERR; + break; + + default: + dev_err(sc->vs_dip, CE_WARN, "Unknown response: 0x%x", + res->response); + pkt->pkt_reason = CMD_TRAN_ERR; + break; + } + + + if (!req->vr_poll) { + scsi_hba_pkt_comp(pkt); + } else { + atomic_or_8(&req->vr_done, 1); + } + } + return (DDI_INTR_CLAIMED); +} + +static int +vioscsi_tran_tgt_init(dev_info_t *hdip, dev_info_t *tdip, scsi_hba_tran_t *tran, + struct scsi_device *sd) +{ + const char *ua; + vioscsi_softc_t *sc; + int target; + int lun; + vioscsi_dev_t *vd; + + if (scsi_hba_iport_unit_address(hdip) == NULL) { + return (DDI_FAILURE); /* only iport has targets */ + } + if ((sc = tran->tran_hba_private) == NULL) { + return (DDI_FAILURE); + } + + if (((ua = scsi_device_unit_address(sd)) == NULL) || + (!vioscsi_parse_unit_address(ua, &target, &lun))) { + return (DDI_FAILURE); + } + + vd = kmem_zalloc(sizeof (*vd), KM_SLEEP); + list_create(&vd->vd_reqs, sizeof (vioscsi_request_t), + offsetof(vioscsi_request_t, vr_node)); + mutex_init(&vd->vd_lock, NULL, MUTEX_DRIVER, + virtio_intr_pri(sc->vs_virtio)); + + vd->vd_target = (uint8_t)target; + vd->vd_lun = (uint16_t)lun; + vd->vd_sc = sc; + vd->vd_sd = sd; + vd->vd_max_cmd = sc->vs_cmd_per_lun; + vd->vd_num_cmd = 0; + + scsi_device_hba_private_set(sd, vd); + + mutex_enter(&sc->vs_lock); + list_insert_tail(&sc->vs_devs, vd); + mutex_exit(&sc->vs_lock); + + return (DDI_SUCCESS); +} + +static void +vioscsi_tran_tgt_free(dev_info_t *hdip, dev_info_t *tdip, scsi_hba_tran_t *tran, + struct scsi_device *sd) +{ + vioscsi_dev_t *vd = scsi_device_hba_private_get(sd); + vioscsi_softc_t *sc = vd->vd_sc; + timeout_id_t tid; + + scsi_device_hba_private_set(sd, NULL); + + mutex_enter(&vd->vd_lock); + tid = vd->vd_timeout; + vd->vd_timeout = 0; + mutex_exit(&vd->vd_lock); + + if (tid != 0) { + (void) untimeout(tid); + } + + mutex_enter(&sc->vs_lock); + list_remove(&sc->vs_devs, vd); + mutex_exit(&sc->vs_lock); + + list_destroy(&vd->vd_reqs); + mutex_destroy(&vd->vd_lock); + kmem_free(vd, sizeof (*vd)); +} + +/* + * vioscsi_probe_target probes for existence of a valid target (LUN 0). + * It utilizes the supplied request, and sends TEST UNIT READY. + * (This command is used because it requires no data.) + * It returns 1 if the target is found, 0 if not, and -1 on error. + * It is expected additional LUNs will be discovered by the HBA framework using + * REPORT LUNS on LUN 0. + */ +static int +vioscsi_probe_target(vioscsi_softc_t *sc, vioscsi_request_t *req, + uint8_t target) +{ + struct virtio_scsi_cmd_req *cmd = &req->vr_req->cmd; + struct virtio_scsi_cmd_resp *res = &req->vr_res->cmd; + + bzero(cmd, sizeof (*cmd)); + cmd->cdb[0] = SCMD_TEST_UNIT_READY; + + virtio_chain_clear(req->vr_vic); + if (virtio_chain_append(req->vr_vic, req->vr_req_pa, + sizeof (*cmd), VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { + return (-1); + } + if (virtio_chain_append(req->vr_vic, req->vr_res_pa, + sizeof (*res), VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { + return (-1); + } + req->vr_poll = B_TRUE; + req->vr_start = ddi_get_lbolt(); + req->vr_time = 10; /* seconds */ + req->vr_target = target; + req->vr_lun = 0; + req->vr_task_attr = VIRTIO_SCSI_S_HEAD; + vioscsi_start(sc, req); + switch (res->response) { + case VIRTIO_SCSI_S_OK: + return (1); + case VIRTIO_SCSI_S_INCORRECT_LUN: + case VIRTIO_SCSI_S_BAD_TARGET: + return (0); + default: + return (-1); + } +} + +static void +vioscsi_null_complete(struct scsi_pkt *pkt) +{ + /* + * This intentionally does nothing. + */ +} + +static void +vioscsi_dev_rescan(vioscsi_dev_t *vd) +{ + struct scsi_pkt *pkt; + struct scsi_arq_status *aqs; + uint8_t *sense; + + /* + * This routine is a bit of a hack, to workaround the fact that we have + * no other good way to convince the SCSI HBA framework to rescan for + * new LUNs. What we do is fake an inquiry and complete it with a UNIT + * ATTENTION indicating that the REPORT LUNS data has changed. + */ + + pkt = scsi_init_pkt(&vd->vd_sd->sd_address, NULL, NULL, 6, + sizeof (struct scsi_arq_status), 0, 0, SLEEP_FUNC, NULL); + if (pkt == NULL) { /* should never happen with SLEEP_FUNC */ + return; + } + + /* + * Must have a non-null completion routine in order to get the HBA + * logic to check the UNIT ATTENTION STATUS. + */ + pkt->pkt_comp = vioscsi_null_complete; + + /* + * The default CDB is 0, TEST UNIT READY, which takes no data, and + * returns no data. + */ + pkt->pkt_state = CMD_CMPLT; + pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD | + STATE_GOT_STATUS | STATE_ARQ_DONE; + pkt->pkt_scbp[0] = STATUS_CHECK; + aqs = (void *)pkt->pkt_scbp; + aqs->sts_rqpkt_reason = CMD_CMPLT; + aqs->sts_rqpkt_resid = 0; + aqs->sts_rqpkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | + STATE_GOT_STATUS | STATE_SENT_CMD | STATE_XFERRED_DATA; + sense = (void *)&aqs->sts_sensedata; + + /* + * Descriptor format sense response: + */ + sense[0] = 0x72; + sense[1] = KEY_UNIT_ATTENTION; + sense[2] = 0x3f; /* ASC - reported LUNs data changed */ + sense[3] = 0x0e; /* ASCQ */ + sense[7] = 0x00; /* additional sense length (none) */ + + vd->vd_rescan = B_FALSE; + + scsi_hba_pkt_comp(pkt); +} + +static void +vioscsi_rescan_luns(void *arg) +{ + vioscsi_softc_t *sc = arg; + vioscsi_dev_t *vd; + list_t *l; + + l = &sc->vs_devs; + mutex_enter(&sc->vs_lock); + for (vd = list_head(l); vd != NULL; vd = list_next(l, vd)) { + if (vd->vd_rescan) { + vioscsi_dev_rescan(vd); + } + } + mutex_exit(&sc->vs_lock); +} + +static void +vioscsi_lun_changed(vioscsi_softc_t *sc, uint8_t target) +{ + vioscsi_dev_t *vd; + list_t *l = &sc->vs_devs; + boolean_t found = B_FALSE; + + mutex_enter(&sc->vs_lock); + for (vd = list_head(l); vd != NULL; vd = list_next(l, vd)) { + if ((vd->vd_target == target) && (vd->vd_lun == 0)) { + vd->vd_rescan = B_TRUE; + found = B_TRUE; + break; + } + } + mutex_exit(&sc->vs_lock); + + if (found) { + /* + * We have lun 0 already, so report luns changed: + */ + (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_rescan_luns, + sc, DDI_NOSLEEP); + } else { + /* + * We didn't find lun 0, so issue a new discovery: + */ + (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover, + sc, DDI_NOSLEEP); + } +} + +/* + * vioscsi_discover is our task function for performing target and lun + * discovery. This is done using active SCSI probes. + */ +static void +vioscsi_discover(void *arg) +{ + vioscsi_softc_t *sc = arg; + scsi_hba_tgtmap_t *tm = sc->vs_tgtmap; + vioscsi_request_t req; + + if (vioscsi_req_init(sc, &req, sc->vs_cmd_vq, KM_SLEEP) != 0) { + vioscsi_req_fini(&req); + return; + } + + if (scsi_hba_tgtmap_set_begin(tm) != DDI_SUCCESS) { + vioscsi_req_fini(&req); + return; + } + for (uint8_t target = 0; target < sc->vs_max_target; target++) { + char ua[10]; + switch (vioscsi_probe_target(sc, &req, target)) { + case 1: + (void) snprintf(ua, sizeof (ua), "%x", target); + if (scsi_hba_tgtmap_set_add(tm, SCSI_TGT_SCSI_DEVICE, + ua, NULL) != DDI_SUCCESS) { + (void) scsi_hba_tgtmap_set_flush(tm); + vioscsi_req_fini(&req); + return; + } + break; + case 0: + continue; + case -1: + (void) scsi_hba_tgtmap_set_flush(tm); + vioscsi_req_fini(&req); + return; + } + } + (void) scsi_hba_tgtmap_set_end(tm, 0); + vioscsi_req_fini(&req); +} + +static void +vioscsi_teardown(vioscsi_softc_t *sc, boolean_t failed) +{ + if (sc->vs_virtio != NULL) { + virtio_fini(sc->vs_virtio, failed); + } + + /* + * Free up the event resources: + */ + for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) { + vioscsi_event_t *ve = &sc->vs_events[i]; + if (ve->ve_vic != NULL) { + virtio_chain_free(ve->ve_vic); + } + if (ve->ve_dma != NULL) { + virtio_dma_free(ve->ve_dma); + } + } + + if (sc->vs_tran != NULL) { + scsi_hba_tran_free(sc->vs_tran); + } + if (sc->vs_tq != NULL) { + ddi_taskq_destroy(sc->vs_tq); + } + if (sc->vs_intr_pri != NULL) { + mutex_destroy(&sc->vs_lock); + } + kmem_free(sc, sizeof (*sc)); +} + +static int +vioscsi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + scsi_hba_tran_t *tran = NULL; + vioscsi_softc_t *sc; + virtio_t *vio; + ddi_dma_attr_t attr; + + if (cmd != DDI_ATTACH) { /* no suspend/resume support */ + return (DDI_FAILURE); + } + + if (scsi_hba_iport_unit_address(dip) != NULL) { + return (vioscsi_iport_attach(dip)); + } + + sc = kmem_zalloc(sizeof (*sc), KM_SLEEP); + sc->vs_dip = dip; + + list_create(&sc->vs_devs, sizeof (vioscsi_dev_t), + offsetof(vioscsi_dev_t, vd_node)); + + tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP); + sc->vs_tran = tran; + + tran->tran_hba_len = sizeof (vioscsi_request_t); + tran->tran_hba_private = sc; + + /* + * We don't use WWN addressing, so advertise parallel. The underlying + * device might still be using a different transport, even in a + * pass-through, but we cannot discriminate that at this layer. + */ + tran->tran_interconnect_type = INTERCONNECT_PARALLEL; + + tran->tran_start = vioscsi_tran_start; + tran->tran_abort = vioscsi_tran_abort; + tran->tran_reset = vioscsi_tran_reset; + tran->tran_getcap = vioscsi_tran_getcap; + tran->tran_setcap = vioscsi_tran_setcap; + + tran->tran_tgt_init = vioscsi_tran_tgt_init; + tran->tran_tgt_free = vioscsi_tran_tgt_free; + + tran->tran_setup_pkt = vioscsi_tran_setup_pkt; + tran->tran_teardown_pkt = vioscsi_tran_teardown_pkt; + tran->tran_pkt_constructor = vioscsi_tran_pkt_constructor; + tran->tran_pkt_destructor = vioscsi_tran_pkt_destructor; + + /* + * We need to determine some device settings here, so we initialize the + * virtio in order to access those values. The rest of the setup we do + * in the iport attach. Note that this driver cannot support + * reattaching a child iport once it is removed -- the entire driver + * will need to be reset for that. + */ + vio = virtio_init(dip, VIOSCSI_WANTED_FEATURES, B_TRUE); + if ((sc->vs_virtio = vio) == NULL) { + dev_err(dip, CE_WARN, "failed to init virtio"); + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + + /* + * Get virtio parameters: + */ + sc->vs_max_target = virtio_dev_get16(vio, VIRTIO_SCSI_CFG_MAX_TARGET); + sc->vs_max_lun = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_MAX_LUN); + sc->vs_cdb_size = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_CDB_SIZE); + sc->vs_max_seg = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_SEG_MAX); + sc->vs_cmd_per_lun = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_CMD_PER_LUN); + + /* + * Adjust operating parameters to functional limits: + */ + sc->vs_max_target = min(VIOSCSI_MAX_TARGET, sc->vs_max_target); + sc->vs_cmd_per_lun = max(1, sc->vs_max_target); + sc->vs_max_seg = max(VIOSCSI_MIN_SEGS, sc->vs_max_seg); + + /* + * Allocate queues: + */ + sc->vs_ctl_vq = virtio_queue_alloc(vio, 0, "ctl", + vioscsi_ctl_handler, sc, B_FALSE, sc->vs_max_seg); + sc->vs_evt_vq = virtio_queue_alloc(vio, 1, "evt", + vioscsi_evt_handler, sc, B_FALSE, sc->vs_max_seg); + sc->vs_cmd_vq = virtio_queue_alloc(vio, 2, "cmd", + vioscsi_cmd_handler, sc, B_FALSE, sc->vs_max_seg); + + if ((sc->vs_ctl_vq == NULL) || (sc->vs_evt_vq == NULL) || + (sc->vs_cmd_vq == NULL)) { + dev_err(dip, CE_WARN, "failed allocating queue(s)"); + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + + if (virtio_init_complete(vio, 0) != DDI_SUCCESS) { + dev_err(dip, CE_WARN, "virtio_init_complete failed"); + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + + /* + * We cannot initialize this mutex before virtio_init_complete: + */ + sc->vs_intr_pri = virtio_intr_pri(vio); + mutex_init(&sc->vs_lock, NULL, MUTEX_DRIVER, sc->vs_intr_pri); + + /* + * Allocate events, but do not submit yet: + */ + for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) { + vioscsi_event_t *ve = &sc->vs_events[i]; + ve->ve_vic = virtio_chain_alloc(sc->vs_evt_vq, KM_SLEEP); + ve->ve_dma = virtio_dma_alloc(sc->vs_virtio, + sizeof (vioscsi_evt_t), &virtio_dma_attr, + DDI_DMA_STREAMING | DDI_DMA_READ, KM_SLEEP); + if ((ve->ve_vic == NULL) || (ve->ve_dma == NULL)) { + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + if (virtio_chain_append(ve->ve_vic, + virtio_dma_cookie_pa(ve->ve_dma, 0), sizeof (*ve->ve_evt), + VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + ve->ve_evt = virtio_dma_va(ve->ve_dma, 0); + virtio_chain_data_set(ve->ve_vic, ve); + } + + sc->vs_tq = ddi_taskq_create(dip, "task", 1, TASKQ_DEFAULTPRI, 0); + if (sc->vs_tq == NULL) { + dev_err(dip, CE_WARN, "failed to create taskq"); + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + + /* + * Maximum number of segments, subtract two needed for headers: + */ + attr = virtio_dma_attr; + attr.dma_attr_sgllen = sc->vs_max_seg - 2; + + if (scsi_hba_attach_setup(dip, &attr, tran, + SCSI_HBA_ADDR_COMPLEX | SCSI_HBA_HBA | + SCSI_HBA_TRAN_CDB | SCSI_HBA_TRAN_SCB) != + DDI_SUCCESS) { + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + + if (scsi_hba_iport_register(dip, "iport0") != DDI_SUCCESS) { + vioscsi_teardown(sc, B_TRUE); + return (DDI_FAILURE); + } + + ddi_report_dev(dip); + + return (DDI_SUCCESS); +} + +static void +vioscsi_iport_teardown(vioscsi_softc_t *sc) +{ + /* + * Stop the taskq -- ensures we don't try to access resources from a + * task while we are tearing down. + */ + ddi_taskq_suspend(sc->vs_tq); + ddi_taskq_wait(sc->vs_tq); + + /* + * Shutdown all interrupts and device transfers: + */ + virtio_interrupts_disable(sc->vs_virtio); + virtio_shutdown(sc->vs_virtio); + + /* + * Common resources: + */ + if (sc->vs_tgtmap != NULL) { + scsi_hba_tgtmap_destroy(sc->vs_tgtmap); + sc->vs_tgtmap = NULL; + } +} + +/* + * vioscsi_iport_attach implements the attach of the iport. We do the final + * set up of interrupts, and posting of event buffers here, as we do not want + * any activity unless the iport is attached. This matches detach, and makes + * teardown safer. + */ +static int +vioscsi_iport_attach(dev_info_t *dip) +{ + const char *ua = scsi_hba_iport_unit_address(dip); + scsi_hba_tran_t *tran; + vioscsi_softc_t *sc; + + /* + * We only support a single iport -- all disks are virtual and all + * disks use target/lun addresses. + */ + if ((ua == NULL) || (strcmp(ua, "iport0") != 0)) { + return (DDI_FAILURE); + } + + /* + * Get our parent's tran, and look up the sc from that: + */ + tran = ddi_get_driver_private(ddi_get_parent(dip)); + if ((tran == NULL) || + ((sc = tran->tran_hba_private) == NULL)) { + return (DDI_FAILURE); + } + + /* + * Save a copy of the soft state in our tran private area. + * (The framework clears this after cloning from parent.) + */ + tran = ddi_get_driver_private(dip); + tran->tran_hba_private = sc; + + /* + * We don't want interrupts on the control queue -- strictly polled + * (however if this handler is called from an interrupt, it should + * still be absolutely fine). + */ + virtio_queue_no_interrupt(sc->vs_ctl_vq, B_TRUE); + + if (scsi_hba_tgtmap_create(dip, SCSI_TM_FULLSET, MICROSEC, + 2 * MICROSEC, sc, NULL, NULL, &sc->vs_tgtmap) != DDI_SUCCESS) { + vioscsi_iport_teardown(sc); + return (DDI_FAILURE); + } + + /* + * Post events: + */ + for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) { + virtio_chain_submit(sc->vs_events[i].ve_vic, B_FALSE); + } + virtio_queue_flush(sc->vs_evt_vq); + + /* + * Start interrupts going now: + */ + if (virtio_interrupts_enable(sc->vs_virtio) != DDI_SUCCESS) { + vioscsi_iport_teardown(sc); + return (DDI_FAILURE); + } + + /* + * Start a discovery: + */ + (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover, sc, DDI_SLEEP); + + return (DDI_SUCCESS); +} + +static int +vioscsi_quiesce(dev_info_t *dip) +{ + vioscsi_softc_t *sc; + scsi_hba_tran_t *tran; + + if (((tran = ddi_get_driver_private(dip)) == NULL) || + ((sc = tran->tran_hba_private) == NULL)) { + return (DDI_FAILURE); + } + if (sc->vs_virtio == NULL) { + return (DDI_SUCCESS); /* not initialized yet */ + } + + return (virtio_quiesce(sc->vs_virtio)); +} + +/* + * vioscsi_iport_detach is used to perform the detach of the iport. It + * disables interrupts and the device, but does not free resources, other than + * the target map. Note that due to lack of a way to start virtio after + * virtio_shutdown(), it is not possible to reattach the iport after this is + * called, unless the underlying HBA is also detached and then re-attached. + */ +static int +vioscsi_iport_detach(dev_info_t *dip) +{ + const char *ua = scsi_hba_iport_unit_address(dip); + vioscsi_softc_t *sc; + scsi_hba_tran_t *tran; + + if ((ua == NULL) || (strcmp(ua, "iport0") != 0)) { + return (DDI_FAILURE); + } + + if (((tran = ddi_get_driver_private(dip)) == NULL) || + ((sc = tran->tran_hba_private) == NULL)) { + return (DDI_FAILURE); + } + + mutex_enter(&sc->vs_lock); + if (!list_is_empty(&sc->vs_devs)) { + /* + * Cannot detach while we have target children. + */ + mutex_exit(&sc->vs_lock); + return (DDI_FAILURE); + } + + vioscsi_iport_teardown(sc); + + return (DDI_SUCCESS); +} + +static int +vioscsi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + vioscsi_softc_t *sc; + scsi_hba_tran_t *tran; + + if (cmd != DDI_DETACH) { + return (DDI_FAILURE); + } + + if (scsi_hba_iport_unit_address(dip) != NULL) { + return (vioscsi_iport_detach(dip)); + } + + if (((tran = ddi_get_driver_private(dip)) == NULL) || + ((sc = tran->tran_hba_private) == NULL)) { + return (DDI_FAILURE); + } + + if (scsi_hba_detach(dip) != DDI_SUCCESS) { + return (DDI_FAILURE); + } + vioscsi_teardown(sc, B_FALSE); + + return (DDI_SUCCESS); +} + +static struct dev_ops vioscsi_dev_ops = { + .devo_rev = DEVO_REV, + .devo_refcnt = 0, + .devo_getinfo = nodev, + .devo_identify = nulldev, + .devo_probe = nulldev, + .devo_attach = vioscsi_attach, + .devo_detach = vioscsi_detach, + .devo_reset = nodev, + .devo_cb_ops = NULL, + .devo_bus_ops = NULL, + .devo_power = NULL, + .devo_quiesce = vioscsi_quiesce, +}; + +static struct modldrv modldrv = { + .drv_modops = &mod_driverops, + .drv_linkinfo = vioscsi_ident, + .drv_dev_ops = &vioscsi_dev_ops, +}; + +static struct modlinkage modlinkage = { + .ml_rev = MODREV_1, + .ml_linkage = { &modldrv, NULL, }, +}; + + +int +_init(void) +{ + int err; + + /* + * Initialize this unconditionally: + */ + vioscsi_hz = drv_usectohz(1000000); + + if ((err = scsi_hba_init(&modlinkage)) != 0) { + return (err); + } + + if ((err = mod_install(&modlinkage)) != 0) { + scsi_hba_fini(&modlinkage); + return (err); + } + + return (err); +} + +int +_fini(void) +{ + int err; + + if ((err = mod_remove(&modlinkage)) != 0) { + return (err); + } + + scsi_hba_fini(&modlinkage); + + return (DDI_SUCCESS); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/io/vioscsi/vioscsi.h b/usr/src/uts/common/io/vioscsi/vioscsi.h new file mode 100644 index 0000000000..b032ef28c8 --- /dev/null +++ b/usr/src/uts/common/io/vioscsi/vioscsi.h @@ -0,0 +1,313 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. + * Copyright 2022 RackTop Systems, Inc. + */ + +#ifndef _VIOSCSI_H_ +#define _VIOSCSI_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/atomic.h> +#include <sys/kmem.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/ksynch.h> +#include <sys/modctl.h> +#include <sys/debug.h> +#include <sys/list.h> +#include <sys/stddef.h> + +#include <sys/scsi/scsi.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> + +#include <virtio.h> + +#define VIRTIO_SCSI_CDB_SIZE 32 +#define VIRTIO_SCSI_SENSE_SIZE 96 + +/* + * Feature bits: + */ +#define VIRTIO_SCSI_F_INOUT (0x1 << 0) +#define VIRTIO_SCSI_F_HOTPLUG (0x1 << 1) +#define VIRTIO_SCSI_F_CHANGE (0x1 << 2) +#define VIRTIO_SCSI_F_T10_PI (0x1 << 3) + +/* + * Register offset in bytes: + */ +#define VIRTIO_SCSI_CFG_NUM_QUEUES 0 +#define VIRTIO_SCSI_CFG_SEG_MAX 4 +#define VIRTIO_SCSI_CFG_MAX_SECTORS 8 +#define VIRTIO_SCSI_CFG_CMD_PER_LUN 12 +#define VIRTIO_SCSI_CFG_EVI_SIZE 16 +#define VIRTIO_SCSI_CFG_SENSE_SIZE 20 +#define VIRTIO_SCSI_CFG_CDB_SIZE 24 +#define VIRTIO_SCSI_CFG_MAX_CHANNEL 28 +#define VIRTIO_SCSI_CFG_MAX_TARGET 30 +#define VIRTIO_SCSI_CFG_MAX_LUN 32 + +/* + * Response codes: + */ +#define VIRTIO_SCSI_S_OK 0 +#define VIRTIO_SCSI_S_FUNCTION_COMPLETED 0 +#define VIRTIO_SCSI_S_OVERRUN 1 +#define VIRTIO_SCSI_S_ABORTED 2 +#define VIRTIO_SCSI_S_BAD_TARGET 3 +#define VIRTIO_SCSI_S_RESET 4 +#define VIRTIO_SCSI_S_BUSY 5 +#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 +#define VIRTIO_SCSI_S_TARGET_FAILURE 7 +#define VIRTIO_SCSI_S_NEXUS_FAILURE 8 +#define VIRTIO_SCSI_S_FAILURE 9 +#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 +#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 +#define VIRTIO_SCSI_S_INCORRECT_LUN 12 + +/* + * Control queue type codes: + */ +#define VIRTIO_SCSI_T_TMF 0 +#define VIRTIO_SCSI_T_AN_QUERY 1 +#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2 + +/* + * Task management codes: + */ +#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 +#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 +#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 +#define VIRTIO_SCSI_T_TMF_CLEAR_ACA_TASK_SET 3 +#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 +#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 + +/* + * Events: + */ +#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000 +#define VIRTIO_SCSI_T_NO_EVENT 0 +#define VIRTIO_SCSI_T_TRANSPORT_RESET 1 +#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 + +/* + * Task attributes: + */ +#define VIRTIO_SCSI_S_SIMPLE 0 +#define VIRTIO_SCSI_S_ORDERED 1 +#define VIRTIO_SCSI_S_HEAD 2 +#define VIRTIO_SCSI_S_ACA 3 + +/* + * Reasons of reset event: + */ +#define VIRTIO_SCSI_EVT_RESET_HARD 0 +#define VIRTIO_SCSI_EVT_RESET_RESCAN 1 +#define VIRTIO_SCSI_EVT_RESET_REMOVED 2 + +/* + * We need to support INOUT, and we want hotplug notifications: + */ +#define VIOSCSI_WANTED_FEATURES (VIRTIO_SCSI_F_INOUT | VIRTIO_SCSI_F_HOTPLUG) + +#define VIOSCSI_MAX_TARGET 256 +#define VIOSCSI_MIN_SEGS 3 +#define VIOSCSI_NUM_EVENTS 16 + +/* + * Data structures: + */ + +#pragma pack(1) + +/* + * virtio SCSI command request: + */ +struct virtio_scsi_cmd_req { + uint8_t lun[8]; + uint64_t tag; + uint8_t task_attr; + uint8_t prio; + uint8_t crn; + uint8_t cdb[VIRTIO_SCSI_CDB_SIZE]; +}; + +/* + * Virtio SCSI response: + */ +struct virtio_scsi_cmd_resp { + uint32_t sense_len; + uint32_t res_id; + uint16_t status_qualifier; + uint8_t status; + uint8_t response; + uint8_t sense[VIRTIO_SCSI_SENSE_SIZE]; +}; + +/* + * Task management request: + */ +struct virtio_scsi_ctrl_tmf_req { + uint32_t type; + uint32_t subtype; + uint8_t lun[8]; + uint64_t tag; +}; + +/* + * Task management response: + */ +struct virtio_scsi_ctrl_tmf_resp { + uint8_t response; +}; + +/* + * Asynchronous notification request: + */ +struct virtio_scsi_ctrl_an_req { + uint32_t type; + uint8_t lun[8]; + uint32_t event_requested; +}; + +/* + * Asynchronous notification response: + */ +struct virtio_scsi_ctrl_an_resp { + uint32_t event_actual; + uint8_t response; +}; + +/* + * Events delivered on the event queue: + */ +struct virtio_scsi_event { + uint32_t event; + uint8_t lun[8]; + uint32_t reason; +}; + +#pragma pack() + +typedef union { + struct virtio_scsi_cmd_req cmd; + struct virtio_scsi_ctrl_tmf_req tmf; + struct virtio_scsi_ctrl_an_req anr; +} vioscsi_req_t; + +typedef union { + struct virtio_scsi_cmd_resp cmd; + struct virtio_scsi_ctrl_tmf_resp tmf; + struct virtio_scsi_ctrl_an_resp anr; +} vioscsi_res_t; + +struct virtio_scsi_op { + vioscsi_req_t req; + vioscsi_res_t res; +}; + +#define VIOSCSI_REQ_OFFSET offsetof(struct virtio_scsi_op, req) +#define VIOSCSI_RES_OFFSET offsetof(struct virtio_scsi_op, res) + +typedef struct vioscsi_request vioscsi_request_t; +typedef struct vioscsi_event vioscsi_event_t; +typedef struct vioscsi_softc vioscsi_softc_t; +typedef struct vioscsi_dev vioscsi_dev_t; +typedef struct virtio_scsi_event vioscsi_evt_t; +typedef struct virtio_scsi_ctrl_tmf_req vioscsi_tmf_req_t; +typedef struct virtio_scsi_ctrl_tmf_resp vioscsi_tmf_res_t; +typedef struct virtio_scsi_cmd_req vioscsi_cmd_req_t; +typedef struct virtio_scsi_cmd_resp vioscsi_cmd_res_t; +typedef struct virtio_scsi_op vioscsi_op_t; + +struct vioscsi_request { + list_node_t vr_node; + struct scsi_pkt *vr_pkt; + virtio_queue_t *vr_vq; + virtio_dma_t *vr_dma; + virtio_chain_t *vr_vic; + vioscsi_dev_t *vr_dev; + vioscsi_req_t *vr_req; + vioscsi_res_t *vr_res; + uint64_t vr_req_pa; + uint64_t vr_res_pa; + boolean_t vr_poll; + uint8_t vr_expired; /* access using atomics */ + uint8_t vr_done; /* access using atomics */ + uint8_t vr_task_attr; + uint8_t vr_target; + uint16_t vr_lun; + clock_t vr_time; /* seconds */ + clock_t vr_start; /* ticks */ + clock_t vr_expire; /* ticks */ +}; + +struct vioscsi_dev { + list_node_t vd_node; + uint8_t vd_target; + uint16_t vd_lun; + struct scsi_device *vd_sd; + vioscsi_softc_t *vd_sc; + int vd_num_cmd; + int vd_max_cmd; + boolean_t vd_rescan; + list_t vd_reqs; + timeout_id_t vd_timeout; + kmutex_t vd_lock; +}; + +struct vioscsi_event { + virtio_chain_t *ve_vic; + virtio_dma_t *ve_dma; + vioscsi_evt_t *ve_evt; +}; + +struct vioscsi_softc { + dev_info_t *vs_dip; + virtio_t *vs_virtio; + uint64_t vs_features; + + virtio_queue_t *vs_ctl_vq; + virtio_queue_t *vs_evt_vq; + virtio_queue_t *vs_cmd_vq; + + scsi_hba_tran_t *vs_tran; + scsi_hba_tgtmap_t *vs_tgtmap; + ddi_taskq_t *vs_tq; + + uint32_t vs_max_target; + uint32_t vs_max_lun; + uint32_t vs_cdb_size; + uint32_t vs_max_seg; + uint32_t vs_cmd_per_lun; + + vioscsi_event_t vs_events[VIOSCSI_NUM_EVENTS]; + + void *vs_intr_pri; + kmutex_t vs_lock; + list_t vs_devs; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _VIOSCSI_H_ */ diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 79f4c3f26b..2cb43d8beb 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -415,15 +415,10 @@ DRV_KMODS += vr # # Virtio drivers # - -# Virtio core DRV_KMODS += virtio - -# Virtio block driver DRV_KMODS += vioblk - -# Virtio network driver DRV_KMODS += vioif +DRV_KMODS += vioscsi # # DTrace and DTrace Providers diff --git a/usr/src/uts/intel/vioscsi/Makefile b/usr/src/uts/intel/vioscsi/Makefile new file mode 100644 index 0000000000..105e4e08fb --- /dev/null +++ b/usr/src/uts/intel/vioscsi/Makefile @@ -0,0 +1,47 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2016 Nexenta Systems, Inc. +# Copyright 2022 RackTop Systems, Inc. +# + +UTSBASE= $(SRC)/uts + +MODULE= vioscsi +OBJECTS= $(VIOSCSI_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE= $(ROOT_DRV_DIR)/$(MODULE) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET= $(BINARY) +INSTALL_TARGET= $(BINARY) $(ROOTMODULE) + +LDFLAGS += -Nmisc/scsi -Nmisc/virtio + +# +# Includes +# +INC_PATH += -I$(UTSBASE)/common/io/virtio -I$(UTSBASE)/common/io/vioscsi + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ |