summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--manifest2
-rw-r--r--usr/src/cmd/dladm/dladm.c15
-rw-r--r--usr/src/common/avl/avl.c24
-rw-r--r--usr/src/man/man4d/Makefile2
-rw-r--r--usr/src/man/man4d/vioblk.4d3
-rw-r--r--usr/src/man/man4d/vioscsi.4d92
-rw-r--r--usr/src/man/man4d/virtio.4d5
-rw-r--r--usr/src/pkg/manifests/driver-storage-vioscsi.p5m36
-rw-r--r--usr/src/uts/common/Makefile.files5
-rw-r--r--usr/src/uts/common/Makefile.rules5
-rw-r--r--usr/src/uts/common/io/vioscsi/vioscsi.c1636
-rw-r--r--usr/src/uts/common/io/vioscsi/vioscsi.h313
-rw-r--r--usr/src/uts/intel/Makefile.intel7
-rw-r--r--usr/src/uts/intel/vioscsi/Makefile47
14 files changed, 2155 insertions, 37 deletions
diff --git a/manifest b/manifest
index 3b3ebab85b..9b65edb77e 100644
--- a/manifest
+++ b/manifest
@@ -813,6 +813,7 @@ f kernel/drv/amd64/usbsprl 0755 root sys
f kernel/drv/amd64/vgatext 0755 root sys
f kernel/drv/amd64/vioblk 0755 root sys
f kernel/drv/amd64/vioif 0755 root sys
+f kernel/drv/amd64/vioscsi 0755 root sys
f kernel/drv/amd64/vmxnet 0755 root sys
f kernel/drv/amd64/vmxnet3s 0755 root sys
f kernel/drv/amd64/vnd 0755 root sys
@@ -19013,6 +19014,7 @@ f usr/share/man/man4d/usbsksp.4d 0444 root bin
f usr/share/man/man4d/usbsprl.4d 0444 root bin
f usr/share/man/man4d/vioblk.4d 0444 root bin
f usr/share/man/man4d/vioif.4d 0444 root bin
+f usr/share/man/man4d/vioscsi.4d 0444 root bin
f usr/share/man/man4d/virtio.4d 0444 root bin
f usr/share/man/man4d/virtualkm.4d 0444 root bin
f usr/share/man/man4d/vnd.4d 0444 root bin
diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c
index 4de300ef65..dac9006a22 100644
--- a/usr/src/cmd/dladm/dladm.c
+++ b/usr/src/cmd/dladm/dladm.c
@@ -26,6 +26,7 @@
* Copyright (c) 2015 Joyent, Inc. All rights reserved.
* Copyright 2020 Peter Tribble.
* Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2021 RackTop Systems, Inc.
*/
#include <stdio.h>
@@ -787,9 +788,9 @@ static const ofmt_field_t aggr_x_fields[] = {
/* name, field width, index callback */
{ "LINK", 12, AGGR_X_LINK, print_xaggr_cb},
{ "PORT", 15, AGGR_X_PORT, print_xaggr_cb},
-{ "SPEED", 5, AGGR_X_SPEED, print_xaggr_cb},
-{ "DUPLEX", 10, AGGR_X_DUPLEX, print_xaggr_cb},
-{ "STATE", 10, AGGR_X_STATE, print_xaggr_cb},
+{ "SPEED", 9, AGGR_X_SPEED, print_xaggr_cb},
+{ "DUPLEX", 9, AGGR_X_DUPLEX, print_xaggr_cb},
+{ "STATE", 9, AGGR_X_STATE, print_xaggr_cb},
{ "ADDRESS", 19, AGGR_X_ADDRESS, print_xaggr_cb},
{ "PORTSTATE", 16, AGGR_X_PORTSTATE, print_xaggr_cb},
{ NULL, 0, 0, NULL}}
@@ -860,9 +861,9 @@ static const ofmt_field_t phys_fields[] = {
offsetof(link_fields_buf_t, link_phys_media), print_default_cb},
{ "STATE", 11,
offsetof(link_fields_buf_t, link_phys_state), print_default_cb},
-{ "SPEED", 7,
+{ "SPEED", 9,
offsetof(link_fields_buf_t, link_phys_speed), print_default_cb},
-{ "DUPLEX", 10,
+{ "DUPLEX", 9,
offsetof(link_fields_buf_t, link_phys_duplex), print_default_cb},
{ "DEVICE", 13,
offsetof(link_fields_buf_t, link_phys_device), print_default_cb},
@@ -1042,7 +1043,7 @@ typedef struct vnic_fields_buf_s
{
char vnic_link[DLPI_LINKNAME_MAX];
char vnic_over[DLPI_LINKNAME_MAX];
- char vnic_speed[6];
+ char vnic_speed[10];
char vnic_macaddr[18];
char vnic_macaddrtype[19];
char vnic_vid[6];
@@ -1054,7 +1055,7 @@ static const ofmt_field_t vnic_fields[] = {
offsetof(vnic_fields_buf_t, vnic_link), print_default_cb},
{ "OVER", 11,
offsetof(vnic_fields_buf_t, vnic_over), print_default_cb},
-{ "SPEED", 6,
+{ "SPEED", 9,
offsetof(vnic_fields_buf_t, vnic_speed), print_default_cb},
{ "MACADDRESS", 18,
offsetof(vnic_fields_buf_t, vnic_macaddr), print_default_cb},
diff --git a/usr/src/common/avl/avl.c b/usr/src/common/avl/avl.c
index 0411afb4c5..ed752bde3d 100644
--- a/usr/src/common/avl/avl.c
+++ b/usr/src/common/avl/avl.c
@@ -105,21 +105,6 @@
#include <sys/cmn_err.h>
/*
- * Small arrays to translate between balance (or diff) values and child indices.
- *
- * Code that deals with binary tree data structures will randomly use
- * left and right children when examining a tree. C "if()" statements
- * which evaluate randomly suffer from very poor hardware branch prediction.
- * In this code we avoid some of the branch mispredictions by using the
- * following translation arrays. They replace random branches with an
- * additional memory reference. Since the translation arrays are both very
- * small the data should remain efficiently in cache.
- */
-static const int avl_child2balance[2] = {-1, 1};
-static const int avl_balance2child[] = {0, 0, 1};
-
-
-/*
* Walk from one node to the previous valued node (ie. an infix walk
* towards the left). At any given node we do one of 2 things:
*
@@ -274,8 +259,7 @@ avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
#endif
return (AVL_NODE2DATA(node, off));
}
- child = avl_balance2child[1 + diff];
-
+ child = (diff > 0);
}
if (where != NULL)
@@ -528,7 +512,7 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
* Compute the new balance
*/
old_balance = AVL_XBALANCE(node);
- new_balance = old_balance + avl_child2balance[which_child];
+ new_balance = old_balance + (which_child ? 1 : -1);
/*
* If we introduced equal balance, then we are done immediately
@@ -708,7 +692,7 @@ avl_remove(avl_tree_t *tree, void *data)
* choose node to swap from whichever side is taller
*/
old_balance = AVL_XBALANCE(delete);
- left = avl_balance2child[old_balance + 1];
+ left = (old_balance > 0);
right = 1 - left;
/*
@@ -792,7 +776,7 @@ avl_remove(avl_tree_t *tree, void *data)
*/
node = parent;
old_balance = AVL_XBALANCE(node);
- new_balance = old_balance - avl_child2balance[which_child];
+ new_balance = old_balance - (which_child ? 1 : -1);
parent = AVL_XPARENT(node);
which_child = AVL_XCHILD(node);
diff --git a/usr/src/man/man4d/Makefile b/usr/src/man/man4d/Makefile
index 2b20c60b32..4a6402de9c 100644
--- a/usr/src/man/man4d/Makefile
+++ b/usr/src/man/man4d/Makefile
@@ -17,6 +17,7 @@
# Copyright 2018 Nexenta Systems, Inc.
# Copyright 2020 Peter Tribble
# Copyright 2021 Oxide Computer Company
+# Copyright 2022 RackTop Systems, Inc.
#
include $(SRC)/Makefile.master
@@ -245,6 +246,7 @@ i386_MANFILES= ahci.4d \
usmn.4d \
vioblk.4d \
vioif.4d \
+ vioscsi.4d \
virtio.4d \
wpi.4d \
xhci.4d \
diff --git a/usr/src/man/man4d/vioblk.4d b/usr/src/man/man4d/vioblk.4d
index 4d7a2b1d9a..c8a751ec8d 100644
--- a/usr/src/man/man4d/vioblk.4d
+++ b/usr/src/man/man4d/vioblk.4d
@@ -11,7 +11,7 @@
.\"
.\" Copyright 2020 Oxide Computer Company
.\"
-.Dd August 28, 2021
+.Dd June 14, 2022
.Dt VIOBLK 4D
.Os
.Sh NAME
@@ -82,6 +82,7 @@ x86 device driver.
.El
.Sh SEE ALSO
.Xr blkdev 4D ,
+.Xr vioscsi 4D ,
.Xr virtio 4D ,
.Xr dkio 4I ,
.Xr diskinfo 8
diff --git a/usr/src/man/man4d/vioscsi.4d b/usr/src/man/man4d/vioscsi.4d
new file mode 100644
index 0000000000..2058fd65ed
--- /dev/null
+++ b/usr/src/man/man4d/vioscsi.4d
@@ -0,0 +1,92 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2022 RackTop Systems, Inc.
+.\"
+.Dd June 17, 2022
+.Dt VIOSCSI 4D
+.Os
+.Sh NAME
+.Nm vioscsi
+.Nd virtio SCSI driver
+.Sh DESCRIPTION
+The
+.Nm
+driver provides a virtual SCSI transport, allowing
+access to
+.Xr virtio 4D
+based SCSI targets and logical units.
+The driver supports the following capabilities:
+.Bl -dash
+.It
+Dynamic hot-plug (if supported by the host)
+.It
+Honors packet timeouts specified in
+.Xr scsi_pkt 9S
+.It
+Reset of target or logical unit via
+.Xr scsi_reset 9F
+.It
+Abort for individual commands via
+.Xr scsi_abort 9F
+.It
+Command queueing (both tagged and untagged)
+.It
+Honors per logical unit queue depth from device
+.It
+Up to 255 targets, and 16384 logical units per PCI function
+.El
+.Pp
+The
+.Nm
+driver is based on
+.Xr iport 9 ,
+and uses a single iport per PCI function,
+with a unit-address of "iport0".
+Children of the iport use a unit-address with the
+format "target,lun", where both target and lun are
+presented as hexadecimal values.
+.Sh NOTES
+The
+.Nm
+driver may be slightly less efficient than
+.Xr vioblk 4D ,
+but it may support use as boot media, as well
+as attachments to SCSI pass-through devices, which
+may include devices such as tape drives via
+.Xr st 4D
+and enclosures via
+.Xr ses 4D .
+.Sh ARCHITECTURE
+The
+.Nm
+driver is only supported on
+.Sy x86 .
+.Sh FILES
+.Bl -tag -width Pa
+.It Pa /kernel/drv/amd64/vioscsi
+x86 device driver.
+.El
+.Sh SEE ALSO
+.Xr sd 4D ,
+.Xr ses 4D ,
+.Xr st 4D ,
+.Xr vioblk 4D ,
+.Xr virtio 4D ,
+.Xr iport 9 ,
+.Xr scsi_abort 9F ,
+.Xr scsi_reset 9F ,
+.Xr scsi_pkt 9S
+.Rs
+.%T Virtual I/O Device (VIRTIO) Version 1.1
+.%D April, 2019
+.%U https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html
+.Re
diff --git a/usr/src/man/man4d/virtio.4d b/usr/src/man/man4d/virtio.4d
index 4a424ba518..47cd03535f 100644
--- a/usr/src/man/man4d/virtio.4d
+++ b/usr/src/man/man4d/virtio.4d
@@ -11,7 +11,7 @@
.\"
.\" Copyright 2020 Oxide Computer Company
.\"
-.Dd October 3, 2020
+.Dd June 14, 2022
.Dt VIRTIO 4D
.Os
.Sh NAME
@@ -39,7 +39,8 @@ x86 device driver.
.El
.Sh SEE ALSO
.Xr vioblk 4D ,
-.Xr vioif 4D
+.Xr vioif 4D ,
+.Xr vioscsi 4D
.Rs
.%T Virtual I/O Device (VIRTIO) Version 1.1
.%D April, 2019
diff --git a/usr/src/pkg/manifests/driver-storage-vioscsi.p5m b/usr/src/pkg/manifests/driver-storage-vioscsi.p5m
new file mode 100644
index 0000000000..9693a137b8
--- /dev/null
+++ b/usr/src/pkg/manifests/driver-storage-vioscsi.p5m
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
+# Copyright 2022 RackTop Systems, Inc.
+#
+
+#
+# The default for payload-bearing actions in this package is to appear in the
+# global zone only. See the include file for greater detail, as well as
+# information about overriding the defaults.
+#
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/driver/storage/vioscsi@$(PKGVERS)
+set name=pkg.summary value="Virtio SCSI"
+set name=pkg.description value="Virtio SCSI driver"
+set name=info.classification value=org.opensolaris.category.2008:Drivers/Storage
+set name=variant.arch value=$(ARCH)
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+file path=kernel/drv/$(ARCH64)/vioscsi group=sys
+dir path=usr/share/man
+dir path=usr/share/man/man4d
+file path=usr/share/man/man4d/vioscsi.4d
+driver name=vioscsi class=scsi-self-identifying alias=pci1af4,1004
+license lic_CDDL license=lic_CDDL
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 3aacd2dde2..800bf9fbde 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -28,7 +28,7 @@
# Copyright 2021 Joyent, Inc.
# Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved.
# Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
-# Copyright 2020 RackTop Systems, Inc.
+# Copyright 2022 RackTop Systems, Inc.
# Copyright 2021 Oxide Computer Company
#
@@ -2133,6 +2133,9 @@ VIOBLK_OBJS = vioblk.o
# Virtio network driver
VIOIF_OBJS = vioif.o
+# Virtio SCSI driver
+VIOSCSI_OBJS = vioscsi.o
+
#
# kiconv modules
#
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 0ae5c754b4..b047275e06 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -27,6 +27,7 @@
# Copyright 2018 Nexenta Systems, Inc.
# Copyright (c) 2017 by Delphix. All rights reserved.
# Copyright 2021 Oxide Computer Company
+# Copyright 2022 RackTop Systems, Inc.
#
#
@@ -1552,6 +1553,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioif/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioscsi/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(COMMONBASE)/idspace/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
diff --git a/usr/src/uts/common/io/vioscsi/vioscsi.c b/usr/src/uts/common/io/vioscsi/vioscsi.c
new file mode 100644
index 0000000000..1e35252a6c
--- /dev/null
+++ b/usr/src/uts/common/io/vioscsi/vioscsi.c
@@ -0,0 +1,1636 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
+ * Copyright 2022 RackTop Systems, Inc.
+ */
+
+#include "vioscsi.h"
+
+static char vioscsi_ident[] = "VIRTIO SCSI driver";
+
+static uint_t vioscsi_ctl_handler(caddr_t arg1, caddr_t arg2);
+static uint_t vioscsi_evt_handler(caddr_t arg1, caddr_t arg2);
+static uint_t vioscsi_cmd_handler(caddr_t arg1, caddr_t arg2);
+
+static int vioscsi_tran_getcap(struct scsi_address *, char *, int);
+static int vioscsi_tran_setcap(struct scsi_address *, char *, int, int);
+static int vioscsi_tran_reset(struct scsi_address *, int);
+
+static int vioscsi_tran_start(struct scsi_address *, struct scsi_pkt *);
+static int vioscsi_tran_abort(struct scsi_address *, struct scsi_pkt *);
+
+static int vioscsi_iport_attach(dev_info_t *);
+static int vioscsi_iport_detach(dev_info_t *);
+
+static int vioscsi_req_init(vioscsi_softc_t *, vioscsi_request_t *,
+ virtio_queue_t *, int);
+static void vioscsi_req_fini(vioscsi_request_t *);
+static boolean_t vioscsi_req_abort(vioscsi_softc_t *, vioscsi_request_t *);
+static void vioscsi_lun_changed(vioscsi_softc_t *sc, uint8_t target);
+static void vioscsi_discover(void *);
+
+/*
+ * DMA attributes. We support a linked list, but most of our uses require a
+ * single aligned buffer. The HBA buffers will use a copy of this adjusted for
+ * the actual virtio limits.
+ */
+static ddi_dma_attr_t virtio_dma_attr = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull,
+ .dma_attr_count_max = 0x00000000FFFFFFFFull,
+ .dma_attr_align = 1,
+ .dma_attr_burstsizes = 1,
+ .dma_attr_minxfer = 1,
+ .dma_attr_maxxfer = 0xFFFFFFFFull,
+ .dma_attr_seg = 0xFFFFFFFFFFFFFFFFull,
+ .dma_attr_sgllen = 1,
+ .dma_attr_granular = 1,
+ .dma_attr_flags = 0,
+};
+
+/*
+ * this avoids calls to drv_usectohz that might be expensive:
+ */
+static clock_t vioscsi_hz;
+
+static boolean_t
+vioscsi_poll_until(vioscsi_softc_t *sc, vioscsi_request_t *req,
+ ddi_intr_handler_t func, clock_t until)
+{
+ until *= 1000000; /* convert to usec */
+ while (until > 0) {
+ (void) func((caddr_t)sc, NULL);
+ if (req->vr_done) {
+ return (B_TRUE);
+ }
+ drv_usecwait(10);
+ until -= 10;
+ }
+ atomic_or_8(&req->vr_expired, 1);
+ return (B_FALSE);
+}
+
+static boolean_t
+vioscsi_tmf(vioscsi_softc_t *sc, uint32_t func, uint8_t target, uint16_t lun,
+ vioscsi_request_t *task)
+{
+ vioscsi_request_t req;
+ vioscsi_tmf_res_t *res;
+ vioscsi_tmf_req_t *tmf;
+
+ bzero(&req, sizeof (req));
+
+ if (vioscsi_req_init(sc, &req, sc->vs_ctl_vq, KM_NOSLEEP) != 0) {
+ return (B_FALSE);
+ }
+
+ tmf = &req.vr_req->tmf;
+ res = &req.vr_res->tmf;
+
+ tmf->type = VIRTIO_SCSI_T_TMF;
+ tmf->subtype = func;
+ tmf->lun[0] = 1;
+ tmf->lun[1] = target;
+ tmf->lun[2] = 0x40 | (lun >> 8);
+ tmf->lun[3] = lun & 0xff;
+ tmf->tag = (uint64_t)task;
+
+ virtio_chain_clear(req.vr_vic);
+ if (virtio_chain_append(req.vr_vic, req.vr_req_pa, sizeof (*tmf),
+ VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ return (B_FALSE);
+ }
+
+ if (virtio_chain_append(req.vr_vic, req.vr_res_pa, sizeof (*res),
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ return (B_FALSE);
+ }
+
+ /*
+ * Make sure the device can see our request:
+ */
+ virtio_dma_sync(req.vr_dma, DDI_DMA_SYNC_FORDEV);
+
+ /*
+ * Push chain into the queue:
+ */
+ virtio_chain_submit(req.vr_vic, B_TRUE);
+
+ /*
+ * Wait for it to complete -- these should always complete in a tiny
+ * amount of time. Give it 5 seconds to be sure.
+ */
+ if (!vioscsi_poll_until(sc, &req, vioscsi_ctl_handler, 5)) {
+ /*
+ * We timed out -- this should *NEVER* happen!
+ * There is no safe way to deal with this if it occurs, so we
+ * just warn and leak the resources. Plan for a reboot soon.
+ */
+ dev_err(sc->vs_dip, CE_WARN,
+ "task mgmt timeout! (target %d lun %d)", target, lun);
+ return (B_FALSE);
+ }
+
+ vioscsi_req_fini(&req);
+
+ switch (res->response) {
+ case VIRTIO_SCSI_S_OK:
+ case VIRTIO_SCSI_S_FUNCTION_SUCCEEDED:
+ break;
+ default:
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
+
+static boolean_t
+vioscsi_lun_reset(vioscsi_softc_t *sc, uint8_t target, uint16_t lun)
+{
+ return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET,
+ target, lun, NULL));
+}
+
+static boolean_t
+vioscsi_target_reset(vioscsi_softc_t *sc, uint8_t target)
+{
+ return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET,
+ target, 0, NULL));
+}
+
+static boolean_t
+vioscsi_req_abort(vioscsi_softc_t *sc, vioscsi_request_t *req)
+{
+ return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_ABORT_TASK,
+ req->vr_target, req->vr_lun, req));
+}
+
+static void
+vioscsi_dev_abort(vioscsi_dev_t *vd)
+{
+ vioscsi_request_t *req;
+ list_t *l = &vd->vd_reqs;
+
+ mutex_enter(&vd->vd_lock);
+ for (req = list_head(l); req != NULL; req = list_next(l, req)) {
+ (void) vioscsi_tmf(vd->vd_sc, VIRTIO_SCSI_T_TMF_ABORT_TASK,
+ req->vr_target, req->vr_lun, req);
+ }
+ mutex_exit(&vd->vd_lock);
+}
+
+static void
+vioscsi_dev_timeout(void *arg)
+{
+ vioscsi_dev_t *vd = arg;
+ vioscsi_softc_t *sc = vd->vd_sc;
+ vioscsi_request_t *req;
+ timeout_id_t tid;
+ clock_t now;
+ list_t *l;
+
+ mutex_enter(&vd->vd_lock);
+ if ((tid = vd->vd_timeout) == 0) {
+ /*
+ * We are shutting down, stop and do not reschedule.
+ */
+ mutex_exit(&vd->vd_lock);
+ return;
+ }
+ vd->vd_timeout = 0;
+
+ now = ddi_get_lbolt();
+ l = &vd->vd_reqs;
+
+ for (req = list_head(l); req != NULL; req = list_next(l, req)) {
+ /*
+ * The list is sorted by expiration time, so if we reach an
+ * item that hasn't expired yet, we're done.
+ */
+ if (now < req->vr_expire) {
+ break;
+ }
+ atomic_or_8(&req->vr_expired, 1);
+
+ /*
+ * This command timed out, so send an abort.
+ */
+ dev_err(sc->vs_dip, CE_WARN, "cmd timed out (%ds)",
+ (int)req->vr_time);
+ (void) vioscsi_req_abort(sc, req);
+ }
+
+ if (!list_is_empty(l)) {
+ /*
+ * Check again in a second.
+ * If these wake ups are too expensive, we could
+ * calculate other timeouts, but that would require
+ * doing untimeout if we want to wake up earlier.
+ * This is probably cheaper, and certainly simpler.
+ */
+ vd->vd_timeout = timeout(vioscsi_dev_timeout, vd, vioscsi_hz);
+ }
+ mutex_exit(&vd->vd_lock);
+}
+
+static void
+vioscsi_poll(vioscsi_softc_t *sc, vioscsi_request_t *req)
+{
+ if (vioscsi_poll_until(sc, req, vioscsi_cmd_handler, req->vr_time)) {
+ return;
+ }
+
+ /*
+ * Try a "gentle" task abort -- timeouts may be quasi-normal for some
+ * types of requests and devices.
+ */
+ if (vioscsi_req_abort(sc, req) &&
+ vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 1)) {
+ return;
+ }
+
+ /*
+ * A little more forceful with a lun reset:
+ */
+ if (vioscsi_lun_reset(sc, req->vr_target, req->vr_lun) &&
+ vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 1)) {
+ return;
+ }
+
+ /*
+ * If all else fails, reset the target, and keep trying.
+ * This can wind up blocking forever, but if it does it means we are in
+ * a very bad situation (and the virtio device is busted).
+ * We may also be leaking request structures at this point, but only at
+ * the maximum rate of one per minute.
+ */
+ for (;;) {
+ dev_err(sc->vs_dip, CE_WARN, "request stuck, resetting target");
+ (void) vioscsi_target_reset(sc, req->vr_target);
+ if (vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 60)) {
+ return;
+ }
+ }
+}
+
+static void
+vioscsi_start(vioscsi_softc_t *sc, vioscsi_request_t *req)
+{
+ vioscsi_cmd_req_t *cmd = &req->vr_req->cmd;
+
+ req->vr_done = 0;
+ req->vr_expired = 0;
+ cmd->lun[0] = 1;
+ cmd->lun[1] = req->vr_target;
+ cmd->lun[2] = 0x40 | ((req->vr_lun >> 8) & 0xff);
+ cmd->lun[3] = req->vr_lun & 0xff;
+ cmd->lun[4] = 0;
+ cmd->lun[5] = 0;
+ cmd->lun[6] = 0;
+ cmd->lun[7] = 0;
+ cmd->tag = (uint64_t)req;
+ cmd->prio = 0;
+ cmd->crn = 0;
+ cmd->task_attr = req->vr_task_attr;
+
+ /*
+ * Make sure the device can see our CDB data:
+ */
+ virtio_dma_sync(req->vr_dma, DDI_DMA_SYNC_FORDEV);
+
+ /*
+ * Determine whether we expect to poll before submitting (because we
+ * cannot touch the request after submission if we are not polling).
+ */
+ if (req->vr_poll) {
+ /*
+ * Push chain into the queue:
+ */
+ virtio_chain_submit(req->vr_vic, B_TRUE);
+
+ /*
+ * NB: Interrupts may be enabled, or might not be. It is fine
+ * either way.
+ */
+ vioscsi_poll(sc, req);
+ } else {
+ /*
+ * Push chain into the queue:
+ */
+ virtio_chain_submit(req->vr_vic, B_TRUE);
+ }
+}
+
+static int
+vioscsi_tran_start(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ struct scsi_device *sd = scsi_address_device(ap);
+ vioscsi_dev_t *vd = scsi_device_hba_private_get(sd);
+ vioscsi_request_t *req = pkt->pkt_ha_private;
+ virtio_chain_t *vic = req->vr_vic;
+ vioscsi_cmd_req_t *cmd = &req->vr_req->cmd;
+ vioscsi_cmd_res_t *res = &req->vr_res->cmd;
+
+ if (pkt->pkt_cdbp == NULL) {
+ return (TRAN_BADPKT);
+ }
+
+ bzero(cmd, sizeof (*cmd));
+ bcopy(pkt->pkt_cdbp, cmd->cdb, pkt->pkt_cdblen);
+
+ /*
+ * Default expiration is 10 seconds, clip at an hour.
+ * (order of operations here is to avoid wrapping, if run in a 32-bit
+ * kernel)
+ */
+ req->vr_time = min(pkt->pkt_time ? pkt->pkt_time : 10, 3600);
+ req->vr_dev = vd;
+ req->vr_poll = ((pkt->pkt_flags & FLAG_NOINTR) != 0);
+ req->vr_target = vd->vd_target;
+ req->vr_lun = vd->vd_lun;
+ req->vr_start = ddi_get_lbolt();
+ req->vr_expire = req->vr_start + req->vr_time * vioscsi_hz;
+
+ /*
+ * Configure task queuing behavior:
+ */
+ if (pkt->pkt_flags & (FLAG_HTAG|FLAG_HEAD)) {
+ req->vr_task_attr = VIRTIO_SCSI_S_HEAD;
+ } else if (pkt->pkt_flags & FLAG_OTAG) {
+ req->vr_task_attr = VIRTIO_SCSI_S_ORDERED;
+ } else if (pkt->pkt_flags & FLAG_SENSING) {
+ req->vr_task_attr = VIRTIO_SCSI_S_ACA;
+ } else { /* FLAG_STAG is also our default */
+ req->vr_task_attr = VIRTIO_SCSI_S_SIMPLE;
+ }
+
+ /*
+ * Make sure we start with a clear chain:
+ */
+ virtio_chain_clear(vic);
+
+ /*
+ * The KVM SCSI emulation requires that all outgoing buffers are added
+ * first with the request header being the first entry. After the
+ * outgoing have been added then the incoming buffers with the response
+ * buffer being the first of the incoming. This requirement is
+ * independent of using chained ring entries or one ring entry with
+ * indirect buffers.
+ */
+
+ /*
+ * Add request header:
+ */
+ if (virtio_chain_append(vic, req->vr_req_pa, sizeof (*cmd),
+ VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ return (TRAN_BUSY);
+ }
+
+ /*
+ * Add write buffers:
+ */
+ if (pkt->pkt_dma_flags & DDI_DMA_WRITE) {
+ for (int i = 0; i < pkt->pkt_numcookies; i++) {
+ if (virtio_chain_append(vic,
+ pkt->pkt_cookies[i].dmac_laddress,
+ pkt->pkt_cookies[i].dmac_size,
+ VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ return (TRAN_BUSY);
+ }
+ }
+ }
+
+ /*
+ * Add response header:
+ */
+ if (virtio_chain_append(vic, req->vr_res_pa, sizeof (*res),
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ return (TRAN_BUSY);
+ }
+
+ /*
+ * Add read buffers:
+ */
+ if (pkt->pkt_dma_flags & DDI_DMA_READ) {
+ for (int i = 0; i < pkt->pkt_numcookies; i++) {
+ if (virtio_chain_append(vic,
+ pkt->pkt_cookies[i].dmac_laddress,
+ pkt->pkt_cookies[i].dmac_size,
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ return (TRAN_BUSY);
+ }
+ }
+ }
+
+ /*
+ * Check for queue depth, and add to the timeout list:
+ */
+ mutex_enter(&vd->vd_lock);
+ if (vd->vd_num_cmd >= vd->vd_max_cmd) {
+ mutex_exit(&vd->vd_lock);
+ return (TRAN_BUSY);
+ }
+ vd->vd_num_cmd++;
+
+ if (!req->vr_poll) {
+ /*
+ * Add the request to the timeout list.
+ *
+ * In order to minimize the work done during timeout handling,
+ * we keep requests sorted. This assumes that requests mostly
+ * have the same timeout, and requests with long timeouts are
+ * infrequent.
+ */
+ list_t *l = &vd->vd_reqs;
+ vioscsi_request_t *r;
+
+ for (r = list_tail(l); r != NULL; r = list_prev(l, r)) {
+ /*
+ * Avoids wrapping lbolt:
+ */
+ if ((req->vr_expire - r->vr_expire) >= 0) {
+ list_insert_after(l, r, req);
+ break;
+ }
+ }
+ if (r == NULL) {
+ /*
+ * List empty, or this one expires before others:
+ */
+ list_insert_tail(l, req);
+ }
+ if (vd->vd_timeout == 0) {
+ vd->vd_timeout = timeout(vioscsi_dev_timeout, vd,
+ vioscsi_hz);
+ }
+ }
+
+ mutex_exit(&vd->vd_lock);
+
+ vioscsi_start(vd->vd_sc, req);
+ return (TRAN_ACCEPT);
+}
+
+static int
+vioscsi_tran_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ struct scsi_device *sd;
+ vioscsi_dev_t *vd;
+ vioscsi_request_t *req;
+
+ if ((ap == NULL) ||
+ ((sd = scsi_address_device(ap)) == NULL) ||
+ ((vd = scsi_device_hba_private_get(sd)) == NULL)) {
+ return (0);
+ }
+ if (pkt == NULL) {
+ /*
+ * Abort all requests for the LUN.
+ */
+ vioscsi_dev_abort(vd);
+ return (1);
+ }
+ if ((req = pkt->pkt_ha_private) != NULL) {
+ return (vioscsi_req_abort(vd->vd_sc, req) ? 1 : 0);
+ }
+
+ return (0);
+}
+
+static void
+vioscsi_req_fini(vioscsi_request_t *req)
+{
+ if (req->vr_dma != NULL) {
+ virtio_dma_free(req->vr_dma);
+ req->vr_dma = NULL;
+ }
+ if (req->vr_vic != NULL) {
+ virtio_chain_free(req->vr_vic);
+ req->vr_vic = NULL;
+ }
+}
+
+static int
+vioscsi_req_init(vioscsi_softc_t *sc, vioscsi_request_t *req,
+ virtio_queue_t *vq, int sleep)
+{
+ uint64_t pa;
+
+ bzero(req, sizeof (*req));
+ list_link_init(&req->vr_node);
+ req->vr_vq = vq;
+ req->vr_dma = virtio_dma_alloc(sc->vs_virtio, sizeof (vioscsi_op_t),
+ &virtio_dma_attr, DDI_DMA_STREAMING | DDI_DMA_READ | DDI_DMA_WRITE,
+ sleep);
+ req->vr_vic = virtio_chain_alloc(vq, sleep);
+ if ((req->vr_dma == NULL) || (req->vr_vic == NULL)) {
+ return (-1);
+ }
+ virtio_chain_data_set(req->vr_vic, req);
+ req->vr_req = virtio_dma_va(req->vr_dma, VIOSCSI_REQ_OFFSET);
+ req->vr_res = virtio_dma_va(req->vr_dma, VIOSCSI_RES_OFFSET);
+ pa = virtio_dma_cookie_pa(req->vr_dma, 0);
+ req->vr_req_pa = pa + VIOSCSI_REQ_OFFSET;
+ req->vr_res_pa = pa + VIOSCSI_RES_OFFSET;
+ return (0);
+}
+
+static void
+vioscsi_tran_pkt_destructor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran)
+{
+ vioscsi_request_t *req = pkt->pkt_ha_private;
+
+ vioscsi_req_fini(req);
+}
+
+static int
+vioscsi_tran_pkt_constructor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran,
+ int sleep)
+{
+ vioscsi_softc_t *sc = tran->tran_hba_private;
+ vioscsi_request_t *req = pkt->pkt_ha_private;
+
+ if (vioscsi_req_init(sc, req, sc->vs_cmd_vq, sleep) != 0) {
+ vioscsi_req_fini(req);
+ return (-1);
+ }
+ req->vr_pkt = pkt;
+ return (0);
+}
+
+static int
+vioscsi_tran_setup_pkt(struct scsi_pkt *pkt, int (*cb)(caddr_t), caddr_t arg)
+{
+ if ((pkt->pkt_dma_flags & DDI_DMA_RDWR) == DDI_DMA_RDWR) {
+ /*
+ * We can do read, or write, but not both.
+ */
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+vioscsi_tran_teardown_pkt(struct scsi_pkt *pkt)
+{
+ vioscsi_request_t *req = pkt->pkt_ha_private;
+ virtio_chain_t *vic = req->vr_vic;
+
+ virtio_chain_clear(vic);
+}
+
+static int
+vioscsi_tran_getcap(struct scsi_address *ap, char *cap, int whom)
+{
+ int rval = 0;
+ vioscsi_softc_t *sc = ap->a_hba_tran->tran_hba_private;
+
+ if (cap == NULL)
+ return (-1);
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ case SCSI_CAP_CDB_LEN:
+ rval = sc->vs_cdb_size;
+ break;
+
+ case SCSI_CAP_ARQ:
+ case SCSI_CAP_LUN_RESET:
+ case SCSI_CAP_TAGGED_QING:
+ case SCSI_CAP_UNTAGGED_QING:
+ rval = 1;
+ break;
+
+ default:
+ rval = -1;
+ }
+ return (rval);
+}
+
+static int
+vioscsi_tran_setcap(struct scsi_address *ap, char *cap, int value, int whom)
+{
+ int rval = 1;
+
+ if (cap == NULL || whom == 0) {
+ return (-1);
+ }
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ default:
+ rval = 1;
+ }
+ return (rval);
+}
+
+static int
+vioscsi_tran_reset(struct scsi_address *ap, int level)
+{
+ struct scsi_device *sd;
+ vioscsi_dev_t *vd;
+
+ if ((ap == NULL) ||
+ ((sd = scsi_address_device(ap)) == NULL) ||
+ ((vd = scsi_device_hba_private_get(sd)) == NULL)) {
+ return (0);
+ }
+
+ switch (level) {
+ case RESET_LUN:
+ if (vioscsi_lun_reset(vd->vd_sc, vd->vd_target, vd->vd_lun)) {
+ return (1);
+ }
+ break;
+ case RESET_TARGET:
+ if (vioscsi_target_reset(vd->vd_sc, vd->vd_target)) {
+ return (1);
+ }
+ break;
+ case RESET_ALL:
+ default:
+ break;
+ }
+ return (0);
+}
+
+static boolean_t
+vioscsi_parse_unit_address(const char *ua, int *tgt, int *lun)
+{
+ long num;
+ char *end;
+
+ if ((ddi_strtol(ua, &end, 16, &num) != 0) ||
+ ((*end != ',') && (*end != 0))) {
+ return (B_FALSE);
+ }
+ *tgt = (int)num;
+ if (*end == 0) {
+ *lun = 0;
+ return (B_TRUE);
+ }
+ end++; /* skip comma */
+ if ((ddi_strtol(end, &end, 16, &num) != 0) || (*end != 0)) {
+ return (B_FALSE);
+ }
+ *lun = (int)num;
+ return (B_TRUE);
+}
+
+uint_t
+vioscsi_ctl_handler(caddr_t arg1, caddr_t arg2)
+{
+ vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1;
+ virtio_chain_t *vic;
+
+ while ((vic = virtio_queue_poll(sc->vs_ctl_vq)) != NULL) {
+ vioscsi_request_t *req;
+
+ if ((req = virtio_chain_data(vic)) == NULL) {
+ dev_err(sc->vs_dip, CE_WARN, "missing ctl chain data");
+ continue;
+ }
+ atomic_or_8(&req->vr_done, 1);
+ }
+ return (DDI_INTR_CLAIMED);
+}
+
+uint_t
+vioscsi_evt_handler(caddr_t arg1, caddr_t arg2)
+{
+ vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1;
+ virtio_chain_t *vic;
+ boolean_t missed = B_FALSE;
+
+ while ((vic = virtio_queue_poll(sc->vs_evt_vq)) != NULL) {
+ vioscsi_evt_t *evt;
+ vioscsi_event_t *ve;
+ uint8_t target;
+
+ if ((ve = virtio_chain_data(vic)) == NULL) {
+ /*
+ * This should never occur, it's a bug if it does.
+ */
+ dev_err(sc->vs_dip, CE_WARN, "missing evt chain data");
+ continue;
+ }
+ evt = ve->ve_evt;
+
+ virtio_dma_sync(ve->ve_dma, DDI_DMA_SYNC_FORKERNEL);
+
+ target = evt->lun[1];
+ switch (evt->event & 0x7FFFFFFF) {
+ case VIRTIO_SCSI_T_TRANSPORT_RESET:
+ switch (evt->reason) {
+ case VIRTIO_SCSI_EVT_RESET_HARD:
+ /*
+ * We could reset-notify, but this doesn't seem
+ * to get fired for targets initiated from
+ * host.
+ */
+ break;
+ case VIRTIO_SCSI_EVT_RESET_REMOVED:
+ case VIRTIO_SCSI_EVT_RESET_RESCAN:
+ /*
+ * We can treat these the same for the target,
+ * and not worry about the actual LUN id here.
+ */
+ vioscsi_lun_changed(sc, target);
+ break;
+ default:
+ /*
+ * Some other event we don't know about.
+ */
+ break;
+ }
+ break;
+ case VIRTIO_SCSI_T_NO_EVENT:
+ /*
+ * If this happens, we missed some event(s).
+ */
+ missed = B_TRUE;
+ break;
+ case VIRTIO_SCSI_T_ASYNC_NOTIFY:
+ /*
+ * We don't register for these, so we don't expect
+ * them.
+ */
+ break;
+ }
+
+ if (evt->event & VIRTIO_SCSI_T_EVENTS_MISSED) {
+ missed = B_TRUE;
+ }
+
+ /*
+ * Resubmit the chain for the next event.
+ */
+ virtio_chain_submit(vic, B_TRUE);
+ }
+
+ if (missed) {
+ (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover, sc,
+ DDI_NOSLEEP);
+ }
+
+ return (DDI_INTR_CLAIMED);
+}
+
+uint_t
+vioscsi_cmd_handler(caddr_t arg1, caddr_t arg2)
+{
+ vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1;
+ virtio_chain_t *vic;
+
+ while ((vic = virtio_queue_poll(sc->vs_cmd_vq)) != NULL) {
+
+ vioscsi_request_t *req;
+ vioscsi_dev_t *vd;
+ struct scsi_pkt *pkt;
+ struct virtio_scsi_cmd_resp *res;
+
+ if ((req = virtio_chain_data(vic)) == NULL) {
+ /*
+ * This should never occur, it's a bug if it does.
+ */
+ dev_err(sc->vs_dip, CE_WARN, "missing cmd chain data");
+ continue;
+ }
+
+ virtio_dma_sync(req->vr_dma, DDI_DMA_SYNC_FORKERNEL);
+ res = &req->vr_res->cmd;
+ pkt = req->vr_pkt;
+
+ if (pkt == NULL) {
+ /*
+ * This is an internal request (from discovery), and
+ * doesn't have an associated SCSI pkt structure. In
+ * this case, the notification we've done is
+ * sufficient, and the submitter will examine the
+ * response field directly.
+ */
+ if (req->vr_poll) {
+ atomic_or_8(&req->vr_done, 1);
+ }
+ continue;
+ }
+
+ if ((vd = req->vr_dev) != NULL) {
+ mutex_enter(&vd->vd_lock);
+ vd->vd_num_cmd--;
+ list_remove(&vd->vd_reqs, req);
+ mutex_exit(&vd->vd_lock);
+ }
+
+ switch (res->response) {
+
+ case VIRTIO_SCSI_S_OK:
+ /*
+ * Request processed successfully, check SCSI status.
+ */
+ pkt->pkt_scbp[0] = res->status;
+ pkt->pkt_resid = 0;
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_state =
+ STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS;
+ if ((pkt->pkt_numcookies > 0) &&
+ (pkt->pkt_cookies[0].dmac_size > 0)) {
+ pkt->pkt_state |= STATE_XFERRED_DATA;
+ }
+
+ /*
+ * For CHECK_CONDITION, fill out the ARQ details:
+ */
+ if (res->status == STATUS_CHECK) {
+ /*
+ * ARQ status and arq structure:
+ */
+ pkt->pkt_state |= STATE_ARQ_DONE;
+ pkt->pkt_scbp[1] = STATUS_GOOD;
+ struct scsi_arq_status *ars =
+ (void *)pkt->pkt_scbp;
+ ars->sts_rqpkt_reason = CMD_CMPLT;
+ ars->sts_rqpkt_resid = 0;
+ ars->sts_rqpkt_state =
+ STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_GOT_STATUS | STATE_SENT_CMD |
+ STATE_XFERRED_DATA;
+ bcopy(res->sense, &ars->sts_sensedata,
+ res->sense_len);
+ }
+ break;
+
+ case VIRTIO_SCSI_S_BAD_TARGET:
+ case VIRTIO_SCSI_S_INCORRECT_LUN:
+ pkt->pkt_reason = CMD_DEV_GONE;
+ break;
+
+ case VIRTIO_SCSI_S_OVERRUN:
+ dev_err(sc->vs_dip, CE_WARN, "OVERRUN");
+ pkt->pkt_reason = CMD_DATA_OVR;
+ break;
+
+ case VIRTIO_SCSI_S_RESET:
+ pkt->pkt_reason = CMD_RESET;
+ pkt->pkt_statistics |= STAT_DEV_RESET;
+ break;
+
+ case VIRTIO_SCSI_S_ABORTED:
+ if (req->vr_expired) {
+ pkt->pkt_statistics |= STAT_TIMEOUT;
+ pkt->pkt_reason = CMD_TIMEOUT;
+ } else {
+ pkt->pkt_reason = CMD_ABORTED;
+ pkt->pkt_statistics |= STAT_ABORTED;
+ }
+ break;
+
+ case VIRTIO_SCSI_S_BUSY:
+ /*
+ * Busy, should have been caught at submission:
+ */
+ pkt->pkt_reason = CMD_TRAN_ERR;
+ break;
+
+ default:
+ dev_err(sc->vs_dip, CE_WARN, "Unknown response: 0x%x",
+ res->response);
+ pkt->pkt_reason = CMD_TRAN_ERR;
+ break;
+ }
+
+
+ if (!req->vr_poll) {
+ scsi_hba_pkt_comp(pkt);
+ } else {
+ atomic_or_8(&req->vr_done, 1);
+ }
+ }
+ return (DDI_INTR_CLAIMED);
+}
+
+static int
+vioscsi_tran_tgt_init(dev_info_t *hdip, dev_info_t *tdip, scsi_hba_tran_t *tran,
+ struct scsi_device *sd)
+{
+ const char *ua;
+ vioscsi_softc_t *sc;
+ int target;
+ int lun;
+ vioscsi_dev_t *vd;
+
+ if (scsi_hba_iport_unit_address(hdip) == NULL) {
+ return (DDI_FAILURE); /* only iport has targets */
+ }
+ if ((sc = tran->tran_hba_private) == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ if (((ua = scsi_device_unit_address(sd)) == NULL) ||
+ (!vioscsi_parse_unit_address(ua, &target, &lun))) {
+ return (DDI_FAILURE);
+ }
+
+ vd = kmem_zalloc(sizeof (*vd), KM_SLEEP);
+ list_create(&vd->vd_reqs, sizeof (vioscsi_request_t),
+ offsetof(vioscsi_request_t, vr_node));
+ mutex_init(&vd->vd_lock, NULL, MUTEX_DRIVER,
+ virtio_intr_pri(sc->vs_virtio));
+
+ vd->vd_target = (uint8_t)target;
+ vd->vd_lun = (uint16_t)lun;
+ vd->vd_sc = sc;
+ vd->vd_sd = sd;
+ vd->vd_max_cmd = sc->vs_cmd_per_lun;
+ vd->vd_num_cmd = 0;
+
+ scsi_device_hba_private_set(sd, vd);
+
+ mutex_enter(&sc->vs_lock);
+ list_insert_tail(&sc->vs_devs, vd);
+ mutex_exit(&sc->vs_lock);
+
+ return (DDI_SUCCESS);
+}
+
+static void
+vioscsi_tran_tgt_free(dev_info_t *hdip, dev_info_t *tdip, scsi_hba_tran_t *tran,
+ struct scsi_device *sd)
+{
+ vioscsi_dev_t *vd = scsi_device_hba_private_get(sd);
+ vioscsi_softc_t *sc = vd->vd_sc;
+ timeout_id_t tid;
+
+ scsi_device_hba_private_set(sd, NULL);
+
+ mutex_enter(&vd->vd_lock);
+ tid = vd->vd_timeout;
+ vd->vd_timeout = 0;
+ mutex_exit(&vd->vd_lock);
+
+ if (tid != 0) {
+ (void) untimeout(tid);
+ }
+
+ mutex_enter(&sc->vs_lock);
+ list_remove(&sc->vs_devs, vd);
+ mutex_exit(&sc->vs_lock);
+
+ list_destroy(&vd->vd_reqs);
+ mutex_destroy(&vd->vd_lock);
+ kmem_free(vd, sizeof (*vd));
+}
+
+/*
+ * vioscsi_probe_target probes for existence of a valid target (LUN 0).
+ * It utilizes the supplied request, and sends TEST UNIT READY.
+ * (This command is used because it requires no data.)
+ * It returns 1 if the target is found, 0 if not, and -1 on error.
+ * It is expected additional LUNs will be discovered by the HBA framework using
+ * REPORT LUNS on LUN 0.
+ */
+static int
+vioscsi_probe_target(vioscsi_softc_t *sc, vioscsi_request_t *req,
+ uint8_t target)
+{
+ struct virtio_scsi_cmd_req *cmd = &req->vr_req->cmd;
+ struct virtio_scsi_cmd_resp *res = &req->vr_res->cmd;
+
+ bzero(cmd, sizeof (*cmd));
+ cmd->cdb[0] = SCMD_TEST_UNIT_READY;
+
+ virtio_chain_clear(req->vr_vic);
+ if (virtio_chain_append(req->vr_vic, req->vr_req_pa,
+ sizeof (*cmd), VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ return (-1);
+ }
+ if (virtio_chain_append(req->vr_vic, req->vr_res_pa,
+ sizeof (*res), VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ return (-1);
+ }
+ req->vr_poll = B_TRUE;
+ req->vr_start = ddi_get_lbolt();
+ req->vr_time = 10; /* seconds */
+ req->vr_target = target;
+ req->vr_lun = 0;
+ req->vr_task_attr = VIRTIO_SCSI_S_HEAD;
+ vioscsi_start(sc, req);
+ switch (res->response) {
+ case VIRTIO_SCSI_S_OK:
+ return (1);
+ case VIRTIO_SCSI_S_INCORRECT_LUN:
+ case VIRTIO_SCSI_S_BAD_TARGET:
+ return (0);
+ default:
+ return (-1);
+ }
+}
+
+static void
+vioscsi_null_complete(struct scsi_pkt *pkt)
+{
+ /*
+ * This intentionally does nothing.
+ */
+}
+
+static void
+vioscsi_dev_rescan(vioscsi_dev_t *vd)
+{
+ struct scsi_pkt *pkt;
+ struct scsi_arq_status *aqs;
+ uint8_t *sense;
+
+ /*
+ * This routine is a bit of a hack, to workaround the fact that we have
+ * no other good way to convince the SCSI HBA framework to rescan for
+ * new LUNs. What we do is fake an inquiry and complete it with a UNIT
+ * ATTENTION indicating that the REPORT LUNS data has changed.
+ */
+
+ pkt = scsi_init_pkt(&vd->vd_sd->sd_address, NULL, NULL, 6,
+ sizeof (struct scsi_arq_status), 0, 0, SLEEP_FUNC, NULL);
+ if (pkt == NULL) { /* should never happen with SLEEP_FUNC */
+ return;
+ }
+
+ /*
+ * Must have a non-null completion routine in order to get the HBA
+ * logic to check the UNIT ATTENTION STATUS.
+ */
+ pkt->pkt_comp = vioscsi_null_complete;
+
+ /*
+ * The default CDB is 0, TEST UNIT READY, which takes no data, and
+ * returns no data.
+ */
+ pkt->pkt_state = CMD_CMPLT;
+ pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD |
+ STATE_GOT_STATUS | STATE_ARQ_DONE;
+ pkt->pkt_scbp[0] = STATUS_CHECK;
+ aqs = (void *)pkt->pkt_scbp;
+ aqs->sts_rqpkt_reason = CMD_CMPLT;
+ aqs->sts_rqpkt_resid = 0;
+ aqs->sts_rqpkt_state = STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_GOT_STATUS | STATE_SENT_CMD | STATE_XFERRED_DATA;
+ sense = (void *)&aqs->sts_sensedata;
+
+ /*
+ * Descriptor format sense response:
+ */
+ sense[0] = 0x72;
+ sense[1] = KEY_UNIT_ATTENTION;
+ sense[2] = 0x3f; /* ASC - reported LUNs data changed */
+ sense[3] = 0x0e; /* ASCQ */
+ sense[7] = 0x00; /* additional sense length (none) */
+
+ vd->vd_rescan = B_FALSE;
+
+ scsi_hba_pkt_comp(pkt);
+}
+
+static void
+vioscsi_rescan_luns(void *arg)
+{
+ vioscsi_softc_t *sc = arg;
+ vioscsi_dev_t *vd;
+ list_t *l;
+
+ l = &sc->vs_devs;
+ mutex_enter(&sc->vs_lock);
+ for (vd = list_head(l); vd != NULL; vd = list_next(l, vd)) {
+ if (vd->vd_rescan) {
+ vioscsi_dev_rescan(vd);
+ }
+ }
+ mutex_exit(&sc->vs_lock);
+}
+
+static void
+vioscsi_lun_changed(vioscsi_softc_t *sc, uint8_t target)
+{
+ vioscsi_dev_t *vd;
+ list_t *l = &sc->vs_devs;
+ boolean_t found = B_FALSE;
+
+ mutex_enter(&sc->vs_lock);
+ for (vd = list_head(l); vd != NULL; vd = list_next(l, vd)) {
+ if ((vd->vd_target == target) && (vd->vd_lun == 0)) {
+ vd->vd_rescan = B_TRUE;
+ found = B_TRUE;
+ break;
+ }
+ }
+ mutex_exit(&sc->vs_lock);
+
+ if (found) {
+ /*
+ * We have lun 0 already, so report luns changed:
+ */
+ (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_rescan_luns,
+ sc, DDI_NOSLEEP);
+ } else {
+ /*
+ * We didn't find lun 0, so issue a new discovery:
+ */
+ (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover,
+ sc, DDI_NOSLEEP);
+ }
+}
+
+/*
+ * vioscsi_discover is our task function for performing target and lun
+ * discovery. This is done using active SCSI probes.
+ */
+static void
+vioscsi_discover(void *arg)
+{
+ vioscsi_softc_t *sc = arg;
+ scsi_hba_tgtmap_t *tm = sc->vs_tgtmap;
+ vioscsi_request_t req;
+
+ if (vioscsi_req_init(sc, &req, sc->vs_cmd_vq, KM_SLEEP) != 0) {
+ vioscsi_req_fini(&req);
+ return;
+ }
+
+ if (scsi_hba_tgtmap_set_begin(tm) != DDI_SUCCESS) {
+ vioscsi_req_fini(&req);
+ return;
+ }
+ for (uint8_t target = 0; target < sc->vs_max_target; target++) {
+ char ua[10];
+ switch (vioscsi_probe_target(sc, &req, target)) {
+ case 1:
+ (void) snprintf(ua, sizeof (ua), "%x", target);
+ if (scsi_hba_tgtmap_set_add(tm, SCSI_TGT_SCSI_DEVICE,
+ ua, NULL) != DDI_SUCCESS) {
+ (void) scsi_hba_tgtmap_set_flush(tm);
+ vioscsi_req_fini(&req);
+ return;
+ }
+ break;
+ case 0:
+ continue;
+ case -1:
+ (void) scsi_hba_tgtmap_set_flush(tm);
+ vioscsi_req_fini(&req);
+ return;
+ }
+ }
+ (void) scsi_hba_tgtmap_set_end(tm, 0);
+ vioscsi_req_fini(&req);
+}
+
+static void
+vioscsi_teardown(vioscsi_softc_t *sc, boolean_t failed)
+{
+ if (sc->vs_virtio != NULL) {
+ virtio_fini(sc->vs_virtio, failed);
+ }
+
+ /*
+ * Free up the event resources:
+ */
+ for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) {
+ vioscsi_event_t *ve = &sc->vs_events[i];
+ if (ve->ve_vic != NULL) {
+ virtio_chain_free(ve->ve_vic);
+ }
+ if (ve->ve_dma != NULL) {
+ virtio_dma_free(ve->ve_dma);
+ }
+ }
+
+ if (sc->vs_tran != NULL) {
+ scsi_hba_tran_free(sc->vs_tran);
+ }
+ if (sc->vs_tq != NULL) {
+ ddi_taskq_destroy(sc->vs_tq);
+ }
+ if (sc->vs_intr_pri != NULL) {
+ mutex_destroy(&sc->vs_lock);
+ }
+ kmem_free(sc, sizeof (*sc));
+}
+
+static int
+vioscsi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ scsi_hba_tran_t *tran = NULL;
+ vioscsi_softc_t *sc;
+ virtio_t *vio;
+ ddi_dma_attr_t attr;
+
+ if (cmd != DDI_ATTACH) { /* no suspend/resume support */
+ return (DDI_FAILURE);
+ }
+
+ if (scsi_hba_iport_unit_address(dip) != NULL) {
+ return (vioscsi_iport_attach(dip));
+ }
+
+ sc = kmem_zalloc(sizeof (*sc), KM_SLEEP);
+ sc->vs_dip = dip;
+
+ list_create(&sc->vs_devs, sizeof (vioscsi_dev_t),
+ offsetof(vioscsi_dev_t, vd_node));
+
+ tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
+ sc->vs_tran = tran;
+
+ tran->tran_hba_len = sizeof (vioscsi_request_t);
+ tran->tran_hba_private = sc;
+
+ /*
+ * We don't use WWN addressing, so advertise parallel. The underlying
+ * device might still be using a different transport, even in a
+ * pass-through, but we cannot discriminate that at this layer.
+ */
+ tran->tran_interconnect_type = INTERCONNECT_PARALLEL;
+
+ tran->tran_start = vioscsi_tran_start;
+ tran->tran_abort = vioscsi_tran_abort;
+ tran->tran_reset = vioscsi_tran_reset;
+ tran->tran_getcap = vioscsi_tran_getcap;
+ tran->tran_setcap = vioscsi_tran_setcap;
+
+ tran->tran_tgt_init = vioscsi_tran_tgt_init;
+ tran->tran_tgt_free = vioscsi_tran_tgt_free;
+
+ tran->tran_setup_pkt = vioscsi_tran_setup_pkt;
+ tran->tran_teardown_pkt = vioscsi_tran_teardown_pkt;
+ tran->tran_pkt_constructor = vioscsi_tran_pkt_constructor;
+ tran->tran_pkt_destructor = vioscsi_tran_pkt_destructor;
+
+ /*
+ * We need to determine some device settings here, so we initialize the
+ * virtio in order to access those values. The rest of the setup we do
+ * in the iport attach. Note that this driver cannot support
+ * reattaching a child iport once it is removed -- the entire driver
+ * will need to be reset for that.
+ */
+ vio = virtio_init(dip, VIOSCSI_WANTED_FEATURES, B_TRUE);
+ if ((sc->vs_virtio = vio) == NULL) {
+ dev_err(dip, CE_WARN, "failed to init virtio");
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Get virtio parameters:
+ */
+ sc->vs_max_target = virtio_dev_get16(vio, VIRTIO_SCSI_CFG_MAX_TARGET);
+ sc->vs_max_lun = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_MAX_LUN);
+ sc->vs_cdb_size = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_CDB_SIZE);
+ sc->vs_max_seg = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_SEG_MAX);
+ sc->vs_cmd_per_lun = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_CMD_PER_LUN);
+
+ /*
+ * Adjust operating parameters to functional limits:
+ */
+ sc->vs_max_target = min(VIOSCSI_MAX_TARGET, sc->vs_max_target);
+ sc->vs_cmd_per_lun = max(1, sc->vs_max_target);
+ sc->vs_max_seg = max(VIOSCSI_MIN_SEGS, sc->vs_max_seg);
+
+ /*
+ * Allocate queues:
+ */
+ sc->vs_ctl_vq = virtio_queue_alloc(vio, 0, "ctl",
+ vioscsi_ctl_handler, sc, B_FALSE, sc->vs_max_seg);
+ sc->vs_evt_vq = virtio_queue_alloc(vio, 1, "evt",
+ vioscsi_evt_handler, sc, B_FALSE, sc->vs_max_seg);
+ sc->vs_cmd_vq = virtio_queue_alloc(vio, 2, "cmd",
+ vioscsi_cmd_handler, sc, B_FALSE, sc->vs_max_seg);
+
+ if ((sc->vs_ctl_vq == NULL) || (sc->vs_evt_vq == NULL) ||
+ (sc->vs_cmd_vq == NULL)) {
+ dev_err(dip, CE_WARN, "failed allocating queue(s)");
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+
+ if (virtio_init_complete(vio, 0) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "virtio_init_complete failed");
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * We cannot initialize this mutex before virtio_init_complete:
+ */
+ sc->vs_intr_pri = virtio_intr_pri(vio);
+ mutex_init(&sc->vs_lock, NULL, MUTEX_DRIVER, sc->vs_intr_pri);
+
+ /*
+ * Allocate events, but do not submit yet:
+ */
+ for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) {
+ vioscsi_event_t *ve = &sc->vs_events[i];
+ ve->ve_vic = virtio_chain_alloc(sc->vs_evt_vq, KM_SLEEP);
+ ve->ve_dma = virtio_dma_alloc(sc->vs_virtio,
+ sizeof (vioscsi_evt_t), &virtio_dma_attr,
+ DDI_DMA_STREAMING | DDI_DMA_READ, KM_SLEEP);
+ if ((ve->ve_vic == NULL) || (ve->ve_dma == NULL)) {
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+ if (virtio_chain_append(ve->ve_vic,
+ virtio_dma_cookie_pa(ve->ve_dma, 0), sizeof (*ve->ve_evt),
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+ ve->ve_evt = virtio_dma_va(ve->ve_dma, 0);
+ virtio_chain_data_set(ve->ve_vic, ve);
+ }
+
+ sc->vs_tq = ddi_taskq_create(dip, "task", 1, TASKQ_DEFAULTPRI, 0);
+ if (sc->vs_tq == NULL) {
+ dev_err(dip, CE_WARN, "failed to create taskq");
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Maximum number of segments, subtract two needed for headers:
+ */
+ attr = virtio_dma_attr;
+ attr.dma_attr_sgllen = sc->vs_max_seg - 2;
+
+ if (scsi_hba_attach_setup(dip, &attr, tran,
+ SCSI_HBA_ADDR_COMPLEX | SCSI_HBA_HBA |
+ SCSI_HBA_TRAN_CDB | SCSI_HBA_TRAN_SCB) !=
+ DDI_SUCCESS) {
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+
+ if (scsi_hba_iport_register(dip, "iport0") != DDI_SUCCESS) {
+ vioscsi_teardown(sc, B_TRUE);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(dip);
+
+ return (DDI_SUCCESS);
+}
+
+static void
+vioscsi_iport_teardown(vioscsi_softc_t *sc)
+{
+ /*
+ * Stop the taskq -- ensures we don't try to access resources from a
+ * task while we are tearing down.
+ */
+ ddi_taskq_suspend(sc->vs_tq);
+ ddi_taskq_wait(sc->vs_tq);
+
+ /*
+ * Shutdown all interrupts and device transfers:
+ */
+ virtio_interrupts_disable(sc->vs_virtio);
+ virtio_shutdown(sc->vs_virtio);
+
+ /*
+ * Common resources:
+ */
+ if (sc->vs_tgtmap != NULL) {
+ scsi_hba_tgtmap_destroy(sc->vs_tgtmap);
+ sc->vs_tgtmap = NULL;
+ }
+}
+
+/*
+ * vioscsi_iport_attach implements the attach of the iport. We do the final
+ * set up of interrupts, and posting of event buffers here, as we do not want
+ * any activity unless the iport is attached. This matches detach, and makes
+ * teardown safer.
+ */
+static int
+vioscsi_iport_attach(dev_info_t *dip)
+{
+ const char *ua = scsi_hba_iport_unit_address(dip);
+ scsi_hba_tran_t *tran;
+ vioscsi_softc_t *sc;
+
+ /*
+ * We only support a single iport -- all disks are virtual and all
+ * disks use target/lun addresses.
+ */
+ if ((ua == NULL) || (strcmp(ua, "iport0") != 0)) {
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Get our parent's tran, and look up the sc from that:
+ */
+ tran = ddi_get_driver_private(ddi_get_parent(dip));
+ if ((tran == NULL) ||
+ ((sc = tran->tran_hba_private) == NULL)) {
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Save a copy of the soft state in our tran private area.
+ * (The framework clears this after cloning from parent.)
+ */
+ tran = ddi_get_driver_private(dip);
+ tran->tran_hba_private = sc;
+
+ /*
+ * We don't want interrupts on the control queue -- strictly polled
+ * (however if this handler is called from an interrupt, it should
+ * still be absolutely fine).
+ */
+ virtio_queue_no_interrupt(sc->vs_ctl_vq, B_TRUE);
+
+ if (scsi_hba_tgtmap_create(dip, SCSI_TM_FULLSET, MICROSEC,
+ 2 * MICROSEC, sc, NULL, NULL, &sc->vs_tgtmap) != DDI_SUCCESS) {
+ vioscsi_iport_teardown(sc);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Post events:
+ */
+ for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) {
+ virtio_chain_submit(sc->vs_events[i].ve_vic, B_FALSE);
+ }
+ virtio_queue_flush(sc->vs_evt_vq);
+
+ /*
+ * Start interrupts going now:
+ */
+ if (virtio_interrupts_enable(sc->vs_virtio) != DDI_SUCCESS) {
+ vioscsi_iport_teardown(sc);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Start a discovery:
+ */
+ (void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover, sc, DDI_SLEEP);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+vioscsi_quiesce(dev_info_t *dip)
+{
+ vioscsi_softc_t *sc;
+ scsi_hba_tran_t *tran;
+
+ if (((tran = ddi_get_driver_private(dip)) == NULL) ||
+ ((sc = tran->tran_hba_private) == NULL)) {
+ return (DDI_FAILURE);
+ }
+ if (sc->vs_virtio == NULL) {
+ return (DDI_SUCCESS); /* not initialized yet */
+ }
+
+ return (virtio_quiesce(sc->vs_virtio));
+}
+
+/*
+ * vioscsi_iport_detach is used to perform the detach of the iport. It
+ * disables interrupts and the device, but does not free resources, other than
+ * the target map. Note that due to lack of a way to start virtio after
+ * virtio_shutdown(), it is not possible to reattach the iport after this is
+ * called, unless the underlying HBA is also detached and then re-attached.
+ */
+static int
+vioscsi_iport_detach(dev_info_t *dip)
+{
+ const char *ua = scsi_hba_iport_unit_address(dip);
+ vioscsi_softc_t *sc;
+ scsi_hba_tran_t *tran;
+
+ if ((ua == NULL) || (strcmp(ua, "iport0") != 0)) {
+ return (DDI_FAILURE);
+ }
+
+ if (((tran = ddi_get_driver_private(dip)) == NULL) ||
+ ((sc = tran->tran_hba_private) == NULL)) {
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&sc->vs_lock);
+ if (!list_is_empty(&sc->vs_devs)) {
+ /*
+ * Cannot detach while we have target children.
+ */
+ mutex_exit(&sc->vs_lock);
+ return (DDI_FAILURE);
+ }
+
+ vioscsi_iport_teardown(sc);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+vioscsi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ vioscsi_softc_t *sc;
+ scsi_hba_tran_t *tran;
+
+ if (cmd != DDI_DETACH) {
+ return (DDI_FAILURE);
+ }
+
+ if (scsi_hba_iport_unit_address(dip) != NULL) {
+ return (vioscsi_iport_detach(dip));
+ }
+
+ if (((tran = ddi_get_driver_private(dip)) == NULL) ||
+ ((sc = tran->tran_hba_private) == NULL)) {
+ return (DDI_FAILURE);
+ }
+
+ if (scsi_hba_detach(dip) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ vioscsi_teardown(sc, B_FALSE);
+
+ return (DDI_SUCCESS);
+}
+
+static struct dev_ops vioscsi_dev_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
+ .devo_getinfo = nodev,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_attach = vioscsi_attach,
+ .devo_detach = vioscsi_detach,
+ .devo_reset = nodev,
+ .devo_cb_ops = NULL,
+ .devo_bus_ops = NULL,
+ .devo_power = NULL,
+ .devo_quiesce = vioscsi_quiesce,
+};
+
+static struct modldrv modldrv = {
+ .drv_modops = &mod_driverops,
+ .drv_linkinfo = vioscsi_ident,
+ .drv_dev_ops = &vioscsi_dev_ops,
+};
+
+static struct modlinkage modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &modldrv, NULL, },
+};
+
+
+int
+_init(void)
+{
+ int err;
+
+ /*
+ * Initialize this unconditionally:
+ */
+ vioscsi_hz = drv_usectohz(1000000);
+
+ if ((err = scsi_hba_init(&modlinkage)) != 0) {
+ return (err);
+ }
+
+ if ((err = mod_install(&modlinkage)) != 0) {
+ scsi_hba_fini(&modlinkage);
+ return (err);
+ }
+
+ return (err);
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modlinkage)) != 0) {
+ return (err);
+ }
+
+ scsi_hba_fini(&modlinkage);
+
+ return (DDI_SUCCESS);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/vioscsi/vioscsi.h b/usr/src/uts/common/io/vioscsi/vioscsi.h
new file mode 100644
index 0000000000..b032ef28c8
--- /dev/null
+++ b/usr/src/uts/common/io/vioscsi/vioscsi.h
@@ -0,0 +1,313 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
+ * Copyright 2022 RackTop Systems, Inc.
+ */
+
+#ifndef _VIOSCSI_H_
+#define _VIOSCSI_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/ksynch.h>
+#include <sys/modctl.h>
+#include <sys/debug.h>
+#include <sys/list.h>
+#include <sys/stddef.h>
+
+#include <sys/scsi/scsi.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+#include <virtio.h>
+
+#define VIRTIO_SCSI_CDB_SIZE 32
+#define VIRTIO_SCSI_SENSE_SIZE 96
+
+/*
+ * Feature bits:
+ */
+#define VIRTIO_SCSI_F_INOUT (0x1 << 0)
+#define VIRTIO_SCSI_F_HOTPLUG (0x1 << 1)
+#define VIRTIO_SCSI_F_CHANGE (0x1 << 2)
+#define VIRTIO_SCSI_F_T10_PI (0x1 << 3)
+
+/*
+ * Register offset in bytes:
+ */
+#define VIRTIO_SCSI_CFG_NUM_QUEUES 0
+#define VIRTIO_SCSI_CFG_SEG_MAX 4
+#define VIRTIO_SCSI_CFG_MAX_SECTORS 8
+#define VIRTIO_SCSI_CFG_CMD_PER_LUN 12
+#define VIRTIO_SCSI_CFG_EVI_SIZE 16
+#define VIRTIO_SCSI_CFG_SENSE_SIZE 20
+#define VIRTIO_SCSI_CFG_CDB_SIZE 24
+#define VIRTIO_SCSI_CFG_MAX_CHANNEL 28
+#define VIRTIO_SCSI_CFG_MAX_TARGET 30
+#define VIRTIO_SCSI_CFG_MAX_LUN 32
+
+/*
+ * Response codes:
+ */
+#define VIRTIO_SCSI_S_OK 0
+#define VIRTIO_SCSI_S_FUNCTION_COMPLETED 0
+#define VIRTIO_SCSI_S_OVERRUN 1
+#define VIRTIO_SCSI_S_ABORTED 2
+#define VIRTIO_SCSI_S_BAD_TARGET 3
+#define VIRTIO_SCSI_S_RESET 4
+#define VIRTIO_SCSI_S_BUSY 5
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
+#define VIRTIO_SCSI_S_FAILURE 9
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
+
+/*
+ * Control queue type codes:
+ */
+#define VIRTIO_SCSI_T_TMF 0
+#define VIRTIO_SCSI_T_AN_QUERY 1
+#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2
+
+/*
+ * Task management codes:
+ */
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA_TASK_SET 3
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
+
+/*
+ * Events:
+ */
+#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000
+#define VIRTIO_SCSI_T_NO_EVENT 0
+#define VIRTIO_SCSI_T_TRANSPORT_RESET 1
+#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2
+
+/*
+ * Task attributes:
+ */
+#define VIRTIO_SCSI_S_SIMPLE 0
+#define VIRTIO_SCSI_S_ORDERED 1
+#define VIRTIO_SCSI_S_HEAD 2
+#define VIRTIO_SCSI_S_ACA 3
+
+/*
+ * Reasons of reset event:
+ */
+#define VIRTIO_SCSI_EVT_RESET_HARD 0
+#define VIRTIO_SCSI_EVT_RESET_RESCAN 1
+#define VIRTIO_SCSI_EVT_RESET_REMOVED 2
+
+/*
+ * We need to support INOUT, and we want hotplug notifications:
+ */
+#define VIOSCSI_WANTED_FEATURES (VIRTIO_SCSI_F_INOUT | VIRTIO_SCSI_F_HOTPLUG)
+
+#define VIOSCSI_MAX_TARGET 256
+#define VIOSCSI_MIN_SEGS 3
+#define VIOSCSI_NUM_EVENTS 16
+
+/*
+ * Data structures:
+ */
+
+#pragma pack(1)
+
+/*
+ * virtio SCSI command request:
+ */
+struct virtio_scsi_cmd_req {
+ uint8_t lun[8];
+ uint64_t tag;
+ uint8_t task_attr;
+ uint8_t prio;
+ uint8_t crn;
+ uint8_t cdb[VIRTIO_SCSI_CDB_SIZE];
+};
+
+/*
+ * Virtio SCSI response:
+ */
+struct virtio_scsi_cmd_resp {
+ uint32_t sense_len;
+ uint32_t res_id;
+ uint16_t status_qualifier;
+ uint8_t status;
+ uint8_t response;
+ uint8_t sense[VIRTIO_SCSI_SENSE_SIZE];
+};
+
+/*
+ * Task management request:
+ */
+struct virtio_scsi_ctrl_tmf_req {
+ uint32_t type;
+ uint32_t subtype;
+ uint8_t lun[8];
+ uint64_t tag;
+};
+
+/*
+ * Task management response:
+ */
+struct virtio_scsi_ctrl_tmf_resp {
+ uint8_t response;
+};
+
+/*
+ * Asynchronous notification request:
+ */
+struct virtio_scsi_ctrl_an_req {
+ uint32_t type;
+ uint8_t lun[8];
+ uint32_t event_requested;
+};
+
+/*
+ * Asynchronous notification response:
+ */
+struct virtio_scsi_ctrl_an_resp {
+ uint32_t event_actual;
+ uint8_t response;
+};
+
+/*
+ * Events delivered on the event queue:
+ */
+struct virtio_scsi_event {
+ uint32_t event;
+ uint8_t lun[8];
+ uint32_t reason;
+};
+
+#pragma pack()
+
+typedef union {
+ struct virtio_scsi_cmd_req cmd;
+ struct virtio_scsi_ctrl_tmf_req tmf;
+ struct virtio_scsi_ctrl_an_req anr;
+} vioscsi_req_t;
+
+typedef union {
+ struct virtio_scsi_cmd_resp cmd;
+ struct virtio_scsi_ctrl_tmf_resp tmf;
+ struct virtio_scsi_ctrl_an_resp anr;
+} vioscsi_res_t;
+
+struct virtio_scsi_op {
+ vioscsi_req_t req;
+ vioscsi_res_t res;
+};
+
+#define VIOSCSI_REQ_OFFSET offsetof(struct virtio_scsi_op, req)
+#define VIOSCSI_RES_OFFSET offsetof(struct virtio_scsi_op, res)
+
+typedef struct vioscsi_request vioscsi_request_t;
+typedef struct vioscsi_event vioscsi_event_t;
+typedef struct vioscsi_softc vioscsi_softc_t;
+typedef struct vioscsi_dev vioscsi_dev_t;
+typedef struct virtio_scsi_event vioscsi_evt_t;
+typedef struct virtio_scsi_ctrl_tmf_req vioscsi_tmf_req_t;
+typedef struct virtio_scsi_ctrl_tmf_resp vioscsi_tmf_res_t;
+typedef struct virtio_scsi_cmd_req vioscsi_cmd_req_t;
+typedef struct virtio_scsi_cmd_resp vioscsi_cmd_res_t;
+typedef struct virtio_scsi_op vioscsi_op_t;
+
+struct vioscsi_request {
+ list_node_t vr_node;
+ struct scsi_pkt *vr_pkt;
+ virtio_queue_t *vr_vq;
+ virtio_dma_t *vr_dma;
+ virtio_chain_t *vr_vic;
+ vioscsi_dev_t *vr_dev;
+ vioscsi_req_t *vr_req;
+ vioscsi_res_t *vr_res;
+ uint64_t vr_req_pa;
+ uint64_t vr_res_pa;
+ boolean_t vr_poll;
+ uint8_t vr_expired; /* access using atomics */
+ uint8_t vr_done; /* access using atomics */
+ uint8_t vr_task_attr;
+ uint8_t vr_target;
+ uint16_t vr_lun;
+ clock_t vr_time; /* seconds */
+ clock_t vr_start; /* ticks */
+ clock_t vr_expire; /* ticks */
+};
+
+struct vioscsi_dev {
+ list_node_t vd_node;
+ uint8_t vd_target;
+ uint16_t vd_lun;
+ struct scsi_device *vd_sd;
+ vioscsi_softc_t *vd_sc;
+ int vd_num_cmd;
+ int vd_max_cmd;
+ boolean_t vd_rescan;
+ list_t vd_reqs;
+ timeout_id_t vd_timeout;
+ kmutex_t vd_lock;
+};
+
+struct vioscsi_event {
+ virtio_chain_t *ve_vic;
+ virtio_dma_t *ve_dma;
+ vioscsi_evt_t *ve_evt;
+};
+
+struct vioscsi_softc {
+ dev_info_t *vs_dip;
+ virtio_t *vs_virtio;
+ uint64_t vs_features;
+
+ virtio_queue_t *vs_ctl_vq;
+ virtio_queue_t *vs_evt_vq;
+ virtio_queue_t *vs_cmd_vq;
+
+ scsi_hba_tran_t *vs_tran;
+ scsi_hba_tgtmap_t *vs_tgtmap;
+ ddi_taskq_t *vs_tq;
+
+ uint32_t vs_max_target;
+ uint32_t vs_max_lun;
+ uint32_t vs_cdb_size;
+ uint32_t vs_max_seg;
+ uint32_t vs_cmd_per_lun;
+
+ vioscsi_event_t vs_events[VIOSCSI_NUM_EVENTS];
+
+ void *vs_intr_pri;
+ kmutex_t vs_lock;
+ list_t vs_devs;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VIOSCSI_H_ */
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 79f4c3f26b..2cb43d8beb 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -415,15 +415,10 @@ DRV_KMODS += vr
#
# Virtio drivers
#
-
-# Virtio core
DRV_KMODS += virtio
-
-# Virtio block driver
DRV_KMODS += vioblk
-
-# Virtio network driver
DRV_KMODS += vioif
+DRV_KMODS += vioscsi
#
# DTrace and DTrace Providers
diff --git a/usr/src/uts/intel/vioscsi/Makefile b/usr/src/uts/intel/vioscsi/Makefile
new file mode 100644
index 0000000000..105e4e08fb
--- /dev/null
+++ b/usr/src/uts/intel/vioscsi/Makefile
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Nexenta Systems, Inc.
+# Copyright 2022 RackTop Systems, Inc.
+#
+
+UTSBASE= $(SRC)/uts
+
+MODULE= vioscsi
+OBJECTS= $(VIOSCSI_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE= $(ROOT_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET= $(BINARY)
+INSTALL_TARGET= $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -Nmisc/scsi -Nmisc/virtio
+
+#
+# Includes
+#
+INC_PATH += -I$(UTSBASE)/common/io/virtio -I$(UTSBASE)/common/io/vioscsi
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ