summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2016-06-06 12:17:02 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2016-06-06 12:17:02 +0000
commit6831752f006768a0e1a2096ab97652e881a06f96 (patch)
tree0e5d4e6d5d2e9caa2bac170d79940fc1056c8c14
parent22f39cb92198262901262a7cb9a72e596b888110 (diff)
parent565657ca18725b8f8dbe5b93704cc1d173be9d65 (diff)
downloadillumos-joyent-6831752f006768a0e1a2096ab97652e881a06f96.tar.gz
[illumos-gate merge]
commit 565657ca18725b8f8dbe5b93704cc1d173be9d65 7017 integrate pvscsi commit 99189164df06057fb968ca7be701bb1a0d5da8c9 6940 Cannot unlink directories when over quota commit 002ec3e4c3efa65890ba89d290fe2cc209838c12 6999 fix 'Use of uninitialized value $picky in numeric eq (==)' in cstyle commit 0dd053d7d890618ea1fc697b07de364e69eb4190 7016 arc_available_memory is not 32-bit safe commit 86f617e90f3e06f970c3ea9b32836e29500836ab 7028 avl_destroy_nodes supports emptying, not just destroying, an avl tree commit 48cb8b9720f175ff5ed31ebd96a9f3d6922bdde9 7027 zfs_written_property_001_pos makes unreasonable assumptions about metadata space usage
-rw-r--r--manifest1
-rw-r--r--usr/src/common/avl/avl.c6
-rw-r--r--usr/src/pkg/manifests/driver-storage-pvscsi.mf36
-rw-r--r--usr/src/test/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh10
-rw-r--r--usr/src/tools/scripts/cstyle.pl2
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c20
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c1
-rw-r--r--usr/src/uts/intel/Makefile.files6
-rw-r--r--usr/src/uts/intel/Makefile.intel5
-rw-r--r--usr/src/uts/intel/Makefile.rules8
-rw-r--r--usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE20
-rw-r--r--usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.c2693
-rw-r--r--usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.h486
-rw-r--r--usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi_var.h236
-rw-r--r--usr/src/uts/intel/pvscsi/Makefile51
16 files changed, 3565 insertions, 17 deletions
diff --git a/manifest b/manifest
index ba3a9982c8..40ffca4b0e 100644
--- a/manifest
+++ b/manifest
@@ -654,6 +654,7 @@ f kernel/drv/amd64/profile 0755 root sys
f kernel/drv/amd64/pseudo 0755 root sys
f kernel/drv/amd64/ptc 0755 root sys
f kernel/drv/amd64/ptsl 0755 root sys
+f kernel/drv/amd64/pvscsi 0755 root sys
f kernel/drv/amd64/qlc 0755 root sys
f kernel/drv/amd64/qlge 0755 root sys
f kernel/drv/amd64/ramdisk 0755 root sys
diff --git a/usr/src/common/avl/avl.c b/usr/src/common/avl/avl.c
index ead2fca2ce..0411afb4c5 100644
--- a/usr/src/common/avl/avl.c
+++ b/usr/src/common/avl/avl.c
@@ -24,8 +24,8 @@
*/
/*
- * Copyright (c) 2014 by Delphix. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2015 by Delphix. All rights reserved.
*/
/*
@@ -952,8 +952,8 @@ avl_is_empty(avl_tree_t *tree)
/*
* Post-order tree walk used to visit all tree nodes and destroy the tree
- * in post order. This is used for destroying a tree without paying any cost
- * for rebalancing it.
+ * in post order. This is used for removing all the nodes from a tree without
+ * paying any cost for rebalancing it.
*
* example:
*
diff --git a/usr/src/pkg/manifests/driver-storage-pvscsi.mf b/usr/src/pkg/manifests/driver-storage-pvscsi.mf
new file mode 100644
index 0000000000..2efefefb73
--- /dev/null
+++ b/usr/src/pkg/manifests/driver-storage-pvscsi.mf
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Nexenta Systems, Inc.
+#
+
+#
+# The default for payload-bearing actions in this package is to appear in the
+# global zone only. See the include file for greater detail, as well as
+# information about overriding the defaults.
+#
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/driver/storage/pvscsi@$(PKGVERS)
+set name=pkg.description value="VMware PVSCSI SCSI Controller driver"
+set name=pkg.summary value="VMware PVSCSI SCSI Controller driver"
+set name=info.classification \
+ value=org.opensolaris.category.2008:Drivers/Storage
+set name=variant.arch value=i386
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+driver name=pvscsi alias=pci15ad,7c0 class=scsi-self-identifying
+file path=kernel/drv/$(ARCH64)/pvscsi group=sys
+file path=kernel/drv/pvscsi group=sys
+license lic_CDDL license=lic_CDDL
+license usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE \
+ license=usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh
index b5eae9c51c..fc58735972 100644
--- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_property/zfs_written_property_001_pos.ksh
@@ -11,7 +11,7 @@
#
#
-# Copyright (c) 2012 by Delphix. All rights reserved.
+# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
#
#
@@ -63,11 +63,8 @@ typeset -l total=0
typeset -l snap1_size=0
typeset -l snap2_size=0
typeset -l snap3_size=0
-typeset -l metadata=0
typeset -l mb_block=0
((mb_block = 1024 * 1024))
-# approximate metadata on dataset when empty is 32KB
-((metadata = 32 * 1024))
log_note "verify written property statistics for dataset"
log_must $ZFS create -p $TESTPOOL/$TESTFS1/$TESTFS2/$TESTFS3
@@ -89,7 +86,10 @@ blocks=0
for i in 1 2 3; do
written=$(get_prop written $TESTPOOL/$TESTFS1@snap$i)
if [[ $blocks -eq 0 ]]; then
- expected_written=$metadata
+ # Written value for the frist non-clone snapshot is
+ # expected to be equal to the referenced value.
+ expected_written=$( \
+ get_prop referenced $TESTPOOL/$TESTFS1@snap$i)
else
((expected_written = blocks * mb_block))
fi
diff --git a/usr/src/tools/scripts/cstyle.pl b/usr/src/tools/scripts/cstyle.pl
index f4d327e986..874235ef21 100644
--- a/usr/src/tools/scripts/cstyle.pl
+++ b/usr/src/tools/scripts/cstyle.pl
@@ -430,7 +430,7 @@ line: while (<$filehandle>) {
$function_header_full_indent = 0;
}
if ($in_function_header && /{$/ ) {
- if ($picky == 1) {
+ if ($picky) {
err("opening brace on same line as function header");
}
$in_function_header = 0;
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 96e8411251..5c06a1bb29 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -217,6 +217,11 @@ static int arc_dead;
static boolean_t arc_warm;
/*
+ * log2 fraction of the zio arena to keep free.
+ */
+int arc_zio_arena_free_shift = 2;
+
+/*
* These tunables are for performance analysis.
*/
uint64_t zfs_arc_max;
@@ -3236,7 +3241,7 @@ arc_available_memory(void)
* heap is allocated. (Or, in the calculation, if less than 1/4th is
* free)
*/
- n = vmem_size(heap_arena, VMEM_FREE) -
+ n = (int64_t)vmem_size(heap_arena, VMEM_FREE) -
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2);
if (n < lowest) {
lowest = n;
@@ -3247,15 +3252,16 @@ arc_available_memory(void)
/*
* If zio data pages are being allocated out of a separate heap segment,
* then enforce that the size of available vmem for this arena remains
- * above about 1/16th free.
+ * above about 1/4th (1/(2^arc_zio_arena_free_shift)) free.
*
- * Note: The 1/16th arena free requirement was put in place
- * to aggressively evict memory from the arc in order to avoid
- * memory fragmentation issues.
+ * Note that reducing the arc_zio_arena_free_shift keeps more virtual
+ * memory (in the zio_arena) free, which can avoid memory
+ * fragmentation issues.
*/
if (zio_arena != NULL) {
- n = vmem_size(zio_arena, VMEM_FREE) -
- (vmem_size(zio_arena, VMEM_ALLOC) >> 4);
+ n = (int64_t)vmem_size(zio_arena, VMEM_FREE) -
+ (vmem_size(zio_arena, VMEM_ALLOC) >>
+ arc_zio_arena_free_shift);
if (n < lowest) {
lowest = n;
r = FMR_ZIO_ARENA;
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 1d3a6ed4e5..829d57b760 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -2115,6 +2115,7 @@ top:
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
+ dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index 4d491ea178..12b23b14df 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -22,6 +22,7 @@
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2016, Joyent, Inc.
+# Copyright 2016 Nexenta Systems, Inc.
#
#
@@ -394,3 +395,8 @@ VMXNET3S_OBJS = vmxnet3_main.o \
vmxnet3_rx.o \
vmxnet3_tx.o \
vmxnet3_utils.o
+
+#
+# VMware PVSCSI SCSI Controller
+#
+PVSCSI_OBJS = pvscsi.o
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 5e473f4168..7f93a02262 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -18,11 +18,13 @@
# CDDL HEADER END
#
+#
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2015 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 2013 Andrew Stormont. All rights reserved.
# Copyright 2016, Joyent, Inc.
# Copyright 2016 Garrett D'Amore <garrett@damore.org>
+# Copyright 2016 Nexenta Systems, Inc.
+#
#
# This makefile contains the common definitions for all intel
@@ -385,6 +387,7 @@ DRV_KMODS += ncall nsctl sdbc nskern sv
DRV_KMODS += ii rdc rdcsrv rdcstub
DRV_KMODS += iptun
DRV_KMODS += vmxnet3s
+DRV_KMODS += pvscsi
#
# Common code drivers
diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules
index cdbd27e92e..2c00dd3c1d 100644
--- a/usr/src/uts/intel/Makefile.rules
+++ b/usr/src/uts/intel/Makefile.rules
@@ -22,6 +22,7 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
# Copyright 2012 Joyent, Inc. All rights reserved.
+# Copyright 2016 Nexenta Systems, Inc.
#
#
@@ -245,6 +246,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/vmxnet3s/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/scsi/adapters/pvscsi/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/intel/nskern/%.s
$(COMPILE.s) -o $@ $<
@@ -474,6 +479,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/heci/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vmxnet3s/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/scsi/adapters/pvscsi/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/os/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE b/usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..a5904d3673
--- /dev/null
+++ b/usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE
@@ -0,0 +1,20 @@
+Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE.descrip b/usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..8f7aabf325
--- /dev/null
+++ b/usr/src/uts/intel/io/scsi/adapters/pvscsi/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+VMware PVSCSI definitions
diff --git a/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.c b/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.c
new file mode 100644
index 0000000000..d0a9db6736
--- /dev/null
+++ b/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.c
@@ -0,0 +1,2693 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/conf.h>
+#include <sys/cpuvar.h>
+#include <sys/ddi.h>
+#include <sys/errno.h>
+#include <sys/fs/dv_node.h>
+#include <sys/kmem.h>
+#include <sys/kmem_impl.h>
+#include <sys/list.h>
+#include <sys/modctl.h>
+#include <sys/pci.h>
+#include <sys/scsi/scsi.h>
+#include <sys/sunddi.h>
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "pvscsi.h"
+#include "pvscsi_var.h"
+
+int pvscsi_enable_msi = 1;
+int pvscsi_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
+int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
+
+static int pvscsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt);
+
+static void *pvscsi_sstate;
+
+/* HBA DMA attributes */
+static ddi_dma_attr_t pvscsi_hba_dma_attr = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000ull,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull,
+ .dma_attr_count_max = 0x000000007FFFFFFFull,
+ .dma_attr_align = 0x0000000000000001ull,
+ .dma_attr_burstsizes = 0x7ff,
+ .dma_attr_minxfer = 0x00000001u,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFFull,
+ .dma_attr_seg = 0x00000000FFFFFFFFull,
+ .dma_attr_sgllen = 1,
+ .dma_attr_granular = 0x00000200u,
+ .dma_attr_flags = 0
+};
+
+/* DMA attributes for req/comp rings */
+static ddi_dma_attr_t pvscsi_ring_dma_attr = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000ull,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull,
+ .dma_attr_count_max = 0x000000007FFFFFFFull,
+ .dma_attr_align = 0x0000000000000001ull,
+ .dma_attr_burstsizes = 0x7ff,
+ .dma_attr_minxfer = 0x00000001u,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFFull,
+ .dma_attr_seg = 0x00000000FFFFFFFFull,
+ .dma_attr_sgllen = 1,
+ .dma_attr_granular = 0x00000001u,
+ .dma_attr_flags = 0
+};
+
+/* DMA attributes for buffer I/O */
+static ddi_dma_attr_t pvscsi_io_dma_attr = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000ull,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull,
+ .dma_attr_count_max = 0x000000007FFFFFFFull,
+ .dma_attr_align = 0x0000000000000001ull,
+ .dma_attr_burstsizes = 0x7ff,
+ .dma_attr_minxfer = 0x00000001u,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFFull,
+ .dma_attr_seg = 0x00000000FFFFFFFFull,
+ .dma_attr_sgllen = PVSCSI_MAX_SG_SIZE,
+ .dma_attr_granular = 0x00000200u,
+ .dma_attr_flags = 0
+};
+
+static ddi_device_acc_attr_t pvscsi_mmio_attr = {
+ DDI_DEVICE_ATTR_V1,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC,
+ DDI_DEFAULT_ACC
+};
+
+static ddi_device_acc_attr_t pvscsi_dma_attrs = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC,
+ DDI_DEFAULT_ACC,
+};
+
+static void
+pvscsi_add_to_queue(pvscsi_cmd_t *cmd)
+{
+ pvscsi_softc_t *pvs = cmd->cmd_pvs;
+
+ ASSERT(pvs != NULL);
+ ASSERT(mutex_owned(&pvs->mutex));
+ ASSERT(!list_link_active(&(cmd)->cmd_queue_node));
+
+ list_insert_tail(&pvs->cmd_queue, cmd);
+ pvs->cmd_queue_len++;
+}
+
+static void
+pvscsi_remove_from_queue(pvscsi_cmd_t *cmd)
+{
+ pvscsi_softc_t *pvs = cmd->cmd_pvs;
+
+ ASSERT(pvs != NULL);
+ ASSERT(mutex_owned(&pvs->mutex));
+ ASSERT(list_link_active(&cmd->cmd_queue_node));
+ ASSERT(pvs->cmd_queue_len > 0);
+
+ if (list_link_active(&cmd->cmd_queue_node)) {
+ list_remove(&pvs->cmd_queue, cmd);
+ pvs->cmd_queue_len--;
+ }
+}
+
+static uint64_t
+pvscsi_map_ctx(pvscsi_softc_t *pvs, pvscsi_cmd_ctx_t *io_ctx)
+{
+ return (io_ctx - pvs->cmd_ctx + 1);
+}
+
+static pvscsi_cmd_ctx_t *
+pvscsi_lookup_ctx(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
+{
+ pvscsi_cmd_ctx_t *ctx, *end;
+
+ end = &pvs->cmd_ctx[pvs->req_depth];
+ for (ctx = pvs->cmd_ctx; ctx < end; ctx++) {
+ if (ctx->cmd == cmd)
+ return (ctx);
+ }
+
+ return (NULL);
+}
+
+static pvscsi_cmd_ctx_t *
+pvscsi_resolve_ctx(pvscsi_softc_t *pvs, uint64_t ctx)
+{
+ if (ctx > 0 && ctx <= pvs->req_depth)
+ return (&pvs->cmd_ctx[ctx - 1]);
+ else
+ return (NULL);
+}
+
+static boolean_t
+pvscsi_acquire_ctx(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
+{
+ pvscsi_cmd_ctx_t *ctx;
+
+ if (list_is_empty(&pvs->cmd_ctx_pool))
+ return (B_FALSE);
+
+ ctx = (pvscsi_cmd_ctx_t *)list_remove_head(&pvs->cmd_ctx_pool);
+ ASSERT(ctx != NULL);
+
+ ctx->cmd = cmd;
+ cmd->ctx = ctx;
+
+ return (B_TRUE);
+}
+
+static void
+pvscsi_release_ctx(pvscsi_cmd_t *cmd)
+{
+ pvscsi_softc_t *pvs = cmd->cmd_pvs;
+
+ ASSERT(mutex_owned(&pvs->mutex));
+
+ cmd->ctx->cmd = NULL;
+ list_insert_tail(&pvs->cmd_ctx_pool, cmd->ctx);
+ cmd->ctx = NULL;
+}
+
+static uint32_t
+pvscsi_reg_read(pvscsi_softc_t *pvs, uint32_t offset)
+{
+ uint32_t ret;
+
+ ASSERT((offset & (sizeof (uint32_t) - 1)) == 0);
+
+ ret = ddi_get32(pvs->mmio_handle,
+ (uint32_t *)(pvs->mmio_base + offset));
+
+ return (ret);
+}
+
+static void
+pvscsi_reg_write(pvscsi_softc_t *pvs, uint32_t offset, uint32_t value)
+{
+ ASSERT((offset & (sizeof (uint32_t) - 1)) == 0);
+
+ ddi_put32(pvs->mmio_handle, (uint32_t *)(pvs->mmio_base + offset),
+ value);
+}
+
+static void
+pvscsi_write_cmd_desc(pvscsi_softc_t *pvs, uint32_t cmd, void *desc, size_t len)
+{
+ len /= sizeof (uint32_t);
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_COMMAND, cmd);
+ ddi_rep_put32(pvs->mmio_handle, (uint32_t *)desc,
+ (uint32_t *)(pvs->mmio_base + PVSCSI_REG_OFFSET_COMMAND_DATA),
+ len, DDI_DEV_NO_AUTOINCR);
+}
+
+static uint32_t
+pvscsi_read_intr_status(pvscsi_softc_t *pvs)
+{
+ return (pvscsi_reg_read(pvs, PVSCSI_REG_OFFSET_INTR_STATUS));
+}
+
+static void
+pvscsi_write_intr_status(pvscsi_softc_t *pvs, uint32_t val)
+{
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_STATUS, val);
+}
+
+static void
+pvscsi_mask_intr(pvscsi_softc_t *pvs)
+{
+ mutex_enter(&pvs->intr_mutex);
+
+ VERIFY(pvs->intr_lock_counter >= 0);
+
+ if (++pvs->intr_lock_counter == 1)
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_MASK, 0);
+
+ mutex_exit(&pvs->intr_mutex);
+}
+
+static void
+pvscsi_unmask_intr(pvscsi_softc_t *pvs)
+{
+ mutex_enter(&pvs->intr_mutex);
+
+ VERIFY(pvs->intr_lock_counter > 0);
+
+ if (--pvs->intr_lock_counter == 0) {
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_MASK,
+ PVSCSI_INTR_CMPL_MASK | PVSCSI_INTR_MSG_MASK);
+ }
+
+ mutex_exit(&pvs->intr_mutex);
+}
+
+static void
+pvscsi_reset_hba(pvscsi_softc_t *pvs)
+{
+ pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
+}
+
+static void
+pvscsi_reset_bus(pvscsi_softc_t *pvs)
+{
+ pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_RESET_BUS, NULL, 0);
+}
+
+static void
+pvscsi_submit_nonrw_io(pvscsi_softc_t *pvs)
+{
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
+}
+
+static void
+pvscsi_submit_rw_io(pvscsi_softc_t *pvs)
+{
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
+}
+
+
+static int
+pvscsi_inquiry_target(pvscsi_softc_t *pvs, int target, struct scsi_inquiry *inq)
+{
+ int len = sizeof (struct scsi_inquiry);
+ int ret = -1;
+ struct buf *b;
+ struct scsi_address ap;
+ struct scsi_pkt *pkt;
+ uint8_t cdb[CDB_GROUP0];
+
+ ap.a_hba_tran = pvs->tran;
+ ap.a_target = (ushort_t)target;
+ ap.a_lun = (uchar_t)0;
+
+ if ((b = scsi_alloc_consistent_buf(&ap, (struct buf *)NULL, len,
+ B_READ, NULL_FUNC, NULL)) == NULL)
+ return (-1);
+
+ if ((pkt = scsi_init_pkt(&ap, (struct scsi_pkt *)NULL, b,
+ CDB_GROUP0, sizeof (struct scsi_arq_status), 0, 0,
+ NULL_FUNC, NULL)) == NULL)
+ goto free_buf;
+
+ cdb[0] = SCMD_INQUIRY;
+ cdb[1] = 0;
+ cdb[2] = 0;
+ cdb[3] = (len & 0xff00) >> 8;
+ cdb[4] = (len & 0x00ff);
+ cdb[5] = 0;
+
+ if (inq != NULL)
+ bzero(inq, sizeof (*inq));
+ bcopy(cdb, pkt->pkt_cdbp, CDB_GROUP0);
+ bzero((struct scsi_inquiry *)b->b_un.b_addr, sizeof (*inq));
+
+ if ((ret = scsi_poll(pkt)) == 0 && inq != NULL)
+ bcopy(b->b_un.b_addr, inq, sizeof (*inq));
+
+ scsi_destroy_pkt(pkt);
+
+free_buf:
+ scsi_free_consistent_buf(b);
+
+ return (ret);
+}
+
+static int
+pvscsi_config_one(dev_info_t *pdip, pvscsi_softc_t *pvs, int target,
+ dev_info_t **childp)
+{
+ char **compatible = NULL;
+ char *nodename = NULL;
+ dev_info_t *dip;
+ int inqrc;
+ int ncompatible = 0;
+ pvscsi_device_t *devnode;
+ struct scsi_inquiry inq;
+
+ /* Inquiry target */
+ inqrc = pvscsi_inquiry_target(pvs, target, &inq);
+
+ /* Find devnode */
+ for (devnode = list_head(&pvs->devnodes); devnode != NULL;
+ devnode = list_next(&pvs->devnodes, devnode)) {
+ if (devnode->target == target)
+ break;
+ }
+
+ if (devnode != NULL) {
+ if (inqrc != 0) {
+ /* Target disappeared, drop devnode */
+ if (i_ddi_devi_attached(devnode->pdip)) {
+ char *devname;
+ /* Get full devname */
+ devname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_deviname(devnode->pdip, devname);
+ /* Clean cache and name */
+ (void) devfs_clean(devnode->parent, devname + 1,
+ DV_CLEAN_FORCE);
+ kmem_free(devname, MAXPATHLEN);
+ }
+
+ (void) ndi_devi_offline(devnode->pdip, NDI_DEVI_REMOVE);
+
+ list_remove(&pvs->devnodes, devnode);
+ kmem_free(devnode, sizeof (*devnode));
+ } else if (childp != NULL) {
+ /* Target exists */
+ *childp = devnode->pdip;
+ }
+ return (NDI_SUCCESS);
+ } else if (inqrc != 0) {
+ /* Target doesn't exist */
+ return (NDI_FAILURE);
+ }
+
+ scsi_hba_nodename_compatible_get(&inq, NULL, inq.inq_dtype, NULL,
+ &nodename, &compatible, &ncompatible);
+ if (nodename == NULL)
+ goto free_nodename;
+
+ if (ndi_devi_alloc(pdip, nodename, DEVI_SID_NODEID,
+ &dip) != NDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to alloc device instance");
+ goto free_nodename;
+ }
+
+ if (ndi_prop_update_string(DDI_DEV_T_NONE, dip,
+ "device-type", "scsi") != DDI_PROP_SUCCESS ||
+ ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ "target", target) != DDI_PROP_SUCCESS ||
+ ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ "lun", 0) != DDI_PROP_SUCCESS ||
+ ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ "pm-capable", 1) != DDI_PROP_SUCCESS ||
+ ndi_prop_update_string_array(DDI_DEV_T_NONE, dip,
+ "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to update props for target %d", target);
+ goto free_devi;
+ }
+
+ if ((devnode = kmem_zalloc(sizeof (*devnode), KM_NOSLEEP)) == NULL)
+ goto free_devi;
+
+ if (ndi_devi_online(dip, NDI_ONLINE_ATTACH) != NDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to online target %d",
+ target);
+ kmem_free(devnode, sizeof (*devnode));
+ goto free_devi;
+ }
+
+ devnode->target = target;
+ devnode->pdip = dip;
+ devnode->parent = pdip;
+ list_insert_tail(&pvs->devnodes, devnode);
+
+ if (childp != NULL)
+ *childp = dip;
+
+ scsi_hba_nodename_compatible_free(nodename, compatible);
+
+ return (NDI_SUCCESS);
+
+free_devi:
+ ndi_prop_remove_all(dip);
+ (void) ndi_devi_free(dip);
+free_nodename:
+ scsi_hba_nodename_compatible_free(nodename, compatible);
+
+ return (NDI_FAILURE);
+}
+
+static int
+pvscsi_config_all(dev_info_t *pdip, pvscsi_softc_t *pvs)
+{
+ int target;
+
+ for (target = 0; target < PVSCSI_MAXTGTS; target++)
+ (void) pvscsi_config_one(pdip, pvs, target, NULL);
+
+ return (NDI_SUCCESS);
+}
+
+static pvscsi_cmd_t *
+pvscsi_process_comp_ring(pvscsi_softc_t *pvs)
+{
+ pvscsi_cmd_t **pnext_cmd = NULL;
+ pvscsi_cmd_t *cmd;
+ pvscsi_cmd_t *head = NULL;
+ struct PVSCSIRingsState *sdesc = RINGS_STATE(pvs);
+ uint32_t cmp_ne = sdesc->cmpNumEntriesLog2;
+
+ ASSERT(mutex_owned(&pvs->rx_mutex));
+
+ while (sdesc->cmpConsIdx != sdesc->cmpProdIdx) {
+ pvscsi_cmd_ctx_t *ctx;
+ struct PVSCSIRingCmpDesc *cdesc;
+
+ cdesc = CMP_RING(pvs) + (sdesc->cmpConsIdx & MASK(cmp_ne));
+ membar_consumer();
+
+ ctx = pvscsi_resolve_ctx(pvs, cdesc->context);
+ ASSERT(ctx != NULL);
+
+ if ((cmd = ctx->cmd) != NULL) {
+ cmd->next_cmd = NULL;
+
+ /* Save command status for further processing */
+ cmd->cmp_stat.host_status = cdesc->hostStatus;
+ cmd->cmp_stat.scsi_status = cdesc->scsiStatus;
+ cmd->cmp_stat.data_len = cdesc->dataLen;
+
+ /* Mark this command as arrived from hardware */
+ cmd->flags |= PVSCSI_FLAG_HW_STATUS;
+
+ if (head == NULL) {
+ head = cmd;
+ head->tail_cmd = cmd;
+ } else {
+ head->tail_cmd = cmd;
+ }
+
+ if (pnext_cmd == NULL) {
+ pnext_cmd = &cmd->next_cmd;
+ } else {
+ *pnext_cmd = cmd;
+ pnext_cmd = &cmd->next_cmd;
+ }
+ }
+
+ membar_consumer();
+ sdesc->cmpConsIdx++;
+ }
+
+ return (head);
+}
+
+static pvscsi_msg_t *
+pvscsi_process_msg_ring(pvscsi_softc_t *pvs)
+{
+ pvscsi_msg_t *msg;
+ struct PVSCSIRingsState *sdesc = RINGS_STATE(pvs);
+ struct PVSCSIRingMsgDesc *mdesc;
+ struct PVSCSIMsgDescDevStatusChanged *desc;
+ uint32_t msg_ne = sdesc->msgNumEntriesLog2;
+
+ ASSERT(mutex_owned(&pvs->rx_mutex));
+
+ if (sdesc->msgProdIdx == sdesc->msgConsIdx)
+ return (NULL);
+
+ mdesc = MSG_RING(pvs) + (sdesc->msgConsIdx & MASK(msg_ne));
+ membar_consumer();
+
+ switch (mdesc->type) {
+ case PVSCSI_MSG_DEV_ADDED:
+ case PVSCSI_MSG_DEV_REMOVED:
+ desc = (struct PVSCSIMsgDescDevStatusChanged *)mdesc;
+ msg = kmem_alloc(sizeof (pvscsi_msg_t), KM_NOSLEEP);
+ if (msg == NULL)
+ return (NULL);
+ msg->msg_pvs = pvs;
+ msg->type = mdesc->type;
+ msg->target = desc->target;
+ break;
+ default:
+ dev_err(pvs->dip, CE_WARN, "!unknown msg type: %d",
+ mdesc->type);
+ return (NULL);
+ }
+
+ membar_consumer();
+ sdesc->msgConsIdx++;
+
+ return (msg);
+}
+
+static void
+pvscsi_handle_msg(void *arg)
+{
+ pvscsi_msg_t *msg = (pvscsi_msg_t *)arg;
+
+ (void) pvscsi_config_one(msg->msg_pvs->dip, msg->msg_pvs, msg->target,
+ NULL);
+
+ kmem_free(msg, sizeof (pvscsi_msg_t));
+}
+
+static int
+pvscsi_abort_cmd(pvscsi_cmd_t *cmd, pvscsi_cmd_t **pending)
+{
+ pvscsi_softc_t *pvs = cmd->cmd_pvs;
+ pvscsi_cmd_t *c;
+ pvscsi_cmd_t *done;
+ struct PVSCSICmdDescAbortCmd acmd;
+
+ dev_err(pvs->dip, CE_WARN, "!aborting command %p", (void *)cmd);
+
+ ASSERT(mutex_owned(&pvs->rx_mutex));
+ ASSERT(mutex_owned(&pvs->tx_mutex));
+
+ /* Check if the cmd was already completed by the HBA */
+ *pending = done = pvscsi_process_comp_ring(pvs);
+ for (c = done; c != NULL; c = c->next_cmd) {
+ if (c == cmd)
+ return (CMD_CMPLT);
+ }
+
+ /* Check if cmd was really scheduled by the HBA */
+ if (pvscsi_lookup_ctx(pvs, cmd) == NULL)
+ return (CMD_CMPLT);
+
+ /* Abort cmd in the HBA */
+ bzero(&acmd, sizeof (acmd));
+ acmd.target = cmd->cmd_target;
+ acmd.context = pvscsi_map_ctx(pvs, cmd->ctx);
+ pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_ABORT_CMD, &acmd, sizeof (acmd));
+
+ /* Check if cmd was completed by the HBA before it could be aborted */
+ if ((done = pvscsi_process_comp_ring(pvs)) != NULL) {
+ done->tail_cmd->next_cmd = *pending;
+ *pending = done;
+ for (c = done; c != NULL; c = c->next_cmd) {
+ if (c == cmd)
+ return (CMD_CMPLT);
+ }
+ }
+
+ /* Release I/O ctx */
+ mutex_enter(&pvs->mutex);
+ if (cmd->ctx != NULL)
+ pvscsi_release_ctx(cmd);
+ /* Remove cmd from the queue */
+ pvscsi_remove_from_queue(cmd);
+ mutex_exit(&pvs->mutex);
+
+ /* Insert cmd at the beginning of the list */
+ cmd->next_cmd = *pending;
+ *pending = cmd;
+
+ dev_err(pvs->dip, CE_WARN, "!command %p aborted", (void *)cmd);
+
+ return (CMD_ABORTED);
+}
+
+static void
+pvscsi_map_buffers(pvscsi_cmd_t *cmd, struct PVSCSIRingReqDesc *rdesc)
+{
+ int i;
+
+ ASSERT(cmd->ctx);
+ ASSERT(cmd->cmd_dmaccount > 0 && cmd->cmd_dmaccount <=
+ PVSCSI_MAX_SG_SIZE);
+
+ rdesc->dataLen = cmd->cmd_dma_count;
+ rdesc->dataAddr = 0;
+
+ if (cmd->cmd_dma_count == 0)
+ return;
+
+ if (cmd->cmd_dmaccount > 1) {
+ struct PVSCSISGElement *sgl = CMD_CTX_SGLIST_VA(cmd->ctx);
+
+ for (i = 0; i < cmd->cmd_dmaccount; i++) {
+ sgl[i].addr = cmd->cached_cookies[i].dmac_laddress;
+ sgl[i].length = cmd->cached_cookies[i].dmac_size;
+ sgl[i].flags = 0;
+ }
+ rdesc->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
+ rdesc->dataAddr = (uint64_t)CMD_CTX_SGLIST_PA(cmd->ctx);
+ } else {
+ rdesc->dataAddr = cmd->cached_cookies[0].dmac_laddress;
+ }
+}
+
+static void
+pvscsi_comp_cmd(pvscsi_cmd_t *cmd, uint8_t status)
+{
+ struct scsi_pkt *pkt = CMD2PKT(cmd);
+
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD |
+ STATE_GOT_STATUS);
+ if ((cmd->flags & PVSCSI_FLAG_DMA_VALID) != 0)
+ pkt->pkt_state |= STATE_XFERRED_DATA;
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_resid = 0;
+ *(pkt->pkt_scbp) = status;
+}
+
+static void
+pvscsi_set_status(pvscsi_cmd_t *cmd)
+{
+ pvscsi_softc_t *pvs = cmd->cmd_pvs;
+ struct scsi_pkt *pkt = CMD2PKT(cmd);
+ uchar_t scsi_status = cmd->cmp_stat.scsi_status;
+ uint32_t host_status = cmd->cmp_stat.host_status;
+
+ if (scsi_status != STATUS_GOOD &&
+ (host_status == BTSTAT_SUCCESS ||
+ (host_status == BTSTAT_LINKED_COMMAND_COMPLETED) ||
+ (host_status == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG))) {
+ if (scsi_status == STATUS_CHECK) {
+ struct scsi_arq_status *astat = (void*)(pkt->pkt_scbp);
+ uint8_t *sensedata;
+ int arq_size;
+
+ *pkt->pkt_scbp = scsi_status;
+ pkt->pkt_state |= STATE_ARQ_DONE;
+
+ if ((cmd->flags & PVSCSI_FLAG_XARQ) != 0) {
+ arq_size = (cmd->cmd_rqslen >=
+ SENSE_BUFFER_SIZE) ? SENSE_BUFFER_SIZE :
+ cmd->cmd_rqslen;
+
+ astat->sts_rqpkt_resid = SENSE_BUFFER_SIZE -
+ arq_size;
+ sensedata = (uint8_t *)&astat->sts_sensedata;
+ bcopy(cmd->arqbuf->b_un.b_addr, sensedata,
+ arq_size);
+
+ pkt->pkt_state |= STATE_XARQ_DONE;
+ } else {
+ astat->sts_rqpkt_resid = 0;
+ }
+
+ astat->sts_rqpkt_statistics = 0;
+ astat->sts_rqpkt_reason = CMD_CMPLT;
+ (*(uint8_t *)&astat->sts_rqpkt_status) = STATUS_GOOD;
+ astat->sts_rqpkt_state = STATE_GOT_BUS |
+ STATE_GOT_TARGET | STATE_SENT_CMD |
+ STATE_XFERRED_DATA | STATE_GOT_STATUS;
+ }
+ pvscsi_comp_cmd(cmd, scsi_status);
+
+ return;
+ }
+
+ switch (host_status) {
+ case BTSTAT_SUCCESS:
+ case BTSTAT_LINKED_COMMAND_COMPLETED:
+ case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
+ pvscsi_comp_cmd(cmd, STATUS_GOOD);
+ break;
+ case BTSTAT_DATARUN:
+ pkt->pkt_reason = CMD_DATA_OVR;
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS |
+ STATE_XFERRED_DATA);
+ pkt->pkt_resid = 0;
+ break;
+ case BTSTAT_DATA_UNDERRUN:
+ pkt->pkt_reason = pkt->pkt_state |= (STATE_GOT_BUS |
+ STATE_GOT_TARGET | STATE_SENT_CMD | STATE_GOT_STATUS);
+ pkt->pkt_resid = cmd->dma_count - cmd->cmp_stat.data_len;
+ if (pkt->pkt_resid != cmd->dma_count)
+ pkt->pkt_state |= STATE_XFERRED_DATA;
+ break;
+ case BTSTAT_SELTIMEO:
+ pkt->pkt_reason = CMD_DEV_GONE;
+ pkt->pkt_state |= STATE_GOT_BUS;
+ break;
+ case BTSTAT_TAGREJECT:
+ pkt->pkt_reason = CMD_TAG_REJECT;
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS);
+ break;
+ case BTSTAT_BADMSG:
+ pkt->pkt_reason = CMD_BADMSG;
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS);
+ break;
+ case BTSTAT_SENTRST:
+ case BTSTAT_RECVRST:
+ case BTSTAT_BUSRESET:
+ pkt->pkt_reason = CMD_RESET;
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS);
+ break;
+ case BTSTAT_ABORTQUEUE:
+ pkt->pkt_reason = CMD_ABORTED;
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS);
+ break;
+ case BTSTAT_HAHARDWARE:
+ case BTSTAT_INVPHASE:
+ case BTSTAT_HATIMEOUT:
+ case BTSTAT_NORESPONSE:
+ case BTSTAT_DISCONNECT:
+ case BTSTAT_HASOFTWARE:
+ case BTSTAT_BUSFREE:
+ case BTSTAT_SENSFAILED:
+ pkt->pkt_reason = CMD_TRAN_ERR;
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS);
+ break;
+ default:
+ dev_err(pvs->dip, CE_WARN,
+ "!unknown host status code: %d", host_status);
+ pkt->pkt_reason = CMD_TRAN_ERR;
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
+ STATE_SENT_CMD | STATE_GOT_STATUS);
+ break;
+ }
+}
+
+static void
+pvscsi_complete_chained(void *arg)
+{
+ pvscsi_cmd_t *cmd = (pvscsi_cmd_t *)arg;
+ pvscsi_cmd_t *c;
+ struct scsi_pkt *pkt;
+
+ while (cmd != NULL) {
+ pvscsi_softc_t *pvs = cmd->cmd_pvs;
+
+ c = cmd->next_cmd;
+ cmd->next_cmd = NULL;
+
+ pkt = CMD2PKT(cmd);
+ if (pkt == NULL)
+ return;
+
+ if ((cmd->flags & PVSCSI_FLAG_IO_IOPB) != 0 &&
+ (cmd->flags & PVSCSI_FLAG_IO_READ) != 0) {
+ (void) ddi_dma_sync(cmd->cmd_dmahdl, 0, 0,
+ DDI_DMA_SYNC_FORCPU);
+ }
+
+ mutex_enter(&pvs->mutex);
+ /* Release I/O ctx */
+ if (cmd->ctx != NULL)
+ pvscsi_release_ctx(cmd);
+ /* Remove command from queue */
+ pvscsi_remove_from_queue(cmd);
+ mutex_exit(&pvs->mutex);
+
+ if ((cmd->flags & PVSCSI_FLAG_HW_STATUS) != 0) {
+ pvscsi_set_status(cmd);
+ } else {
+ ASSERT((cmd->flags & PVSCSI_FLAGS_NON_HW_COMPLETION) !=
+ 0);
+
+ if ((cmd->flags & PVSCSI_FLAG_TIMED_OUT) != 0) {
+ cmd->pkt->pkt_reason = CMD_TIMEOUT;
+ cmd->pkt->pkt_statistics |=
+ (STAT_TIMEOUT | STAT_ABORTED);
+ } else if ((cmd->flags & PVSCSI_FLAG_ABORTED) != 0) {
+ cmd->pkt->pkt_reason = CMD_ABORTED;
+ cmd->pkt->pkt_statistics |=
+ (STAT_TIMEOUT | STAT_ABORTED);
+ } else if ((cmd->flags & PVSCSI_FLAGS_RESET) != 0) {
+ cmd->pkt->pkt_reason = CMD_RESET;
+ if ((cmd->flags & PVSCSI_FLAG_RESET_BUS) != 0) {
+ cmd->pkt->pkt_statistics |=
+ STAT_BUS_RESET;
+ } else {
+ cmd->pkt->pkt_statistics |=
+ STAT_DEV_RESET;
+ }
+ }
+ }
+
+ cmd->flags |= PVSCSI_FLAG_DONE;
+ cmd->flags &= ~PVSCSI_FLAG_TRANSPORT;
+
+ if ((pkt->pkt_flags & FLAG_NOINTR) == 0 &&
+ pkt->pkt_comp != NULL)
+ (*pkt->pkt_comp)(pkt);
+
+ cmd = c;
+ }
+}
+
+static void
+pvscsi_dev_reset(pvscsi_softc_t *pvs, int target)
+{
+ struct PVSCSICmdDescResetDevice cmd = { 0 };
+
+ cmd.target = target;
+ pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof (cmd));
+}
+
+static int
+pvscsi_poll_cmd(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
+{
+ boolean_t seen_intr;
+ int cycles = (cmd->pkt->pkt_time * 1000000) / USECS_TO_WAIT;
+ int i;
+ pvscsi_cmd_t *dcmd;
+ struct scsi_pkt *pkt = CMD2PKT(cmd);
+
+ /*
+ * Make sure we're not missing any commands completed
+ * concurrently before we have actually disabled interrupts.
+ */
+ mutex_enter(&pvs->rx_mutex);
+ dcmd = pvscsi_process_comp_ring(pvs);
+ mutex_exit(&pvs->rx_mutex);
+
+ pvscsi_complete_chained(dcmd);
+
+ while ((cmd->flags & PVSCSI_FLAG_DONE) == 0) {
+ seen_intr = B_FALSE;
+
+ /* Disable interrupts from H/W */
+ pvscsi_mask_intr(pvs);
+
+ /* Wait for interrupt to arrive */
+ for (i = 0; i < cycles; i++) {
+ uint32_t status;
+
+ mutex_enter(&pvs->rx_mutex);
+ mutex_enter(&pvs->intr_mutex);
+ status = pvscsi_read_intr_status(pvs);
+ if ((status & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
+ /* Check completion ring */
+ mutex_exit(&pvs->intr_mutex);
+ dcmd = pvscsi_process_comp_ring(pvs);
+ mutex_exit(&pvs->rx_mutex);
+ seen_intr = B_TRUE;
+ break;
+ } else {
+ mutex_exit(&pvs->intr_mutex);
+ mutex_exit(&pvs->rx_mutex);
+ drv_usecwait(USECS_TO_WAIT);
+ }
+ }
+
+ /* Enable interrupts from H/W */
+ pvscsi_unmask_intr(pvs);
+
+ if (!seen_intr) {
+ /* No interrupts seen from device during the timeout */
+ mutex_enter(&pvs->tx_mutex);
+ mutex_enter(&pvs->rx_mutex);
+ if ((cmd->flags & PVSCSI_FLAGS_COMPLETION) != 0) {
+ /* Command was cancelled asynchronously */
+ dcmd = NULL;
+ } else if ((pvscsi_abort_cmd(cmd,
+ &dcmd)) == CMD_ABORTED) {
+ /* Command was cancelled in hardware */
+ pkt->pkt_state |= (STAT_TIMEOUT | STAT_ABORTED);
+ pkt->pkt_statistics |= (STAT_TIMEOUT |
+ STAT_ABORTED);
+ pkt->pkt_reason = CMD_TIMEOUT;
+ }
+ mutex_exit(&pvs->rx_mutex);
+ mutex_exit(&pvs->tx_mutex);
+
+ /*
+ * Complete commands that might be on completion list.
+ * Target command can also be on the list in case it was
+ * completed before it could be actually cancelled.
+ */
+ break;
+ }
+
+ pvscsi_complete_chained(dcmd);
+
+ if (!seen_intr)
+ break;
+ }
+
+ return (TRAN_ACCEPT);
+}
+
+static void
+pvscsi_abort_all(struct scsi_address *ap, pvscsi_softc_t *pvs,
+ pvscsi_cmd_t **pending, int marker_flag)
+{
+ int qlen = pvs->cmd_queue_len;
+ pvscsi_cmd_t *cmd, *pcmd, *phead = NULL;
+
+ ASSERT(mutex_owned(&pvs->rx_mutex));
+ ASSERT(mutex_owned(&pvs->tx_mutex));
+
+ /*
+ * Try to abort all queued commands, merging commands waiting
+ * for completion into a single list to complete them at one
+ * time when mutex is released.
+ */
+ while (qlen > 0) {
+ mutex_enter(&pvs->mutex);
+ cmd = list_remove_head(&pvs->cmd_queue);
+ ASSERT(cmd != NULL);
+
+ qlen--;
+
+ if (ap == NULL || ap->a_target == cmd->cmd_target) {
+ int c = --pvs->cmd_queue_len;
+
+ mutex_exit(&pvs->mutex);
+
+ if (pvscsi_abort_cmd(cmd, &pcmd) == CMD_ABORTED) {
+ /*
+ * Assume command is completely cancelled now,
+ * so mark it as requested.
+ */
+ cmd->flags |= marker_flag;
+ }
+
+ qlen -= (c - pvs->cmd_queue_len);
+
+ /*
+ * Now merge current pending commands with
+ * previous ones.
+ */
+ if (phead == NULL) {
+ phead = pcmd;
+ } else if (pcmd != NULL) {
+ phead->tail_cmd->next_cmd = pcmd;
+ phead->tail_cmd = pcmd->tail_cmd;
+ }
+ } else {
+ list_insert_tail(&pvs->cmd_queue, cmd);
+ mutex_exit(&pvs->mutex);
+ }
+ }
+
+ *pending = phead;
+}
+
+static void
+pvscsi_quiesce_notify(pvscsi_softc_t *pvs)
+{
+ mutex_enter(&pvs->mutex);
+ if (pvs->cmd_queue_len == 0 &&
+ (pvs->flags & PVSCSI_HBA_QUIESCE_PENDING) != 0) {
+ pvs->flags &= ~PVSCSI_HBA_QUIESCE_PENDING;
+ cv_broadcast(&pvs->quiescevar);
+ }
+ mutex_exit(&pvs->mutex);
+}
+
+static int
+pvscsi_transport_command(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
+{
+ struct PVSCSIRingReqDesc *rdesc;
+ struct PVSCSIRingsState *sdesc = RINGS_STATE(pvs);
+ struct scsi_pkt *pkt = CMD2PKT(cmd);
+ uint32_t req_ne = sdesc->reqNumEntriesLog2;
+
+ mutex_enter(&pvs->tx_mutex);
+ mutex_enter(&pvs->mutex);
+ if (!pvscsi_acquire_ctx(pvs, cmd)) {
+ mutex_exit(&pvs->mutex);
+ mutex_exit(&pvs->tx_mutex);
+ dev_err(pvs->dip, CE_WARN, "!no free ctx available");
+ return (TRAN_BUSY);
+ }
+
+ if ((sdesc->reqProdIdx - sdesc->cmpConsIdx) >= (1 << req_ne)) {
+ pvscsi_release_ctx(cmd);
+ mutex_exit(&pvs->mutex);
+ mutex_exit(&pvs->tx_mutex);
+ dev_err(pvs->dip, CE_WARN, "!no free I/O slots available");
+ return (TRAN_BUSY);
+ }
+ mutex_exit(&pvs->mutex);
+
+ cmd->flags |= PVSCSI_FLAG_TRANSPORT;
+
+ rdesc = REQ_RING(pvs) + (sdesc->reqProdIdx & MASK(req_ne));
+
+ bzero(&rdesc->lun, sizeof (rdesc->lun));
+
+ rdesc->bus = 0;
+ rdesc->target = cmd->cmd_target;
+
+ if ((cmd->flags & PVSCSI_FLAG_XARQ) != 0) {
+ bzero((void*)cmd->arqbuf->b_un.b_addr, SENSE_BUFFER_SIZE);
+ rdesc->senseLen = SENSE_BUFFER_SIZE;
+ rdesc->senseAddr = cmd->arqc.dmac_laddress;
+ } else {
+ rdesc->senseLen = 0;
+ rdesc->senseAddr = 0;
+ }
+
+ rdesc->vcpuHint = CPU->cpu_id;
+ rdesc->cdbLen = cmd->cmdlen;
+ bcopy(cmd->cmd_cdb, rdesc->cdb, cmd->cmdlen);
+
+ /* Setup tag info */
+ if ((cmd->flags & PVSCSI_FLAG_TAG) != 0)
+ rdesc->tag = cmd->tag;
+ else
+ rdesc->tag = MSG_SIMPLE_QTAG;
+
+ /* Setup I/O direction and map data buffers */
+ if ((cmd->flags & PVSCSI_FLAG_DMA_VALID) != 0) {
+ if ((cmd->flags & PVSCSI_FLAG_IO_READ) != 0)
+ rdesc->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
+ else
+ rdesc->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
+ pvscsi_map_buffers(cmd, rdesc);
+ } else {
+ rdesc->flags = 0;
+ }
+
+ rdesc->context = pvscsi_map_ctx(pvs, cmd->ctx);
+ membar_producer();
+
+ sdesc->reqProdIdx++;
+ membar_producer();
+
+ mutex_enter(&pvs->mutex);
+ cmd->timeout_lbolt = ddi_get_lbolt() + SEC_TO_TICK(pkt->pkt_time);
+ pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD);
+ pvscsi_add_to_queue(cmd);
+
+ switch (cmd->pkt->pkt_cdbp[0]) {
+ case SCMD_READ:
+ case SCMD_WRITE:
+ case SCMD_READ_G1:
+ case SCMD_WRITE_G1:
+ case SCMD_READ_G4:
+ case SCMD_WRITE_G4:
+ case SCMD_READ_G5:
+ case SCMD_WRITE_G5:
+ ASSERT((cmd->flags & PVSCSI_FLAG_DMA_VALID) != 0);
+ pvscsi_submit_rw_io(pvs);
+ break;
+ default:
+ pvscsi_submit_nonrw_io(pvs);
+ break;
+ }
+ mutex_exit(&pvs->mutex);
+ mutex_exit(&pvs->tx_mutex);
+
+ return (TRAN_ACCEPT);
+}
+
+static int
+pvscsi_reset_generic(pvscsi_softc_t *pvs, struct scsi_address *ap)
+{
+ boolean_t bus_reset = (ap == NULL);
+ int flags;
+ pvscsi_cmd_t *done, *aborted;
+
+ flags = bus_reset ? PVSCSI_FLAG_RESET_BUS : PVSCSI_FLAG_RESET_DEV;
+
+ mutex_enter(&pvs->tx_mutex);
+ mutex_enter(&pvs->rx_mutex);
+ /* Try to process pending requests */
+ done = pvscsi_process_comp_ring(pvs);
+
+ /* Abort all pending requests */
+ pvscsi_abort_all(ap, pvs, &aborted, flags);
+
+ /* Reset at hardware level */
+ if (bus_reset) {
+ pvscsi_reset_bus(pvs);
+ /* Should never happen after bus reset */
+ ASSERT(pvscsi_process_comp_ring(pvs) == NULL);
+ } else {
+ pvscsi_dev_reset(pvs, ap->a_target);
+ }
+ mutex_exit(&pvs->rx_mutex);
+ mutex_exit(&pvs->tx_mutex);
+
+ pvscsi_complete_chained(done);
+ pvscsi_complete_chained(aborted);
+
+ return (1);
+}
+
+static void
+pvscsi_cmd_ext_free(pvscsi_cmd_t *cmd)
+{
+ struct scsi_pkt *pkt = CMD2PKT(cmd);
+
+ if ((cmd->flags & PVSCSI_FLAG_CDB_EXT) != 0) {
+ kmem_free(pkt->pkt_cdbp, cmd->cmdlen);
+ cmd->flags &= ~PVSCSI_FLAG_CDB_EXT;
+ }
+ if ((cmd->flags & PVSCSI_FLAG_SCB_EXT) != 0) {
+ kmem_free(pkt->pkt_scbp, cmd->statuslen);
+ cmd->flags &= ~PVSCSI_FLAG_SCB_EXT;
+ }
+ if ((cmd->flags & PVSCSI_FLAG_PRIV_EXT) != 0) {
+ kmem_free(pkt->pkt_private, cmd->tgtlen);
+ cmd->flags &= ~PVSCSI_FLAG_PRIV_EXT;
+ }
+}
+
+/* ARGSUSED pvs */
+static int
+pvscsi_cmd_ext_alloc(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd, int kf)
+{
+ struct scsi_pkt *pkt = CMD2PKT(cmd);
+ void *buf;
+
+ if (cmd->cmdlen > sizeof (cmd->cmd_cdb)) {
+ if ((buf = kmem_zalloc(cmd->cmdlen, kf)) == NULL)
+ return (NULL);
+ pkt->pkt_cdbp = buf;
+ cmd->flags |= PVSCSI_FLAG_CDB_EXT;
+ }
+
+ if (cmd->statuslen > sizeof (cmd->cmd_scb)) {
+ if ((buf = kmem_zalloc(cmd->statuslen, kf)) == NULL)
+ goto out;
+ pkt->pkt_scbp = buf;
+ cmd->flags |= PVSCSI_FLAG_SCB_EXT;
+ cmd->cmd_rqslen = (cmd->statuslen - sizeof (cmd->cmd_scb));
+ }
+
+ if (cmd->tgtlen > sizeof (cmd->tgt_priv)) {
+ if ((buf = kmem_zalloc(cmd->tgtlen, kf)) == NULL)
+ goto out;
+ pkt->pkt_private = buf;
+ cmd->flags |= PVSCSI_FLAG_PRIV_EXT;
+ }
+
+ return (DDI_SUCCESS);
+
+out:
+ pvscsi_cmd_ext_free(cmd);
+
+ return (NULL);
+}
+
+static int
+pvscsi_setup_dma_buffer(pvscsi_softc_t *pvs, size_t length,
+ pvscsi_dma_buf_t *buf)
+{
+ ddi_dma_cookie_t cookie;
+ uint_t ccount;
+
+ if ((ddi_dma_alloc_handle(pvs->dip, &pvscsi_ring_dma_attr,
+ DDI_DMA_SLEEP, NULL, &buf->dma_handle)) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to allocate DMA handle");
+ return (DDI_FAILURE);
+ }
+
+ if ((ddi_dma_mem_alloc(buf->dma_handle, length, &pvscsi_dma_attrs,
+ DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &buf->addr,
+ &buf->real_length, &buf->acc_handle)) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to allocate %ld bytes for DMA buffer", length);
+ ddi_dma_free_handle(&buf->dma_handle);
+ return (DDI_FAILURE);
+ }
+
+ if ((ddi_dma_addr_bind_handle(buf->dma_handle, NULL, buf->addr,
+ buf->real_length, DDI_DMA_CONSISTENT | DDI_DMA_RDWR, DDI_DMA_SLEEP,
+ NULL, &cookie, &ccount)) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to bind DMA buffer");
+ ddi_dma_free_handle(&buf->dma_handle);
+ ddi_dma_mem_free(&buf->acc_handle);
+ return (DDI_FAILURE);
+ }
+
+ /* TODO Support multipart SG regions */
+ ASSERT(ccount == 1);
+
+ buf->pa = cookie.dmac_laddress;
+
+ return (DDI_SUCCESS);
+}
+
+static void
+pvscsi_free_dma_buffer(pvscsi_dma_buf_t *buf)
+{
+ ddi_dma_free_handle(&buf->dma_handle);
+ ddi_dma_mem_free(&buf->acc_handle);
+}
+
+static int
+pvscsi_setup_sg(pvscsi_softc_t *pvs)
+{
+ int i;
+ pvscsi_cmd_ctx_t *ctx;
+ size_t size = pvs->req_depth * sizeof (pvscsi_cmd_ctx_t);
+
+ ctx = pvs->cmd_ctx = kmem_zalloc(size, KM_SLEEP);
+
+ for (i = 0; i < pvs->req_depth; ++i, ++ctx) {
+ list_insert_tail(&pvs->cmd_ctx_pool, ctx);
+ if (pvscsi_setup_dma_buffer(pvs, PAGE_SIZE,
+ &ctx->dma_buf) != DDI_SUCCESS)
+ goto cleanup;
+ }
+
+ return (DDI_SUCCESS);
+
+cleanup:
+ for (; i >= 0; --i, --ctx) {
+ list_remove(&pvs->cmd_ctx_pool, ctx);
+ pvscsi_free_dma_buffer(&ctx->dma_buf);
+ }
+ kmem_free(pvs->cmd_ctx, size);
+
+ return (DDI_FAILURE);
+}
+
+static void
+pvscsi_free_sg(pvscsi_softc_t *pvs)
+{
+ int i;
+ pvscsi_cmd_ctx_t *ctx = pvs->cmd_ctx;
+
+ for (i = 0; i < pvs->req_depth; ++i, ++ctx) {
+ list_remove(&pvs->cmd_ctx_pool, ctx);
+ pvscsi_free_dma_buffer(&ctx->dma_buf);
+ }
+
+ kmem_free(pvs->cmd_ctx, pvs->req_pages << PAGE_SHIFT);
+}
+
+static int
+pvscsi_allocate_rings(pvscsi_softc_t *pvs)
+{
+ /* Allocate DMA buffer for rings state */
+ if (pvscsi_setup_dma_buffer(pvs, PAGE_SIZE,
+ &pvs->rings_state_buf) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ /* Allocate DMA buffer for request ring */
+ pvs->req_pages = MIN(pvscsi_ring_pages, PVSCSI_MAX_NUM_PAGES_REQ_RING);
+ pvs->req_depth = pvs->req_pages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
+ if (pvscsi_setup_dma_buffer(pvs, pvs->req_pages * PAGE_SIZE,
+ &pvs->req_ring_buf) != DDI_SUCCESS)
+ goto free_rings_state;
+
+ /* Allocate completion ring */
+ pvs->cmp_pages = MIN(pvscsi_ring_pages, PVSCSI_MAX_NUM_PAGES_CMP_RING);
+ if (pvscsi_setup_dma_buffer(pvs, pvs->cmp_pages * PAGE_SIZE,
+ &pvs->cmp_ring_buf) != DDI_SUCCESS)
+ goto free_req_buf;
+
+ /* Allocate message ring */
+ pvs->msg_pages = MIN(pvscsi_msg_ring_pages,
+ PVSCSI_MAX_NUM_PAGES_MSG_RING);
+ if (pvscsi_setup_dma_buffer(pvs, pvs->msg_pages * PAGE_SIZE,
+ &pvs->msg_ring_buf) != DDI_SUCCESS)
+ goto free_cmp_buf;
+
+ return (DDI_SUCCESS);
+
+free_cmp_buf:
+ pvscsi_free_dma_buffer(&pvs->cmp_ring_buf);
+free_req_buf:
+ pvscsi_free_dma_buffer(&pvs->req_ring_buf);
+free_rings_state:
+ pvscsi_free_dma_buffer(&pvs->rings_state_buf);
+
+ return (DDI_FAILURE);
+}
+
+static void
+pvscsi_free_rings(pvscsi_softc_t *pvs)
+{
+ pvscsi_free_dma_buffer(&pvs->msg_ring_buf);
+ pvscsi_free_dma_buffer(&pvs->cmp_ring_buf);
+ pvscsi_free_dma_buffer(&pvs->req_ring_buf);
+ pvscsi_free_dma_buffer(&pvs->rings_state_buf);
+}
+
+static void
+pvscsi_setup_rings(pvscsi_softc_t *pvs)
+{
+ int i;
+ struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
+ struct PVSCSICmdDescSetupRings cmd = { 0 };
+ uint64_t base;
+
+ cmd.ringsStatePPN = pvs->rings_state_buf.pa >> PAGE_SHIFT;
+ cmd.reqRingNumPages = pvs->req_pages;
+ cmd.cmpRingNumPages = pvs->cmp_pages;
+
+ /* Setup request ring */
+ base = pvs->req_ring_buf.pa;
+ for (i = 0; i < pvs->req_pages; i++) {
+ cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
+ base += PAGE_SIZE;
+ }
+
+ /* Setup completion ring */
+ base = pvs->cmp_ring_buf.pa;
+ for (i = 0; i < pvs->cmp_pages; i++) {
+ cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
+ base += PAGE_SIZE;
+ }
+
+ bzero(RINGS_STATE(pvs), PAGE_SIZE);
+ bzero(REQ_RING(pvs), pvs->req_pages * PAGE_SIZE);
+ bzero(CMP_RING(pvs), pvs->cmp_pages * PAGE_SIZE);
+
+ /* Issue SETUP command */
+ pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof (cmd));
+
+ /* Setup message ring */
+ cmd_msg.numPages = pvs->msg_pages;
+ base = pvs->msg_ring_buf.pa;
+
+ for (i = 0; i < pvs->msg_pages; i++) {
+ cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
+ base += PAGE_SIZE;
+ }
+ bzero(MSG_RING(pvs), pvs->msg_pages * PAGE_SIZE);
+
+ pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_SETUP_MSG_RING, &cmd_msg,
+ sizeof (cmd_msg));
+}
+
+static int
+pvscsi_setup_io(pvscsi_softc_t *pvs)
+{
+ int offset, rcount, rn, type;
+ int ret = DDI_FAILURE;
+ off_t regsize;
+ pci_regspec_t *regs;
+ uint_t regs_length;
+
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, pvs->dip,
+ DDI_PROP_DONTPASS, "reg", (int **)&regs,
+ &regs_length) != DDI_PROP_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to lookup 'reg' property");
+ return (DDI_FAILURE);
+ }
+
+ rcount = regs_length * sizeof (int) / sizeof (pci_regspec_t);
+
+ for (offset = PCI_CONF_BASE0; offset <= PCI_CONF_BASE5; offset += 4) {
+ for (rn = 0; rn < rcount; ++rn) {
+ if (PCI_REG_REG_G(regs[rn].pci_phys_hi) == offset) {
+ type = regs[rn].pci_phys_hi & PCI_ADDR_MASK;
+ break;
+ }
+ }
+
+ if (rn >= rcount)
+ continue;
+
+ if (type != PCI_ADDR_IO) {
+ if (ddi_dev_regsize(pvs->dip, rn,
+ &regsize) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to get size of reg %d", rn);
+ goto out;
+ }
+ if (regsize == PVSCSI_MEM_SPACE_SIZE) {
+ if (ddi_regs_map_setup(pvs->dip, rn,
+ &pvs->mmio_base, 0, 0,
+ &pvscsi_mmio_attr,
+ &pvs->mmio_handle) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to map MMIO BAR");
+ goto out;
+ }
+ ret = DDI_SUCCESS;
+ break;
+ }
+ }
+ }
+
+out:
+ ddi_prop_free(regs);
+
+ return (ret);
+}
+
+static void
+pvscsi_free_io(pvscsi_softc_t *pvs)
+{
+ ddi_regs_map_free(&pvs->mmio_handle);
+}
+
+static int
+pvscsi_enable_intrs(pvscsi_softc_t *pvs)
+{
+ int i, rc, intr_caps;
+
+ if ((rc = ddi_intr_get_cap(pvs->intr_htable[0], &intr_caps)) !=
+ DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to get interrupt caps");
+ return (DDI_FAILURE);
+ }
+
+ if ((intr_caps & DDI_INTR_FLAG_BLOCK) != 0) {
+ if ((rc = ddi_intr_block_enable(pvs->intr_htable,
+ pvs->intr_cnt)) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to enable interrupt block");
+ }
+ } else {
+ for (i = 0; i < pvs->intr_cnt; i++) {
+ if ((rc = ddi_intr_enable(pvs->intr_htable[i])) ==
+ DDI_SUCCESS)
+ continue;
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to enable interrupt");
+ while (--i >= 0)
+ (void) ddi_intr_disable(pvs->intr_htable[i]);
+ break;
+ }
+ }
+
+ /* Unmask interrupts */
+ if (rc == DDI_SUCCESS) {
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_MASK,
+ PVSCSI_INTR_CMPL_MASK | PVSCSI_INTR_MSG_MASK);
+ }
+
+ return (rc);
+}
+
+/* ARGSUSED arg2 */
+static uint32_t
+pvscsi_intr_handler(caddr_t arg1, caddr_t arg2)
+{
+ boolean_t handled;
+ pvscsi_softc_t *pvs = (pvscsi_softc_t *)arg1;
+ uint32_t status;
+
+ mutex_enter(&pvs->intr_mutex);
+ if (pvs->num_pollers > 0) {
+ mutex_exit(&pvs->intr_mutex);
+ return (DDI_INTR_CLAIMED);
+ }
+
+ if (pvscsi_enable_msi) {
+ handled = B_TRUE;
+ } else {
+ status = pvscsi_read_intr_status(pvs);
+ handled = (status & PVSCSI_INTR_ALL_SUPPORTED) != 0;
+ if (handled)
+ pvscsi_write_intr_status(pvs, status);
+ }
+ mutex_exit(&pvs->intr_mutex);
+
+ if (handled) {
+ boolean_t qnotify;
+ pvscsi_cmd_t *pending;
+ pvscsi_msg_t *msg;
+
+ mutex_enter(&pvs->rx_mutex);
+ pending = pvscsi_process_comp_ring(pvs);
+ msg = pvscsi_process_msg_ring(pvs);
+ mutex_exit(&pvs->rx_mutex);
+
+ mutex_enter(&pvs->mutex);
+ qnotify = HBA_QUIESCE_PENDING(pvs);
+ mutex_exit(&pvs->mutex);
+
+ if (pending != NULL && ddi_taskq_dispatch(pvs->comp_tq,
+ pvscsi_complete_chained, pending,
+ DDI_NOSLEEP) == DDI_FAILURE)
+ pvscsi_complete_chained(pending);
+
+ if (msg != NULL && ddi_taskq_dispatch(pvs->msg_tq,
+ pvscsi_handle_msg, msg, DDI_NOSLEEP) == DDI_FAILURE) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to process msg type %d for target %d",
+ msg->type, msg->target);
+ kmem_free(msg, sizeof (pvscsi_msg_t));
+ }
+
+ if (qnotify)
+ pvscsi_quiesce_notify(pvs);
+ }
+
+ return (handled ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
+}
+
+static int
+pvscsi_register_isr(pvscsi_softc_t *pvs, int type)
+{
+ int navail, nactual;
+ int i;
+
+ if (ddi_intr_get_navail(pvs->dip, type, &navail) != DDI_SUCCESS ||
+ navail == 0) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to get number of available interrupts of type %d",
+ type);
+ return (DDI_FAILURE);
+ }
+ navail = MIN(navail, PVSCSI_MAX_INTRS);
+
+ pvs->intr_size = navail * sizeof (ddi_intr_handle_t);
+ if ((pvs->intr_htable = kmem_alloc(pvs->intr_size, KM_SLEEP)) == NULL) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to allocate %d bytes for interrupt hashtable",
+ pvs->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_intr_alloc(pvs->dip, pvs->intr_htable, type, 0, navail,
+ &nactual, DDI_INTR_ALLOC_NORMAL) != DDI_SUCCESS || nactual == 0) {
+ dev_err(pvs->dip, CE_WARN, "!failed to allocate %d interrupts",
+ navail);
+ goto free_htable;
+ }
+
+ pvs->intr_cnt = nactual;
+
+ if (ddi_intr_get_pri(pvs->intr_htable[0],
+ (uint_t *)&pvs->intr_pri) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to get interrupt priority");
+ goto free_intrs;
+ }
+
+ for (i = 0; i < nactual; i++) {
+ if (ddi_intr_add_handler(pvs->intr_htable[i],
+ pvscsi_intr_handler, (caddr_t)pvs, NULL) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to add interrupt handler");
+ goto free_intrs;
+ }
+ }
+
+ return (DDI_SUCCESS);
+
+free_intrs:
+ for (i = 0; i < nactual; i++)
+ (void) ddi_intr_free(pvs->intr_htable[i]);
+free_htable:
+ kmem_free(pvs->intr_htable, pvs->intr_size);
+
+ return (DDI_FAILURE);
+}
+
+static void
+pvscsi_free_intr_resources(pvscsi_softc_t *pvs)
+{
+ int i;
+
+ for (i = 0; i < pvs->intr_cnt; i++) {
+ (void) ddi_intr_disable(pvs->intr_htable[i]);
+ (void) ddi_intr_remove_handler(pvs->intr_htable[i]);
+ (void) ddi_intr_free(pvs->intr_htable[i]);
+ }
+ kmem_free(pvs->intr_htable, pvs->intr_size);
+}
+
+static int
+pvscsi_setup_isr(pvscsi_softc_t *pvs)
+{
+ int intr_types;
+
+ if (ddi_intr_get_supported_types(pvs->dip,
+ &intr_types) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to get supported interrupt types");
+ return (DDI_FAILURE);
+ }
+
+ if ((intr_types & DDI_INTR_TYPE_MSIX) != 0 && pvscsi_enable_msi) {
+ if (pvscsi_register_isr(pvs,
+ DDI_INTR_TYPE_MSIX) == DDI_SUCCESS) {
+ pvs->intr_type = DDI_INTR_TYPE_MSIX;
+ } else {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to install MSI-X interrupt handler");
+ }
+ } else if ((intr_types & DDI_INTR_TYPE_MSI) != 0 && pvscsi_enable_msi) {
+ if (pvscsi_register_isr(pvs,
+ DDI_INTR_TYPE_MSI) == DDI_SUCCESS) {
+ pvs->intr_type = DDI_INTR_TYPE_MSI;
+ } else {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to install MSI interrupt handler");
+ }
+ } else if ((intr_types & DDI_INTR_TYPE_FIXED) != 0) {
+ if (pvscsi_register_isr(pvs,
+ DDI_INTR_TYPE_FIXED) == DDI_SUCCESS) {
+ pvs->intr_type = DDI_INTR_TYPE_FIXED;
+ } else {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to install FIXED interrupt handler");
+ }
+ }
+
+ return (pvs->intr_type == 0 ? DDI_FAILURE : DDI_SUCCESS);
+}
+
+static void
+pvscsi_wd_thread(pvscsi_softc_t *pvs)
+{
+ clock_t now;
+ pvscsi_cmd_t *expired, *c, *cn, **pnext;
+
+ mutex_enter(&pvs->mutex);
+ for (;;) {
+ expired = NULL;
+ pnext = NULL;
+ now = ddi_get_lbolt();
+
+ for (c = list_head(&pvs->cmd_queue); c != NULL; ) {
+ cn = list_next(&pvs->cmd_queue, c);
+
+ /*
+ * Commands with 'FLAG_NOINTR' are watched using their
+ * own timeouts, so we should not touch them.
+ */
+ if ((c->pkt->pkt_flags & FLAG_NOINTR) == 0 &&
+ now > c->timeout_lbolt) {
+ dev_err(pvs->dip, CE_WARN,
+ "!expired command: %p (%ld > %ld)",
+ (void *)c, now, c->timeout_lbolt);
+ pvscsi_remove_from_queue(c);
+ if (expired == NULL)
+ expired = c;
+ if (pnext == NULL) {
+ pnext = &c->next_cmd;
+ } else {
+ *pnext = c;
+ pnext = &c->next_cmd;
+ }
+ }
+ c = cn;
+ }
+ mutex_exit(&pvs->mutex);
+
+ /* Now cancel all expired commands */
+ if (expired != NULL) {
+ struct scsi_address sa = {0};
+ /* Build a fake SCSI address */
+ sa.a_hba_tran = pvs->tran;
+ while (expired != NULL) {
+ c = expired->next_cmd;
+ sa.a_target = expired->cmd_target;
+ sa.a_lun = 0;
+ (void) pvscsi_abort(&sa, CMD2PKT(expired));
+ expired = c;
+ }
+ }
+
+ mutex_enter(&pvs->mutex);
+ if ((pvs->flags & PVSCSI_DRIVER_SHUTDOWN) != 0) {
+ /* Finish job */
+ break;
+ }
+ if (cv_reltimedwait(&pvs->wd_condvar, &pvs->mutex,
+ SEC_TO_TICK(1), TR_CLOCK_TICK) > 0) {
+ /* Explicitly woken up, finish job */
+ break;
+ }
+ }
+
+ /* Confirm thread termination */
+ cv_signal(&pvs->syncvar);
+ mutex_exit(&pvs->mutex);
+}
+
+static int
+pvscsi_ccache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ int (*callback)(caddr_t);
+ uint_t cookiec;
+ pvscsi_cmd_t *cmd = (pvscsi_cmd_t *)buf;
+ pvscsi_softc_t *pvs = cdrarg;
+ struct scsi_address ap;
+
+ callback = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
+ ap.a_hba_tran = pvs->tran;
+ ap.a_target = 0;
+ ap.a_lun = 0;
+
+ /* Allocate a DMA handle for data transfers */
+ if ((ddi_dma_alloc_handle(pvs->dip, &pvs->io_dma_attr, callback,
+ NULL, &cmd->cmd_dmahdl)) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to allocate DMA handle");
+ return (-1);
+ }
+
+ /* Setup ARQ buffer */
+ if ((cmd->arqbuf = scsi_alloc_consistent_buf(&ap, (struct buf *)NULL,
+ SENSE_BUFFER_SIZE, B_READ, callback, NULL)) == NULL) {
+ dev_err(pvs->dip, CE_WARN, "!failed to allocate ARQ buffer");
+ goto free_handle;
+ }
+
+ if (ddi_dma_alloc_handle(pvs->dip, &pvs->hba_dma_attr,
+ callback, NULL, &cmd->arqhdl) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to allocate DMA handle for ARQ buffer");
+ goto free_arqbuf;
+ }
+
+ if (ddi_dma_buf_bind_handle(cmd->arqhdl, cmd->arqbuf,
+ (DDI_DMA_READ | DDI_DMA_CONSISTENT), callback, NULL,
+ &cmd->arqc, &cookiec) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to bind ARQ buffer");
+ goto free_arqhdl;
+ }
+
+ return (0);
+
+free_arqhdl:
+ ddi_dma_free_handle(&cmd->arqhdl);
+free_arqbuf:
+ scsi_free_consistent_buf(cmd->arqbuf);
+free_handle:
+ ddi_dma_free_handle(&cmd->cmd_dmahdl);
+
+ return (-1);
+}
+
+/* ARGSUSED cdrarg */
+static void
+pvscsi_ccache_destructor(void *buf, void *cdrarg)
+{
+ pvscsi_cmd_t *cmd = (pvscsi_cmd_t *)buf;
+
+ if (cmd->cmd_dmahdl != NULL) {
+ (void) ddi_dma_unbind_handle(cmd->cmd_dmahdl);
+ ddi_dma_free_handle(&cmd->cmd_dmahdl);
+ cmd->cmd_dmahdl = NULL;
+ }
+
+ if (cmd->arqhdl != NULL) {
+ (void) ddi_dma_unbind_handle(cmd->arqhdl);
+ ddi_dma_free_handle(&cmd->arqhdl);
+ cmd->arqhdl = NULL;
+ }
+
+ if (cmd->arqbuf != NULL) {
+ scsi_free_consistent_buf(cmd->arqbuf);
+ cmd->arqbuf = NULL;
+ }
+}
+
+/* tran_* entry points and setup */
+/* ARGSUSED hba_dip tgt_dip hba_tran */
+static int
+pvscsi_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
+ scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
+{
+ pvscsi_softc_t *pvs = SDEV2PRIV(sd);
+
+ ASSERT(pvs != NULL);
+
+ if (sd->sd_address.a_target >= PVSCSI_MAXTGTS)
+ return (DDI_FAILURE);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+pvscsi_start(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ boolean_t poll = ((pkt->pkt_flags & FLAG_NOINTR) != 0);
+ int rc;
+ pvscsi_cmd_t *cmd = PKT2CMD(pkt);
+ pvscsi_softc_t *pvs = ap->a_hba_tran->tran_hba_private;
+
+ ASSERT(cmd->pkt == pkt);
+ ASSERT(cmd->cmd_pvs == pvs);
+
+ /*
+ * Reinitialize some fields because the packet may
+ * have been resubmitted.
+ */
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_state = 0;
+ pkt->pkt_statistics = 0;
+
+ /* Zero status byte */
+ *(pkt->pkt_scbp) = 0;
+
+ if ((cmd->flags & PVSCSI_FLAG_DMA_VALID) != 0) {
+ ASSERT(cmd->cmd_dma_count != 0);
+ pkt->pkt_resid = cmd->cmd_dma_count;
+
+ /*
+ * Consistent packets need to be synced first
+ * (only for data going out).
+ */
+ if ((cmd->flags & PVSCSI_FLAG_IO_IOPB) != 0) {
+ (void) ddi_dma_sync(cmd->cmd_dmahdl, 0, 0,
+ DDI_DMA_SYNC_FORDEV);
+ }
+ }
+
+ cmd->cmd_target = ap->a_target;
+
+ mutex_enter(&pvs->mutex);
+ if (HBA_IS_QUIESCED(pvs) && !poll) {
+ mutex_exit(&pvs->mutex);
+ return (TRAN_BUSY);
+ }
+ mutex_exit(&pvs->mutex);
+
+ rc = pvscsi_transport_command(pvs, cmd);
+
+ if (poll) {
+ pvscsi_cmd_t *dcmd;
+ boolean_t qnotify;
+
+ if (rc == TRAN_ACCEPT)
+ rc = pvscsi_poll_cmd(pvs, cmd);
+
+ mutex_enter(&pvs->rx_mutex);
+ dcmd = pvscsi_process_comp_ring(pvs);
+ mutex_exit(&pvs->rx_mutex);
+
+ mutex_enter(&pvs->mutex);
+ qnotify = HBA_QUIESCE_PENDING(pvs);
+ mutex_exit(&pvs->mutex);
+
+ pvscsi_complete_chained(dcmd);
+
+ if (qnotify)
+ pvscsi_quiesce_notify(pvs);
+ }
+
+ return (rc);
+}
+
+static int
+pvscsi_reset(struct scsi_address *ap, int level)
+{
+ pvscsi_softc_t *pvs = AP2PRIV(ap);
+
+ switch (level) {
+ case RESET_ALL:
+ return (pvscsi_reset_generic(pvs, NULL));
+ case RESET_TARGET:
+ ASSERT(ap != NULL);
+ return (pvscsi_reset_generic(pvs, ap));
+ default:
+ return (0);
+ }
+}
+
+static int
+pvscsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ boolean_t qnotify = B_FALSE;
+ pvscsi_cmd_t *pending;
+ pvscsi_softc_t *pvs = ap->a_hba_tran->tran_hba_private;
+
+ mutex_enter(&pvs->tx_mutex);
+ mutex_enter(&pvs->rx_mutex);
+ if (pkt != NULL) {
+ /* Abort single command */
+ pvscsi_cmd_t *cmd = PKT2CMD(pkt);
+
+ if (pvscsi_abort_cmd(cmd, &pending) == CMD_ABORTED) {
+ /* Assume command is completely cancelled now */
+ cmd->flags |= PVSCSI_FLAG_ABORTED;
+ }
+ } else {
+ /* Abort all commands on the bus */
+ pvscsi_abort_all(ap, pvs, &pending, PVSCSI_FLAG_ABORTED);
+ }
+ qnotify = HBA_QUIESCE_PENDING(pvs);
+ mutex_exit(&pvs->rx_mutex);
+ mutex_exit(&pvs->tx_mutex);
+
+ pvscsi_complete_chained(pending);
+
+ if (qnotify)
+ pvscsi_quiesce_notify(pvs);
+
+ return (1);
+}
+
+/* ARGSUSED tgtonly */
+static int
+pvscsi_getcap(struct scsi_address *ap, char *cap, int tgtonly)
+{
+ pvscsi_softc_t *pvs = ap->a_hba_tran->tran_hba_private;
+
+ if (cap == NULL)
+ return (-1);
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ case SCSI_CAP_ARQ:
+ return ((pvs->flags & PVSCSI_HBA_AUTO_REQUEST_SENSE) != 0);
+ case SCSI_CAP_UNTAGGED_QING:
+ return (1);
+ default:
+ return (-1);
+ }
+}
+
+/* ARGSUSED tgtonly */
+static int
+pvscsi_setcap(struct scsi_address *ap, char *cap, int value, int tgtonly)
+{
+ pvscsi_softc_t *pvs = ap->a_hba_tran->tran_hba_private;
+
+ if (cap == NULL)
+ return (-1);
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ case SCSI_CAP_ARQ:
+ mutex_enter(&pvs->mutex);
+ if (value == 0)
+ pvs->flags &= ~PVSCSI_HBA_AUTO_REQUEST_SENSE;
+ else
+ pvs->flags |= PVSCSI_HBA_AUTO_REQUEST_SENSE;
+ mutex_exit(&pvs->mutex);
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+static void
+pvscsi_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ pvscsi_cmd_t *cmd = PKT2CMD(pkt);
+ pvscsi_softc_t *pvs = ap->a_hba_tran->tran_hba_private;
+
+ ASSERT(cmd->cmd_pvs == pvs);
+
+ if ((cmd->flags & PVSCSI_FLAG_DMA_VALID) != 0) {
+ cmd->flags &= ~PVSCSI_FLAG_DMA_VALID;
+ (void) ddi_dma_unbind_handle(cmd->cmd_dmahdl);
+ }
+
+ if (cmd->ctx != NULL) {
+ mutex_enter(&pvs->mutex);
+ pvscsi_release_ctx(cmd);
+ mutex_exit(&pvs->mutex);
+ }
+
+ if ((cmd->flags & PVSCSI_FLAGS_EXT) != 0)
+ pvscsi_cmd_ext_free(cmd);
+
+ kmem_cache_free(pvs->cmd_cache, cmd);
+}
+
+static struct scsi_pkt *
+pvscsi_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt, struct buf *bp,
+ int cmdlen, int statuslen, int tgtlen, int flags, int (*callback)(),
+ caddr_t arg)
+{
+ boolean_t is_new;
+ int kf = (callback == SLEEP_FUNC) ? KM_SLEEP: KM_NOSLEEP;
+ int rc, i;
+ pvscsi_cmd_t *cmd;
+ pvscsi_softc_t *pvs;
+
+ pvs = ap->a_hba_tran->tran_hba_private;
+ ASSERT(pvs != NULL);
+
+ /* Allocate a new SCSI packet */
+ if (pkt == NULL) {
+ ddi_dma_handle_t saved_dmahdl, saved_arqhdl;
+ struct buf *saved_arqbuf;
+ ddi_dma_cookie_t saved_arqc;
+
+ is_new = B_TRUE;
+
+ if ((cmd = kmem_cache_alloc(pvs->cmd_cache, kf)) == NULL)
+ return (NULL);
+
+ saved_dmahdl = cmd->cmd_dmahdl;
+ saved_arqhdl = cmd->arqhdl;
+ saved_arqbuf = cmd->arqbuf;
+ saved_arqc = cmd->arqc;
+
+ bzero(cmd, sizeof (pvscsi_cmd_t) -
+ sizeof (cmd->cached_cookies));
+
+ cmd->cmd_pvs = pvs;
+ cmd->cmd_dmahdl = saved_dmahdl;
+ cmd->arqhdl = saved_arqhdl;
+ cmd->arqbuf = saved_arqbuf;
+ cmd->arqc = saved_arqc;
+
+ pkt = &cmd->cached_pkt;
+ pkt->pkt_ha_private = (opaque_t)cmd;
+ pkt->pkt_address = *ap;
+ pkt->pkt_scbp = (uint8_t *)&cmd->cmd_scb;
+ pkt->pkt_cdbp = (uint8_t *)&cmd->cmd_cdb;
+ pkt->pkt_private = (opaque_t)&cmd->tgt_priv;
+
+ cmd->tgtlen = tgtlen;
+ cmd->statuslen = statuslen;
+ cmd->cmdlen = cmdlen;
+ cmd->pkt = pkt;
+ cmd->ctx = NULL;
+
+ /* Allocate extended buffers */
+ if ((cmdlen > sizeof (cmd->cmd_cdb)) ||
+ (statuslen > sizeof (cmd->cmd_scb)) ||
+ (tgtlen > sizeof (cmd->tgt_priv))) {
+ if (pvscsi_cmd_ext_alloc(pvs, cmd, kf) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN,
+ "!extent allocation failed");
+ goto out;
+ }
+ }
+ } else {
+ is_new = B_FALSE;
+
+ cmd = PKT2CMD(pkt);
+ cmd->flags &= PVSCSI_FLAGS_PERSISTENT;
+ }
+
+ ASSERT((cmd->flags & PVSCSI_FLAG_TRANSPORT) == 0);
+
+ if ((flags & PKT_XARQ) != 0)
+ cmd->flags |= PVSCSI_FLAG_XARQ;
+
+ /* Handle partial DMA transfers */
+ if (cmd->cmd_nwin > 0) {
+ if (++cmd->cmd_winindex >= cmd->cmd_nwin)
+ return (NULL);
+ if (ddi_dma_getwin(cmd->cmd_dmahdl, cmd->cmd_winindex,
+ &cmd->cmd_dma_offset, &cmd->cmd_dma_len,
+ &cmd->cmd_dmac, &cmd->cmd_dmaccount) == DDI_FAILURE)
+ return (NULL);
+ goto handle_dma_cookies;
+ }
+
+ /* Setup data buffer */
+ if (bp != NULL && bp->b_bcount > 0 &&
+ (cmd->flags & PVSCSI_FLAG_DMA_VALID) == 0) {
+ int dma_flags;
+
+ ASSERT(cmd->cmd_dmahdl != NULL);
+
+ if ((bp->b_flags & B_READ) != 0) {
+ cmd->flags |= PVSCSI_FLAG_IO_READ;
+ dma_flags = DDI_DMA_READ;
+ } else {
+ cmd->flags &= ~PVSCSI_FLAG_IO_READ;
+ dma_flags = DDI_DMA_WRITE;
+ }
+ if ((flags & PKT_CONSISTENT) != 0) {
+ cmd->flags |= PVSCSI_FLAG_IO_IOPB;
+ dma_flags |= DDI_DMA_CONSISTENT;
+ }
+ if ((flags & PKT_DMA_PARTIAL) != 0)
+ dma_flags |= DDI_DMA_PARTIAL;
+
+ rc = ddi_dma_buf_bind_handle(cmd->cmd_dmahdl, bp,
+ dma_flags, callback, arg, &cmd->cmd_dmac,
+ &cmd->cmd_dmaccount);
+ if (rc == DDI_DMA_PARTIAL_MAP) {
+ (void) ddi_dma_numwin(cmd->cmd_dmahdl,
+ &cmd->cmd_nwin);
+ cmd->cmd_winindex = 0;
+ (void) ddi_dma_getwin(cmd->cmd_dmahdl,
+ cmd->cmd_winindex, &cmd->cmd_dma_offset,
+ &cmd->cmd_dma_len, &cmd->cmd_dmac,
+ &cmd->cmd_dmaccount);
+ } else if (rc != 0 && rc != DDI_DMA_MAPPED) {
+ switch (rc) {
+ case DDI_DMA_NORESOURCES:
+ bioerror(bp, 0);
+ break;
+ case DDI_DMA_BADATTR:
+ case DDI_DMA_NOMAPPING:
+ bioerror(bp, EFAULT);
+ break;
+ case DDI_DMA_TOOBIG:
+ default:
+ bioerror(bp, EINVAL);
+ break;
+ }
+ cmd->flags &= ~PVSCSI_FLAG_DMA_VALID;
+ goto out;
+ }
+
+handle_dma_cookies:
+ ASSERT(cmd->cmd_dmaccount > 0);
+ if (cmd->cmd_dmaccount > PVSCSI_MAX_SG_SIZE) {
+ dev_err(pvs->dip, CE_WARN,
+ "!invalid cookie count: %d (max %d)",
+ cmd->cmd_dmaccount, PVSCSI_MAX_SG_SIZE);
+ bioerror(bp, EINVAL);
+ goto out;
+ }
+
+ cmd->flags |= PVSCSI_FLAG_DMA_VALID;
+ cmd->cmd_dma_count = cmd->cmd_dmac.dmac_size;
+ cmd->cmd_total_dma_count += cmd->cmd_dmac.dmac_size;
+
+ cmd->cached_cookies[0] = cmd->cmd_dmac;
+
+ /*
+ * Calculate total amount of bytes for this I/O and
+ * store cookies for further processing.
+ */
+ for (i = 1; i < cmd->cmd_dmaccount; i++) {
+ ddi_dma_nextcookie(cmd->cmd_dmahdl, &cmd->cmd_dmac);
+ cmd->cached_cookies[i] = cmd->cmd_dmac;
+ cmd->cmd_dma_count += cmd->cmd_dmac.dmac_size;
+ cmd->cmd_total_dma_count += cmd->cmd_dmac.dmac_size;
+ }
+
+ pkt->pkt_resid = (bp->b_bcount - cmd->cmd_total_dma_count);
+ }
+
+ return (pkt);
+
+out:
+ if (is_new)
+ pvscsi_destroy_pkt(ap, pkt);
+
+ return (NULL);
+}
+
+/* ARGSUSED ap */
+static void
+pvscsi_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ pvscsi_cmd_t *cmd = PKT2CMD(pkt);
+
+ if ((cmd->flags & PVSCSI_FLAG_DMA_VALID) != 0) {
+ (void) ddi_dma_unbind_handle(cmd->cmd_dmahdl);
+ cmd->flags &= ~PVSCSI_FLAG_DMA_VALID;
+ }
+}
+
+/* ARGSUSED ap */
+static void
+pvscsi_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ pvscsi_cmd_t *cmd = PKT2CMD(pkt);
+
+ if (cmd->cmd_dmahdl != NULL) {
+ (void) ddi_dma_sync(cmd->cmd_dmahdl, 0, 0,
+ (cmd->flags & PVSCSI_FLAG_IO_READ) ?
+ DDI_DMA_SYNC_FORCPU : DDI_DMA_SYNC_FORDEV);
+ }
+
+}
+
+/* ARGSUSED ap flag callback arg */
+static int
+pvscsi_reset_notify(struct scsi_address *ap, int flag,
+ void (*callback)(caddr_t), caddr_t arg)
+{
+ return (DDI_FAILURE);
+}
+
+static int
+pvscsi_quiesce_hba(dev_info_t *dip)
+{
+ pvscsi_softc_t *pvs;
+ scsi_hba_tran_t *tran;
+
+ if ((tran = ddi_get_driver_private(dip)) == NULL ||
+ (pvs = TRAN2PRIV(tran)) == NULL)
+ return (-1);
+
+ mutex_enter(&pvs->mutex);
+ if (!HBA_IS_QUIESCED(pvs))
+ pvs->flags |= PVSCSI_HBA_QUIESCED;
+
+ if (pvs->cmd_queue_len != 0) {
+ /* Outstanding commands present, wait */
+ pvs->flags |= PVSCSI_HBA_QUIESCE_PENDING;
+ cv_wait(&pvs->quiescevar, &pvs->mutex);
+ ASSERT(pvs->cmd_queue_len == 0);
+ }
+ mutex_exit(&pvs->mutex);
+
+ /* Suspend taskq delivery and complete all scheduled tasks */
+ ddi_taskq_suspend(pvs->msg_tq);
+ ddi_taskq_wait(pvs->msg_tq);
+ ddi_taskq_suspend(pvs->comp_tq);
+ ddi_taskq_wait(pvs->comp_tq);
+
+ return (0);
+}
+
+static int
+pvscsi_unquiesce_hba(dev_info_t *dip)
+{
+ pvscsi_softc_t *pvs;
+ scsi_hba_tran_t *tran;
+
+ if ((tran = ddi_get_driver_private(dip)) == NULL ||
+ (pvs = TRAN2PRIV(tran)) == NULL)
+ return (-1);
+
+ mutex_enter(&pvs->mutex);
+ if (!HBA_IS_QUIESCED(pvs)) {
+ mutex_exit(&pvs->mutex);
+ return (0);
+ }
+ ASSERT(pvs->cmd_queue_len == 0);
+ pvs->flags &= ~PVSCSI_HBA_QUIESCED;
+ mutex_exit(&pvs->mutex);
+
+ /* Resume taskq delivery */
+ ddi_taskq_resume(pvs->msg_tq);
+ ddi_taskq_resume(pvs->comp_tq);
+
+ return (0);
+}
+
+static int
+pvscsi_bus_config(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
+ void *arg, dev_info_t **childp)
+{
+ char *p;
+ int circ;
+ int ret = NDI_FAILURE;
+ long target = 0;
+ pvscsi_softc_t *pvs;
+ scsi_hba_tran_t *tran;
+
+ tran = ddi_get_driver_private(pdip);
+ pvs = tran->tran_hba_private;
+
+ ndi_devi_enter(pdip, &circ);
+ switch (op) {
+ case BUS_CONFIG_ONE:
+ if ((p = strrchr((char *)arg, '@')) != NULL &&
+ ddi_strtol(p + 1, NULL, 10, &target) == 0)
+ ret = pvscsi_config_one(pdip, pvs, (int)target, childp);
+ break;
+ case BUS_CONFIG_DRIVER:
+ case BUS_CONFIG_ALL:
+ ret = pvscsi_config_all(pdip, pvs);
+ break;
+ default:
+ break;
+ }
+
+ if (ret == NDI_SUCCESS)
+ ret = ndi_busop_bus_config(pdip, flags, op, arg, childp, 0);
+ ndi_devi_exit(pdip, circ);
+
+ return (ret);
+}
+
+static int
+pvscsi_hba_setup(pvscsi_softc_t *pvs)
+{
+ scsi_hba_tran_t *hba_tran;
+
+ hba_tran = pvs->tran = scsi_hba_tran_alloc(pvs->dip,
+ SCSI_HBA_CANSLEEP);
+ ASSERT(pvs->tran != NULL);
+
+ hba_tran->tran_hba_private = pvs;
+ hba_tran->tran_tgt_private = NULL;
+
+ hba_tran->tran_tgt_init = pvscsi_tgt_init;
+ hba_tran->tran_tgt_free = NULL;
+ hba_tran->tran_tgt_probe = scsi_hba_probe;
+
+ hba_tran->tran_start = pvscsi_start;
+ hba_tran->tran_reset = pvscsi_reset;
+ hba_tran->tran_abort = pvscsi_abort;
+ hba_tran->tran_getcap = pvscsi_getcap;
+ hba_tran->tran_setcap = pvscsi_setcap;
+ hba_tran->tran_init_pkt = pvscsi_init_pkt;
+ hba_tran->tran_destroy_pkt = pvscsi_destroy_pkt;
+
+ hba_tran->tran_dmafree = pvscsi_dmafree;
+ hba_tran->tran_sync_pkt = pvscsi_sync_pkt;
+ hba_tran->tran_reset_notify = pvscsi_reset_notify;
+
+ hba_tran->tran_quiesce = pvscsi_quiesce_hba;
+ hba_tran->tran_unquiesce = pvscsi_unquiesce_hba;
+ hba_tran->tran_bus_reset = NULL;
+
+ hba_tran->tran_add_eventcall = NULL;
+ hba_tran->tran_get_eventcookie = NULL;
+ hba_tran->tran_post_event = NULL;
+ hba_tran->tran_remove_eventcall = NULL;
+
+ hba_tran->tran_bus_config = pvscsi_bus_config;
+
+ hba_tran->tran_interconnect_type = INTERCONNECT_SAS;
+
+ if (scsi_hba_attach_setup(pvs->dip, &pvs->hba_dma_attr, hba_tran,
+ SCSI_HBA_TRAN_CDB | SCSI_HBA_TRAN_SCB | SCSI_HBA_TRAN_CLONE) !=
+ DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to attach HBA");
+ scsi_hba_tran_free(hba_tran);
+ pvs->tran = NULL;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+pvscsi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance;
+ pvscsi_softc_t *pvs;
+ char buf[32];
+
+ ASSERT(scsi_hba_iport_unit_address(dip) == NULL);
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ case DDI_RESUME:
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+
+ instance = ddi_get_instance(dip);
+
+ /* Allocate softstate information */
+ if (ddi_soft_state_zalloc(pvscsi_sstate, instance) != DDI_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!ddi_soft_state_zalloc() failed for instance %d",
+ instance);
+ return (DDI_FAILURE);
+ }
+
+ if ((pvs = ddi_get_soft_state(pvscsi_sstate, instance)) == NULL) {
+ cmn_err(CE_WARN, "!failed to get soft state for instance %d",
+ instance);
+ goto fail;
+ }
+
+ /*
+ * Indicate that we are 'sizeof (scsi_*(9S))' clean, we use
+ * scsi_pkt_size() instead.
+ */
+ scsi_size_clean(dip);
+
+ /* Setup HBA instance */
+ pvs->instance = instance;
+ pvs->dip = dip;
+ pvs->hba_dma_attr = pvscsi_hba_dma_attr;
+ pvs->ring_dma_attr = pvscsi_ring_dma_attr;
+ pvs->io_dma_attr = pvscsi_io_dma_attr;
+ mutex_init(&pvs->mutex, "pvscsi instance mutex", MUTEX_DRIVER, NULL);
+ mutex_init(&pvs->intr_mutex, "pvscsi instance interrupt mutex",
+ MUTEX_DRIVER, NULL);
+ mutex_init(&pvs->rx_mutex, "pvscsi rx ring mutex", MUTEX_DRIVER, NULL);
+ mutex_init(&pvs->tx_mutex, "pvscsi tx ring mutex", MUTEX_DRIVER, NULL);
+ list_create(&pvs->cmd_ctx_pool, sizeof (pvscsi_cmd_ctx_t),
+ offsetof(pvscsi_cmd_ctx_t, list));
+ list_create(&pvs->devnodes, sizeof (pvscsi_device_t),
+ offsetof(pvscsi_device_t, list));
+ list_create(&pvs->cmd_queue, sizeof (pvscsi_cmd_t),
+ offsetof(pvscsi_cmd_t, cmd_queue_node));
+ cv_init(&pvs->syncvar, "pvscsi synchronization cv", CV_DRIVER, NULL);
+ cv_init(&pvs->wd_condvar, "pvscsi watchdog cv", CV_DRIVER, NULL);
+ cv_init(&pvs->quiescevar, "pvscsi quiesce cv", CV_DRIVER, NULL);
+
+ (void) sprintf(buf, "pvscsi%d_cache", instance);
+ pvs->cmd_cache = kmem_cache_create(buf, sizeof (pvscsi_cmd_t), 0,
+ pvscsi_ccache_constructor, pvscsi_ccache_destructor, NULL,
+ (void *)pvs, NULL, 0);
+ if (pvs->cmd_cache == NULL) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to create a cache for SCSI commands");
+ goto fail;
+ }
+
+ if ((pvscsi_setup_io(pvs)) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to setup I/O region");
+ goto free_cache;
+ }
+
+ pvscsi_reset_hba(pvs);
+
+ if ((pvscsi_allocate_rings(pvs)) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to allocate DMA rings");
+ goto free_io;
+ }
+
+ pvscsi_setup_rings(pvs);
+
+ if (pvscsi_setup_isr(pvs) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to setup ISR");
+ goto free_rings;
+ }
+
+ if (pvscsi_setup_sg(pvs) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to setup S/G");
+ goto free_intr;
+ }
+
+ if (pvscsi_hba_setup(pvs) != 0) {
+ dev_err(pvs->dip, CE_WARN, "!failed to setup HBA");
+ goto free_sg;
+ }
+
+ if ((pvs->comp_tq = ddi_taskq_create(pvs->dip, "comp_tq",
+ MIN(UINT16_MAX, ncpus), TASKQ_DEFAULTPRI, 0)) == NULL) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to create completion taskq");
+ goto free_sg;
+ }
+
+ if ((pvs->msg_tq = ddi_taskq_create(pvs->dip, "msg_tq",
+ 1, TASKQ_DEFAULTPRI, 0)) == NULL) {
+ dev_err(pvs->dip, CE_WARN,
+ "!failed to create message taskq");
+ goto free_comp_tq;
+ }
+
+ if (pvscsi_enable_intrs(pvs) != DDI_SUCCESS) {
+ dev_err(pvs->dip, CE_WARN, "!failed to enable interrupts");
+ goto free_msg_tq;
+ }
+
+ /* Launch watchdog thread */
+ pvs->wd_thread = thread_create(NULL, 0, pvscsi_wd_thread, pvs, 0, &p0,
+ TS_RUN, minclsyspri);
+
+ return (DDI_SUCCESS);
+
+free_msg_tq:
+ ddi_taskq_destroy(pvs->msg_tq);
+free_comp_tq:
+ ddi_taskq_destroy(pvs->comp_tq);
+free_sg:
+ pvscsi_free_sg(pvs);
+free_intr:
+ pvscsi_free_intr_resources(pvs);
+free_rings:
+ pvscsi_reset_hba(pvs);
+ pvscsi_free_rings(pvs);
+free_io:
+ pvscsi_free_io(pvs);
+free_cache:
+ kmem_cache_destroy(pvs->cmd_cache);
+fail:
+ ddi_soft_state_free(pvscsi_sstate, instance);
+
+ return (DDI_FAILURE);
+}
+
+static int
+pvscsi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance;
+ pvscsi_softc_t *pvs;
+
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+
+ instance = ddi_get_instance(dip);
+ if ((pvs = ddi_get_soft_state(pvscsi_sstate, instance)) == NULL) {
+ cmn_err(CE_WARN, "!failed to get soft state for instance %d",
+ instance);
+ return (DDI_FAILURE);
+ }
+
+ pvscsi_reset_hba(pvs);
+ pvscsi_free_intr_resources(pvs);
+
+ /* Shutdown message taskq */
+ ddi_taskq_wait(pvs->msg_tq);
+ ddi_taskq_destroy(pvs->msg_tq);
+
+ /* Shutdown completion taskq */
+ ddi_taskq_wait(pvs->comp_tq);
+ ddi_taskq_destroy(pvs->comp_tq);
+
+ /* Shutdown watchdog thread */
+ mutex_enter(&pvs->mutex);
+ pvs->flags |= PVSCSI_DRIVER_SHUTDOWN;
+ cv_signal(&pvs->wd_condvar);
+ cv_wait(&pvs->syncvar, &pvs->mutex);
+ mutex_exit(&pvs->mutex);
+
+ pvscsi_free_sg(pvs);
+ pvscsi_free_rings(pvs);
+ pvscsi_free_io(pvs);
+
+ kmem_cache_destroy(pvs->cmd_cache);
+
+ mutex_destroy(&pvs->mutex);
+ mutex_destroy(&pvs->intr_mutex);
+ mutex_destroy(&pvs->rx_mutex);
+
+ cv_destroy(&pvs->syncvar);
+ cv_destroy(&pvs->wd_condvar);
+ cv_destroy(&pvs->quiescevar);
+
+ ddi_soft_state_free(pvscsi_sstate, instance);
+ ddi_prop_remove_all(dip);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+pvscsi_ioctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *credp,
+ int *rval)
+{
+ int ret;
+
+ if (ddi_get_soft_state(pvscsi_sstate, getminor(dev)) == NULL) {
+ cmn_err(CE_WARN, "!invalid device instance: %d", getminor(dev));
+ return (ENXIO);
+ }
+
+ /* Try to handle command in a common way */
+ if ((ret = scsi_hba_ioctl(dev, cmd, data, mode, credp, rval)) != ENOTTY)
+ return (ret);
+
+ cmn_err(CE_WARN, "!unsupported IOCTL command: 0x%X", cmd);
+
+ return (ENXIO);
+}
+
+static int
+pvscsi_quiesce(dev_info_t *devi)
+{
+ scsi_hba_tran_t *tran;
+ pvscsi_softc_t *pvs;
+
+ if ((tran = ddi_get_driver_private(devi)) == NULL)
+ return (DDI_SUCCESS);
+
+ if ((pvs = tran->tran_hba_private) == NULL)
+ return (DDI_SUCCESS);
+
+ /* Mask all interrupts from device */
+ pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_MASK, 0);
+
+ /* Reset the HBA */
+ pvscsi_reset_hba(pvs);
+
+ return (DDI_SUCCESS);
+}
+
+/* module */
+
+static struct cb_ops pvscsi_cb_ops = {
+ .cb_open = scsi_hba_open,
+ .cb_close = scsi_hba_close,
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_read = nodev,
+ .cb_write = nodev,
+ .cb_ioctl = pvscsi_ioctl,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_str = NULL,
+ .cb_flag = D_MP,
+ .cb_rev = CB_REV,
+ .cb_aread = nodev,
+ .cb_awrite = nodev
+};
+
+static struct dev_ops pvscsi_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
+ .devo_getinfo = ddi_no_info,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_attach = pvscsi_attach,
+ .devo_detach = pvscsi_detach,
+ .devo_reset = nodev,
+ .devo_cb_ops = &pvscsi_cb_ops,
+ .devo_bus_ops = NULL,
+ .devo_power = NULL,
+ .devo_quiesce = pvscsi_quiesce
+};
+
+#define PVSCSI_IDENT "VMware PVSCSI"
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ PVSCSI_IDENT,
+ &pvscsi_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ int ret;
+
+ if ((ret = ddi_soft_state_init(&pvscsi_sstate,
+ sizeof (struct pvscsi_softc), PVSCSI_INITIAL_SSTATE_ITEMS)) != 0) {
+ cmn_err(CE_WARN, "!ddi_soft_state_init() failed");
+ return (ret);
+ }
+
+ if ((ret = scsi_hba_init(&modlinkage)) != 0) {
+ cmn_err(CE_WARN, "!scsi_hba_init() failed");
+ ddi_soft_state_fini(&pvscsi_sstate);
+ return (ret);
+ }
+
+ if ((ret = mod_install(&modlinkage)) != 0) {
+ cmn_err(CE_WARN, "!mod_install() failed");
+ ddi_soft_state_fini(&pvscsi_sstate);
+ scsi_hba_fini(&modlinkage);
+ }
+
+ return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int ret;
+
+ if ((ret = mod_remove(&modlinkage)) == 0) {
+ ddi_soft_state_fini(&pvscsi_sstate);
+ scsi_hba_fini(&modlinkage);
+ }
+
+ return (ret);
+}
diff --git a/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.h b/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.h
new file mode 100644
index 0000000000..cbf507a9b8
--- /dev/null
+++ b/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PVSCSI_H_
+#define _PVSCSI_H_
+
+#include <sys/types.h>
+
+#define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
+
+#define MASK(n) ((1 << (n)) - 1) /* make an n-bit mask */
+
+#define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07C0
+
+/*
+ * host adapter status/error codes
+ */
+enum HostBusAdapterStatus {
+ BTSTAT_SUCCESS = 0x00, /* CCB complete normally with */
+ /* no errors */
+ BTSTAT_LINKED_COMMAND_COMPLETED = 0x0a,
+ BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b,
+ BTSTAT_DATA_UNDERRUN = 0x0c,
+ BTSTAT_SELTIMEO = 0x11, /* SCSI selection timeout */
+ BTSTAT_DATARUN = 0x12, /* data overrun/underrun */
+ BTSTAT_BUSFREE = 0x13, /* unexpected bus free */
+ BTSTAT_INVPHASE = 0x14, /* invalid bus phase or sequence */
+ /* requested by target */
+ BTSTAT_LUNMISMATCH = 0x17, /* linked CCB has different LUN from */
+ /* first CCB */
+ BTSTAT_INVPARAM = 0x1a, /* invalid parameter in CCB */
+ /* or segment list */
+ BTSTAT_SENSFAILED = 0x1b, /* auto request sense failed */
+ BTSTAT_TAGREJECT = 0x1c, /* SCSI II tagged queueing message */
+ /* rejected by target */
+ BTSTAT_BADMSG = 0x1d, /* unsupported message received by */
+ /* the host adapter */
+ BTSTAT_HAHARDWARE = 0x20, /* host adapter hardware failed */
+ BTSTAT_NORESPONSE = 0x21, /* target did not respond to */
+ /* SCSI ATN, sent a SCSI RST */
+ BTSTAT_SENTRST = 0x22, /* host adapter asserted a SCSI RST */
+ BTSTAT_RECVRST = 0x23, /* other SCSI devices asserted */
+ /* a SCSI RST */
+ BTSTAT_DISCONNECT = 0x24, /* target device reconnected */
+ /* improperly (w/o tag) */
+ BTSTAT_BUSRESET = 0x25, /* host adapter issued */
+ /* BUS device reset */
+ BTSTAT_ABORTQUEUE = 0x26, /* abort queue generated */
+ BTSTAT_HASOFTWARE = 0x27, /* host adapter software error */
+ BTSTAT_HATIMEOUT = 0x30, /* host adapter hardware */
+ /* timeout error */
+ BTSTAT_SCSIPARITY = 0x34, /* SCSI parity error detected */
+};
+
+/*
+ * SCSI device status values.
+ */
+enum ScsiDeviceStatus {
+ SDSTAT_GOOD = 0x00, /* No errors. */
+ SDSTAT_CHECK = 0x02, /* Check condition. */
+};
+
+/*
+ * Register offsets.
+ *
+ * These registers are accessible both via i/o space and mm i/o.
+ */
+
+enum PVSCSIRegOffset {
+ PVSCSI_REG_OFFSET_COMMAND = 0x0,
+ PVSCSI_REG_OFFSET_COMMAND_DATA = 0x4,
+ PVSCSI_REG_OFFSET_COMMAND_STATUS = 0x8,
+ PVSCSI_REG_OFFSET_LAST_STS_0 = 0x100,
+ PVSCSI_REG_OFFSET_LAST_STS_1 = 0x104,
+ PVSCSI_REG_OFFSET_LAST_STS_2 = 0x108,
+ PVSCSI_REG_OFFSET_LAST_STS_3 = 0x10c,
+ PVSCSI_REG_OFFSET_INTR_STATUS = 0x100c,
+ PVSCSI_REG_OFFSET_INTR_MASK = 0x2010,
+ PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014,
+ PVSCSI_REG_OFFSET_DEBUG = 0x3018,
+ PVSCSI_REG_OFFSET_KICK_RW_IO = 0x4018,
+};
+
+/*
+ * Virtual h/w commands.
+ */
+
+enum PVSCSICommands {
+ PVSCSI_CMD_FIRST = 0, /* has to be first */
+
+ PVSCSI_CMD_ADAPTER_RESET = 1,
+ PVSCSI_CMD_ISSUE_SCSI = 2,
+ PVSCSI_CMD_SETUP_RINGS = 3,
+ PVSCSI_CMD_RESET_BUS = 4,
+ PVSCSI_CMD_RESET_DEVICE = 5,
+ PVSCSI_CMD_ABORT_CMD = 6,
+ PVSCSI_CMD_CONFIG = 7,
+ PVSCSI_CMD_SETUP_MSG_RING = 8,
+ PVSCSI_CMD_DEVICE_UNPLUG = 9,
+ PVSCSI_CMD_SETUP_REQCALLTHRESHOLD = 10,
+
+ PVSCSI_CMD_LAST = 11 /* has to be last */
+};
+
+/*
+ * Command descriptor for PVSCSI_CMD_RESET_DEVICE --
+ */
+#pragma pack(1)
+struct PVSCSICmdDescResetDevice {
+ uint32_t target;
+ uint8_t lun[8];
+};
+#pragma pack()
+
+/*
+ * Command descriptor for PVSCSI_CMD_CONFIG --
+ */
+#pragma pack(1)
+struct PVSCSICmdDescConfigCmd {
+ uint64_t cmpAddr;
+ uint64_t configPageAddress;
+ uint32_t configPageNum;
+ uint32_t _pad;
+};
+#pragma pack()
+
+/*
+ * Command descriptor for PVSCSI_CMD_SETUP_REQCALLTHRESHOLD --
+ */
+#pragma pack(1)
+struct PVSCSICmdDescSetupReqCall {
+ uint32_t enable;
+};
+#pragma pack()
+
+enum PVSCSIConfigPageType {
+ PVSCSI_CONFIG_PAGE_CONTROLLER = 0x1958,
+ PVSCSI_CONFIG_PAGE_PHY = 0x1959,
+ PVSCSI_CONFIG_PAGE_DEVICE = 0x195a,
+};
+
+enum PVSCSIConfigPageAddressType {
+ PVSCSI_CONFIG_CONTROLLER_ADDRESS = 0x2120,
+ PVSCSI_CONFIG_BUSTARGET_ADDRESS = 0x2121,
+ PVSCSI_CONFIG_PHY_ADDRESS = 0x2122,
+};
+
+/*
+ * Command descriptor for PVSCSI_CMD_ABORT_CMD --
+ *
+ * - currently does not support specifying the LUN.
+ * - _pad should be 0.
+ */
+
+struct PVSCSICmdDescAbortCmd {
+ uint64_t context;
+ uint32_t target;
+ uint32_t _pad;
+} __packed;
+
+/*
+ * Command descriptor for PVSCSI_CMD_SETUP_RINGS --
+ *
+ * Notes:
+ * - reqRingNumPages and cmpRingNumPages need to be power of two.
+ * - reqRingNumPages and cmpRingNumPages need to be different from 0,
+ * - reqRingNumPages and cmpRingNumPages need to be inferior to
+ * PVSCSI_SETUP_RINGS_MAX_NUM_PAGES.
+ */
+#define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 32
+
+#pragma pack(1)
+struct PVSCSICmdDescSetupRings {
+ uint32_t reqRingNumPages;
+ uint32_t cmpRingNumPages;
+ uint64_t ringsStatePPN;
+ uint64_t reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+ uint64_t cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES];
+};
+#pragma pack()
+
+/*
+ * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING --
+ *
+ * Notes:
+ * - this command was not supported in the initial revision of the h/w
+ * interface. Before using it, you need to check that it is supported by
+ * writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then
+ * immediately after read the 'command status' register:
+ * * a value of -1 means that the cmd is NOT supported,
+ * * a value != -1 means that the cmd IS supported.
+ * If it's supported the 'command status' register should return:
+ * sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t).
+ * - this command should be issued _after_ the usual SETUP_RINGS so that the
+ * RingsState page is already setup. If not, the command is a nop.
+ * - numPages needs to be a power of two,
+ * - numPages needs to be different from 0,
+ * - _pad should be zero.
+ */
+#define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES 16
+
+#pragma pack(1)
+struct PVSCSICmdDescSetupMsgRing {
+ uint32_t numPages;
+ uint32_t _pad;
+ uint64_t ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES];
+};
+#pragma pack()
+
+enum PVSCSIMsgType {
+ PVSCSI_MSG_DEV_ADDED = 0,
+ PVSCSI_MSG_DEV_REMOVED = 1,
+ PVSCSI_MSG_LAST = 2,
+};
+
+/*
+ * Msg descriptor.
+ *
+ * sizeof(struct PVSCSIRingMsgDesc) == 128.
+ *
+ * - type is of type enum PVSCSIMsgType.
+ * - the content of args depend on the type of event being delivered.
+ */
+#pragma pack(1)
+struct PVSCSIRingMsgDesc {
+ uint32_t type;
+ uint32_t args[31];
+};
+#pragma pack()
+
+#pragma pack(1)
+struct PVSCSIMsgDescDevStatusChanged {
+ uint32_t type; /* PVSCSI_MSG_DEV _ADDED / _REMOVED */
+ uint32_t bus;
+ uint32_t target;
+ uint8_t lun[8];
+ uint32_t pad[27];
+};
+#pragma pack()
+
+/*
+ * Rings state.
+ *
+ * - the fields:
+ * . msgProdIdx,
+ * . msgConsIdx,
+ * . msgNumEntriesLog2,
+ * .. are only used once the SETUP_MSG_RING cmd has been issued.
+ * - '_pad' helps to ensure that the msg related fields are on their own
+ * cache-line.
+ */
+#pragma pack(1)
+struct PVSCSIRingsState {
+ uint32_t reqProdIdx;
+ uint32_t reqConsIdx;
+ uint32_t reqNumEntriesLog2;
+
+ uint32_t cmpProdIdx;
+ uint32_t cmpConsIdx;
+ uint32_t cmpNumEntriesLog2;
+
+ uint32_t reqCallThreshold;
+
+ uint8_t _pad[100];
+
+ uint32_t msgProdIdx;
+ uint32_t msgConsIdx;
+ uint32_t msgNumEntriesLog2;
+};
+#pragma pack()
+
+/*
+ * Request descriptor.
+ *
+ * sizeof(RingReqDesc) = 128
+ *
+ * - context: is a unique identifier of a command. It could normally be any
+ * 64bit value, however we currently store it in the serialNumber variable
+ * of struct SCSI_Command, so we have the following restrictions due to the
+ * way this field is handled in the vmkernel storage stack:
+ * * this value can't be 0,
+ * * the upper 32bit need to be 0 since serialNumber is as a uint32_t.
+ * Currently tracked as PR 292060.
+ * - dataLen: contains the total number of bytes that need to be transferred.
+ * - dataAddr:
+ * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first
+ * s/g table segment, each s/g segment is entirely contained on a single
+ * page of physical memory,
+ * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of
+ * the buffer used for the DMA transfer,
+ * - flags:
+ * * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above,
+ * * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved,
+ * * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory,
+ * * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device,
+ * * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than
+ * 16bytes. To be specified.
+ * - vcpuHint: vcpuId of the processor that will be most likely waiting for the
+ * completion of the i/o. For guest OSes that use lowest priority message
+ * delivery mode (such as windows), we use this "hint" to deliver the
+ * completion action to the proper vcpu. For now, we can use the vcpuId of
+ * the processor that initiated the i/o as a likely candidate for the vcpu
+ * that will be waiting for the completion..
+ * - bus should be 0: we currently only support bus 0 for now.
+ * - unused should be zero'd.
+ */
+#define PVSCSI_FLAG_CMD_WITH_SG_LIST (1 << 0)
+#define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB (1 << 1)
+#define PVSCSI_FLAG_CMD_DIR_NONE (1 << 2)
+#define PVSCSI_FLAG_CMD_DIR_TOHOST (1 << 3)
+#define PVSCSI_FLAG_CMD_DIR_TODEVICE (1 << 4)
+
+#pragma pack(1)
+struct PVSCSIRingReqDesc {
+ uint64_t context;
+ uint64_t dataAddr;
+ uint64_t dataLen;
+ uint64_t senseAddr;
+ uint32_t senseLen;
+ uint32_t flags;
+ uint8_t cdb[16];
+ uint8_t cdbLen;
+ uint8_t lun[8];
+ uint8_t tag;
+ uint8_t bus;
+ uint8_t target;
+ uint8_t vcpuHint;
+ uint8_t unused[59];
+};
+#pragma pack()
+
+/*
+ * Scatter-gather list management.
+ *
+ * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the
+ * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g
+ * table segment.
+ *
+ * - each segment of the s/g table contain a succession of struct
+ * PVSCSISGElement.
+ * - each segment is entirely contained on a single physical page of memory.
+ * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in
+ * PVSCSISGElement.flags and in this case:
+ * * addr is the PA of the next s/g segment,
+ * * length is undefined, assumed to be 0.
+ */
+#pragma pack(1)
+struct PVSCSISGElement {
+ uint64_t addr;
+ uint32_t length;
+ uint32_t flags;
+};
+#pragma pack()
+
+/*
+ * Completion descriptor.
+ *
+ * sizeof(RingCmpDesc) = 32
+ *
+ * - context: identifier of the command. The same thing that was specified
+ * under "context" as part of struct RingReqDesc at initiation time,
+ * - dataLen: number of bytes transferred for the actual i/o operation,
+ * - senseLen: number of bytes written into the sense buffer,
+ * - hostStatus: adapter status,
+ * - scsiStatus: device status,
+ * - _pad should be zero.
+ */
+#pragma pack(1)
+struct PVSCSIRingCmpDesc {
+ uint64_t context;
+ uint64_t dataLen;
+ uint32_t senseLen;
+ uint16_t hostStatus;
+ uint16_t scsiStatus;
+ uint32_t _pad[2];
+};
+#pragma pack()
+
+#pragma pack(1)
+struct PVSCSIConfigPageHeader {
+ uint32_t pageNum;
+ uint16_t numDwords;
+ uint16_t hostStatus;
+ uint16_t scsiStatus;
+ uint16_t reserved[3];
+};
+#pragma pack()
+
+#pragma pack(1)
+struct PVSCSIConfigPageController {
+ struct PVSCSIConfigPageHeader header;
+ uint64_t nodeWWN; /* Device name as defined in the SAS spec. */
+ uint16_t manufacturer[64];
+ uint16_t serialNumber[64];
+ uint16_t opromVersion[32];
+ uint16_t hwVersion[32];
+ uint16_t firmwareVersion[32];
+ uint32_t numPhys;
+ uint8_t useConsecutivePhyWWNs;
+ uint8_t reserved[3];
+};
+#pragma pack()
+
+/*
+ * Interrupt status / IRQ bits.
+ */
+
+#define PVSCSI_INTR_CMPL_0 (1 << 0)
+#define PVSCSI_INTR_CMPL_1 (1 << 1)
+#define PVSCSI_INTR_CMPL_MASK MASK(2)
+
+#define PVSCSI_INTR_MSG_0 (1 << 2)
+#define PVSCSI_INTR_MSG_1 (1 << 3)
+#define PVSCSI_INTR_MSG_MASK (MASK(2) << 2)
+
+#define PVSCSI_INTR_ALL_SUPPORTED MASK(4)
+
+/*
+ * Number of MSI-X vectors supported.
+ */
+#define PVSCSI_MAX_INTRS 24
+
+/*
+ * Enumeration of supported MSI-X vectors
+ */
+#define PVSCSI_VECTOR_COMPLETION 0
+
+/*
+ * Misc constants for the rings.
+ */
+
+#define PVSCSI_MAX_NUM_PAGES_REQ_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_CMP_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+#define PVSCSI_MAX_NUM_PAGES_MSG_RING PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES
+
+#define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \
+ (PAGE_SIZE / sizeof (struct PVSCSIRingReqDesc))
+
+#define PVSCSI_MAX_REQ_QUEUE_DEPTH \
+ (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE)
+
+#define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES 1
+#define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1
+#define PVSCSI_MEM_SPACE_MISC_NUM_PAGES 2
+#define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES 2
+#define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES 2
+
+enum PVSCSIMemSpace {
+ PVSCSI_MEM_SPACE_COMMAND_PAGE = 0,
+ PVSCSI_MEM_SPACE_INTR_STATUS_PAGE = 1,
+ PVSCSI_MEM_SPACE_MISC_PAGE = 2,
+ PVSCSI_MEM_SPACE_KICK_IO_PAGE = 4,
+ PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE = 6,
+ PVSCSI_MEM_SPACE_MSIX_PBA_PAGE = 7,
+};
+
+#define PVSCSI_MEM_SPACE_NUM_PAGES \
+ (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES + \
+ PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES +\
+ PVSCSI_MEM_SPACE_MISC_NUM_PAGES + \
+ PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES + \
+ PVSCSI_MEM_SPACE_MSIX_NUM_PAGES)
+
+#define PVSCSI_MEM_SPACE_SIZE (PVSCSI_MEM_SPACE_NUM_PAGES * PAGE_SIZE)
+
+#endif /* _PVSCSI_H_ */
diff --git a/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi_var.h b/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi_var.h
new file mode 100644
index 0000000000..f675376556
--- /dev/null
+++ b/usr/src/uts/intel/io/scsi/adapters/pvscsi/pvscsi_var.h
@@ -0,0 +1,236 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc.
+ */
+
+#ifndef _PVSCSI_VAR_H_
+#define _PVSCSI_VAR_H_
+
+typedef struct pvscsi_dma_buf {
+ ddi_dma_handle_t dma_handle;
+ caddr_t addr;
+ uint64_t pa;
+ size_t real_length;
+ ddi_acc_handle_t acc_handle;
+} pvscsi_dma_buf_t;
+
+#define PVSCSI_TGT_PRIV_SIZE 2
+
+#define PVSCSI_FLAG_CDB_EXT 0x0001
+#define PVSCSI_FLAG_SCB_EXT 0x0002
+#define PVSCSI_FLAG_PRIV_EXT 0x0004
+#define PVSCSI_FLAG_TAG 0x0008
+#define PVSCSI_FLAG_IO_READ 0x0010
+#define PVSCSI_FLAG_IO_IOPB 0x0040
+#define PVSCSI_FLAG_DONE 0x0080
+#define PVSCSI_FLAG_DMA_VALID 0x0100
+#define PVSCSI_FLAG_XARQ 0x0200
+#define PVSCSI_FLAG_HW_STATUS 0x0400
+#define PVSCSI_FLAG_TIMED_OUT 0x0800
+#define PVSCSI_FLAG_ABORTED 0x1000
+#define PVSCSI_FLAG_RESET_BUS 0x2000
+#define PVSCSI_FLAG_RESET_DEV 0x4000
+#define PVSCSI_FLAG_TRANSPORT 0x8000
+
+/* Flags that must remain during SCSI packet retransmission */
+#define PVSCSI_FLAGS_PERSISTENT \
+ (PVSCSI_FLAG_CDB_EXT |\
+ PVSCSI_FLAG_SCB_EXT |\
+ PVSCSI_FLAG_PRIV_EXT |\
+ PVSCSI_FLAG_TAG |\
+ PVSCSI_FLAG_IO_READ |\
+ PVSCSI_FLAG_IO_IOPB |\
+ PVSCSI_FLAG_DMA_VALID |\
+ PVSCSI_FLAG_XARQ)
+
+#define PVSCSI_FLAGS_RESET \
+ (PVSCSI_FLAG_RESET_BUS |\
+ PVSCSI_FLAG_RESET_DEV)
+
+#define PVSCSI_FLAGS_NON_HW_COMPLETION \
+ (PVSCSI_FLAG_TIMED_OUT |\
+ PVSCSI_FLAG_ABORTED |\
+ PVSCSI_FLAGS_RESET)
+
+#define PVSCSI_FLAGS_COMPLETION \
+ (PVSCSI_FLAG_HW_STATUS |\
+ PVSCSI_FLAGS_NON_HW_COMPLETION)
+
+#define PVSCSI_FLAGS_EXT \
+ (PVSCSI_FLAG_CDB_EXT |\
+ PVSCSI_FLAG_SCB_EXT |\
+ PVSCSI_FLAG_PRIV_EXT)
+
+typedef struct pvscsi_cmd_ctx {
+ pvscsi_dma_buf_t dma_buf;
+ struct pvscsi_cmd *cmd;
+ list_node_t list;
+} pvscsi_cmd_ctx_t;
+
+typedef struct pvscsi_cmp_desc_stat {
+ uchar_t scsi_status;
+ uint32_t host_status;
+ uint64_t data_len;
+} pvscsi_cmp_desc_stat_t;
+
+#define PVSCSI_MAX_IO_PAGES 256
+#define PVSCSI_MAX_IO_SIZE (PVSCSI_MAX_IO_PAGES * PAGE_SIZE)
+#define PVSCSI_MAX_SG_SIZE (PVSCSI_MAX_IO_PAGES + 1)
+
+typedef struct pvscsi_cmd {
+ struct scsi_pkt *pkt;
+ uint8_t cmd_cdb[SCSI_CDB_SIZE];
+ struct scsi_arq_status cmd_scb;
+ uint64_t tgt_priv[PVSCSI_TGT_PRIV_SIZE];
+ size_t tgtlen;
+ size_t cmdlen;
+ size_t statuslen;
+ uint8_t tag;
+ int flags;
+ ulong_t dma_count;
+ pvscsi_cmp_desc_stat_t cmp_stat;
+ pvscsi_cmd_ctx_t *ctx;
+ ddi_dma_handle_t cmd_dmahdl;
+ ddi_dma_cookie_t cmd_dmac;
+ uint_t cmd_dmaccount;
+ uint_t cmd_winindex;
+ uint_t cmd_nwin;
+ off_t cmd_dma_offset;
+ size_t cmd_dma_len;
+ uint_t cmd_dma_count;
+ uint_t cmd_total_dma_count;
+ int cmd_target;
+ list_node_t cmd_queue_node;
+ clock_t timeout_lbolt;
+ struct pvscsi_softc *cmd_pvs;
+ struct pvscsi_cmd *next_cmd;
+ struct pvscsi_cmd *tail_cmd;
+ struct buf *arqbuf;
+ ddi_dma_cookie_t arqc;
+ ddi_dma_handle_t arqhdl;
+ int cmd_rqslen;
+ struct scsi_pkt cached_pkt;
+ ddi_dma_cookie_t cached_cookies[PVSCSI_MAX_SG_SIZE];
+} pvscsi_cmd_t;
+
+#define AP2PRIV(ap) ((ap)->a_hba_tran->tran_hba_private)
+#define CMD2PKT(cmd) ((struct scsi_pkt *)((cmd)->pkt))
+#define PKT2CMD(pkt) ((pvscsi_cmd_t *)((pkt)->pkt_ha_private))
+#define SDEV2PRIV(sd) ((sd)->sd_address.a_hba_tran->tran_hba_private)
+#define TRAN2PRIV(tran) ((pvscsi_softc_t *)(tran)->tran_hba_private)
+
+#define CMD_CTX_SGLIST_VA(cmd_ctx) \
+ ((struct PVSCSISGElement *) \
+ (((pvscsi_cmd_ctx_t *)(cmd_ctx))->dma_buf.addr))
+
+#define CMD_CTX_SGLIST_PA(cmd_ctx) \
+ ((((pvscsi_cmd_ctx_t *)(cmd_ctx))->dma_buf.pa))
+
+typedef struct pvscsi_msg {
+ struct pvscsi_softc *msg_pvs;
+ int type;
+ int target;
+} pvscsi_msg_t;
+
+/* Driver-wide flags */
+#define PVSCSI_DRIVER_SHUTDOWN 0x01
+#define PVSCSI_HBA_QUIESCED 0x02
+#define PVSCSI_HBA_QUIESCE_PENDING 0x04
+#define PVSCSI_HBA_AUTO_REQUEST_SENSE 0x08
+
+#define HBA_IS_QUIESCED(pvs) (((pvs)->flags & PVSCSI_HBA_QUIESCED) != 0)
+#define HBA_QUIESCE_PENDING(pvs) \
+ (((pvs)->flags & PVSCSI_HBA_QUIESCE_PENDING) != 0 && \
+ ((pvs)->cmd_queue_len == 0))
+
+typedef struct pvscsi_softc {
+ dev_info_t *dip;
+ int instance;
+ scsi_hba_tran_t *tran;
+ ddi_dma_attr_t hba_dma_attr;
+ ddi_dma_attr_t io_dma_attr;
+ ddi_dma_attr_t ring_dma_attr;
+ pvscsi_dma_buf_t rings_state_buf;
+ pvscsi_dma_buf_t req_ring_buf;
+ uint_t req_pages;
+ uint_t req_depth;
+ pvscsi_dma_buf_t cmp_ring_buf;
+ uint_t cmp_pages;
+ pvscsi_dma_buf_t msg_ring_buf;
+ uint_t msg_pages;
+ ddi_acc_handle_t pci_config_handle;
+ ddi_acc_handle_t mmio_handle;
+ caddr_t mmio_base;
+ int intr_type;
+ int intr_size;
+ int intr_cnt;
+ int intr_pri;
+ int flags;
+ ddi_intr_handle_t *intr_htable;
+ pvscsi_cmd_ctx_t *cmd_ctx;
+ list_t cmd_ctx_pool;
+ list_t cmd_queue;
+ int cmd_queue_len;
+ kcondvar_t wd_condvar;
+ kmutex_t mutex;
+ kmutex_t rx_mutex;
+ kmutex_t tx_mutex;
+ kmutex_t intr_mutex;
+ struct kmem_cache *cmd_cache;
+ list_t devnodes;
+ kcondvar_t syncvar;
+ kcondvar_t quiescevar;
+ kthread_t *wd_thread;
+ int intr_lock_counter;
+ int num_pollers;
+ ddi_taskq_t *comp_tq;
+ ddi_taskq_t *msg_tq;
+} pvscsi_softc_t;
+
+typedef struct pvscsi_device {
+ list_node_t list;
+ int target;
+ dev_info_t *pdip;
+ dev_info_t *parent;
+} pvscsi_device_t;
+
+#define REQ_RING(pvs) \
+ ((struct PVSCSIRingReqDesc *) \
+ (((pvscsi_softc_t *)(pvs))->req_ring_buf.addr))
+
+#define CMP_RING(pvs) \
+ ((struct PVSCSIRingCmpDesc *) \
+ (((pvscsi_softc_t *)(pvs))->cmp_ring_buf.addr))
+
+#define MSG_RING(pvs) \
+ ((struct PVSCSIRingMsgDesc *) \
+ (((pvscsi_softc_t *)(pvs))->msg_ring_buf.addr))
+
+#define RINGS_STATE(pvs) \
+ ((struct PVSCSIRingsState *)(((pvscsi_softc_t *)\
+ (pvs))->rings_state_buf.addr))
+
+#define PVSCSI_INITIAL_SSTATE_ITEMS 16
+
+#define SENSE_BUFFER_SIZE SENSE_LENGTH
+#define USECS_TO_WAIT 1000
+
+#define PVSCSI_MAXTGTS 16
+
+#define PAGE_SIZE 4096
+#define PAGE_SHIFT 12
+
+#define PVSCSI_DEFAULT_NUM_PAGES_PER_RING 8
+#define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING 1
+
+#endif /* _PVSCSI_VAR_H_ */
diff --git a/usr/src/uts/intel/pvscsi/Makefile b/usr/src/uts/intel/pvscsi/Makefile
new file mode 100644
index 0000000000..5ea943cfaa
--- /dev/null
+++ b/usr/src/uts/intel/pvscsi/Makefile
@@ -0,0 +1,51 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Nexenta Systems, Inc.
+#
+
+UTSBASE= $(SRC)/uts
+
+MODULE= pvscsi
+OBJECTS= $(PVSCSI_OBJS:%=$(OBJS_DIR)/%)
+LINTS= $(PVSCSI_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE= $(ROOT_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET= $(BINARY)
+LINT_TARGET= $(MODULE).lint
+INSTALL_TARGET= $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -N misc/scsi
+
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+install: $(INSTALL_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ