summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/contrib/freebsd/dev/nvme/nvme.h1506
-rw-r--r--usr/src/cmd/bhyve/Makefile16
-rw-r--r--usr/src/cmd/bhyve/acpi.c43
-rw-r--r--usr/src/cmd/bhyve/ahci.h548
-rw-r--r--usr/src/cmd/bhyve/atkbdc.c3
-rw-r--r--usr/src/cmd/bhyve/bhyvegc.c2
-rw-r--r--usr/src/cmd/bhyve/bhyvegc.h2
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c97
-rw-r--r--usr/src/cmd/bhyve/block_if.c4
-rw-r--r--usr/src/cmd/bhyve/block_if.h2
-rw-r--r--usr/src/cmd/bhyve/bootrom.c2
-rw-r--r--usr/src/cmd/bhyve/bootrom.h4
-rw-r--r--usr/src/cmd/bhyve/console.c2
-rw-r--r--usr/src/cmd/bhyve/console.h18
-rw-r--r--usr/src/cmd/bhyve/consport.c16
-rw-r--r--usr/src/cmd/bhyve/dbgport.c16
-rw-r--r--usr/src/cmd/bhyve/fwctl.c4
-rw-r--r--usr/src/cmd/bhyve/fwctl.h2
-rw-r--r--usr/src/cmd/bhyve/inout.c30
-rw-r--r--usr/src/cmd/bhyve/iov.c141
-rw-r--r--usr/src/cmd/bhyve/iov.h43
-rw-r--r--usr/src/cmd/bhyve/mem.c49
-rw-r--r--usr/src/cmd/bhyve/mem.h2
-rw-r--r--usr/src/cmd/bhyve/mevent.c4
-rw-r--r--usr/src/cmd/bhyve/mevent_test.c38
-rw-r--r--usr/src/cmd/bhyve/pci_e82545.c16
-rw-r--r--usr/src/cmd/bhyve/pci_emul.c23
-rw-r--r--usr/src/cmd/bhyve/pci_emul.h1
-rw-r--r--usr/src/cmd/bhyve/pci_fbuf.c2
-rw-r--r--usr/src/cmd/bhyve/pci_irq.c2
-rw-r--r--usr/src/cmd/bhyve/pci_irq.h2
-rw-r--r--usr/src/cmd/bhyve/pci_lpc.c10
-rw-r--r--usr/src/cmd/bhyve/pci_lpc.h1
-rw-r--r--usr/src/cmd/bhyve/pci_nvme.c1873
-rw-r--r--usr/src/cmd/bhyve/pci_virtio_block.c16
-rw-r--r--usr/src/cmd/bhyve/pci_virtio_console.c17
-rw-r--r--usr/src/cmd/bhyve/pci_virtio_net.c33
-rw-r--r--usr/src/cmd/bhyve/pci_virtio_rnd.c2
-rw-r--r--usr/src/cmd/bhyve/pci_virtio_scsi.c718
-rw-r--r--usr/src/cmd/bhyve/pci_xhci.c20
-rw-r--r--usr/src/cmd/bhyve/pci_xhci.h2
-rw-r--r--usr/src/cmd/bhyve/ps2kbd.c2
-rw-r--r--usr/src/cmd/bhyve/ps2kbd.h2
-rw-r--r--usr/src/cmd/bhyve/ps2mouse.c2
-rw-r--r--usr/src/cmd/bhyve/ps2mouse.h2
-rw-r--r--usr/src/cmd/bhyve/rfb.c24
-rw-r--r--usr/src/cmd/bhyve/rfb.h2
-rw-r--r--usr/src/cmd/bhyve/rtc.c2
-rw-r--r--usr/src/cmd/bhyve/sockstream.c4
-rw-r--r--usr/src/cmd/bhyve/sockstream.h2
-rw-r--r--usr/src/cmd/bhyve/task_switch.c2
-rw-r--r--usr/src/cmd/bhyve/uart_emul.c4
-rw-r--r--usr/src/cmd/bhyve/usb_emul.c2
-rw-r--r--usr/src/cmd/bhyve/usb_emul.h2
-rw-r--r--usr/src/cmd/bhyve/usb_mouse.c22
-rw-r--r--usr/src/cmd/bhyve/vga.c2
-rw-r--r--usr/src/cmd/bhyve/vga.h78
-rw-r--r--usr/src/cmd/bhyve/virtio.c2
-rw-r--r--usr/src/cmd/bhyve/virtio.h31
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c8
-rw-r--r--usr/src/compat/freebsd/amd64/machine/atomic.h35
-rw-r--r--usr/src/compat/freebsd/amd64/machine/reg.h23
-rw-r--r--usr/src/compat/freebsd/sys/endian.h11
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c37
-rw-r--r--usr/src/uts/i86pc/io/vmm/README.sync28
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c1461
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h431
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c735
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/npt.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/npt.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c7
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_msr.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_msr.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_softc.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c72
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpic.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c7
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_ioport.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_ioport.h2
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h2
88 files changed, 7788 insertions, 620 deletions
diff --git a/usr/contrib/freebsd/dev/nvme/nvme.h b/usr/contrib/freebsd/dev/nvme/nvme.h
new file mode 100644
index 0000000000..73d4e2d2db
--- /dev/null
+++ b/usr/contrib/freebsd/dev/nvme/nvme.h
@@ -0,0 +1,1506 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 2012-2013 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __NVME_H__
+#define __NVME_H__
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#endif
+
+#include <sys/param.h>
+#include <sys/endian.h>
+
+#define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command)
+#define NVME_RESET_CONTROLLER _IO('n', 1)
+
+#define NVME_IO_TEST _IOWR('n', 100, struct nvme_io_test)
+#define NVME_BIO_TEST _IOWR('n', 101, struct nvme_io_test)
+
+/*
+ * Macros to deal with NVME revisions, as defined VS register
+ */
+#define NVME_REV(x, y) (((x) << 16) | ((y) << 8))
+#define NVME_MAJOR(r) (((r) >> 16) & 0xffff)
+#define NVME_MINOR(r) (((r) >> 8) & 0xff)
+
+/*
+ * Use to mark a command to apply to all namespaces, or to retrieve global
+ * log pages.
+ */
+#define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF)
+
+/* Cap nvme to 1MB transfers driver explodes with larger sizes */
+#define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20))
+
+/* Register field definitions */
+#define NVME_CAP_LO_REG_MQES_SHIFT (0)
+#define NVME_CAP_LO_REG_MQES_MASK (0xFFFF)
+#define NVME_CAP_LO_REG_CQR_SHIFT (16)
+#define NVME_CAP_LO_REG_CQR_MASK (0x1)
+#define NVME_CAP_LO_REG_AMS_SHIFT (17)
+#define NVME_CAP_LO_REG_AMS_MASK (0x3)
+#define NVME_CAP_LO_REG_TO_SHIFT (24)
+#define NVME_CAP_LO_REG_TO_MASK (0xFF)
+
+#define NVME_CAP_HI_REG_DSTRD_SHIFT (0)
+#define NVME_CAP_HI_REG_DSTRD_MASK (0xF)
+#define NVME_CAP_HI_REG_CSS_NVM_SHIFT (5)
+#define NVME_CAP_HI_REG_CSS_NVM_MASK (0x1)
+#define NVME_CAP_HI_REG_MPSMIN_SHIFT (16)
+#define NVME_CAP_HI_REG_MPSMIN_MASK (0xF)
+#define NVME_CAP_HI_REG_MPSMAX_SHIFT (20)
+#define NVME_CAP_HI_REG_MPSMAX_MASK (0xF)
+
+#define NVME_CC_REG_EN_SHIFT (0)
+#define NVME_CC_REG_EN_MASK (0x1)
+#define NVME_CC_REG_CSS_SHIFT (4)
+#define NVME_CC_REG_CSS_MASK (0x7)
+#define NVME_CC_REG_MPS_SHIFT (7)
+#define NVME_CC_REG_MPS_MASK (0xF)
+#define NVME_CC_REG_AMS_SHIFT (11)
+#define NVME_CC_REG_AMS_MASK (0x7)
+#define NVME_CC_REG_SHN_SHIFT (14)
+#define NVME_CC_REG_SHN_MASK (0x3)
+#define NVME_CC_REG_IOSQES_SHIFT (16)
+#define NVME_CC_REG_IOSQES_MASK (0xF)
+#define NVME_CC_REG_IOCQES_SHIFT (20)
+#define NVME_CC_REG_IOCQES_MASK (0xF)
+
+#define NVME_CSTS_REG_RDY_SHIFT (0)
+#define NVME_CSTS_REG_RDY_MASK (0x1)
+#define NVME_CSTS_REG_CFS_SHIFT (1)
+#define NVME_CSTS_REG_CFS_MASK (0x1)
+#define NVME_CSTS_REG_SHST_SHIFT (2)
+#define NVME_CSTS_REG_SHST_MASK (0x3)
+
+#define NVME_CSTS_GET_SHST(csts) (((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK)
+
+#define NVME_AQA_REG_ASQS_SHIFT (0)
+#define NVME_AQA_REG_ASQS_MASK (0xFFF)
+#define NVME_AQA_REG_ACQS_SHIFT (16)
+#define NVME_AQA_REG_ACQS_MASK (0xFFF)
+
+/* Command field definitions */
+
+#define NVME_CMD_FUSE_SHIFT (8)
+#define NVME_CMD_FUSE_MASK (0x3)
+
+#define NVME_STATUS_P_SHIFT (0)
+#define NVME_STATUS_P_MASK (0x1)
+#define NVME_STATUS_SC_SHIFT (1)
+#define NVME_STATUS_SC_MASK (0xFF)
+#define NVME_STATUS_SCT_SHIFT (9)
+#define NVME_STATUS_SCT_MASK (0x7)
+#define NVME_STATUS_M_SHIFT (14)
+#define NVME_STATUS_M_MASK (0x1)
+#define NVME_STATUS_DNR_SHIFT (15)
+#define NVME_STATUS_DNR_MASK (0x1)
+
+#define NVME_STATUS_GET_P(st) (((st) >> NVME_STATUS_P_SHIFT) & NVME_STATUS_P_MASK)
+#define NVME_STATUS_GET_SC(st) (((st) >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK)
+#define NVME_STATUS_GET_SCT(st) (((st) >> NVME_STATUS_SCT_SHIFT) & NVME_STATUS_SCT_MASK)
+#define NVME_STATUS_GET_M(st) (((st) >> NVME_STATUS_M_SHIFT) & NVME_STATUS_M_MASK)
+#define NVME_STATUS_GET_DNR(st) (((st) >> NVME_STATUS_DNR_SHIFT) & NVME_STATUS_DNR_MASK)
+
+#define NVME_PWR_ST_MPS_SHIFT (0)
+#define NVME_PWR_ST_MPS_MASK (0x1)
+#define NVME_PWR_ST_NOPS_SHIFT (1)
+#define NVME_PWR_ST_NOPS_MASK (0x1)
+#define NVME_PWR_ST_RRT_SHIFT (0)
+#define NVME_PWR_ST_RRT_MASK (0x1F)
+#define NVME_PWR_ST_RRL_SHIFT (0)
+#define NVME_PWR_ST_RRL_MASK (0x1F)
+#define NVME_PWR_ST_RWT_SHIFT (0)
+#define NVME_PWR_ST_RWT_MASK (0x1F)
+#define NVME_PWR_ST_RWL_SHIFT (0)
+#define NVME_PWR_ST_RWL_MASK (0x1F)
+#define NVME_PWR_ST_IPS_SHIFT (6)
+#define NVME_PWR_ST_IPS_MASK (0x3)
+#define NVME_PWR_ST_APW_SHIFT (0)
+#define NVME_PWR_ST_APW_MASK (0x7)
+#define NVME_PWR_ST_APS_SHIFT (6)
+#define NVME_PWR_ST_APS_MASK (0x3)
+
+/** Controller Multi-path I/O and Namespace Sharing Capabilities */
+/* More then one port */
+#define NVME_CTRLR_DATA_MIC_MPORTS_SHIFT (0)
+#define NVME_CTRLR_DATA_MIC_MPORTS_MASK (0x1)
+/* More then one controller */
+#define NVME_CTRLR_DATA_MIC_MCTRLRS_SHIFT (1)
+#define NVME_CTRLR_DATA_MIC_MCTRLRS_MASK (0x1)
+/* SR-IOV Virtual Function */
+#define NVME_CTRLR_DATA_MIC_SRIOVVF_SHIFT (2)
+#define NVME_CTRLR_DATA_MIC_SRIOVVF_MASK (0x1)
+
+/** OACS - optional admin command support */
+/* supports security send/receive commands */
+#define NVME_CTRLR_DATA_OACS_SECURITY_SHIFT (0)
+#define NVME_CTRLR_DATA_OACS_SECURITY_MASK (0x1)
+/* supports format nvm command */
+#define NVME_CTRLR_DATA_OACS_FORMAT_SHIFT (1)
+#define NVME_CTRLR_DATA_OACS_FORMAT_MASK (0x1)
+/* supports firmware activate/download commands */
+#define NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT (2)
+#define NVME_CTRLR_DATA_OACS_FIRMWARE_MASK (0x1)
+/* supports namespace management commands */
+#define NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT (3)
+#define NVME_CTRLR_DATA_OACS_NSMGMT_MASK (0x1)
+/* supports Device Self-test command */
+#define NVME_CTRLR_DATA_OACS_SELFTEST_SHIFT (4)
+#define NVME_CTRLR_DATA_OACS_SELFTEST_MASK (0x1)
+/* supports Directives */
+#define NVME_CTRLR_DATA_OACS_DIRECTIVES_SHIFT (5)
+#define NVME_CTRLR_DATA_OACS_DIRECTIVES_MASK (0x1)
+/* supports NVMe-MI Send/Receive */
+#define NVME_CTRLR_DATA_OACS_NVMEMI_SHIFT (6)
+#define NVME_CTRLR_DATA_OACS_NVMEMI_MASK (0x1)
+/* supports Virtualization Management */
+#define NVME_CTRLR_DATA_OACS_VM_SHIFT (7)
+#define NVME_CTRLR_DATA_OACS_VM_MASK (0x1)
+/* supports Doorbell Buffer Config */
+#define NVME_CTRLR_DATA_OACS_DBBUFFER_SHIFT (8)
+#define NVME_CTRLR_DATA_OACS_DBBUFFER_MASK (0x1)
+
+/** firmware updates */
+/* first slot is read-only */
+#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT (0)
+#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK (0x1)
+/* number of firmware slots */
+#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT (1)
+#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK (0x7)
+
+/** log page attributes */
+/* per namespace smart/health log page */
+#define NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT (0)
+#define NVME_CTRLR_DATA_LPA_NS_SMART_MASK (0x1)
+
+/** AVSCC - admin vendor specific command configuration */
+/* admin vendor specific commands use spec format */
+#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_SHIFT (0)
+#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_MASK (0x1)
+
+/** Autonomous Power State Transition Attributes */
+/* Autonomous Power State Transitions supported */
+#define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT (0)
+#define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK (0x1)
+
+/** submission queue entry size */
+#define NVME_CTRLR_DATA_SQES_MIN_SHIFT (0)
+#define NVME_CTRLR_DATA_SQES_MIN_MASK (0xF)
+#define NVME_CTRLR_DATA_SQES_MAX_SHIFT (4)
+#define NVME_CTRLR_DATA_SQES_MAX_MASK (0xF)
+
+/** completion queue entry size */
+#define NVME_CTRLR_DATA_CQES_MIN_SHIFT (0)
+#define NVME_CTRLR_DATA_CQES_MIN_MASK (0xF)
+#define NVME_CTRLR_DATA_CQES_MAX_SHIFT (4)
+#define NVME_CTRLR_DATA_CQES_MAX_MASK (0xF)
+
+/** optional nvm command support */
+#define NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT (0)
+#define NVME_CTRLR_DATA_ONCS_COMPARE_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT (1)
+#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_DSM_SHIFT (2)
+#define NVME_CTRLR_DATA_ONCS_DSM_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_WRZERO_SHIFT (3)
+#define NVME_CTRLR_DATA_ONCS_WRZERO_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_SAVEFEAT_SHIFT (4)
+#define NVME_CTRLR_DATA_ONCS_SAVEFEAT_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_RESERV_SHIFT (5)
+#define NVME_CTRLR_DATA_ONCS_RESERV_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_TIMESTAMP_SHIFT (6)
+#define NVME_CTRLR_DATA_ONCS_TIMESTAMP_MASK (0x1)
+
+/** Fused Operation Support */
+#define NVME_CTRLR_DATA_FUSES_CNW_SHIFT (0)
+#define NVME_CTRLR_DATA_FUSES_CNW_MASK (0x1)
+
+/** Format NVM Attributes */
+#define NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT (0)
+#define NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK (0x1)
+#define NVME_CTRLR_DATA_FNA_ERASE_ALL_SHIFT (1)
+#define NVME_CTRLR_DATA_FNA_ERASE_ALL_MASK (0x1)
+#define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_SHIFT (2)
+#define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK (0x1)
+
+/** volatile write cache */
+#define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT (0)
+#define NVME_CTRLR_DATA_VWC_PRESENT_MASK (0x1)
+
+/** namespace features */
+/* thin provisioning */
+#define NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT (0)
+#define NVME_NS_DATA_NSFEAT_THIN_PROV_MASK (0x1)
+/* NAWUN, NAWUPF, and NACWU fields are valid */
+#define NVME_NS_DATA_NSFEAT_NA_FIELDS_SHIFT (1)
+#define NVME_NS_DATA_NSFEAT_NA_FIELDS_MASK (0x1)
+/* Deallocated or Unwritten Logical Block errors supported */
+#define NVME_NS_DATA_NSFEAT_DEALLOC_SHIFT (2)
+#define NVME_NS_DATA_NSFEAT_DEALLOC_MASK (0x1)
+/* NGUID and EUI64 fields are not reusable */
+#define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_SHIFT (3)
+#define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_MASK (0x1)
+
+/** formatted lba size */
+#define NVME_NS_DATA_FLBAS_FORMAT_SHIFT (0)
+#define NVME_NS_DATA_FLBAS_FORMAT_MASK (0xF)
+#define NVME_NS_DATA_FLBAS_EXTENDED_SHIFT (4)
+#define NVME_NS_DATA_FLBAS_EXTENDED_MASK (0x1)
+
+/** metadata capabilities */
+/* metadata can be transferred as part of data prp list */
+#define NVME_NS_DATA_MC_EXTENDED_SHIFT (0)
+#define NVME_NS_DATA_MC_EXTENDED_MASK (0x1)
+/* metadata can be transferred with separate metadata pointer */
+#define NVME_NS_DATA_MC_POINTER_SHIFT (1)
+#define NVME_NS_DATA_MC_POINTER_MASK (0x1)
+
+/** end-to-end data protection capabilities */
+/* protection information type 1 */
+#define NVME_NS_DATA_DPC_PIT1_SHIFT (0)
+#define NVME_NS_DATA_DPC_PIT1_MASK (0x1)
+/* protection information type 2 */
+#define NVME_NS_DATA_DPC_PIT2_SHIFT (1)
+#define NVME_NS_DATA_DPC_PIT2_MASK (0x1)
+/* protection information type 3 */
+#define NVME_NS_DATA_DPC_PIT3_SHIFT (2)
+#define NVME_NS_DATA_DPC_PIT3_MASK (0x1)
+/* first eight bytes of metadata */
+#define NVME_NS_DATA_DPC_MD_START_SHIFT (3)
+#define NVME_NS_DATA_DPC_MD_START_MASK (0x1)
+/* last eight bytes of metadata */
+#define NVME_NS_DATA_DPC_MD_END_SHIFT (4)
+#define NVME_NS_DATA_DPC_MD_END_MASK (0x1)
+
+/** end-to-end data protection type settings */
+/* protection information type */
+#define NVME_NS_DATA_DPS_PIT_SHIFT (0)
+#define NVME_NS_DATA_DPS_PIT_MASK (0x7)
+/* 1 == protection info transferred at start of metadata */
+/* 0 == protection info transferred at end of metadata */
+#define NVME_NS_DATA_DPS_MD_START_SHIFT (3)
+#define NVME_NS_DATA_DPS_MD_START_MASK (0x1)
+
+/** Namespace Multi-path I/O and Namespace Sharing Capabilities */
+/* the namespace may be attached to two or more controllers */
+#define NVME_NS_DATA_NMIC_MAY_BE_SHARED_SHIFT (0)
+#define NVME_NS_DATA_NMIC_MAY_BE_SHARED_MASK (0x1)
+
+/** Reservation Capabilities */
+/* Persist Through Power Loss */
+#define NVME_NS_DATA_RESCAP_PTPL_SHIFT (0)
+#define NVME_NS_DATA_RESCAP_PTPL_MASK (0x1)
+/* supports the Write Exclusive */
+#define NVME_NS_DATA_RESCAP_WR_EX_SHIFT (1)
+#define NVME_NS_DATA_RESCAP_WR_EX_MASK (0x1)
+/* supports the Exclusive Access */
+#define NVME_NS_DATA_RESCAP_EX_AC_SHIFT (2)
+#define NVME_NS_DATA_RESCAP_EX_AC_MASK (0x1)
+/* supports the Write Exclusive – Registrants Only */
+#define NVME_NS_DATA_RESCAP_WR_EX_RO_SHIFT (3)
+#define NVME_NS_DATA_RESCAP_WR_EX_RO_MASK (0x1)
+/* supports the Exclusive Access - Registrants Only */
+#define NVME_NS_DATA_RESCAP_EX_AC_RO_SHIFT (4)
+#define NVME_NS_DATA_RESCAP_EX_AC_RO_MASK (0x1)
+/* supports the Write Exclusive – All Registrants */
+#define NVME_NS_DATA_RESCAP_WR_EX_AR_SHIFT (5)
+#define NVME_NS_DATA_RESCAP_WR_EX_AR_MASK (0x1)
+/* supports the Exclusive Access - All Registrants */
+#define NVME_NS_DATA_RESCAP_EX_AC_AR_SHIFT (6)
+#define NVME_NS_DATA_RESCAP_EX_AC_AR_MASK (0x1)
+/* Ignore Existing Key is used as defined in revision 1.3 or later */
+#define NVME_NS_DATA_RESCAP_IEKEY13_SHIFT (7)
+#define NVME_NS_DATA_RESCAP_IEKEY13_MASK (0x1)
+
+/** Format Progress Indicator */
+/* percentage of the Format NVM command that remains to be completed */
+#define NVME_NS_DATA_FPI_PERC_SHIFT (0)
+#define NVME_NS_DATA_FPI_PERC_MASK (0x7f)
+/* namespace supports the Format Progress Indicator */
+#define NVME_NS_DATA_FPI_SUPP_SHIFT (7)
+#define NVME_NS_DATA_FPI_SUPP_MASK (0x1)
+
+/** lba format support */
+/* metadata size */
+#define NVME_NS_DATA_LBAF_MS_SHIFT (0)
+#define NVME_NS_DATA_LBAF_MS_MASK (0xFFFF)
+/* lba data size */
+#define NVME_NS_DATA_LBAF_LBADS_SHIFT (16)
+#define NVME_NS_DATA_LBAF_LBADS_MASK (0xFF)
+/* relative performance */
+#define NVME_NS_DATA_LBAF_RP_SHIFT (24)
+#define NVME_NS_DATA_LBAF_RP_MASK (0x3)
+
+enum nvme_critical_warning_state {
+ NVME_CRIT_WARN_ST_AVAILABLE_SPARE = 0x1,
+ NVME_CRIT_WARN_ST_TEMPERATURE = 0x2,
+ NVME_CRIT_WARN_ST_DEVICE_RELIABILITY = 0x4,
+ NVME_CRIT_WARN_ST_READ_ONLY = 0x8,
+ NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP = 0x10,
+};
+#define NVME_CRIT_WARN_ST_RESERVED_MASK (0xE0)
+
+/* slot for current FW */
+#define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT (0)
+#define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK (0x7)
+
+/* CC register SHN field values */
+enum shn_value {
+ NVME_SHN_NORMAL = 0x1,
+ NVME_SHN_ABRUPT = 0x2,
+};
+
+/* CSTS register SHST field values */
+enum shst_value {
+ NVME_SHST_NORMAL = 0x0,
+ NVME_SHST_OCCURRING = 0x1,
+ NVME_SHST_COMPLETE = 0x2,
+};
+
+struct nvme_registers
+{
+ /** controller capabilities */
+ uint32_t cap_lo;
+ uint32_t cap_hi;
+
+ uint32_t vs; /* version */
+ uint32_t intms; /* interrupt mask set */
+ uint32_t intmc; /* interrupt mask clear */
+
+ /** controller configuration */
+ uint32_t cc;
+
+ uint32_t reserved1;
+
+ /** controller status */
+ uint32_t csts;
+
+ uint32_t reserved2;
+
+ /** admin queue attributes */
+ uint32_t aqa;
+
+ uint64_t asq; /* admin submission queue base addr */
+ uint64_t acq; /* admin completion queue base addr */
+ uint32_t reserved3[0x3f2];
+
+ struct {
+ uint32_t sq_tdbl; /* submission queue tail doorbell */
+ uint32_t cq_hdbl; /* completion queue head doorbell */
+ } doorbell[1] __packed;
+} __packed;
+
+_Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_registers");
+
+struct nvme_command
+{
+ /* dword 0 */
+ uint8_t opc; /* opcode */
+ uint8_t fuse; /* fused operation */
+ uint16_t cid; /* command identifier */
+
+ /* dword 1 */
+ uint32_t nsid; /* namespace identifier */
+
+ /* dword 2-3 */
+ uint32_t rsvd2;
+ uint32_t rsvd3;
+
+ /* dword 4-5 */
+ uint64_t mptr; /* metadata pointer */
+
+ /* dword 6-7 */
+ uint64_t prp1; /* prp entry 1 */
+
+ /* dword 8-9 */
+ uint64_t prp2; /* prp entry 2 */
+
+ /* dword 10-15 */
+ uint32_t cdw10; /* command-specific */
+ uint32_t cdw11; /* command-specific */
+ uint32_t cdw12; /* command-specific */
+ uint32_t cdw13; /* command-specific */
+ uint32_t cdw14; /* command-specific */
+ uint32_t cdw15; /* command-specific */
+} __packed;
+
+_Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command");
+
+struct nvme_completion {
+
+ /* dword 0 */
+ uint32_t cdw0; /* command-specific */
+
+ /* dword 1 */
+ uint32_t rsvd1;
+
+ /* dword 2 */
+ uint16_t sqhd; /* submission queue head pointer */
+ uint16_t sqid; /* submission queue identifier */
+
+ /* dword 3 */
+ uint16_t cid; /* command identifier */
+ uint16_t status;
+} __packed;
+
+_Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion");
+
+struct nvme_dsm_range {
+ uint32_t attributes;
+ uint32_t length;
+ uint64_t starting_lba;
+} __packed;
+
+/* Largest DSM Trim that can be done */
+#define NVME_MAX_DSM_TRIM 4096
+
+_Static_assert(sizeof(struct nvme_dsm_range) == 16, "bad size for nvme_dsm_ranage");
+
+/* status code types */
+enum nvme_status_code_type {
+ NVME_SCT_GENERIC = 0x0,
+ NVME_SCT_COMMAND_SPECIFIC = 0x1,
+ NVME_SCT_MEDIA_ERROR = 0x2,
+ /* 0x3-0x6 - reserved */
+ NVME_SCT_VENDOR_SPECIFIC = 0x7,
+};
+
+/* generic command status codes */
+enum nvme_generic_command_status_code {
+ NVME_SC_SUCCESS = 0x00,
+ NVME_SC_INVALID_OPCODE = 0x01,
+ NVME_SC_INVALID_FIELD = 0x02,
+ NVME_SC_COMMAND_ID_CONFLICT = 0x03,
+ NVME_SC_DATA_TRANSFER_ERROR = 0x04,
+ NVME_SC_ABORTED_POWER_LOSS = 0x05,
+ NVME_SC_INTERNAL_DEVICE_ERROR = 0x06,
+ NVME_SC_ABORTED_BY_REQUEST = 0x07,
+ NVME_SC_ABORTED_SQ_DELETION = 0x08,
+ NVME_SC_ABORTED_FAILED_FUSED = 0x09,
+ NVME_SC_ABORTED_MISSING_FUSED = 0x0a,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b,
+ NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c,
+ NVME_SC_INVALID_SGL_SEGMENT_DESCR = 0x0d,
+ NVME_SC_INVALID_NUMBER_OF_SGL_DESCR = 0x0e,
+ NVME_SC_DATA_SGL_LENGTH_INVALID = 0x0f,
+ NVME_SC_METADATA_SGL_LENGTH_INVALID = 0x10,
+ NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID = 0x11,
+ NVME_SC_INVALID_USE_OF_CMB = 0x12,
+ NVME_SC_PRP_OFFET_INVALID = 0x13,
+ NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED = 0x14,
+ NVME_SC_OPERATION_DENIED = 0x15,
+ NVME_SC_SGL_OFFSET_INVALID = 0x16,
+ /* 0x17 - reserved */
+ NVME_SC_HOST_ID_INCONSISTENT_FORMAT = 0x18,
+ NVME_SC_KEEP_ALIVE_TIMEOUT_EXPIRED = 0x19,
+ NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID = 0x1a,
+ NVME_SC_ABORTED_DUE_TO_PREEMPT = 0x1b,
+ NVME_SC_SANITIZE_FAILED = 0x1c,
+ NVME_SC_SANITIZE_IN_PROGRESS = 0x1d,
+ NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID = 0x1e,
+ NVME_SC_NOT_SUPPORTED_IN_CMB = 0x1f,
+
+ NVME_SC_LBA_OUT_OF_RANGE = 0x80,
+ NVME_SC_CAPACITY_EXCEEDED = 0x81,
+ NVME_SC_NAMESPACE_NOT_READY = 0x82,
+ NVME_SC_RESERVATION_CONFLICT = 0x83,
+ NVME_SC_FORMAT_IN_PROGRESS = 0x84,
+};
+
+/* command specific status codes */
+enum nvme_command_specific_status_code {
+ NVME_SC_COMPLETION_QUEUE_INVALID = 0x00,
+ NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01,
+ NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED = 0x02,
+ NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03,
+ /* 0x04 - reserved */
+ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
+ NVME_SC_INVALID_FIRMWARE_SLOT = 0x06,
+ NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07,
+ NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08,
+ NVME_SC_INVALID_LOG_PAGE = 0x09,
+ NVME_SC_INVALID_FORMAT = 0x0a,
+ NVME_SC_FIRMWARE_REQUIRES_RESET = 0x0b,
+ NVME_SC_INVALID_QUEUE_DELETION = 0x0c,
+ NVME_SC_FEATURE_NOT_SAVEABLE = 0x0d,
+ NVME_SC_FEATURE_NOT_CHANGEABLE = 0x0e,
+ NVME_SC_FEATURE_NOT_NS_SPECIFIC = 0x0f,
+ NVME_SC_FW_ACT_REQUIRES_NVMS_RESET = 0x10,
+ NVME_SC_FW_ACT_REQUIRES_RESET = 0x11,
+ NVME_SC_FW_ACT_REQUIRES_TIME = 0x12,
+ NVME_SC_FW_ACT_PROHIBITED = 0x13,
+ NVME_SC_OVERLAPPING_RANGE = 0x14,
+ NVME_SC_NS_INSUFFICIENT_CAPACITY = 0x15,
+ NVME_SC_NS_ID_UNAVAILABLE = 0x16,
+ /* 0x17 - reserved */
+ NVME_SC_NS_ALREADY_ATTACHED = 0x18,
+ NVME_SC_NS_IS_PRIVATE = 0x19,
+ NVME_SC_NS_NOT_ATTACHED = 0x1a,
+ NVME_SC_THIN_PROV_NOT_SUPPORTED = 0x1b,
+ NVME_SC_CTRLR_LIST_INVALID = 0x1c,
+ NVME_SC_SELT_TEST_IN_PROGRESS = 0x1d,
+ NVME_SC_BOOT_PART_WRITE_PROHIB = 0x1e,
+ NVME_SC_INVALID_CTRLR_ID = 0x1f,
+ NVME_SC_INVALID_SEC_CTRLR_STATE = 0x20,
+ NVME_SC_INVALID_NUM_OF_CTRLR_RESRC = 0x21,
+ NVME_SC_INVALID_RESOURCE_ID = 0x22,
+
+ NVME_SC_CONFLICTING_ATTRIBUTES = 0x80,
+ NVME_SC_INVALID_PROTECTION_INFO = 0x81,
+ NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE = 0x82,
+};
+
+/* media error status codes */
+enum nvme_media_error_status_code {
+ NVME_SC_WRITE_FAULTS = 0x80,
+ NVME_SC_UNRECOVERED_READ_ERROR = 0x81,
+ NVME_SC_GUARD_CHECK_ERROR = 0x82,
+ NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83,
+ NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84,
+ NVME_SC_COMPARE_FAILURE = 0x85,
+ NVME_SC_ACCESS_DENIED = 0x86,
+ NVME_SC_DEALLOCATED_OR_UNWRITTEN = 0x87,
+};
+
+/* admin opcodes */
+enum nvme_admin_opcode {
+ NVME_OPC_DELETE_IO_SQ = 0x00,
+ NVME_OPC_CREATE_IO_SQ = 0x01,
+ NVME_OPC_GET_LOG_PAGE = 0x02,
+ /* 0x03 - reserved */
+ NVME_OPC_DELETE_IO_CQ = 0x04,
+ NVME_OPC_CREATE_IO_CQ = 0x05,
+ NVME_OPC_IDENTIFY = 0x06,
+ /* 0x07 - reserved */
+ NVME_OPC_ABORT = 0x08,
+ NVME_OPC_SET_FEATURES = 0x09,
+ NVME_OPC_GET_FEATURES = 0x0a,
+ /* 0x0b - reserved */
+ NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c,
+ NVME_OPC_NAMESPACE_MANAGEMENT = 0x0d,
+ /* 0x0e-0x0f - reserved */
+ NVME_OPC_FIRMWARE_ACTIVATE = 0x10,
+ NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11,
+ NVME_OPC_DEVICE_SELF_TEST = 0x14,
+ NVME_OPC_NAMESPACE_ATTACHMENT = 0x15,
+ NVME_OPC_KEEP_ALIVE = 0x18,
+ NVME_OPC_DIRECTIVE_SEND = 0x19,
+ NVME_OPC_DIRECTIVE_RECEIVE = 0x1a,
+ NVME_OPC_VIRTUALIZATION_MANAGEMENT = 0x1c,
+ NVME_OPC_NVME_MI_SEND = 0x1d,
+ NVME_OPC_NVME_MI_RECEIVE = 0x1e,
+ NVME_OPC_DOORBELL_BUFFER_CONFIG = 0x7c,
+
+ NVME_OPC_FORMAT_NVM = 0x80,
+ NVME_OPC_SECURITY_SEND = 0x81,
+ NVME_OPC_SECURITY_RECEIVE = 0x82,
+ NVME_OPC_SANITIZE = 0x84,
+};
+
+/* nvme nvm opcodes */
+enum nvme_nvm_opcode {
+ NVME_OPC_FLUSH = 0x00,
+ NVME_OPC_WRITE = 0x01,
+ NVME_OPC_READ = 0x02,
+ /* 0x03 - reserved */
+ NVME_OPC_WRITE_UNCORRECTABLE = 0x04,
+ NVME_OPC_COMPARE = 0x05,
+ /* 0x06 - reserved */
+ NVME_OPC_WRITE_ZEROES = 0x08,
+ /* 0x07 - reserved */
+ NVME_OPC_DATASET_MANAGEMENT = 0x09,
+ /* 0x0a-0x0c - reserved */
+ NVME_OPC_RESERVATION_REGISTER = 0x0d,
+ NVME_OPC_RESERVATION_REPORT = 0x0e,
+ /* 0x0f-0x10 - reserved */
+ NVME_OPC_RESERVATION_ACQUIRE = 0x11,
+ /* 0x12-0x14 - reserved */
+ NVME_OPC_RESERVATION_RELEASE = 0x15,
+};
+
+enum nvme_feature {
+ /* 0x00 - reserved */
+ NVME_FEAT_ARBITRATION = 0x01,
+ NVME_FEAT_POWER_MANAGEMENT = 0x02,
+ NVME_FEAT_LBA_RANGE_TYPE = 0x03,
+ NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04,
+ NVME_FEAT_ERROR_RECOVERY = 0x05,
+ NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06,
+ NVME_FEAT_NUMBER_OF_QUEUES = 0x07,
+ NVME_FEAT_INTERRUPT_COALESCING = 0x08,
+ NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
+ NVME_FEAT_WRITE_ATOMICITY = 0x0A,
+ NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B,
+ NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C,
+ NVME_FEAT_HOST_MEMORY_BUFFER = 0x0D,
+ NVME_FEAT_TIMESTAMP = 0x0E,
+ NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F,
+ NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT = 0x10,
+ NVME_FEAT_NON_OP_POWER_STATE_CONFIG = 0x11,
+ /* 0x12-0x77 - reserved */
+ /* 0x78-0x7f - NVMe Management Interface */
+ NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80,
+ /* 0x81-0xBF - command set specific (reserved) */
+ /* 0xC0-0xFF - vendor specific */
+};
+
+enum nvme_dsm_attribute {
+ NVME_DSM_ATTR_INTEGRAL_READ = 0x1,
+ NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2,
+ NVME_DSM_ATTR_DEALLOCATE = 0x4,
+};
+
+enum nvme_activate_action {
+ NVME_AA_REPLACE_NO_ACTIVATE = 0x0,
+ NVME_AA_REPLACE_ACTIVATE = 0x1,
+ NVME_AA_ACTIVATE = 0x2,
+};
+
+struct nvme_power_state {
+ /** Maximum Power */
+ uint16_t mp; /* Maximum Power */
+ uint8_t ps_rsvd1;
+ uint8_t mps_nops; /* Max Power Scale, Non-Operational State */
+
+ uint32_t enlat; /* Entry Latency */
+ uint32_t exlat; /* Exit Latency */
+
+ uint8_t rrt; /* Relative Read Throughput */
+ uint8_t rrl; /* Relative Read Latency */
+ uint8_t rwt; /* Relative Write Throughput */
+ uint8_t rwl; /* Relative Write Latency */
+
+ uint16_t idlp; /* Idle Power */
+ uint8_t ips; /* Idle Power Scale */
+ uint8_t ps_rsvd8;
+
+ uint16_t actp; /* Active Power */
+ uint8_t apw_aps; /* Active Power Workload, Active Power Scale */
+ uint8_t ps_rsvd10[9];
+} __packed;
+
+_Static_assert(sizeof(struct nvme_power_state) == 32, "bad size for nvme_power_state");
+
+#define NVME_SERIAL_NUMBER_LENGTH 20
+#define NVME_MODEL_NUMBER_LENGTH 40
+#define NVME_FIRMWARE_REVISION_LENGTH 8
+
+struct nvme_controller_data {
+
+ /* bytes 0-255: controller capabilities and features */
+
+ /** pci vendor id */
+ uint16_t vid;
+
+ /** pci subsystem vendor id */
+ uint16_t ssvid;
+
+ /** serial number */
+ uint8_t sn[NVME_SERIAL_NUMBER_LENGTH];
+
+ /** model number */
+ uint8_t mn[NVME_MODEL_NUMBER_LENGTH];
+
+ /** firmware revision */
+ uint8_t fr[NVME_FIRMWARE_REVISION_LENGTH];
+
+ /** recommended arbitration burst */
+ uint8_t rab;
+
+ /** ieee oui identifier */
+ uint8_t ieee[3];
+
+ /** multi-interface capabilities */
+ uint8_t mic;
+
+ /** maximum data transfer size */
+ uint8_t mdts;
+
+ /** Controller ID */
+ uint16_t ctrlr_id;
+
+ /** Version */
+ uint32_t ver;
+
+ /** RTD3 Resume Latency */
+ uint32_t rtd3r;
+
+ /** RTD3 Enter Latency */
+ uint32_t rtd3e;
+
+ /** Optional Asynchronous Events Supported */
+ uint32_t oaes; /* bitfield really */
+
+ /** Controller Attributes */
+ uint32_t ctratt; /* bitfield really */
+
+ uint8_t reserved1[12];
+
+ /** FRU Globally Unique Identifier */
+ uint8_t fguid[16];
+
+ uint8_t reserved2[128];
+
+ /* bytes 256-511: admin command set attributes */
+
+ /** optional admin command support */
+ uint16_t oacs;
+
+ /** abort command limit */
+ uint8_t acl;
+
+ /** asynchronous event request limit */
+ uint8_t aerl;
+
+ /** firmware updates */
+ uint8_t frmw;
+
+ /** log page attributes */
+ uint8_t lpa;
+
+ /** error log page entries */
+ uint8_t elpe;
+
+ /** number of power states supported */
+ uint8_t npss;
+
+ /** admin vendor specific command configuration */
+ uint8_t avscc;
+
+ /** Autonomous Power State Transition Attributes */
+ uint8_t apsta;
+
+ /** Warning Composite Temperature Threshold */
+ uint16_t wctemp;
+
+ /** Critical Composite Temperature Threshold */
+ uint16_t cctemp;
+
+ /** Maximum Time for Firmware Activation */
+ uint16_t mtfa;
+
+ /** Host Memory Buffer Preferred Size */
+ uint32_t hmpre;
+
+ /** Host Memory Buffer Minimum Size */
+ uint32_t hmmin;
+
+ /** Name space capabilities */
+ struct {
+ /* if nsmgmt, report tnvmcap and unvmcap */
+ uint8_t tnvmcap[16];
+ uint8_t unvmcap[16];
+ } __packed untncap;
+
+ /** Replay Protected Memory Block Support */
+ uint32_t rpmbs; /* Really a bitfield */
+
+ /** Extended Device Self-test Time */
+ uint16_t edstt;
+
+ /** Device Self-test Options */
+ uint8_t dsto; /* Really a bitfield */
+
+ /** Firmware Update Granularity */
+ uint8_t fwug;
+
+ /** Keep Alive Support */
+ uint16_t kas;
+
+ /** Host Controlled Thermal Management Attributes */
+ uint16_t hctma; /* Really a bitfield */
+
+ /** Minimum Thermal Management Temperature */
+ uint16_t mntmt;
+
+ /** Maximum Thermal Management Temperature */
+ uint16_t mxtmt;
+
+ /** Sanitize Capabilities */
+ uint32_t sanicap; /* Really a bitfield */
+
+ uint8_t reserved3[180];
+ /* bytes 512-703: nvm command set attributes */
+
+ /** submission queue entry size */
+ uint8_t sqes;
+
+ /** completion queue entry size */
+ uint8_t cqes;
+
+ /** Maximum Outstanding Commands */
+ uint16_t maxcmd;
+
+ /** number of namespaces */
+ uint32_t nn;
+
+ /** optional nvm command support */
+ uint16_t oncs;
+
+ /** fused operation support */
+ uint16_t fuses;
+
+ /** format nvm attributes */
+ uint8_t fna;
+
+ /** volatile write cache */
+ uint8_t vwc;
+
+ /** Atomic Write Unit Normal */
+ uint16_t awun;
+
+ /** Atomic Write Unit Power Fail */
+ uint16_t awupf;
+
+ /** NVM Vendor Specific Command Configuration */
+ uint8_t nvscc;
+ uint8_t reserved5;
+
+ /** Atomic Compare & Write Unit */
+ uint16_t acwu;
+ uint16_t reserved6;
+
+ /** SGL Support */
+ uint32_t sgls;
+
+ /* bytes 540-767: Reserved */
+ uint8_t reserved7[228];
+
+ /** NVM Subsystem NVMe Qualified Name */
+ uint8_t subnqn[256];
+
+ /* bytes 1024-1791: Reserved */
+ uint8_t reserved8[768];
+
+ /* bytes 1792-2047: NVMe over Fabrics specification */
+ uint8_t reserved9[256];
+
+ /* bytes 2048-3071: power state descriptors */
+ struct nvme_power_state power_state[32];
+
+ /* bytes 3072-4095: vendor specific */
+ uint8_t vs[1024];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_controller_data) == 4096, "bad size for nvme_controller_data");
+
+struct nvme_namespace_data {
+
+ /** namespace size */
+ uint64_t nsze;
+
+ /** namespace capacity */
+ uint64_t ncap;
+
+ /** namespace utilization */
+ uint64_t nuse;
+
+ /** namespace features */
+ uint8_t nsfeat;
+
+ /** number of lba formats */
+ uint8_t nlbaf;
+
+ /** formatted lba size */
+ uint8_t flbas;
+
+ /** metadata capabilities */
+ uint8_t mc;
+
+ /** end-to-end data protection capabilities */
+ uint8_t dpc;
+
+ /** end-to-end data protection type settings */
+ uint8_t dps;
+
+ /** Namespace Multi-path I/O and Namespace Sharing Capabilities */
+ uint8_t nmic;
+
+ /** Reservation Capabilities */
+ uint8_t rescap;
+
+ /** Format Progress Indicator */
+ uint8_t fpi;
+
+ /** Deallocate Logical Block Features */
+ uint8_t dlfeat;
+
+ /** Namespace Atomic Write Unit Normal */
+ uint16_t nawun;
+
+ /** Namespace Atomic Write Unit Power Fail */
+ uint16_t nawupf;
+
+ /** Namespace Atomic Compare & Write Unit */
+ uint16_t nacwu;
+
+ /** Namespace Atomic Boundary Size Normal */
+ uint16_t nabsn;
+
+ /** Namespace Atomic Boundary Offset */
+ uint16_t nabo;
+
+ /** Namespace Atomic Boundary Size Power Fail */
+ uint16_t nabspf;
+
+ /** Namespace Optimal IO Boundary */
+ uint16_t noiob;
+
+ /** NVM Capacity */
+ uint8_t nvmcap[16];
+
+ /* bytes 64-103: Reserved */
+ uint8_t reserved5[40];
+
+ /** Namespace Globally Unique Identifier */
+ uint8_t nguid[16];
+
+ /** IEEE Extended Unique Identifier */
+ uint8_t eui64[8];
+
+ /** lba format support */
+ uint32_t lbaf[16];
+
+ uint8_t reserved6[192];
+
+ uint8_t vendor_specific[3712];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_namespace_data) == 4096, "bad size for nvme_namepsace_data");
+
+enum nvme_log_page {
+
+ /* 0x00 - reserved */
+ NVME_LOG_ERROR = 0x01,
+ NVME_LOG_HEALTH_INFORMATION = 0x02,
+ NVME_LOG_FIRMWARE_SLOT = 0x03,
+ NVME_LOG_CHANGED_NAMESPACE = 0x04,
+ NVME_LOG_COMMAND_EFFECT = 0x05,
+ /* 0x06-0x7F - reserved */
+ /* 0x80-0xBF - I/O command set specific */
+ NVME_LOG_RES_NOTIFICATION = 0x80,
+ /* 0xC0-0xFF - vendor specific */
+
+ /*
+ * The following are Intel Specific log pages, but they seem
+ * to be widely implemented.
+ */
+ INTEL_LOG_READ_LAT_LOG = 0xc1,
+ INTEL_LOG_WRITE_LAT_LOG = 0xc2,
+ INTEL_LOG_TEMP_STATS = 0xc5,
+ INTEL_LOG_ADD_SMART = 0xca,
+ INTEL_LOG_DRIVE_MKT_NAME = 0xdd,
+
+ /*
+ * HGST log page, with lots ofs sub pages.
+ */
+ HGST_INFO_LOG = 0xc1,
+};
+
+struct nvme_error_information_entry {
+
+ uint64_t error_count;
+ uint16_t sqid;
+ uint16_t cid;
+ uint16_t status;
+ uint16_t error_location;
+ uint64_t lba;
+ uint32_t nsid;
+ uint8_t vendor_specific;
+ uint8_t reserved[35];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry");
+
+struct nvme_health_information_page {
+
+ uint8_t critical_warning;
+ uint16_t temperature;
+ uint8_t available_spare;
+ uint8_t available_spare_threshold;
+ uint8_t percentage_used;
+
+ uint8_t reserved[26];
+
+ /*
+ * Note that the following are 128-bit values, but are
+ * defined as an array of 2 64-bit values.
+ */
+ /* Data Units Read is always in 512-byte units. */
+ uint64_t data_units_read[2];
+ /* Data Units Written is always in 512-byte units. */
+ uint64_t data_units_written[2];
+ /* For NVM command set, this includes Compare commands. */
+ uint64_t host_read_commands[2];
+ uint64_t host_write_commands[2];
+ /* Controller Busy Time is reported in minutes. */
+ uint64_t controller_busy_time[2];
+ uint64_t power_cycles[2];
+ uint64_t power_on_hours[2];
+ uint64_t unsafe_shutdowns[2];
+ uint64_t media_errors[2];
+ uint64_t num_error_info_log_entries[2];
+ uint32_t warning_temp_time;
+ uint32_t error_temp_time;
+ uint16_t temp_sensor[8];
+
+ uint8_t reserved2[296];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for nvme_health_information_page");
+
+struct nvme_firmware_page {
+
+ uint8_t afi;
+ uint8_t reserved[7];
+ uint64_t revision[7]; /* revisions for 7 slots */
+ uint8_t reserved2[448];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_firmware_page) == 512, "bad size for nvme_firmware_page");
+
+struct nvme_ns_list {
+ uint32_t ns[1024];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_ns_list) == 4096, "bad size for nvme_ns_list");
+
+struct intel_log_temp_stats
+{
+ uint64_t current;
+ uint64_t overtemp_flag_last;
+ uint64_t overtemp_flag_life;
+ uint64_t max_temp;
+ uint64_t min_temp;
+ uint64_t _rsvd[5];
+ uint64_t max_oper_temp;
+ uint64_t min_oper_temp;
+ uint64_t est_offset;
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats");
+
+#define NVME_TEST_MAX_THREADS 128
+
+struct nvme_io_test {
+
+ enum nvme_nvm_opcode opc;
+ uint32_t size;
+ uint32_t time; /* in seconds */
+ uint32_t num_threads;
+ uint32_t flags;
+ uint64_t io_completed[NVME_TEST_MAX_THREADS];
+};
+
+enum nvme_io_test_flags {
+
+ /*
+ * Specifies whether dev_refthread/dev_relthread should be
+ * called during NVME_BIO_TEST. Ignored for other test
+ * types.
+ */
+ NVME_TEST_FLAG_REFTHREAD = 0x1,
+};
+
+struct nvme_pt_command {
+
+ /*
+ * cmd is used to specify a passthrough command to a controller or
+ * namespace.
+ *
+ * The following fields from cmd may be specified by the caller:
+ * * opc (opcode)
+ * * nsid (namespace id) - for admin commands only
+ * * cdw10-cdw15
+ *
+ * Remaining fields must be set to 0 by the caller.
+ */
+ struct nvme_command cmd;
+
+ /*
+ * cpl returns completion status for the passthrough command
+ * specified by cmd.
+ *
+ * The following fields will be filled out by the driver, for
+ * consumption by the caller:
+ * * cdw0
+ * * status (except for phase)
+ *
+ * Remaining fields will be set to 0 by the driver.
+ */
+ struct nvme_completion cpl;
+
+ /* buf is the data buffer associated with this passthrough command. */
+ void * buf;
+
+ /*
+ * len is the length of the data buffer associated with this
+ * passthrough command.
+ */
+ uint32_t len;
+
+ /*
+ * is_read = 1 if the passthrough command will read data into the
+ * supplied buffer from the controller.
+ *
+ * is_read = 0 if the passthrough command will write data from the
+ * supplied buffer to the controller.
+ */
+ uint32_t is_read;
+
+ /*
+ * driver_lock is used by the driver only. It must be set to 0
+ * by the caller.
+ */
+ struct mtx * driver_lock;
+};
+
+#define nvme_completion_is_error(cpl) \
+ (NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0)
+
+void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
+
+#ifdef _KERNEL
+
+struct bio;
+
+struct nvme_namespace;
+struct nvme_controller;
+struct nvme_consumer;
+
+typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
+
+typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *);
+typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *);
+typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
+ uint32_t, void *, uint32_t);
+typedef void (*nvme_cons_fail_fn_t)(void *);
+
+enum nvme_namespace_flags {
+ NVME_NS_DEALLOCATE_SUPPORTED = 0x1,
+ NVME_NS_FLUSH_SUPPORTED = 0x2,
+};
+
+int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
+ struct nvme_pt_command *pt,
+ uint32_t nsid, int is_user_buffer,
+ int is_admin_cmd);
+
+/* Admin functions */
+void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
+ uint8_t feature, uint32_t cdw11,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
+ uint8_t feature, uint32_t cdw11,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+void nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr,
+ uint8_t log_page, uint32_t nsid,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/* NVM I/O functions */
+int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
+ uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
+ uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+int nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
+ uint8_t num_ranges, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset,
+ size_t len);
+
+/* Registration functions */
+struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn,
+ nvme_cons_ctrlr_fn_t ctrlr_fn,
+ nvme_cons_async_fn_t async_fn,
+ nvme_cons_fail_fn_t fail_fn);
+void nvme_unregister_consumer(struct nvme_consumer *consumer);
+
+/* Controller helper functions */
+device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr);
+const struct nvme_controller_data *
+ nvme_ctrlr_get_data(struct nvme_controller *ctrlr);
+
+/* Namespace helper functions */
+uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns);
+uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns);
+uint64_t nvme_ns_get_size(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_flags(struct nvme_namespace *ns);
+const char * nvme_ns_get_serial_number(struct nvme_namespace *ns);
+const char * nvme_ns_get_model_number(struct nvme_namespace *ns);
+const struct nvme_namespace_data *
+ nvme_ns_get_data(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns);
+
+int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn);
+
+/*
+ * Command building helper functions -- shared with CAM
+ * These functions assume allocator zeros out cmd structure
+ * CAM's xpt_get_ccb and the request allocator for nvme both
+ * do zero'd allocations.
+ */
+static inline
+void nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid)
+{
+
+ cmd->opc = NVME_OPC_FLUSH;
+ cmd->nsid = htole32(nsid);
+}
+
+static inline
+void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ cmd->opc = rwcmd;
+ cmd->nsid = htole32(nsid);
+ cmd->cdw10 = htole32(lba & 0xffffffffu);
+ cmd->cdw11 = htole32(lba >> 32);
+ cmd->cdw12 = htole32(count-1);
+}
+
+static inline
+void nvme_ns_write_cmd(struct nvme_command *cmd, uint32_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count);
+}
+
+static inline
+void nvme_ns_read_cmd(struct nvme_command *cmd, uint32_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count);
+}
+
+static inline
+void nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid,
+ uint32_t num_ranges)
+{
+ cmd->opc = NVME_OPC_DATASET_MANAGEMENT;
+ cmd->nsid = htole32(nsid);
+ cmd->cdw10 = htole32(num_ranges - 1);
+ cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE);
+}
+
+extern int nvme_use_nvd;
+
+#endif /* _KERNEL */
+
+/* Endianess conversion functions for NVMe structs */
+static inline
+void nvme_completion_swapbytes(struct nvme_completion *s)
+{
+
+ s->cdw0 = le32toh(s->cdw0);
+ /* omit rsvd1 */
+ s->sqhd = le16toh(s->sqhd);
+ s->sqid = le16toh(s->sqid);
+ /* omit cid */
+ s->status = le16toh(s->status);
+}
+
+static inline
+void nvme_power_state_swapbytes(struct nvme_power_state *s)
+{
+
+ s->mp = le16toh(s->mp);
+ s->enlat = le32toh(s->enlat);
+ s->exlat = le32toh(s->exlat);
+ s->idlp = le16toh(s->idlp);
+ s->actp = le16toh(s->actp);
+}
+
+static inline
+void nvme_controller_data_swapbytes(struct nvme_controller_data *s)
+{
+ int i;
+
+ s->vid = le16toh(s->vid);
+ s->ssvid = le16toh(s->ssvid);
+ s->ctrlr_id = le16toh(s->ctrlr_id);
+ s->ver = le32toh(s->ver);
+ s->rtd3r = le32toh(s->rtd3r);
+ s->rtd3e = le32toh(s->rtd3e);
+ s->oaes = le32toh(s->oaes);
+ s->ctratt = le32toh(s->ctratt);
+ s->oacs = le16toh(s->oacs);
+ s->wctemp = le16toh(s->wctemp);
+ s->cctemp = le16toh(s->cctemp);
+ s->mtfa = le16toh(s->mtfa);
+ s->hmpre = le32toh(s->hmpre);
+ s->hmmin = le32toh(s->hmmin);
+ s->rpmbs = le32toh(s->rpmbs);
+ s->edstt = le16toh(s->edstt);
+ s->kas = le16toh(s->kas);
+ s->hctma = le16toh(s->hctma);
+ s->mntmt = le16toh(s->mntmt);
+ s->mxtmt = le16toh(s->mxtmt);
+ s->sanicap = le32toh(s->sanicap);
+ s->maxcmd = le16toh(s->maxcmd);
+ s->nn = le32toh(s->nn);
+ s->oncs = le16toh(s->oncs);
+ s->fuses = le16toh(s->fuses);
+ s->awun = le16toh(s->awun);
+ s->awupf = le16toh(s->awupf);
+ s->acwu = le16toh(s->acwu);
+ s->sgls = le32toh(s->sgls);
+ for (i = 0; i < 32; i++)
+ nvme_power_state_swapbytes(&s->power_state[i]);
+}
+
+static inline
+void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s)
+{
+ int i;
+
+ s->nsze = le64toh(s->nsze);
+ s->ncap = le64toh(s->ncap);
+ s->nuse = le64toh(s->nuse);
+ s->nawun = le16toh(s->nawun);
+ s->nawupf = le16toh(s->nawupf);
+ s->nacwu = le16toh(s->nacwu);
+ s->nabsn = le16toh(s->nabsn);
+ s->nabo = le16toh(s->nabo);
+ s->nabspf = le16toh(s->nabspf);
+ s->noiob = le16toh(s->noiob);
+ for (i = 0; i < 16; i++)
+ s->lbaf[i] = le32toh(s->lbaf[i]);
+}
+
+static inline
+void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry *s)
+{
+
+ s->error_count = le64toh(s->error_count);
+ s->sqid = le16toh(s->sqid);
+ s->cid = le16toh(s->cid);
+ s->status = le16toh(s->status);
+ s->error_location = le16toh(s->error_location);
+ s->lba = le64toh(s->lba);
+ s->nsid = le32toh(s->nsid);
+}
+
+static inline
+void nvme_le128toh(void *p)
+{
+ /*
+ * Upstream, this uses the following comparison:
+ * #if _BYTE_ORDER != _LITTLE_ENDIAN
+ *
+ * Rather than keep this file in compat with only that little bit
+ * changed, we'll just float a little patch here for now.
+ */
+#ifndef _LITTLE_ENDIAN
+ /* Swap 16 bytes in place */
+ char *tmp = (char*)p;
+ char b;
+ int i;
+ for (i = 0; i < 8; i++) {
+ b = tmp[i];
+ tmp[i] = tmp[15-i];
+ tmp[15-i] = b;
+ }
+#else
+ (void)p;
+#endif
+}
+
+static inline
+void nvme_health_information_page_swapbytes(struct nvme_health_information_page *s)
+{
+ int i;
+
+ s->temperature = le16toh(s->temperature);
+ nvme_le128toh((void *)s->data_units_read);
+ nvme_le128toh((void *)s->data_units_written);
+ nvme_le128toh((void *)s->host_read_commands);
+ nvme_le128toh((void *)s->host_write_commands);
+ nvme_le128toh((void *)s->controller_busy_time);
+ nvme_le128toh((void *)s->power_cycles);
+ nvme_le128toh((void *)s->power_on_hours);
+ nvme_le128toh((void *)s->unsafe_shutdowns);
+ nvme_le128toh((void *)s->media_errors);
+ nvme_le128toh((void *)s->num_error_info_log_entries);
+ s->warning_temp_time = le32toh(s->warning_temp_time);
+ s->error_temp_time = le32toh(s->error_temp_time);
+ for (i = 0; i < 8; i++)
+ s->temp_sensor[i] = le16toh(s->temp_sensor[i]);
+}
+
+
+static inline
+void nvme_firmware_page_swapbytes(struct nvme_firmware_page *s)
+{
+ int i;
+
+ for (i = 0; i < 7; i++)
+ s->revision[i] = le64toh(s->revision[i]);
+}
+
+static inline
+void nvme_ns_list_swapbytes(struct nvme_ns_list *s)
+{
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ s->ns[i] = le32toh(s->ns[i]);
+}
+
+static inline
+void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s)
+{
+
+ s->current = le64toh(s->current);
+ s->overtemp_flag_last = le64toh(s->overtemp_flag_last);
+ s->overtemp_flag_life = le64toh(s->overtemp_flag_life);
+ s->max_temp = le64toh(s->max_temp);
+ s->min_temp = le64toh(s->min_temp);
+ /* omit _rsvd[] */
+ s->max_oper_temp = le64toh(s->max_oper_temp);
+ s->min_oper_temp = le64toh(s->min_oper_temp);
+ s->est_offset = le64toh(s->est_offset);
+}
+
+#endif /* __NVME_H__ */
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index a8c772c7f8..0ad066e6d4 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -51,6 +51,7 @@ SRCS = acpi.c \
pci_hostbridge.c \
pci_irq.c \
pci_lpc.c \
+ pci_nvme.c \
pci_passthru.c \
pci_uart.c \
pci_virtio_block.c \
@@ -76,8 +77,16 @@ SRCS = acpi.c \
vmm_instruction_emul.c \
xmsr.c \
spinup_ap.c \
+ iov.c \
bhyve_sol_glue.c
+# The virtio-scsi driver appears to include a slew of materials from FreeBSD's
+# native SCSI implementation. We will omit that complexity for now.
+ #ctl_util.c \
+ #ctl_scsi_all.c \
+ #pci_virtio_scsi.c \
+
+
OBJS = $(SRCS:.c=.o)
CLOBBERFILES = $(ROOTUSRSBINPROG) $(ZHYVE)
@@ -109,6 +118,13 @@ CPPFLAGS = -I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd \
# Disable the crypto code until it is wired up
CPPFLAGS += -DNO_OPENSSL
+pci_nvme.o := CERRWARN += -_gcc=-Wno-pointer-sign
+
+# Force c99 for everything
+CSTD= $(CSTD_GNU99)
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
$(PROG) := LDLIBS += -lsocket -lnsl -ldlpi -ldladm -lmd -luuid -lvmmapi -lz
$(ZHYVE_PROG) := LDLIBS += -lnvpair
$(MEVENT_TEST_PROG) := LDLIBS += -lsocket
diff --git a/usr/src/cmd/bhyve/acpi.c b/usr/src/cmd/bhyve/acpi.c
index 518ff34d69..309ba98a11 100644
--- a/usr/src/cmd/bhyve/acpi.c
+++ b/usr/src/cmd/bhyve/acpi.c
@@ -118,18 +118,14 @@ struct basl_fio {
};
#define EFPRINTF(...) \
- err = fprintf(__VA_ARGS__); if (err < 0) goto err_exit;
+ if (fprintf(__VA_ARGS__) < 0) goto err_exit;
#define EFFLUSH(x) \
- err = fflush(x); if (err != 0) goto err_exit;
+ if (fflush(x) != 0) goto err_exit;
static int
basl_fwrite_rsdp(FILE *fp)
{
- int err;
-
- err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve RSDP template\n");
EFPRINTF(fp, " */\n");
@@ -156,10 +152,6 @@ err_exit:
static int
basl_fwrite_rsdt(FILE *fp)
{
- int err;
-
- err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve RSDT template\n");
EFPRINTF(fp, " */\n");
@@ -196,10 +188,6 @@ err_exit:
static int
basl_fwrite_xsdt(FILE *fp)
{
- int err;
-
- err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve XSDT template\n");
EFPRINTF(fp, " */\n");
@@ -236,11 +224,8 @@ err_exit:
static int
basl_fwrite_madt(FILE *fp)
{
- int err;
int i;
- err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve MADT template\n");
EFPRINTF(fp, " */\n");
@@ -326,10 +311,6 @@ err_exit:
static int
basl_fwrite_fadt(FILE *fp)
{
- int err;
-
- err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve FADT template\n");
EFPRINTF(fp, " */\n");
@@ -547,10 +528,6 @@ err_exit:
static int
basl_fwrite_hpet(FILE *fp)
{
- int err;
-
- err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve HPET template\n");
EFPRINTF(fp, " */\n");
@@ -596,8 +573,6 @@ err_exit:
static int
basl_fwrite_mcfg(FILE *fp)
{
- int err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve MCFG template\n");
EFPRINTF(fp, " */\n");
@@ -629,10 +604,6 @@ err_exit:
static int
basl_fwrite_facs(FILE *fp)
{
- int err;
-
- err = 0;
-
EFPRINTF(fp, "/*\n");
EFPRINTF(fp, " * bhyve FACS template\n");
EFPRINTF(fp, " */\n");
@@ -666,7 +637,6 @@ void
dsdt_line(const char *fmt, ...)
{
va_list ap;
- int err = 0;
if (dsdt_error != 0)
return;
@@ -675,8 +645,10 @@ dsdt_line(const char *fmt, ...)
if (dsdt_indent_level != 0)
EFPRINTF(dsdt_fp, "%*c", dsdt_indent_level * 2, ' ');
va_start(ap, fmt);
- if (vfprintf(dsdt_fp, fmt, ap) < 0)
+ if (vfprintf(dsdt_fp, fmt, ap) < 0) {
+ va_end(ap);
goto err_exit;
+ }
va_end(ap);
}
EFPRINTF(dsdt_fp, "\n");
@@ -735,9 +707,6 @@ dsdt_fixed_mem32(uint32_t base, uint32_t length)
static int
basl_fwrite_dsdt(FILE *fp)
{
- int err;
-
- err = 0;
dsdt_fp = fp;
dsdt_error = 0;
dsdt_indent_level = 0;
@@ -916,7 +885,7 @@ basl_make_templates(void)
int len;
err = 0;
-
+
/*
*
*/
diff --git a/usr/src/cmd/bhyve/ahci.h b/usr/src/cmd/bhyve/ahci.h
index 1fd3bff99c..691d4bd438 100644
--- a/usr/src/cmd/bhyve/ahci.h
+++ b/usr/src/cmd/bhyve/ahci.h
@@ -33,292 +33,292 @@
#define _AHCI_H_
/* ATA register defines */
-#define ATA_DATA 0 /* (RW) data */
-
-#define ATA_FEATURE 1 /* (W) feature */
-#define ATA_F_DMA 0x01 /* enable DMA */
-#define ATA_F_OVL 0x02 /* enable overlap */
-
-#define ATA_COUNT 2 /* (W) sector count */
-
-#define ATA_SECTOR 3 /* (RW) sector # */
-#define ATA_CYL_LSB 4 /* (RW) cylinder# LSB */
-#define ATA_CYL_MSB 5 /* (RW) cylinder# MSB */
-#define ATA_DRIVE 6 /* (W) Sector/Drive/Head */
-#define ATA_D_LBA 0x40 /* use LBA addressing */
-#define ATA_D_IBM 0xa0 /* 512 byte sectors, ECC */
-
-#define ATA_COMMAND 7 /* (W) command */
-
-#define ATA_ERROR 8 /* (R) error */
-#define ATA_E_ILI 0x01 /* illegal length */
-#define ATA_E_NM 0x02 /* no media */
-#define ATA_E_ABORT 0x04 /* command aborted */
-#define ATA_E_MCR 0x08 /* media change request */
-#define ATA_E_IDNF 0x10 /* ID not found */
-#define ATA_E_MC 0x20 /* media changed */
-#define ATA_E_UNC 0x40 /* uncorrectable data */
-#define ATA_E_ICRC 0x80 /* UDMA crc error */
-#define ATA_E_ATAPI_SENSE_MASK 0xf0 /* ATAPI sense key mask */
-
-#define ATA_IREASON 9 /* (R) interrupt reason */
-#define ATA_I_CMD 0x01 /* cmd (1) | data (0) */
-#define ATA_I_IN 0x02 /* read (1) | write (0) */
-#define ATA_I_RELEASE 0x04 /* released bus (1) */
-#define ATA_I_TAGMASK 0xf8 /* tag mask */
-
-#define ATA_STATUS 10 /* (R) status */
-#define ATA_ALTSTAT 11 /* (R) alternate status */
-#define ATA_S_ERROR 0x01 /* error */
-#define ATA_S_INDEX 0x02 /* index */
-#define ATA_S_CORR 0x04 /* data corrected */
-#define ATA_S_DRQ 0x08 /* data request */
-#define ATA_S_DSC 0x10 /* drive seek completed */
-#define ATA_S_SERVICE 0x10 /* drive needs service */
-#define ATA_S_DWF 0x20 /* drive write fault */
-#define ATA_S_DMA 0x20 /* DMA ready */
-#define ATA_S_READY 0x40 /* drive ready */
-#define ATA_S_BUSY 0x80 /* busy */
-
-#define ATA_CONTROL 12 /* (W) control */
-#define ATA_A_IDS 0x02 /* disable interrupts */
-#define ATA_A_RESET 0x04 /* RESET controller */
-#define ATA_A_4BIT 0x08 /* 4 head bits */
-#define ATA_A_HOB 0x80 /* High Order Byte enable */
+#define ATA_DATA 0 /* (RW) data */
+
+#define ATA_FEATURE 1 /* (W) feature */
+#define ATA_F_DMA 0x01 /* enable DMA */
+#define ATA_F_OVL 0x02 /* enable overlap */
+
+#define ATA_COUNT 2 /* (W) sector count */
+
+#define ATA_SECTOR 3 /* (RW) sector # */
+#define ATA_CYL_LSB 4 /* (RW) cylinder# LSB */
+#define ATA_CYL_MSB 5 /* (RW) cylinder# MSB */
+#define ATA_DRIVE 6 /* (W) Sector/Drive/Head */
+#define ATA_D_LBA 0x40 /* use LBA addressing */
+#define ATA_D_IBM 0xa0 /* 512 byte sectors, ECC */
+
+#define ATA_COMMAND 7 /* (W) command */
+
+#define ATA_ERROR 8 /* (R) error */
+#define ATA_E_ILI 0x01 /* illegal length */
+#define ATA_E_NM 0x02 /* no media */
+#define ATA_E_ABORT 0x04 /* command aborted */
+#define ATA_E_MCR 0x08 /* media change request */
+#define ATA_E_IDNF 0x10 /* ID not found */
+#define ATA_E_MC 0x20 /* media changed */
+#define ATA_E_UNC 0x40 /* uncorrectable data */
+#define ATA_E_ICRC 0x80 /* UDMA crc error */
+#define ATA_E_ATAPI_SENSE_MASK 0xf0 /* ATAPI sense key mask */
+
+#define ATA_IREASON 9 /* (R) interrupt reason */
+#define ATA_I_CMD 0x01 /* cmd (1) | data (0) */
+#define ATA_I_IN 0x02 /* read (1) | write (0) */
+#define ATA_I_RELEASE 0x04 /* released bus (1) */
+#define ATA_I_TAGMASK 0xf8 /* tag mask */
+
+#define ATA_STATUS 10 /* (R) status */
+#define ATA_ALTSTAT 11 /* (R) alternate status */
+#define ATA_S_ERROR 0x01 /* error */
+#define ATA_S_INDEX 0x02 /* index */
+#define ATA_S_CORR 0x04 /* data corrected */
+#define ATA_S_DRQ 0x08 /* data request */
+#define ATA_S_DSC 0x10 /* drive seek completed */
+#define ATA_S_SERVICE 0x10 /* drive needs service */
+#define ATA_S_DWF 0x20 /* drive write fault */
+#define ATA_S_DMA 0x20 /* DMA ready */
+#define ATA_S_READY 0x40 /* drive ready */
+#define ATA_S_BUSY 0x80 /* busy */
+
+#define ATA_CONTROL 12 /* (W) control */
+#define ATA_A_IDS 0x02 /* disable interrupts */
+#define ATA_A_RESET 0x04 /* RESET controller */
+#define ATA_A_4BIT 0x08 /* 4 head bits */
+#define ATA_A_HOB 0x80 /* High Order Byte enable */
/* SATA register defines */
-#define ATA_SSTATUS 13
-#define ATA_SS_DET_MASK 0x0000000f
-#define ATA_SS_DET_NO_DEVICE 0x00000000
-#define ATA_SS_DET_DEV_PRESENT 0x00000001
-#define ATA_SS_DET_PHY_ONLINE 0x00000003
-#define ATA_SS_DET_PHY_OFFLINE 0x00000004
-
-#define ATA_SS_SPD_MASK 0x000000f0
-#define ATA_SS_SPD_NO_SPEED 0x00000000
-#define ATA_SS_SPD_GEN1 0x00000010
-#define ATA_SS_SPD_GEN2 0x00000020
-#define ATA_SS_SPD_GEN3 0x00000030
-
-#define ATA_SS_IPM_MASK 0x00000f00
-#define ATA_SS_IPM_NO_DEVICE 0x00000000
-#define ATA_SS_IPM_ACTIVE 0x00000100
-#define ATA_SS_IPM_PARTIAL 0x00000200
-#define ATA_SS_IPM_SLUMBER 0x00000600
-#define ATA_SS_IPM_DEVSLEEP 0x00000800
-
-#define ATA_SERROR 14
-#define ATA_SE_DATA_CORRECTED 0x00000001
-#define ATA_SE_COMM_CORRECTED 0x00000002
-#define ATA_SE_DATA_ERR 0x00000100
-#define ATA_SE_COMM_ERR 0x00000200
-#define ATA_SE_PROT_ERR 0x00000400
-#define ATA_SE_HOST_ERR 0x00000800
-#define ATA_SE_PHY_CHANGED 0x00010000
-#define ATA_SE_PHY_IERROR 0x00020000
-#define ATA_SE_COMM_WAKE 0x00040000
-#define ATA_SE_DECODE_ERR 0x00080000
-#define ATA_SE_PARITY_ERR 0x00100000
-#define ATA_SE_CRC_ERR 0x00200000
-#define ATA_SE_HANDSHAKE_ERR 0x00400000
-#define ATA_SE_LINKSEQ_ERR 0x00800000
-#define ATA_SE_TRANSPORT_ERR 0x01000000
-#define ATA_SE_UNKNOWN_FIS 0x02000000
-#define ATA_SE_EXCHANGED 0x04000000
-
-#define ATA_SCONTROL 15
-#define ATA_SC_DET_MASK 0x0000000f
-#define ATA_SC_DET_IDLE 0x00000000
-#define ATA_SC_DET_RESET 0x00000001
-#define ATA_SC_DET_DISABLE 0x00000004
-
-#define ATA_SC_SPD_MASK 0x000000f0
-#define ATA_SC_SPD_NO_SPEED 0x00000000
-#define ATA_SC_SPD_SPEED_GEN1 0x00000010
-#define ATA_SC_SPD_SPEED_GEN2 0x00000020
-#define ATA_SC_SPD_SPEED_GEN3 0x00000030
-
-#define ATA_SC_IPM_MASK 0x00000f00
-#define ATA_SC_IPM_NONE 0x00000000
-#define ATA_SC_IPM_DIS_PARTIAL 0x00000100
-#define ATA_SC_IPM_DIS_SLUMBER 0x00000200
-#define ATA_SC_IPM_DIS_DEVSLEEP 0x00000400
-
-#define ATA_SACTIVE 16
-
-#define AHCI_MAX_PORTS 32
-#define AHCI_MAX_SLOTS 32
-#define AHCI_MAX_IRQS 16
+#define ATA_SSTATUS 13
+#define ATA_SS_DET_MASK 0x0000000f
+#define ATA_SS_DET_NO_DEVICE 0x00000000
+#define ATA_SS_DET_DEV_PRESENT 0x00000001
+#define ATA_SS_DET_PHY_ONLINE 0x00000003
+#define ATA_SS_DET_PHY_OFFLINE 0x00000004
+
+#define ATA_SS_SPD_MASK 0x000000f0
+#define ATA_SS_SPD_NO_SPEED 0x00000000
+#define ATA_SS_SPD_GEN1 0x00000010
+#define ATA_SS_SPD_GEN2 0x00000020
+#define ATA_SS_SPD_GEN3 0x00000030
+
+#define ATA_SS_IPM_MASK 0x00000f00
+#define ATA_SS_IPM_NO_DEVICE 0x00000000
+#define ATA_SS_IPM_ACTIVE 0x00000100
+#define ATA_SS_IPM_PARTIAL 0x00000200
+#define ATA_SS_IPM_SLUMBER 0x00000600
+#define ATA_SS_IPM_DEVSLEEP 0x00000800
+
+#define ATA_SERROR 14
+#define ATA_SE_DATA_CORRECTED 0x00000001
+#define ATA_SE_COMM_CORRECTED 0x00000002
+#define ATA_SE_DATA_ERR 0x00000100
+#define ATA_SE_COMM_ERR 0x00000200
+#define ATA_SE_PROT_ERR 0x00000400
+#define ATA_SE_HOST_ERR 0x00000800
+#define ATA_SE_PHY_CHANGED 0x00010000
+#define ATA_SE_PHY_IERROR 0x00020000
+#define ATA_SE_COMM_WAKE 0x00040000
+#define ATA_SE_DECODE_ERR 0x00080000
+#define ATA_SE_PARITY_ERR 0x00100000
+#define ATA_SE_CRC_ERR 0x00200000
+#define ATA_SE_HANDSHAKE_ERR 0x00400000
+#define ATA_SE_LINKSEQ_ERR 0x00800000
+#define ATA_SE_TRANSPORT_ERR 0x01000000
+#define ATA_SE_UNKNOWN_FIS 0x02000000
+#define ATA_SE_EXCHANGED 0x04000000
+
+#define ATA_SCONTROL 15
+#define ATA_SC_DET_MASK 0x0000000f
+#define ATA_SC_DET_IDLE 0x00000000
+#define ATA_SC_DET_RESET 0x00000001
+#define ATA_SC_DET_DISABLE 0x00000004
+
+#define ATA_SC_SPD_MASK 0x000000f0
+#define ATA_SC_SPD_NO_SPEED 0x00000000
+#define ATA_SC_SPD_SPEED_GEN1 0x00000010
+#define ATA_SC_SPD_SPEED_GEN2 0x00000020
+#define ATA_SC_SPD_SPEED_GEN3 0x00000030
+
+#define ATA_SC_IPM_MASK 0x00000f00
+#define ATA_SC_IPM_NONE 0x00000000
+#define ATA_SC_IPM_DIS_PARTIAL 0x00000100
+#define ATA_SC_IPM_DIS_SLUMBER 0x00000200
+#define ATA_SC_IPM_DIS_DEVSLEEP 0x00000400
+
+#define ATA_SACTIVE 16
+
+#define AHCI_MAX_PORTS 32
+#define AHCI_MAX_SLOTS 32
+#define AHCI_MAX_IRQS 16
/* SATA AHCI v1.0 register defines */
-#define AHCI_CAP 0x00
-#define AHCI_CAP_NPMASK 0x0000001f
-#define AHCI_CAP_SXS 0x00000020
-#define AHCI_CAP_EMS 0x00000040
-#define AHCI_CAP_CCCS 0x00000080
-#define AHCI_CAP_NCS 0x00001F00
-#define AHCI_CAP_NCS_SHIFT 8
-#define AHCI_CAP_PSC 0x00002000
-#define AHCI_CAP_SSC 0x00004000
-#define AHCI_CAP_PMD 0x00008000
-#define AHCI_CAP_FBSS 0x00010000
-#define AHCI_CAP_SPM 0x00020000
-#define AHCI_CAP_SAM 0x00080000
-#define AHCI_CAP_ISS 0x00F00000
-#define AHCI_CAP_ISS_SHIFT 20
-#define AHCI_CAP_SCLO 0x01000000
-#define AHCI_CAP_SAL 0x02000000
-#define AHCI_CAP_SALP 0x04000000
-#define AHCI_CAP_SSS 0x08000000
-#define AHCI_CAP_SMPS 0x10000000
-#define AHCI_CAP_SSNTF 0x20000000
-#define AHCI_CAP_SNCQ 0x40000000
-#define AHCI_CAP_64BIT 0x80000000
-
-#define AHCI_GHC 0x04
-#define AHCI_GHC_AE 0x80000000
-#define AHCI_GHC_MRSM 0x00000004
-#define AHCI_GHC_IE 0x00000002
-#define AHCI_GHC_HR 0x00000001
-
-#define AHCI_IS 0x08
-#define AHCI_PI 0x0c
-#define AHCI_VS 0x10
-
-#define AHCI_CCCC 0x14
-#define AHCI_CCCC_TV_MASK 0xffff0000
-#define AHCI_CCCC_TV_SHIFT 16
-#define AHCI_CCCC_CC_MASK 0x0000ff00
-#define AHCI_CCCC_CC_SHIFT 8
-#define AHCI_CCCC_INT_MASK 0x000000f8
-#define AHCI_CCCC_INT_SHIFT 3
-#define AHCI_CCCC_EN 0x00000001
-#define AHCI_CCCP 0x18
-
-#define AHCI_EM_LOC 0x1C
-#define AHCI_EM_CTL 0x20
-#define AHCI_EM_MR 0x00000001
-#define AHCI_EM_TM 0x00000100
-#define AHCI_EM_RST 0x00000200
-#define AHCI_EM_LED 0x00010000
-#define AHCI_EM_SAFTE 0x00020000
-#define AHCI_EM_SES2 0x00040000
-#define AHCI_EM_SGPIO 0x00080000
-#define AHCI_EM_SMB 0x01000000
-#define AHCI_EM_XMT 0x02000000
-#define AHCI_EM_ALHD 0x04000000
-#define AHCI_EM_PM 0x08000000
-
-#define AHCI_CAP2 0x24
-#define AHCI_CAP2_BOH 0x00000001
-#define AHCI_CAP2_NVMP 0x00000002
-#define AHCI_CAP2_APST 0x00000004
-#define AHCI_CAP2_SDS 0x00000008
-#define AHCI_CAP2_SADM 0x00000010
-#define AHCI_CAP2_DESO 0x00000020
-
-#define AHCI_OFFSET 0x100
-#define AHCI_STEP 0x80
-
-#define AHCI_P_CLB 0x00
-#define AHCI_P_CLBU 0x04
-#define AHCI_P_FB 0x08
-#define AHCI_P_FBU 0x0c
-#define AHCI_P_IS 0x10
-#define AHCI_P_IE 0x14
-#define AHCI_P_IX_DHR 0x00000001
-#define AHCI_P_IX_PS 0x00000002
-#define AHCI_P_IX_DS 0x00000004
-#define AHCI_P_IX_SDB 0x00000008
-#define AHCI_P_IX_UF 0x00000010
-#define AHCI_P_IX_DP 0x00000020
-#define AHCI_P_IX_PC 0x00000040
-#define AHCI_P_IX_MP 0x00000080
-
-#define AHCI_P_IX_PRC 0x00400000
-#define AHCI_P_IX_IPM 0x00800000
-#define AHCI_P_IX_OF 0x01000000
-#define AHCI_P_IX_INF 0x04000000
-#define AHCI_P_IX_IF 0x08000000
-#define AHCI_P_IX_HBD 0x10000000
-#define AHCI_P_IX_HBF 0x20000000
-#define AHCI_P_IX_TFE 0x40000000
-#define AHCI_P_IX_CPD 0x80000000
-
-#define AHCI_P_CMD 0x18
-#define AHCI_P_CMD_ST 0x00000001
-#define AHCI_P_CMD_SUD 0x00000002
-#define AHCI_P_CMD_POD 0x00000004
-#define AHCI_P_CMD_CLO 0x00000008
-#define AHCI_P_CMD_FRE 0x00000010
-#define AHCI_P_CMD_CCS_MASK 0x00001f00
-#define AHCI_P_CMD_CCS_SHIFT 8
-#define AHCI_P_CMD_ISS 0x00002000
-#define AHCI_P_CMD_FR 0x00004000
-#define AHCI_P_CMD_CR 0x00008000
-#define AHCI_P_CMD_CPS 0x00010000
-#define AHCI_P_CMD_PMA 0x00020000
-#define AHCI_P_CMD_HPCP 0x00040000
-#define AHCI_P_CMD_MPSP 0x00080000
-#define AHCI_P_CMD_CPD 0x00100000
-#define AHCI_P_CMD_ESP 0x00200000
-#define AHCI_P_CMD_FBSCP 0x00400000
-#define AHCI_P_CMD_APSTE 0x00800000
-#define AHCI_P_CMD_ATAPI 0x01000000
-#define AHCI_P_CMD_DLAE 0x02000000
-#define AHCI_P_CMD_ALPE 0x04000000
-#define AHCI_P_CMD_ASP 0x08000000
-#define AHCI_P_CMD_ICC_MASK 0xf0000000
-#define AHCI_P_CMD_NOOP 0x00000000
-#define AHCI_P_CMD_ACTIVE 0x10000000
-#define AHCI_P_CMD_PARTIAL 0x20000000
-#define AHCI_P_CMD_SLUMBER 0x60000000
-#define AHCI_P_CMD_DEVSLEEP 0x80000000
-
-#define AHCI_P_TFD 0x20
-#define AHCI_P_SIG 0x24
-#define AHCI_P_SSTS 0x28
-#define AHCI_P_SCTL 0x2c
-#define AHCI_P_SERR 0x30
-#define AHCI_P_SACT 0x34
-#define AHCI_P_CI 0x38
-#define AHCI_P_SNTF 0x3C
-#define AHCI_P_FBS 0x40
-#define AHCI_P_FBS_EN 0x00000001
-#define AHCI_P_FBS_DEC 0x00000002
-#define AHCI_P_FBS_SDE 0x00000004
-#define AHCI_P_FBS_DEV 0x00000f00
-#define AHCI_P_FBS_DEV_SHIFT 8
-#define AHCI_P_FBS_ADO 0x0000f000
-#define AHCI_P_FBS_ADO_SHIFT 12
-#define AHCI_P_FBS_DWE 0x000f0000
-#define AHCI_P_FBS_DWE_SHIFT 16
-#define AHCI_P_DEVSLP 0x44
-#define AHCI_P_DEVSLP_ADSE 0x00000001
-#define AHCI_P_DEVSLP_DSP 0x00000002
-#define AHCI_P_DEVSLP_DETO 0x000003fc
-#define AHCI_P_DEVSLP_DETO_SHIFT 2
-#define AHCI_P_DEVSLP_MDAT 0x00007c00
-#define AHCI_P_DEVSLP_MDAT_SHIFT 10
-#define AHCI_P_DEVSLP_DITO 0x01ff8000
-#define AHCI_P_DEVSLP_DITO_SHIFT 15
-#define AHCI_P_DEVSLP_DM 0x0e000000
-#define AHCI_P_DEVSLP_DM_SHIFT 25
+#define AHCI_CAP 0x00
+#define AHCI_CAP_NPMASK 0x0000001f
+#define AHCI_CAP_SXS 0x00000020
+#define AHCI_CAP_EMS 0x00000040
+#define AHCI_CAP_CCCS 0x00000080
+#define AHCI_CAP_NCS 0x00001F00
+#define AHCI_CAP_NCS_SHIFT 8
+#define AHCI_CAP_PSC 0x00002000
+#define AHCI_CAP_SSC 0x00004000
+#define AHCI_CAP_PMD 0x00008000
+#define AHCI_CAP_FBSS 0x00010000
+#define AHCI_CAP_SPM 0x00020000
+#define AHCI_CAP_SAM 0x00080000
+#define AHCI_CAP_ISS 0x00F00000
+#define AHCI_CAP_ISS_SHIFT 20
+#define AHCI_CAP_SCLO 0x01000000
+#define AHCI_CAP_SAL 0x02000000
+#define AHCI_CAP_SALP 0x04000000
+#define AHCI_CAP_SSS 0x08000000
+#define AHCI_CAP_SMPS 0x10000000
+#define AHCI_CAP_SSNTF 0x20000000
+#define AHCI_CAP_SNCQ 0x40000000
+#define AHCI_CAP_64BIT 0x80000000
+
+#define AHCI_GHC 0x04
+#define AHCI_GHC_AE 0x80000000
+#define AHCI_GHC_MRSM 0x00000004
+#define AHCI_GHC_IE 0x00000002
+#define AHCI_GHC_HR 0x00000001
+
+#define AHCI_IS 0x08
+#define AHCI_PI 0x0c
+#define AHCI_VS 0x10
+
+#define AHCI_CCCC 0x14
+#define AHCI_CCCC_TV_MASK 0xffff0000
+#define AHCI_CCCC_TV_SHIFT 16
+#define AHCI_CCCC_CC_MASK 0x0000ff00
+#define AHCI_CCCC_CC_SHIFT 8
+#define AHCI_CCCC_INT_MASK 0x000000f8
+#define AHCI_CCCC_INT_SHIFT 3
+#define AHCI_CCCC_EN 0x00000001
+#define AHCI_CCCP 0x18
+
+#define AHCI_EM_LOC 0x1C
+#define AHCI_EM_CTL 0x20
+#define AHCI_EM_MR 0x00000001
+#define AHCI_EM_TM 0x00000100
+#define AHCI_EM_RST 0x00000200
+#define AHCI_EM_LED 0x00010000
+#define AHCI_EM_SAFTE 0x00020000
+#define AHCI_EM_SES2 0x00040000
+#define AHCI_EM_SGPIO 0x00080000
+#define AHCI_EM_SMB 0x01000000
+#define AHCI_EM_XMT 0x02000000
+#define AHCI_EM_ALHD 0x04000000
+#define AHCI_EM_PM 0x08000000
+
+#define AHCI_CAP2 0x24
+#define AHCI_CAP2_BOH 0x00000001
+#define AHCI_CAP2_NVMP 0x00000002
+#define AHCI_CAP2_APST 0x00000004
+#define AHCI_CAP2_SDS 0x00000008
+#define AHCI_CAP2_SADM 0x00000010
+#define AHCI_CAP2_DESO 0x00000020
+
+#define AHCI_OFFSET 0x100
+#define AHCI_STEP 0x80
+
+#define AHCI_P_CLB 0x00
+#define AHCI_P_CLBU 0x04
+#define AHCI_P_FB 0x08
+#define AHCI_P_FBU 0x0c
+#define AHCI_P_IS 0x10
+#define AHCI_P_IE 0x14
+#define AHCI_P_IX_DHR 0x00000001
+#define AHCI_P_IX_PS 0x00000002
+#define AHCI_P_IX_DS 0x00000004
+#define AHCI_P_IX_SDB 0x00000008
+#define AHCI_P_IX_UF 0x00000010
+#define AHCI_P_IX_DP 0x00000020
+#define AHCI_P_IX_PC 0x00000040
+#define AHCI_P_IX_MP 0x00000080
+
+#define AHCI_P_IX_PRC 0x00400000
+#define AHCI_P_IX_IPM 0x00800000
+#define AHCI_P_IX_OF 0x01000000
+#define AHCI_P_IX_INF 0x04000000
+#define AHCI_P_IX_IF 0x08000000
+#define AHCI_P_IX_HBD 0x10000000
+#define AHCI_P_IX_HBF 0x20000000
+#define AHCI_P_IX_TFE 0x40000000
+#define AHCI_P_IX_CPD 0x80000000
+
+#define AHCI_P_CMD 0x18
+#define AHCI_P_CMD_ST 0x00000001
+#define AHCI_P_CMD_SUD 0x00000002
+#define AHCI_P_CMD_POD 0x00000004
+#define AHCI_P_CMD_CLO 0x00000008
+#define AHCI_P_CMD_FRE 0x00000010
+#define AHCI_P_CMD_CCS_MASK 0x00001f00
+#define AHCI_P_CMD_CCS_SHIFT 8
+#define AHCI_P_CMD_ISS 0x00002000
+#define AHCI_P_CMD_FR 0x00004000
+#define AHCI_P_CMD_CR 0x00008000
+#define AHCI_P_CMD_CPS 0x00010000
+#define AHCI_P_CMD_PMA 0x00020000
+#define AHCI_P_CMD_HPCP 0x00040000
+#define AHCI_P_CMD_MPSP 0x00080000
+#define AHCI_P_CMD_CPD 0x00100000
+#define AHCI_P_CMD_ESP 0x00200000
+#define AHCI_P_CMD_FBSCP 0x00400000
+#define AHCI_P_CMD_APSTE 0x00800000
+#define AHCI_P_CMD_ATAPI 0x01000000
+#define AHCI_P_CMD_DLAE 0x02000000
+#define AHCI_P_CMD_ALPE 0x04000000
+#define AHCI_P_CMD_ASP 0x08000000
+#define AHCI_P_CMD_ICC_MASK 0xf0000000
+#define AHCI_P_CMD_NOOP 0x00000000
+#define AHCI_P_CMD_ACTIVE 0x10000000
+#define AHCI_P_CMD_PARTIAL 0x20000000
+#define AHCI_P_CMD_SLUMBER 0x60000000
+#define AHCI_P_CMD_DEVSLEEP 0x80000000
+
+#define AHCI_P_TFD 0x20
+#define AHCI_P_SIG 0x24
+#define AHCI_P_SSTS 0x28
+#define AHCI_P_SCTL 0x2c
+#define AHCI_P_SERR 0x30
+#define AHCI_P_SACT 0x34
+#define AHCI_P_CI 0x38
+#define AHCI_P_SNTF 0x3C
+#define AHCI_P_FBS 0x40
+#define AHCI_P_FBS_EN 0x00000001
+#define AHCI_P_FBS_DEC 0x00000002
+#define AHCI_P_FBS_SDE 0x00000004
+#define AHCI_P_FBS_DEV 0x00000f00
+#define AHCI_P_FBS_DEV_SHIFT 8
+#define AHCI_P_FBS_ADO 0x0000f000
+#define AHCI_P_FBS_ADO_SHIFT 12
+#define AHCI_P_FBS_DWE 0x000f0000
+#define AHCI_P_FBS_DWE_SHIFT 16
+#define AHCI_P_DEVSLP 0x44
+#define AHCI_P_DEVSLP_ADSE 0x00000001
+#define AHCI_P_DEVSLP_DSP 0x00000002
+#define AHCI_P_DEVSLP_DETO 0x000003fc
+#define AHCI_P_DEVSLP_DETO_SHIFT 2
+#define AHCI_P_DEVSLP_MDAT 0x00007c00
+#define AHCI_P_DEVSLP_MDAT_SHIFT 10
+#define AHCI_P_DEVSLP_DITO 0x01ff8000
+#define AHCI_P_DEVSLP_DITO_SHIFT 15
+#define AHCI_P_DEVSLP_DM 0x0e000000
+#define AHCI_P_DEVSLP_DM_SHIFT 25
/* Just to be sure, if building as module. */
#if MAXPHYS < 512 * 1024
#undef MAXPHYS
-#define MAXPHYS 512 * 1024
+#define MAXPHYS 512 * 1024
#endif
/* Pessimistic prognosis on number of required S/G entries */
-#define AHCI_SG_ENTRIES (roundup(btoc(MAXPHYS) + 1, 8))
+#define AHCI_SG_ENTRIES (roundup(btoc(MAXPHYS) + 1, 8))
/* Command list. 32 commands. First, 1Kbyte aligned. */
-#define AHCI_CL_OFFSET 0
-#define AHCI_CL_SIZE 32
+#define AHCI_CL_OFFSET 0
+#define AHCI_CL_SIZE 32
/* Command tables. Up to 32 commands, Each, 128byte aligned. */
-#define AHCI_CT_OFFSET (AHCI_CL_OFFSET + AHCI_CL_SIZE * AHCI_MAX_SLOTS)
-#define AHCI_CT_SIZE (128 + AHCI_SG_ENTRIES * 16)
+#define AHCI_CT_OFFSET (AHCI_CL_OFFSET + AHCI_CL_SIZE * AHCI_MAX_SLOTS)
+#define AHCI_CT_SIZE (128 + AHCI_SG_ENTRIES * 16)
/* Total main work area. */
-#define AHCI_WORK_SIZE (AHCI_CT_OFFSET + AHCI_CT_SIZE * ch->numslots)
+#define AHCI_WORK_SIZE (AHCI_CT_OFFSET + AHCI_CT_SIZE * ch->numslots)
#endif /* _AHCI_H_ */
diff --git a/usr/src/cmd/bhyve/atkbdc.c b/usr/src/cmd/bhyve/atkbdc.c
index 8e71b0507c..1c1838c2e8 100644
--- a/usr/src/cmd/bhyve/atkbdc.c
+++ b/usr/src/cmd/bhyve/atkbdc.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2015 Nahanni Systems Inc.
* All rights reserved.
@@ -45,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <pthread_np.h>
#include "acpi.h"
+#include "atkbdc.h"
#include "inout.h"
#include "pci_emul.h"
#include "pci_irq.h"
diff --git a/usr/src/cmd/bhyve/bhyvegc.c b/usr/src/cmd/bhyve/bhyvegc.c
index 11cc2b1fb4..4bd49ded79 100644
--- a/usr/src/cmd/bhyve/bhyvegc.c
+++ b/usr/src/cmd/bhyve/bhyvegc.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/bhyvegc.h b/usr/src/cmd/bhyve/bhyvegc.h
index fa2ab68d9e..11323586df 100644
--- a/usr/src/cmd/bhyve/bhyvegc.h
+++ b/usr/src/cmd/bhyve/bhyvegc.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index b12fba0800..317d640a2c 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -145,14 +145,14 @@ static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
static struct vm_exit vmexit[VM_MAXCPU];
struct bhyvestats {
- uint64_t vmexit_bogus;
+ uint64_t vmexit_bogus;
uint64_t vmexit_reqidle;
- uint64_t vmexit_hlt;
- uint64_t vmexit_pause;
- uint64_t vmexit_mtrap;
- uint64_t vmexit_inst_emul;
- uint64_t cpu_switch_rotate;
- uint64_t cpu_switch_direct;
+ uint64_t vmexit_hlt;
+ uint64_t vmexit_pause;
+ uint64_t vmexit_mtrap;
+ uint64_t vmexit_inst_emul;
+ uint64_t cpu_switch_rotate;
+ uint64_t cpu_switch_direct;
} stats;
struct mt_vmm_info {
@@ -180,7 +180,7 @@ usage(int code)
#endif
" -a: local apic is in xAPIC mode (deprecated)\n"
" -A: create ACPI tables\n"
- " -c: number of cpus and/or topology specification"
+ " -c: number of cpus and/or topology specification\n"
" -C: include guest memory in core file\n"
" -e: exit on unhandled I/O access\n"
" -g: gdb port\n"
@@ -228,6 +228,8 @@ topology_parse(const char *opt)
c = 1, n = 1, s = 1, t = 1;
ns = false, scts = false;
str = strdup(opt);
+ if (str == NULL)
+ goto out;
while ((cp = strsep(&str, ",")) != NULL) {
if (sscanf(cp, "%i%n", &tmp, &chk) == 1) {
@@ -253,11 +255,14 @@ topology_parse(const char *opt)
} else if (cp[0] == '\0')
continue;
else
- return (-1);
+ goto out;
/* Any trailing garbage causes an error */
if (cp[chk] != '\0')
- return (-1);
+ goto out;
}
+ free(str);
+ str = NULL;
+
/*
* Range check 1 <= n <= UINT16_MAX all values
*/
@@ -283,6 +288,10 @@ topology_parse(const char *opt)
cores = c;
threads = t;
return(0);
+
+out:
+ free(str);
+ return (-1);
}
#ifndef WITHOUT_CAPSICUM
@@ -462,7 +471,7 @@ fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
if (!CPU_ISSET(vcpu, &cpumask)) {
fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
- exit(1);
+ exit(4);
}
CPU_CLR_ATOMIC(vcpu, &cpumask);
@@ -478,7 +487,7 @@ vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
* put guest-driven debug here
*/
#endif
- return (VMEXIT_CONTINUE);
+ return (VMEXIT_CONTINUE);
}
static int
@@ -808,7 +817,7 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
exitcode);
- exit(1);
+ exit(4);
}
rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
@@ -819,7 +828,7 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
case VMEXIT_ABORT:
abort();
default:
- exit(1);
+ exit(4);
}
}
fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
@@ -851,7 +860,7 @@ fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
if (err < 0) {
fprintf(stderr, "VM exit on HLT not supported\n");
- exit(1);
+ exit(4);
}
vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
if (cpu == BSP)
@@ -866,7 +875,7 @@ fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
if (err < 0) {
fprintf(stderr,
"SMP mux requested, no pause support\n");
- exit(1);
+ exit(4);
}
vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
if (cpu == BSP)
@@ -880,7 +889,7 @@ fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
if (err) {
fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
- exit(1);
+ exit(4);
}
#ifdef __FreeBSD__
@@ -918,7 +927,7 @@ do_open(const char *vmname)
}
} else {
perror("vm_create");
- exit(1);
+ exit(4);
}
} else {
if (!romboot) {
@@ -927,14 +936,14 @@ do_open(const char *vmname)
* bootrom must be configured to boot it.
*/
fprintf(stderr, "virtual machine cannot be booted\n");
- exit(1);
+ exit(4);
}
}
ctx = vm_open(vmname);
if (ctx == NULL) {
perror("vm_open");
- exit(1);
+ exit(4);
}
#ifndef WITHOUT_CAPSICUM
@@ -956,7 +965,7 @@ do_open(const char *vmname)
error = vm_reinit(ctx);
if (error) {
perror("vm_reinit");
- exit(1);
+ exit(4);
}
}
error = vm_set_topology(ctx, sockets, cores, threads, maxcpus);
@@ -1062,14 +1071,20 @@ main(int argc, char *argv[])
gdb_port = atoi(optarg);
break;
case 'l':
- if (lpc_device_parse(optarg) != 0) {
+ if (strncmp(optarg, "help", strlen(optarg)) == 0) {
+ lpc_print_supported_devices();
+ exit(0);
+ } else if (lpc_device_parse(optarg) != 0) {
errx(EX_USAGE, "invalid lpc device "
"configuration '%s'", optarg);
}
break;
case 's':
- if (pci_parse_slot(optarg) != 0)
- exit(1);
+ if (strncmp(optarg, "help", strlen(optarg)) == 0) {
+ pci_print_supported_devices();
+ exit(0);
+ } else if (pci_parse_slot(optarg) != 0)
+ exit(4);
else
break;
case 'S':
@@ -1135,7 +1150,7 @@ main(int argc, char *argv[])
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
guest_ncpus, max_vcpus);
- exit(1);
+ exit(4);
}
fbsdrun_set_capabilities(ctx, BSP);
@@ -1157,13 +1172,13 @@ main(int argc, char *argv[])
#endif
if (err) {
fprintf(stderr, "Unable to setup memory (%d)\n", errno);
- exit(1);
+ exit(4);
}
error = init_msr();
if (error) {
fprintf(stderr, "init_msr error %d", error);
- exit(1);
+ exit(4);
}
init_mem();
@@ -1178,8 +1193,10 @@ main(int argc, char *argv[])
/*
* Exit if a device emulation finds an error in its initilization
*/
- if (init_pci(ctx) != 0)
- exit(1);
+ if (init_pci(ctx) != 0) {
+ perror("device emulation initialization error");
+ exit(4);
+ }
if (dbg_port != 0)
init_dbgport(dbg_port);
@@ -1196,7 +1213,7 @@ main(int argc, char *argv[])
if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
fprintf(stderr, "ROM boot failed: unrestricted guest "
"capability not available\n");
- exit(1);
+ exit(4);
}
error = vcpu_reset(ctx, BSP);
assert(error == 0);
@@ -1210,8 +1227,10 @@ main(int argc, char *argv[])
*/
if (mptgen) {
error = mptable_build(ctx, guest_ncpus);
- if (error)
- exit(1);
+ if (error) {
+ perror("error to build the guest tables");
+ exit(4);
+ }
}
error = smbios_build(ctx);
@@ -1225,21 +1244,21 @@ main(int argc, char *argv[])
if (lpc_bootrom())
fwctl_init();
+ /*
+ * Change the proc title to include the VM name.
+ */
+ setproctitle("%s", vmname);
+
#ifndef WITHOUT_CAPSICUM
caph_cache_catpages();
if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
errx(EX_OSERR, "Unable to apply rights for sandbox");
- if (cap_enter() == -1 && errno != ENOSYS)
+ if (caph_enter() == -1)
errx(EX_OSERR, "cap_enter() failed");
#endif
- /*
- * Change the proc title to include the VM name.
- */
- setproctitle("%s", vmname);
-
#ifndef __FreeBSD__
/*
* If applicable, wait for bhyveconsole
@@ -1269,5 +1288,5 @@ main(int argc, char *argv[])
*/
mevent_dispatch();
- exit(1);
+ exit(4);
}
diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c
index 53d8507f8e..81a305493e 100644
--- a/usr/src/cmd/bhyve/block_if.c
+++ b/usr/src/cmd/bhyve/block_if.c
@@ -117,8 +117,8 @@ struct blockif_ctxt {
int bc_psectoff;
int bc_closing;
pthread_t bc_btid[BLOCKIF_NUMTHR];
- pthread_mutex_t bc_mtx;
- pthread_cond_t bc_cond;
+ pthread_mutex_t bc_mtx;
+ pthread_cond_t bc_cond;
/* Request elements and free/pending/busy queues */
TAILQ_HEAD(, blockif_elem) bc_freeq;
diff --git a/usr/src/cmd/bhyve/block_if.h b/usr/src/cmd/bhyve/block_if.h
index 265048d90f..d01e5c9213 100644
--- a/usr/src/cmd/bhyve/block_if.h
+++ b/usr/src/cmd/bhyve/block_if.h
@@ -53,12 +53,12 @@
#endif
struct blockif_req {
- struct iovec br_iov[BLOCKIF_IOV_MAX];
int br_iovcnt;
off_t br_offset;
ssize_t br_resid;
void (*br_callback)(struct blockif_req *req, int err);
void *br_param;
+ struct iovec br_iov[BLOCKIF_IOV_MAX];
};
struct blockif_ctxt;
diff --git a/usr/src/cmd/bhyve/bootrom.c b/usr/src/cmd/bhyve/bootrom.c
index 5e4e0e93a2..b8c63828c8 100644
--- a/usr/src/cmd/bhyve/bootrom.c
+++ b/usr/src/cmd/bhyve/bootrom.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Neel Natu <neel@freebsd.org>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/bootrom.h b/usr/src/cmd/bhyve/bootrom.h
index af150d3255..7fb12181dd 100644
--- a/usr/src/cmd/bhyve/bootrom.h
+++ b/usr/src/cmd/bhyve/bootrom.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Neel Natu <neel@freebsd.org>
* All rights reserved.
*
@@ -33,6 +35,6 @@
struct vmctx;
-int bootrom_init(struct vmctx *ctx, const char *romfile);
+int bootrom_init(struct vmctx *ctx, const char *romfile);
#endif
diff --git a/usr/src/cmd/bhyve/console.c b/usr/src/cmd/bhyve/console.c
index ebb9c921bf..2567f69959 100644
--- a/usr/src/cmd/bhyve/console.c
+++ b/usr/src/cmd/bhyve/console.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/console.h b/usr/src/cmd/bhyve/console.h
index 47193938a6..0d0a854866 100644
--- a/usr/src/cmd/bhyve/console.h
+++ b/usr/src/cmd/bhyve/console.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
@@ -35,19 +37,19 @@ typedef void (*fb_render_func_t)(struct bhyvegc *gc, void *arg);
typedef void (*kbd_event_func_t)(int down, uint32_t keysym, void *arg);
typedef void (*ptr_event_func_t)(uint8_t mask, int x, int y, void *arg);
-void console_init(int w, int h, void *fbaddr);
+void console_init(int w, int h, void *fbaddr);
-void console_set_fbaddr(void *fbaddr);
+void console_set_fbaddr(void *fbaddr);
struct bhyvegc_image *console_get_image(void);
-void console_fb_register(fb_render_func_t render_cb, void *arg);
-void console_refresh(void);
+void console_fb_register(fb_render_func_t render_cb, void *arg);
+void console_refresh(void);
-void console_kbd_register(kbd_event_func_t event_cb, void *arg, int pri);
-void console_key_event(int down, uint32_t keysym);
+void console_kbd_register(kbd_event_func_t event_cb, void *arg, int pri);
+void console_key_event(int down, uint32_t keysym);
-void console_ptr_register(ptr_event_func_t event_cb, void *arg, int pri);
-void console_ptr_event(uint8_t button, int x, int y);
+void console_ptr_register(ptr_event_func_t event_cb, void *arg, int pri);
+void console_ptr_event(uint8_t button, int x, int y);
#endif /* _CONSOLE_H_ */
diff --git a/usr/src/cmd/bhyve/consport.c b/usr/src/cmd/bhyve/consport.c
index 7996e4ffab..f630cec1f3 100644
--- a/usr/src/cmd/bhyve/consport.c
+++ b/usr/src/cmd/bhyve/consport.c
@@ -78,14 +78,14 @@ ttyopen(void)
static bool
tty_char_available(void)
{
- fd_set rfds;
- struct timeval tv;
-
- FD_ZERO(&rfds);
- FD_SET(STDIN_FILENO, &rfds);
- tv.tv_sec = 0;
- tv.tv_usec = 0;
- if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
+ fd_set rfds;
+ struct timeval tv;
+
+ FD_ZERO(&rfds);
+ FD_SET(STDIN_FILENO, &rfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
return (true);
} else {
return (false);
diff --git a/usr/src/cmd/bhyve/dbgport.c b/usr/src/cmd/bhyve/dbgport.c
index d6c5f9383e..6b3d26336f 100644
--- a/usr/src/cmd/bhyve/dbgport.c
+++ b/usr/src/cmd/bhyve/dbgport.c
@@ -139,8 +139,8 @@ init_dbgport(int sport)
conn_fd = -1;
if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
- perror("socket");
- exit(1);
+ perror("cannot create socket");
+ exit(4);
}
#ifdef __FreeBSD__
@@ -153,18 +153,18 @@ init_dbgport(int sport)
reuse = 1;
if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse,
sizeof(reuse)) < 0) {
- perror("setsockopt");
- exit(1);
+ perror("cannot set socket options");
+ exit(4);
}
if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
- perror("bind");
- exit(1);
+ perror("cannot bind socket");
+ exit(4);
}
if (listen(listen_fd, 1) < 0) {
- perror("listen");
- exit(1);
+ perror("cannot listen socket");
+ exit(4);
}
#ifndef WITHOUT_CAPSICUM
diff --git a/usr/src/cmd/bhyve/fwctl.c b/usr/src/cmd/bhyve/fwctl.c
index 9e90c1ade6..00d6ef8681 100644
--- a/usr/src/cmd/bhyve/fwctl.c
+++ b/usr/src/cmd/bhyve/fwctl.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
* All rights reserved.
*
@@ -373,7 +375,7 @@ fwctl_request(uint32_t value)
/* Verify size */
if (value < 12) {
printf("msg size error");
- exit(1);
+ exit(4);
}
rinfo.req_size = value;
rinfo.req_count = 1;
diff --git a/usr/src/cmd/bhyve/fwctl.h b/usr/src/cmd/bhyve/fwctl.h
index f5f8d131ab..6dad244811 100644
--- a/usr/src/cmd/bhyve/fwctl.h
+++ b/usr/src/cmd/bhyve/fwctl.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/inout.c b/usr/src/cmd/bhyve/inout.c
index 693c4fdbac..b460ee2988 100644
--- a/usr/src/cmd/bhyve/inout.c
+++ b/usr/src/cmd/bhyve/inout.c
@@ -68,21 +68,21 @@ static int
default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
uint32_t *eax, void *arg)
{
- if (in) {
- switch (bytes) {
- case 4:
- *eax = 0xffffffff;
- break;
- case 2:
- *eax = 0xffff;
- break;
- case 1:
- *eax = 0xff;
- break;
- }
- }
-
- return (0);
+ if (in) {
+ switch (bytes) {
+ case 4:
+ *eax = 0xffffffff;
+ break;
+ case 2:
+ *eax = 0xffff;
+ break;
+ case 1:
+ *eax = 0xff;
+ break;
+ }
+ }
+
+ return (0);
}
static void
diff --git a/usr/src/cmd/bhyve/iov.c b/usr/src/cmd/bhyve/iov.c
new file mode 100644
index 0000000000..c564bd8ae5
--- /dev/null
+++ b/usr/src/cmd/bhyve/iov.c
@@ -0,0 +1,141 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include "iov.h"
+
+void
+seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, size_t *niov2,
+ size_t seek)
+{
+ size_t remainder = 0;
+ size_t left = seek;
+ size_t i, j;
+
+ for (i = 0; i < niov1; i++) {
+ size_t toseek = MIN(left, iov1[i].iov_len);
+ left -= toseek;
+
+ if (toseek == iov1[i].iov_len)
+ continue;
+
+ if (left == 0) {
+ remainder = toseek;
+ break;
+ }
+ }
+
+ for (j = i; j < niov1; j++) {
+ iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder;
+ iov2[j - i].iov_len = iov1[j].iov_len - remainder;
+ remainder = 0;
+ }
+
+ *niov2 = j - i;
+}
+
+size_t
+count_iov(struct iovec *iov, size_t niov)
+{
+ size_t i, total = 0;
+
+ for (i = 0; i < niov; i++)
+ total += iov[i].iov_len;
+
+ return (total);
+}
+
+size_t
+truncate_iov(struct iovec *iov, size_t niov, size_t length)
+{
+ size_t i, done = 0;
+
+ for (i = 0; i < niov; i++) {
+ size_t toseek = MIN(length - done, iov[i].iov_len);
+ done += toseek;
+
+ if (toseek < iov[i].iov_len) {
+ iov[i].iov_len = toseek;
+ return (i + 1);
+ }
+ }
+
+ return (niov);
+}
+
+ssize_t
+iov_to_buf(struct iovec *iov, size_t niov, void **buf)
+{
+ size_t i, ptr = 0, total = 0;
+
+ for (i = 0; i < niov; i++) {
+ total += iov[i].iov_len;
+ *buf = realloc(*buf, total);
+ if (*buf == NULL)
+ return (-1);
+
+ memcpy(*buf + ptr, iov[i].iov_base, iov[i].iov_len);
+ ptr += iov[i].iov_len;
+ }
+
+ return (total);
+}
+
+ssize_t
+buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov,
+ size_t seek)
+{
+ struct iovec *diov;
+ size_t ndiov, i;
+ uintptr_t off = 0;
+
+ if (seek > 0) {
+ diov = malloc(sizeof(struct iovec) * niov);
+ seek_iov(iov, niov, diov, &ndiov, seek);
+ } else {
+ diov = iov;
+ ndiov = niov;
+ }
+
+ for (i = 0; i < ndiov; i++) {
+ memcpy(diov[i].iov_base, buf + off, diov[i].iov_len);
+ off += diov[i].iov_len;
+ }
+
+ return ((ssize_t)off);
+}
+
diff --git a/usr/src/cmd/bhyve/iov.h b/usr/src/cmd/bhyve/iov.h
new file mode 100644
index 0000000000..87fa4c1dcf
--- /dev/null
+++ b/usr/src/cmd/bhyve/iov.h
@@ -0,0 +1,43 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IOV_H_
+#define _IOV_H_
+
+void seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2,
+ size_t *niov2, size_t seek);
+size_t truncate_iov(struct iovec *iov, size_t niov, size_t length);
+size_t count_iov(struct iovec *iov, size_t niov);
+ssize_t iov_to_buf(struct iovec *iov, size_t niov, void **buf);
+ssize_t buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov,
+ size_t seek);
+
+#endif /* _IOV_H_ */
diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c
index 105d37cf56..85e56af10b 100644
--- a/usr/src/cmd/bhyve/mem.c
+++ b/usr/src/cmd/bhyve/mem.c
@@ -38,15 +38,16 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
-#include <sys/tree.h>
#include <sys/errno.h>
+#include <sys/tree.h>
#include <machine/vmm.h>
#include <machine/vmm_instruction_emul.h>
-#include <stdio.h>
-#include <stdlib.h>
#include <assert.h>
+#include <err.h>
#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
#include "mem.h"
@@ -123,6 +124,7 @@ mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
static void
mmio_rb_dump(struct mmio_rb_tree *rbt)
{
+ int perror;
struct mmio_rb_range *np;
pthread_rwlock_rdlock(&mmio_rwlock);
@@ -130,7 +132,8 @@ mmio_rb_dump(struct mmio_rb_tree *rbt)
printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
np->mr_param.name);
}
- pthread_rwlock_unlock(&mmio_rwlock);
+ perror = pthread_rwlock_unlock(&mmio_rwlock);
+ assert(perror == 0);
}
#endif
@@ -166,7 +169,7 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
void *arg)
{
struct mmio_rb_range *entry;
- int err, immutable;
+ int err, perror, immutable;
pthread_rwlock_rdlock(&mmio_rwlock);
/*
@@ -184,7 +187,8 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
/* Update the per-vCPU cache */
mmio_hint[vcpu] = entry;
} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
- pthread_rwlock_unlock(&mmio_rwlock);
+ perror = pthread_rwlock_unlock(&mmio_rwlock);
+ assert(perror == 0);
return (ESRCH);
}
}
@@ -203,13 +207,18 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
* config space window as 'immutable' the deadlock can be avoided.
*/
immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE);
- if (immutable)
- pthread_rwlock_unlock(&mmio_rwlock);
+ if (immutable) {
+ perror = pthread_rwlock_unlock(&mmio_rwlock);
+ assert(perror == 0);
+ }
err = cb(ctx, vcpu, paddr, &entry->mr_param, arg);
- if (!immutable)
- pthread_rwlock_unlock(&mmio_rwlock);
+ if (!immutable) {
+ perror = pthread_rwlock_unlock(&mmio_rwlock);
+ assert(perror == 0);
+ }
+
return (err);
}
@@ -272,24 +281,27 @@ static int
register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
{
struct mmio_rb_range *entry, *mrp;
- int err;
+ int err, perror;
err = 0;
mrp = malloc(sizeof(struct mmio_rb_range));
-
- if (mrp != NULL) {
+ if (mrp == NULL) {
+ warn("%s: couldn't allocate memory for mrp\n",
+ __func__);
+ err = ENOMEM;
+ } else {
mrp->mr_param = *memp;
mrp->mr_base = memp->base;
mrp->mr_end = memp->base + memp->size - 1;
pthread_rwlock_wrlock(&mmio_rwlock);
if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
err = mmio_rb_add(rbt, mrp);
- pthread_rwlock_unlock(&mmio_rwlock);
+ perror = pthread_rwlock_unlock(&mmio_rwlock);
+ assert(perror == 0);
if (err)
free(mrp);
- } else
- err = ENOMEM;
+ }
return (err);
}
@@ -313,7 +325,7 @@ unregister_mem(struct mem_range *memp)
{
struct mem_range *mr;
struct mmio_rb_range *entry = NULL;
- int err, i;
+ int err, perror, i;
pthread_rwlock_wrlock(&mmio_rwlock);
err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
@@ -330,7 +342,8 @@ unregister_mem(struct mem_range *memp)
mmio_hint[i] = NULL;
}
}
- pthread_rwlock_unlock(&mmio_rwlock);
+ perror = pthread_rwlock_unlock(&mmio_rwlock);
+ assert(perror == 0);
if (entry)
free(entry);
diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h
index f386d67749..596c0b0cf3 100644
--- a/usr/src/cmd/bhyve/mem.h
+++ b/usr/src/cmd/bhyve/mem.h
@@ -55,7 +55,7 @@ struct mem_range {
void init_mem(void);
int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
struct vm_guest_paging *paging);
-
+
int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval,
int size);
int register_mem(struct mem_range *memp);
diff --git a/usr/src/cmd/bhyve/mevent.c b/usr/src/cmd/bhyve/mevent.c
index edd5cf14cb..4ad33a9f01 100644
--- a/usr/src/cmd/bhyve/mevent.c
+++ b/usr/src/cmd/bhyve/mevent.c
@@ -82,7 +82,7 @@ static int mevent_timid = 43;
static int mevent_pipefd[2];
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
-struct mevent {
+struct mevent {
void (*me_func)(int, enum ev_type, void *);
#define me_msecs me_fd
int me_fd;
@@ -101,7 +101,7 @@ struct mevent {
struct sigevent me_sigev;
boolean_t me_auto_requeue;
#endif
- LIST_ENTRY(mevent) me_list;
+ LIST_ENTRY(mevent) me_list;
};
static LIST_HEAD(listhead, mevent) global_head, change_head;
diff --git a/usr/src/cmd/bhyve/mevent_test.c b/usr/src/cmd/bhyve/mevent_test.c
index 22e3561fed..4da3adb5ae 100644
--- a/usr/src/cmd/bhyve/mevent_test.c
+++ b/usr/src/cmd/bhyve/mevent_test.c
@@ -164,7 +164,7 @@ echoer(void *param)
mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
if (mev == NULL) {
printf("Could not allocate echoer event\n");
- exit(1);
+ exit(4);
}
while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
@@ -219,27 +219,27 @@ acceptor(void *param)
int news;
int s;
- if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
- perror("socket");
- exit(1);
- }
+ if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("cannot create socket");
+ exit(4);
+ }
#ifdef __FreeBSD__
- sin.sin_len = sizeof(sin);
+ sin.sin_len = sizeof(sin);
#endif
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = htonl(INADDR_ANY);
- sin.sin_port = htons(TEST_PORT);
-
- if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
- perror("bind");
- exit(1);
- }
-
- if (listen(s, 1) < 0) {
- perror("listen");
- exit(1);
- }
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(TEST_PORT);
+
+ if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
+ perror("cannot bind socket");
+ exit(4);
+ }
+
+ if (listen(s, 1) < 0) {
+ perror("cannot listen socket");
+ exit(4);
+ }
(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
diff --git a/usr/src/cmd/bhyve/pci_e82545.c b/usr/src/cmd/bhyve/pci_e82545.c
index 121c0fc773..3f5a6ef0c5 100644
--- a/usr/src/cmd/bhyve/pci_e82545.c
+++ b/usr/src/cmd/bhyve/pci_e82545.c
@@ -1,4 +1,6 @@
/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
* Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
* Copyright (c) 2013 Jeremiah Lott, Avere Systems
@@ -345,8 +347,8 @@ struct e82545_softc {
#define E82545_NVM_MODE_OPADDR 0x0
#define E82545_NVM_MODE_DATAIN 0x1
#define E82545_NVM_MODE_DATAOUT 0x2
- /* EEPROM data */
- uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE];
+ /* EEPROM data */
+ uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE];
};
static void e82545_reset(struct e82545_softc *sc, int dev);
@@ -1495,7 +1497,7 @@ e82545_rx_disable(struct e82545_softc *sc)
static void
e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval)
{
- struct eth_uni *eu;
+ struct eth_uni *eu;
int idx;
idx = reg >> 1;
@@ -1521,7 +1523,7 @@ e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval)
static uint32_t
e82545_read_ra(struct e82545_softc *sc, int reg)
{
- struct eth_uni *eu;
+ struct eth_uni *eu;
uint32_t retval;
int idx;
@@ -1765,12 +1767,12 @@ e82545_read_register(struct e82545_softc *sc, uint32_t offset)
{
uint32_t retval;
int ridx;
-
+
if (offset & 0x3) {
DPRINTF("Unaligned register read offset:0x%x\r\n", offset);
return 0;
}
-
+
DPRINTF("Register read: 0x%x\r\n", offset);
switch (offset) {
@@ -2247,7 +2249,7 @@ e82545_open_tap(struct e82545_softc *sc, char *opts)
sc->esc_tapfd = open(tbuf, O_RDWR);
if (sc->esc_tapfd == -1) {
DPRINTF("unable to open tap device %s\n", opts);
- exit(1);
+ exit(4);
}
/*
diff --git a/usr/src/cmd/bhyve/pci_emul.c b/usr/src/cmd/bhyve/pci_emul.c
index 8af6a37498..03db632e37 100644
--- a/usr/src/cmd/bhyve/pci_emul.c
+++ b/usr/src/cmd/bhyve/pci_emul.c
@@ -250,6 +250,17 @@ done:
return (error);
}
+void
+pci_print_supported_devices()
+{
+ struct pci_devemu **pdpp, *pdp;
+
+ SET_FOREACH(pdpp, pci_devemu_set) {
+ pdp = *pdpp;
+ printf("%s\n", pdp->pe_emu);
+ }
+}
+
static int
pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset)
{
@@ -882,7 +893,7 @@ msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
{
uint16_t msgctrl, rwmask;
int off;
-
+
off = offset - capoff;
/* Message Control Register */
if (off == 2 && bytes == 2) {
@@ -895,8 +906,8 @@ msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE;
pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK;
pci_lintr_update(pi);
- }
-
+ }
+
CFGWRITE(pi, offset, val, bytes);
}
@@ -1355,11 +1366,11 @@ pci_bus_write_dsdt(int bus)
dsdt_line("Name (PPRT, Package ()");
dsdt_line("{");
pci_walk_lintr(bus, pci_pirq_prt_entry, NULL);
- dsdt_line("})");
+ dsdt_line("})");
dsdt_line("Name (APRT, Package ()");
dsdt_line("{");
pci_walk_lintr(bus, pci_apic_prt_entry, NULL);
- dsdt_line("})");
+ dsdt_line("})");
dsdt_line("Method (_PRT, 0, NotSerialized)");
dsdt_line("{");
dsdt_line(" If (PICM)");
@@ -1750,7 +1761,7 @@ pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes)
* interrupt.
*/
pci_lintr_update(pi);
-}
+}
static void
pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
diff --git a/usr/src/cmd/bhyve/pci_emul.h b/usr/src/cmd/bhyve/pci_emul.h
index 0a1dd39f57..0053caed99 100644
--- a/usr/src/cmd/bhyve/pci_emul.h
+++ b/usr/src/cmd/bhyve/pci_emul.h
@@ -241,6 +241,7 @@ int pci_msix_table_bar(struct pci_devinst *pi);
int pci_msix_pba_bar(struct pci_devinst *pi);
int pci_msi_maxmsgnum(struct pci_devinst *pi);
int pci_parse_slot(char *opt);
+void pci_print_supported_devices();
void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum);
int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
diff --git a/usr/src/cmd/bhyve/pci_fbuf.c b/usr/src/cmd/bhyve/pci_fbuf.c
index 8478f6e531..5a04c41e54 100644
--- a/usr/src/cmd/bhyve/pci_fbuf.c
+++ b/usr/src/cmd/bhyve/pci_fbuf.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Nahanni Systems, Inc.
* Copyright 2018 Joyent, Inc.
* All rights reserved.
diff --git a/usr/src/cmd/bhyve/pci_irq.c b/usr/src/cmd/bhyve/pci_irq.c
index 4ae9ff3582..4ecb3eddb0 100644
--- a/usr/src/cmd/bhyve/pci_irq.c
+++ b/usr/src/cmd/bhyve/pci_irq.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
diff --git a/usr/src/cmd/bhyve/pci_irq.h b/usr/src/cmd/bhyve/pci_irq.h
index aa1a6c356b..1ae56efc8f 100644
--- a/usr/src/cmd/bhyve/pci_irq.h
+++ b/usr/src/cmd/bhyve/pci_irq.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
diff --git a/usr/src/cmd/bhyve/pci_lpc.c b/usr/src/cmd/bhyve/pci_lpc.c
index 70bfed96f6..b7ddb772a1 100644
--- a/usr/src/cmd/bhyve/pci_lpc.c
+++ b/usr/src/cmd/bhyve/pci_lpc.c
@@ -118,6 +118,16 @@ done:
return (error);
}
+void
+lpc_print_supported_devices()
+{
+ size_t i;
+
+ printf("bootrom\n");
+ for (i = 0; i < LPC_UART_NUM; i++)
+ printf("%s\n", lpc_uart_names[i]);
+}
+
const char *
lpc_bootrom(void)
{
diff --git a/usr/src/cmd/bhyve/pci_lpc.h b/usr/src/cmd/bhyve/pci_lpc.h
index 8cab52f372..9041f79c50 100644
--- a/usr/src/cmd/bhyve/pci_lpc.h
+++ b/usr/src/cmd/bhyve/pci_lpc.h
@@ -68,6 +68,7 @@ struct lpc_sysres {
#define SYSRES_MEM(base, length) LPC_SYSRES(LPC_SYSRES_MEM, base, length)
int lpc_device_parse(const char *opt);
+void lpc_print_supported_devices();
char *lpc_pirq_name(int pin);
void lpc_pirq_routed(void);
const char *lpc_bootrom(void);
diff --git a/usr/src/cmd/bhyve/pci_nvme.c b/usr/src/cmd/bhyve/pci_nvme.c
new file mode 100644
index 0000000000..571f916a9d
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_nvme.c
@@ -0,0 +1,1873 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017 Shunsuke Mie
+ * Copyright (c) 2018 Leon Dang
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * bhyve PCIe-NVMe device emulation.
+ *
+ * options:
+ * -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z
+ *
+ * accepted devpath:
+ * /dev/blockdev
+ * /path/to/image
+ * ram=size_in_MiB
+ *
+ * maxq = max number of queues
+ * qsz = max elements in each queue
+ * ioslots = max number of concurrent io requests
+ * sectsz = sector size (defaults to blockif sector size)
+ * ser = serial number (20-chars max)
+ *
+ */
+
+/* TODO:
+ - create async event for smart and log
+ - intr coalesce
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <machine/atomic.h>
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include <dev/nvme/nvme.h>
+
+#include "bhyverun.h"
+#include "block_if.h"
+#include "pci_emul.h"
+
+
+static int nvme_debug = 0;
+#define DPRINTF(params) if (nvme_debug) printf params
+#define WPRINTF(params) printf params
+
+/* defaults; can be overridden */
+#define NVME_MSIX_BAR 4
+
+#define NVME_IOSLOTS 8
+
+#define NVME_QUEUES 16
+#define NVME_MAX_QENTRIES 2048
+
+#define NVME_PRP2_ITEMS (PAGE_SIZE/sizeof(uint64_t))
+#define NVME_MAX_BLOCKIOVS 512
+
+/* helpers */
+
+#define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell)
+
+enum nvme_controller_register_offsets {
+ NVME_CR_CAP_LOW = 0x00,
+ NVME_CR_CAP_HI = 0x04,
+ NVME_CR_VS = 0x08,
+ NVME_CR_INTMS = 0x0c,
+ NVME_CR_INTMC = 0x10,
+ NVME_CR_CC = 0x14,
+ NVME_CR_CSTS = 0x1c,
+ NVME_CR_NSSR = 0x20,
+ NVME_CR_AQA = 0x24,
+ NVME_CR_ASQ_LOW = 0x28,
+ NVME_CR_ASQ_HI = 0x2c,
+ NVME_CR_ACQ_LOW = 0x30,
+ NVME_CR_ACQ_HI = 0x34,
+};
+
+enum nvme_cmd_cdw11 {
+ NVME_CMD_CDW11_PC = 0x0001,
+ NVME_CMD_CDW11_IEN = 0x0002,
+ NVME_CMD_CDW11_IV = 0xFFFF0000,
+};
+
+#define NVME_CQ_INTEN 0x01
+#define NVME_CQ_INTCOAL 0x02
+
+struct nvme_completion_queue {
+ struct nvme_completion *qbase;
+ uint32_t size;
+ uint16_t tail; /* nvme progress */
+ uint16_t head; /* guest progress */
+ uint16_t intr_vec;
+ uint32_t intr_en;
+ pthread_mutex_t mtx;
+};
+
+struct nvme_submission_queue {
+ struct nvme_command *qbase;
+ uint32_t size;
+ uint16_t head; /* nvme progress */
+ uint16_t tail; /* guest progress */
+ uint16_t cqid; /* completion queue id */
+ int busy; /* queue is being processed */
+ int qpriority;
+};
+
+enum nvme_storage_type {
+ NVME_STOR_BLOCKIF = 0,
+ NVME_STOR_RAM = 1,
+};
+
+struct pci_nvme_blockstore {
+ enum nvme_storage_type type;
+ void *ctx;
+ uint64_t size;
+ uint32_t sectsz;
+ uint32_t sectsz_bits;
+};
+
+struct pci_nvme_ioreq {
+ struct pci_nvme_softc *sc;
+ struct pci_nvme_ioreq *next;
+ struct nvme_submission_queue *nvme_sq;
+ uint16_t sqid;
+
+ /* command information */
+ uint16_t opc;
+ uint16_t cid;
+ uint32_t nsid;
+
+ uint64_t prev_gpaddr;
+ size_t prev_size;
+
+ /*
+ * lock if all iovs consumed (big IO);
+ * complete transaction before continuing
+ */
+ pthread_mutex_t mtx;
+ pthread_cond_t cv;
+
+ struct blockif_req io_req;
+
+ /* pad to fit up to 512 page descriptors from guest IO request */
+ struct iovec iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX];
+};
+
+struct pci_nvme_softc {
+ struct pci_devinst *nsc_pi;
+
+ pthread_mutex_t mtx;
+
+ struct nvme_registers regs;
+
+ struct nvme_namespace_data nsdata;
+ struct nvme_controller_data ctrldata;
+
+ struct pci_nvme_blockstore nvstore;
+
+ uint16_t max_qentries; /* max entries per queue */
+ uint32_t max_queues;
+ uint32_t num_cqueues;
+ uint32_t num_squeues;
+
+ struct pci_nvme_ioreq *ioreqs;
+ struct pci_nvme_ioreq *ioreqs_free; /* free list of ioreqs */
+ uint32_t pending_ios;
+ uint32_t ioslots;
+ sem_t iosemlock;
+
+ /* status and guest memory mapped queues */
+ struct nvme_completion_queue *compl_queues;
+ struct nvme_submission_queue *submit_queues;
+
+ /* controller features */
+ uint32_t intr_coales_aggr_time; /* 0x08: uS to delay intr */
+ uint32_t intr_coales_aggr_thresh; /* 0x08: compl-Q entries */
+ uint32_t async_ev_config; /* 0x0B: async event config */
+};
+
+
+static void pci_nvme_io_partial(struct blockif_req *br, int err);
+
+/* Controller Configuration utils */
+#define NVME_CC_GET_EN(cc) \
+ ((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK)
+#define NVME_CC_GET_CSS(cc) \
+ ((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK)
+#define NVME_CC_GET_SHN(cc) \
+ ((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK)
+#define NVME_CC_GET_IOSQES(cc) \
+ ((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK)
+#define NVME_CC_GET_IOCQES(cc) \
+ ((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK)
+
+#define NVME_CC_WRITE_MASK \
+ ((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \
+ (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \
+ (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT))
+
+#define NVME_CC_NEN_WRITE_MASK \
+ ((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \
+ (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \
+ (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT))
+
+/* Controller Status utils */
+#define NVME_CSTS_GET_RDY(sts) \
+ ((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK)
+
+#define NVME_CSTS_RDY (1 << NVME_CSTS_REG_RDY_SHIFT)
+
+/* Completion Queue status word utils */
+#define NVME_STATUS_P (1 << NVME_STATUS_P_SHIFT)
+#define NVME_STATUS_MASK \
+ ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\
+ (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT))
+
+static __inline void
+cpywithpad(char *dst, int dst_size, const char *src, char pad)
+{
+ int len = strnlen(src, dst_size);
+ memcpy(dst, src, len);
+ memset(dst + len, pad, dst_size - len);
+}
+
+static __inline void
+pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code)
+{
+
+ *status &= ~NVME_STATUS_MASK;
+ *status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT |
+ (code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT;
+}
+
+static __inline void
+pci_nvme_status_genc(uint16_t *status, uint16_t code)
+{
+
+ pci_nvme_status_tc(status, NVME_SCT_GENERIC, code);
+}
+
+static __inline void
+pci_nvme_toggle_phase(uint16_t *status, int prev)
+{
+
+ if (prev)
+ *status &= ~NVME_STATUS_P;
+ else
+ *status |= NVME_STATUS_P;
+}
+
+static void
+pci_nvme_init_ctrldata(struct pci_nvme_softc *sc)
+{
+ struct nvme_controller_data *cd = &sc->ctrldata;
+
+ cd->vid = 0xFB5D;
+ cd->ssvid = 0x0000;
+
+ cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' ');
+ cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' ');
+
+ /* Num of submission commands that we can handle at a time (2^rab) */
+ cd->rab = 4;
+
+ /* FreeBSD OUI */
+ cd->ieee[0] = 0x58;
+ cd->ieee[1] = 0x9c;
+ cd->ieee[2] = 0xfc;
+
+ cd->mic = 0;
+
+ cd->mdts = 9; /* max data transfer size (2^mdts * CAP.MPSMIN) */
+
+ cd->ver = 0x00010300;
+
+ cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT;
+ cd->acl = 2;
+ cd->aerl = 4;
+
+ cd->lpa = 0; /* TODO: support some simple things like SMART */
+ cd->elpe = 0; /* max error log page entries */
+ cd->npss = 1; /* number of power states support */
+
+ /* Warning Composite Temperature Threshold */
+ cd->wctemp = 0x0157;
+
+ cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) |
+ (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT);
+ cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) |
+ (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT);
+ cd->nn = 1; /* number of namespaces */
+
+ cd->fna = 0x03;
+
+ cd->power_state[0].mp = 10;
+}
+
+static void
+pci_nvme_init_nsdata(struct pci_nvme_softc *sc)
+{
+ struct nvme_namespace_data *nd;
+
+ nd = &sc->nsdata;
+
+ nd->nsze = sc->nvstore.size / sc->nvstore.sectsz;
+ nd->ncap = nd->nsze;
+ nd->nuse = nd->nsze;
+
+ /* Get LBA and backstore information from backing store */
+ nd->nlbaf = 1;
+ /* LBA data-sz = 2^lbads */
+ nd->lbaf[0] = sc->nvstore.sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT;
+
+ nd->flbas = 0;
+}
+
+static void
+pci_nvme_reset_locked(struct pci_nvme_softc *sc)
+{
+ DPRINTF(("%s\r\n", __func__));
+
+ sc->regs.cap_lo = (sc->max_qentries & NVME_CAP_LO_REG_MQES_MASK) |
+ (1 << NVME_CAP_LO_REG_CQR_SHIFT) |
+ (60 << NVME_CAP_LO_REG_TO_SHIFT);
+
+ sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT;
+
+ sc->regs.vs = 0x00010300; /* NVMe v1.3 */
+
+ sc->regs.cc = 0;
+ sc->regs.csts = 0;
+
+ sc->num_cqueues = sc->num_squeues = sc->max_queues;
+ if (sc->submit_queues != NULL) {
+ for (int i = 0; i <= sc->max_queues; i++) {
+ /*
+ * The Admin Submission Queue is at index 0.
+ * It must not be changed at reset otherwise the
+ * emulation will be out of sync with the guest.
+ */
+ if (i != 0) {
+ sc->submit_queues[i].qbase = NULL;
+ sc->submit_queues[i].size = 0;
+ sc->submit_queues[i].cqid = 0;
+
+ sc->compl_queues[i].qbase = NULL;
+ sc->compl_queues[i].size = 0;
+ }
+ sc->submit_queues[i].tail = 0;
+ sc->submit_queues[i].head = 0;
+ sc->submit_queues[i].busy = 0;
+
+ sc->compl_queues[i].tail = 0;
+ sc->compl_queues[i].head = 0;
+ }
+ } else
+ sc->submit_queues = calloc(sc->max_queues + 1,
+ sizeof(struct nvme_submission_queue));
+
+ if (sc->compl_queues == NULL) {
+ sc->compl_queues = calloc(sc->max_queues + 1,
+ sizeof(struct nvme_completion_queue));
+
+ for (int i = 0; i <= sc->num_cqueues; i++)
+ pthread_mutex_init(&sc->compl_queues[i].mtx, NULL);
+ }
+}
+
+static void
+pci_nvme_reset(struct pci_nvme_softc *sc)
+{
+ pthread_mutex_lock(&sc->mtx);
+ pci_nvme_reset_locked(sc);
+ pthread_mutex_unlock(&sc->mtx);
+}
+
+static void
+pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc)
+{
+ uint16_t acqs, asqs;
+
+ DPRINTF(("%s\r\n", __func__));
+
+ asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1;
+ sc->submit_queues[0].size = asqs;
+ sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq,
+ sizeof(struct nvme_command) * asqs);
+
+ DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p\r\n",
+ __func__, sc->regs.asq, sc->submit_queues[0].qbase));
+
+ acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) &
+ NVME_AQA_REG_ACQS_MASK) + 1;
+ sc->compl_queues[0].size = acqs;
+ sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq,
+ sizeof(struct nvme_completion) * acqs);
+ DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p\r\n",
+ __func__, sc->regs.acq, sc->compl_queues[0].qbase));
+}
+
+static int
+nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ uint16_t qid = command->cdw10 & 0xffff;
+
+ DPRINTF(("%s DELETE_IO_SQ %u\r\n", __func__, qid));
+ if (qid == 0 || qid > sc->num_cqueues) {
+ WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u\r\n",
+ __func__, qid, sc->num_squeues));
+ pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
+ NVME_SC_INVALID_QUEUE_IDENTIFIER);
+ return (1);
+ }
+
+ sc->submit_queues[qid].qbase = NULL;
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+ return (1);
+}
+
+static int
+nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ if (command->cdw11 & NVME_CMD_CDW11_PC) {
+ uint16_t qid = command->cdw10 & 0xffff;
+ struct nvme_submission_queue *nsq;
+
+ if (qid > sc->num_squeues) {
+ WPRINTF(("%s queue index %u > num_squeues %u\r\n",
+ __func__, qid, sc->num_squeues));
+ pci_nvme_status_tc(&compl->status,
+ NVME_SCT_COMMAND_SPECIFIC,
+ NVME_SC_INVALID_QUEUE_IDENTIFIER);
+ return (1);
+ }
+
+ nsq = &sc->submit_queues[qid];
+ nsq->size = ((command->cdw10 >> 16) & 0xffff) + 1;
+
+ nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
+ sizeof(struct nvme_command) * (size_t)nsq->size);
+ nsq->cqid = (command->cdw11 >> 16) & 0xffff;
+ nsq->qpriority = (command->cdw11 >> 1) & 0x03;
+
+ DPRINTF(("%s sq %u size %u gaddr %p cqid %u\r\n", __func__,
+ qid, nsq->size, nsq->qbase, nsq->cqid));
+
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+
+ DPRINTF(("%s completed creating IOSQ qid %u\r\n",
+ __func__, qid));
+ } else {
+ /*
+ * Guest sent non-cont submission queue request.
+ * This setting is unsupported by this emulation.
+ */
+ WPRINTF(("%s unsupported non-contig (list-based) "
+ "create i/o submission queue\r\n", __func__));
+
+ pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
+ }
+ return (1);
+}
+
+static int
+nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ uint16_t qid = command->cdw10 & 0xffff;
+
+ DPRINTF(("%s DELETE_IO_CQ %u\r\n", __func__, qid));
+ if (qid == 0 || qid > sc->num_cqueues) {
+ WPRINTF(("%s queue index %u / num_cqueues %u\r\n",
+ __func__, qid, sc->num_cqueues));
+ pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
+ NVME_SC_INVALID_QUEUE_IDENTIFIER);
+ return (1);
+ }
+
+ sc->compl_queues[qid].qbase = NULL;
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+ return (1);
+}
+
+static int
+nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ if (command->cdw11 & NVME_CMD_CDW11_PC) {
+ uint16_t qid = command->cdw10 & 0xffff;
+ struct nvme_completion_queue *ncq;
+
+ if (qid > sc->num_cqueues) {
+ WPRINTF(("%s queue index %u > num_cqueues %u\r\n",
+ __func__, qid, sc->num_cqueues));
+ pci_nvme_status_tc(&compl->status,
+ NVME_SCT_COMMAND_SPECIFIC,
+ NVME_SC_INVALID_QUEUE_IDENTIFIER);
+ return (1);
+ }
+
+ ncq = &sc->compl_queues[qid];
+ ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1;
+ ncq->intr_vec = (command->cdw11 >> 16) & 0xffff;
+ ncq->size = ((command->cdw10 >> 16) & 0xffff) + 1;
+
+ ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
+ command->prp1,
+ sizeof(struct nvme_command) * (size_t)ncq->size);
+
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+ } else {
+ /*
+ * Non-contig completion queue unsupported.
+ */
+ WPRINTF(("%s unsupported non-contig (list-based) "
+ "create i/o completion queue\r\n",
+ __func__));
+
+ /* 0x12 = Invalid Use of Controller Memory Buffer */
+ pci_nvme_status_genc(&compl->status, 0x12);
+ }
+
+ return (1);
+}
+
+static int
+nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2;
+ uint8_t logpage = command->cdw10 & 0xFF;
+#ifdef __FreeBSD__
+ void *data;
+#else
+ /* Our compiler grumbles about this, despite it being OK */
+ void *data = NULL;
+#endif
+
+ DPRINTF(("%s log page %u len %u\r\n", __func__, logpage, logsize));
+
+ if (logpage >= 1 && logpage <= 3)
+ data = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
+ PAGE_SIZE);
+
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+
+ switch (logpage) {
+ case 0x01: /* Error information */
+ memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize);
+ break;
+ case 0x02: /* SMART/Health information */
+ /* TODO: present some smart info */
+ memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize);
+ break;
+ case 0x03: /* Firmware slot information */
+ memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize);
+ break;
+ default:
+ WPRINTF(("%s get log page %x command not supported\r\n",
+ __func__, logpage));
+
+ pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
+ NVME_SC_INVALID_LOG_PAGE);
+ }
+
+ return (1);
+}
+
+static int
+nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ void *dest;
+
+ DPRINTF(("%s identify 0x%x nsid 0x%x\r\n", __func__,
+ command->cdw10 & 0xFF, command->nsid));
+
+ switch (command->cdw10 & 0xFF) {
+ case 0x00: /* return Identify Namespace data structure */
+ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
+ sizeof(sc->nsdata));
+ memcpy(dest, &sc->nsdata, sizeof(sc->nsdata));
+ break;
+ case 0x01: /* return Identify Controller data structure */
+ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
+ sizeof(sc->ctrldata));
+ memcpy(dest, &sc->ctrldata, sizeof(sc->ctrldata));
+ break;
+ case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */
+ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
+ sizeof(uint32_t) * 1024);
+ ((uint32_t *)dest)[0] = 1;
+ ((uint32_t *)dest)[1] = 0;
+ break;
+ case 0x11:
+ pci_nvme_status_genc(&compl->status,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ return (1);
+ case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */
+ case 0x10:
+ case 0x12:
+ case 0x13:
+ case 0x14:
+ case 0x15:
+ default:
+ DPRINTF(("%s unsupported identify command requested 0x%x\r\n",
+ __func__, command->cdw10 & 0xFF));
+ pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
+ return (1);
+ }
+
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+ return (1);
+}
+
+static int
+nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ int feature = command->cdw10 & 0xFF;
+ uint32_t iv;
+
+ DPRINTF(("%s feature 0x%x\r\n", __func__, feature));
+ compl->cdw0 = 0;
+
+ switch (feature) {
+ case NVME_FEAT_ARBITRATION:
+ DPRINTF((" arbitration 0x%x\r\n", command->cdw11));
+ break;
+ case NVME_FEAT_POWER_MANAGEMENT:
+ DPRINTF((" power management 0x%x\r\n", command->cdw11));
+ break;
+ case NVME_FEAT_LBA_RANGE_TYPE:
+ DPRINTF((" lba range 0x%x\r\n", command->cdw11));
+ break;
+ case NVME_FEAT_TEMPERATURE_THRESHOLD:
+ DPRINTF((" temperature threshold 0x%x\r\n", command->cdw11));
+ break;
+ case NVME_FEAT_ERROR_RECOVERY:
+ DPRINTF((" error recovery 0x%x\r\n", command->cdw11));
+ break;
+ case NVME_FEAT_VOLATILE_WRITE_CACHE:
+ DPRINTF((" volatile write cache 0x%x\r\n", command->cdw11));
+ break;
+ case NVME_FEAT_NUMBER_OF_QUEUES:
+ sc->num_squeues = command->cdw11 & 0xFFFF;
+ sc->num_cqueues = (command->cdw11 >> 16) & 0xFFFF;
+ DPRINTF((" number of queues (submit %u, completion %u)\r\n",
+ sc->num_squeues, sc->num_cqueues));
+
+ if (sc->num_squeues == 0 || sc->num_squeues > sc->max_queues)
+ sc->num_squeues = sc->max_queues;
+ if (sc->num_cqueues == 0 || sc->num_cqueues > sc->max_queues)
+ sc->num_cqueues = sc->max_queues;
+
+ compl->cdw0 = (sc->num_squeues & 0xFFFF) |
+ ((sc->num_cqueues & 0xFFFF) << 16);
+
+ break;
+ case NVME_FEAT_INTERRUPT_COALESCING:
+ DPRINTF((" interrupt coalescing 0x%x\r\n", command->cdw11));
+
+ /* in uS */
+ sc->intr_coales_aggr_time = ((command->cdw11 >> 8) & 0xFF)*100;
+
+ sc->intr_coales_aggr_thresh = command->cdw11 & 0xFF;
+ break;
+ case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
+ iv = command->cdw11 & 0xFFFF;
+
+ DPRINTF((" interrupt vector configuration 0x%x\r\n",
+ command->cdw11));
+
+ for (uint32_t i = 0; i <= sc->num_cqueues; i++) {
+ if (sc->compl_queues[i].intr_vec == iv) {
+ if (command->cdw11 & (1 << 16))
+ sc->compl_queues[i].intr_en |=
+ NVME_CQ_INTCOAL;
+ else
+ sc->compl_queues[i].intr_en &=
+ ~NVME_CQ_INTCOAL;
+ }
+ }
+ break;
+ case NVME_FEAT_WRITE_ATOMICITY:
+ DPRINTF((" write atomicity 0x%x\r\n", command->cdw11));
+ break;
+ case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+ DPRINTF((" async event configuration 0x%x\r\n",
+ command->cdw11));
+ sc->async_ev_config = command->cdw11;
+ break;
+ case NVME_FEAT_SOFTWARE_PROGRESS_MARKER:
+ DPRINTF((" software progress marker 0x%x\r\n",
+ command->cdw11));
+ break;
+ case 0x0C:
+ DPRINTF((" autonomous power state transition 0x%x\r\n",
+ command->cdw11));
+ break;
+ default:
+ WPRINTF(("%s invalid feature\r\n", __func__));
+ pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
+ return (1);
+ }
+
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+ return (1);
+}
+
+static int
+nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ int feature = command->cdw10 & 0xFF;
+
+ DPRINTF(("%s feature 0x%x\r\n", __func__, feature));
+
+ compl->cdw0 = 0;
+
+ switch (feature) {
+ case NVME_FEAT_ARBITRATION:
+ DPRINTF((" arbitration\r\n"));
+ break;
+ case NVME_FEAT_POWER_MANAGEMENT:
+ DPRINTF((" power management\r\n"));
+ break;
+ case NVME_FEAT_LBA_RANGE_TYPE:
+ DPRINTF((" lba range\r\n"));
+ break;
+ case NVME_FEAT_TEMPERATURE_THRESHOLD:
+ DPRINTF((" temperature threshold\r\n"));
+ switch ((command->cdw11 >> 20) & 0x3) {
+ case 0:
+ /* Over temp threshold */
+ compl->cdw0 = 0xFFFF;
+ break;
+ case 1:
+ /* Under temp threshold */
+ compl->cdw0 = 0;
+ break;
+ default:
+ WPRINTF((" invalid threshold type select\r\n"));
+ pci_nvme_status_genc(&compl->status,
+ NVME_SC_INVALID_FIELD);
+ return (1);
+ }
+ break;
+ case NVME_FEAT_ERROR_RECOVERY:
+ DPRINTF((" error recovery\r\n"));
+ break;
+ case NVME_FEAT_VOLATILE_WRITE_CACHE:
+ DPRINTF((" volatile write cache\r\n"));
+ break;
+ case NVME_FEAT_NUMBER_OF_QUEUES:
+ compl->cdw0 = 0;
+ if (sc->num_squeues == 0)
+ compl->cdw0 |= sc->max_queues & 0xFFFF;
+ else
+ compl->cdw0 |= sc->num_squeues & 0xFFFF;
+
+ if (sc->num_cqueues == 0)
+ compl->cdw0 |= (sc->max_queues & 0xFFFF) << 16;
+ else
+ compl->cdw0 |= (sc->num_cqueues & 0xFFFF) << 16;
+
+ DPRINTF((" number of queues (submit %u, completion %u)\r\n",
+ compl->cdw0 & 0xFFFF,
+ (compl->cdw0 >> 16) & 0xFFFF));
+
+ break;
+ case NVME_FEAT_INTERRUPT_COALESCING:
+ DPRINTF((" interrupt coalescing\r\n"));
+ break;
+ case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
+ DPRINTF((" interrupt vector configuration\r\n"));
+ break;
+ case NVME_FEAT_WRITE_ATOMICITY:
+ DPRINTF((" write atomicity\r\n"));
+ break;
+ case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+ DPRINTF((" async event configuration\r\n"));
+ sc->async_ev_config = command->cdw11;
+ break;
+ case NVME_FEAT_SOFTWARE_PROGRESS_MARKER:
+ DPRINTF((" software progress marker\r\n"));
+ break;
+ case 0x0C:
+ DPRINTF((" autonomous power state transition\r\n"));
+ break;
+ default:
+ WPRINTF(("%s invalid feature 0x%x\r\n", __func__, feature));
+ pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
+ return (1);
+ }
+
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+ return (1);
+}
+
+static int
+nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command,
+ struct nvme_completion* compl)
+{
+ DPRINTF(("%s submission queue %u, command ID 0x%x\r\n", __func__,
+ command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF));
+
+ /* TODO: search for the command ID and abort it */
+
+ compl->cdw0 = 1;
+ pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
+ return (1);
+}
+
+#ifdef __FreeBSD__
+static int
+nvme_opc_async_event_req(struct pci_nvme_softc* sc,
+ struct nvme_command* command, struct nvme_completion* compl)
+{
+ DPRINTF(("%s async event request 0x%x\r\n", __func__, command->cdw11));
+
+ /*
+ * TODO: raise events when they happen based on the Set Features cmd.
+ * These events happen async, so only set completion successful if
+ * there is an event reflective of the request to get event.
+ */
+ pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
+ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
+ return (0);
+}
+#else
+/* This is kept behind an ifdef while it's unused to appease the compiler. */
+#endif /* __FreeBSD__ */
+
+static void
+pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value)
+{
+ struct nvme_completion compl;
+ struct nvme_command *cmd;
+ struct nvme_submission_queue *sq;
+ struct nvme_completion_queue *cq;
+ int do_intr = 0;
+ uint16_t sqhead;
+
+ DPRINTF(("%s index %u\r\n", __func__, (uint32_t)value));
+
+ sq = &sc->submit_queues[0];
+
+ sqhead = atomic_load_acq_short(&sq->head);
+
+ if (atomic_testandset_int(&sq->busy, 1)) {
+ DPRINTF(("%s SQ busy, head %u, tail %u\r\n",
+ __func__, sqhead, sq->tail));
+ return;
+ }
+
+ DPRINTF(("sqhead %u, tail %u\r\n", sqhead, sq->tail));
+
+ while (sqhead != atomic_load_acq_short(&sq->tail)) {
+ cmd = &(sq->qbase)[sqhead];
+ compl.status = 0;
+
+ switch (cmd->opc) {
+ case NVME_OPC_DELETE_IO_SQ:
+ DPRINTF(("%s command DELETE_IO_SQ\r\n", __func__));
+ do_intr |= nvme_opc_delete_io_sq(sc, cmd, &compl);
+ break;
+ case NVME_OPC_CREATE_IO_SQ:
+ DPRINTF(("%s command CREATE_IO_SQ\r\n", __func__));
+ do_intr |= nvme_opc_create_io_sq(sc, cmd, &compl);
+ break;
+ case NVME_OPC_DELETE_IO_CQ:
+ DPRINTF(("%s command DELETE_IO_CQ\r\n", __func__));
+ do_intr |= nvme_opc_delete_io_cq(sc, cmd, &compl);
+ break;
+ case NVME_OPC_CREATE_IO_CQ:
+ DPRINTF(("%s command CREATE_IO_CQ\r\n", __func__));
+ do_intr |= nvme_opc_create_io_cq(sc, cmd, &compl);
+ break;
+ case NVME_OPC_GET_LOG_PAGE:
+ DPRINTF(("%s command GET_LOG_PAGE\r\n", __func__));
+ do_intr |= nvme_opc_get_log_page(sc, cmd, &compl);
+ break;
+ case NVME_OPC_IDENTIFY:
+ DPRINTF(("%s command IDENTIFY\r\n", __func__));
+ do_intr |= nvme_opc_identify(sc, cmd, &compl);
+ break;
+ case NVME_OPC_ABORT:
+ DPRINTF(("%s command ABORT\r\n", __func__));
+ do_intr |= nvme_opc_abort(sc, cmd, &compl);
+ break;
+ case NVME_OPC_SET_FEATURES:
+ DPRINTF(("%s command SET_FEATURES\r\n", __func__));
+ do_intr |= nvme_opc_set_features(sc, cmd, &compl);
+ break;
+ case NVME_OPC_GET_FEATURES:
+ DPRINTF(("%s command GET_FEATURES\r\n", __func__));
+ do_intr |= nvme_opc_get_features(sc, cmd, &compl);
+ break;
+ case NVME_OPC_ASYNC_EVENT_REQUEST:
+ DPRINTF(("%s command ASYNC_EVENT_REQ\r\n", __func__));
+ /* XXX dont care, unhandled for now
+ do_intr |= nvme_opc_async_event_req(sc, cmd, &compl);
+ */
+ break;
+ default:
+ WPRINTF(("0x%x command is not implemented\r\n",
+ cmd->opc));
+ }
+
+ /* for now skip async event generation */
+ if (cmd->opc != NVME_OPC_ASYNC_EVENT_REQUEST) {
+ struct nvme_completion *cp;
+ int phase;
+
+ cq = &sc->compl_queues[0];
+
+ cp = &(cq->qbase)[cq->tail];
+ cp->sqid = 0;
+ cp->sqhd = sqhead;
+ cp->cid = cmd->cid;
+
+ phase = NVME_STATUS_GET_P(cp->status);
+ cp->status = compl.status;
+ pci_nvme_toggle_phase(&cp->status, phase);
+
+ cq->tail = (cq->tail + 1) % cq->size;
+ }
+ sqhead = (sqhead + 1) % sq->size;
+ }
+
+ DPRINTF(("setting sqhead %u\r\n", sqhead));
+ atomic_store_short(&sq->head, sqhead);
+ atomic_store_int(&sq->busy, 0);
+
+ if (do_intr)
+ pci_generate_msix(sc->nsc_pi, 0);
+
+}
+
+static int
+pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req,
+ uint64_t gpaddr, size_t size, int do_write, uint64_t lba)
+{
+ int iovidx;
+
+ if (req != NULL) {
+ /* concatenate contig block-iovs to minimize number of iovs */
+ if ((req->prev_gpaddr + req->prev_size) == gpaddr) {
+ iovidx = req->io_req.br_iovcnt - 1;
+
+ req->io_req.br_iov[iovidx].iov_base =
+ paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
+ req->prev_gpaddr, size);
+
+ req->prev_size += size;
+ req->io_req.br_resid += size;
+
+ req->io_req.br_iov[iovidx].iov_len = req->prev_size;
+ } else {
+ pthread_mutex_lock(&req->mtx);
+
+ iovidx = req->io_req.br_iovcnt;
+ if (iovidx == NVME_MAX_BLOCKIOVS) {
+ int err = 0;
+
+ DPRINTF(("large I/O, doing partial req\r\n"));
+
+ iovidx = 0;
+ req->io_req.br_iovcnt = 0;
+
+ req->io_req.br_callback = pci_nvme_io_partial;
+
+ if (!do_write)
+ err = blockif_read(sc->nvstore.ctx,
+ &req->io_req);
+ else
+#ifdef __FreeBSD__
+ err = blockif_write(sc->nvstore.ctx,
+ &req->io_req);
+#else
+ err = blockif_write(sc->nvstore.ctx,
+ &req->io_req, B_FALSE);
+ /*
+ * XXX: Is a follow-up needed for proper sync
+ * detection here or later flush behavior?
+ */
+#endif
+
+ /* wait until req completes before cont */
+ if (err == 0)
+ pthread_cond_wait(&req->cv, &req->mtx);
+ }
+ if (iovidx == 0) {
+ req->io_req.br_offset = lba;
+ req->io_req.br_resid = 0;
+ req->io_req.br_param = req;
+ }
+
+ req->io_req.br_iov[iovidx].iov_base =
+ paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
+ gpaddr, size);
+
+ req->io_req.br_iov[iovidx].iov_len = size;
+
+ req->prev_gpaddr = gpaddr;
+ req->prev_size = size;
+ req->io_req.br_resid += size;
+
+ req->io_req.br_iovcnt++;
+
+ pthread_mutex_unlock(&req->mtx);
+ }
+ } else {
+ /* RAM buffer: read/write directly */
+ void *p = sc->nvstore.ctx;
+ void *gptr;
+
+ if ((lba + size) > sc->nvstore.size) {
+ WPRINTF(("%s write would overflow RAM\r\n", __func__));
+ return (-1);
+ }
+
+ p = (void *)((uintptr_t)p + (uintptr_t)lba);
+ gptr = paddr_guest2host(sc->nsc_pi->pi_vmctx, gpaddr, size);
+ if (do_write)
+ memcpy(p, gptr, size);
+ else
+ memcpy(gptr, p, size);
+ }
+ return (0);
+}
+
+static void
+pci_nvme_set_completion(struct pci_nvme_softc *sc,
+ struct nvme_submission_queue *sq, int sqid, uint16_t cid,
+ uint32_t cdw0, uint16_t status, int ignore_busy)
+{
+ struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid];
+ struct nvme_completion *compl;
+ int do_intr = 0;
+ int phase;
+
+ DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x\r\n",
+ __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status),
+ NVME_STATUS_GET_SC(status)));
+
+ pthread_mutex_lock(&cq->mtx);
+
+ assert(cq->qbase != NULL);
+
+ compl = &cq->qbase[cq->tail];
+
+ compl->sqhd = atomic_load_acq_short(&sq->head);
+ compl->sqid = sqid;
+ compl->cid = cid;
+
+ // toggle phase
+ phase = NVME_STATUS_GET_P(compl->status);
+ compl->status = status;
+ pci_nvme_toggle_phase(&compl->status, phase);
+
+ cq->tail = (cq->tail + 1) % cq->size;
+
+ if (cq->intr_en & NVME_CQ_INTEN)
+ do_intr = 1;
+
+ pthread_mutex_unlock(&cq->mtx);
+
+ if (ignore_busy || !atomic_load_acq_int(&sq->busy))
+ if (do_intr)
+ pci_generate_msix(sc->nsc_pi, cq->intr_vec);
+}
+
+static void
+pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req)
+{
+ req->sc = NULL;
+ req->nvme_sq = NULL;
+ req->sqid = 0;
+
+ pthread_mutex_lock(&sc->mtx);
+
+ req->next = sc->ioreqs_free;
+ sc->ioreqs_free = req;
+ sc->pending_ios--;
+
+ /* when no more IO pending, can set to ready if device reset/enabled */
+ if (sc->pending_ios == 0 &&
+ NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts)))
+ sc->regs.csts |= NVME_CSTS_RDY;
+
+ pthread_mutex_unlock(&sc->mtx);
+
+ sem_post(&sc->iosemlock);
+}
+
+static struct pci_nvme_ioreq *
+pci_nvme_get_ioreq(struct pci_nvme_softc *sc)
+{
+ struct pci_nvme_ioreq *req = NULL;;
+
+ sem_wait(&sc->iosemlock);
+ pthread_mutex_lock(&sc->mtx);
+
+ req = sc->ioreqs_free;
+ assert(req != NULL);
+
+ sc->ioreqs_free = req->next;
+
+ req->next = NULL;
+ req->sc = sc;
+
+ sc->pending_ios++;
+
+ pthread_mutex_unlock(&sc->mtx);
+
+ req->io_req.br_iovcnt = 0;
+ req->io_req.br_offset = 0;
+ req->io_req.br_resid = 0;
+ req->io_req.br_param = req;
+ req->prev_gpaddr = 0;
+ req->prev_size = 0;
+
+ return req;
+}
+
+static void
+pci_nvme_io_done(struct blockif_req *br, int err)
+{
+ struct pci_nvme_ioreq *req = br->br_param;
+ struct nvme_submission_queue *sq = req->nvme_sq;
+ uint16_t code, status = 0;
+
+ DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err)));
+
+ /* TODO return correct error */
+ code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS;
+ pci_nvme_status_genc(&status, code);
+
+ pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status, 0);
+ pci_nvme_release_ioreq(req->sc, req);
+}
+
+static void
+pci_nvme_io_partial(struct blockif_req *br, int err)
+{
+ struct pci_nvme_ioreq *req = br->br_param;
+
+ DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err)));
+
+ pthread_cond_signal(&req->cv);
+}
+
+
+static void
+pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
+{
+ struct nvme_submission_queue *sq;
+ uint16_t status = 0;
+ uint16_t sqhead;
+ int err;
+
+ /* handle all submissions up to sq->tail index */
+ sq = &sc->submit_queues[idx];
+
+ if (atomic_testandset_int(&sq->busy, 1)) {
+ DPRINTF(("%s sqid %u busy\r\n", __func__, idx));
+ return;
+ }
+
+ sqhead = atomic_load_acq_short(&sq->head);
+
+ DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p\r\n",
+ idx, sqhead, sq->tail, sq->qbase));
+
+ while (sqhead != atomic_load_acq_short(&sq->tail)) {
+ struct nvme_command *cmd;
+ struct pci_nvme_ioreq *req = NULL;
+ uint64_t lba;
+ uint64_t nblocks, bytes, size, cpsz;
+
+ /* TODO: support scatter gather list handling */
+
+ cmd = &sq->qbase[sqhead];
+ sqhead = (sqhead + 1) % sq->size;
+
+ lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
+
+ if (cmd->opc == NVME_OPC_FLUSH) {
+ pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
+ pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
+ status, 1);
+
+ continue;
+ } else if (cmd->opc == 0x08) {
+ /* TODO: write zeroes */
+ WPRINTF(("%s write zeroes lba 0x%lx blocks %u\r\n",
+ __func__, lba, cmd->cdw12 & 0xFFFF));
+ pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
+ pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
+ status, 1);
+
+ continue;
+ }
+
+ nblocks = (cmd->cdw12 & 0xFFFF) + 1;
+
+ bytes = nblocks * sc->nvstore.sectsz;
+
+ if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
+ req = pci_nvme_get_ioreq(sc);
+ req->nvme_sq = sq;
+ req->sqid = idx;
+ }
+
+ /*
+ * If data starts mid-page and flows into the next page, then
+ * increase page count
+ */
+
+ DPRINTF(("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu "
+ "(%lu-bytes)\r\n",
+ sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size,
+ cmd->opc == NVME_OPC_WRITE ?
+ "WRITE" : "READ",
+ lba, nblocks, bytes));
+
+ cmd->prp1 &= ~(0x03UL);
+ cmd->prp2 &= ~(0x03UL);
+
+ DPRINTF((" prp1 0x%lx prp2 0x%lx\r\n", cmd->prp1, cmd->prp2));
+
+ size = bytes;
+ lba *= sc->nvstore.sectsz;
+
+ cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE);
+
+ if (cpsz > bytes)
+ cpsz = bytes;
+
+ if (req != NULL) {
+ req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) |
+ cmd->cdw10;
+ req->opc = cmd->opc;
+ req->cid = cmd->cid;
+ req->nsid = cmd->nsid;
+ }
+
+ err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz,
+ cmd->opc == NVME_OPC_WRITE, lba);
+ lba += cpsz;
+ size -= cpsz;
+
+ if (size == 0)
+ goto iodone;
+
+ if (size <= PAGE_SIZE) {
+ /* prp2 is second (and final) page in transfer */
+
+ err = pci_nvme_append_iov_req(sc, req, cmd->prp2,
+ size,
+ cmd->opc == NVME_OPC_WRITE,
+ lba);
+ } else {
+ uint64_t *prp_list;
+ int i;
+
+ /* prp2 is pointer to a physical region page list */
+ prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx,
+ cmd->prp2, PAGE_SIZE);
+
+ i = 0;
+ while (size != 0) {
+ cpsz = MIN(size, PAGE_SIZE);
+
+ /*
+ * Move to linked physical region page list
+ * in last item.
+ */
+ if (i == (NVME_PRP2_ITEMS-1) &&
+ size > PAGE_SIZE) {
+ assert((prp_list[i] & (PAGE_SIZE-1)) == 0);
+ prp_list = paddr_guest2host(
+ sc->nsc_pi->pi_vmctx,
+ prp_list[i], PAGE_SIZE);
+ i = 0;
+ }
+ if (prp_list[i] == 0) {
+ WPRINTF(("PRP2[%d] = 0 !!!\r\n", i));
+ err = 1;
+ break;
+ }
+
+ err = pci_nvme_append_iov_req(sc, req,
+ prp_list[i], cpsz,
+ cmd->opc == NVME_OPC_WRITE, lba);
+ if (err)
+ break;
+
+ lba += cpsz;
+ size -= cpsz;
+ i++;
+ }
+ }
+
+iodone:
+ if (sc->nvstore.type == NVME_STOR_RAM) {
+ uint16_t code, status = 0;
+
+ code = err ? NVME_SC_LBA_OUT_OF_RANGE :
+ NVME_SC_SUCCESS;
+ pci_nvme_status_genc(&status, code);
+
+ pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
+ status, 1);
+
+ continue;
+ }
+
+
+ if (err)
+ goto do_error;
+
+ req->io_req.br_callback = pci_nvme_io_done;
+
+ err = 0;
+ switch (cmd->opc) {
+ case NVME_OPC_READ:
+ err = blockif_read(sc->nvstore.ctx, &req->io_req);
+ break;
+ case NVME_OPC_WRITE:
+#ifdef __FreeBSD__
+ err = blockif_write(sc->nvstore.ctx, &req->io_req);
+#else
+ /* XXX: Should this be sync? */
+ err = blockif_write(sc->nvstore.ctx, &req->io_req,
+ B_FALSE);
+#endif
+ break;
+ default:
+ WPRINTF(("%s unhandled io command 0x%x\r\n",
+ __func__, cmd->opc));
+ err = 1;
+ }
+
+do_error:
+ if (err) {
+ uint16_t status = 0;
+
+ pci_nvme_status_genc(&status,
+ NVME_SC_DATA_TRANSFER_ERROR);
+
+ pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
+ status, 1);
+ pci_nvme_release_ioreq(sc, req);
+ }
+ }
+
+ atomic_store_short(&sq->head, sqhead);
+ atomic_store_int(&sq->busy, 0);
+}
+
+static void
+pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc,
+ uint64_t idx, int is_sq, uint64_t value)
+{
+ DPRINTF(("nvme doorbell %lu, %s, val 0x%lx\r\n",
+ idx, is_sq ? "SQ" : "CQ", value & 0xFFFF));
+
+ if (is_sq) {
+ atomic_store_short(&sc->submit_queues[idx].tail,
+ (uint16_t)value);
+
+ if (idx == 0) {
+ pci_nvme_handle_admin_cmd(sc, value);
+ } else {
+ /* submission queue; handle new entries in SQ */
+ if (idx > sc->num_squeues) {
+ WPRINTF(("%s SQ index %lu overflow from "
+ "guest (max %u)\r\n",
+ __func__, idx, sc->num_squeues));
+ return;
+ }
+ pci_nvme_handle_io_cmd(sc, (uint16_t)idx);
+ }
+ } else {
+ if (idx > sc->num_cqueues) {
+ WPRINTF(("%s queue index %lu overflow from "
+ "guest (max %u)\r\n",
+ __func__, idx, sc->num_cqueues));
+ return;
+ }
+
+ sc->compl_queues[idx].head = (uint16_t)value;
+ }
+}
+
+static void
+pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite)
+{
+ const char *s = iswrite ? "WRITE" : "READ";
+
+ switch (offset) {
+ case NVME_CR_CAP_LOW:
+ DPRINTF(("%s %s NVME_CR_CAP_LOW\r\n", func, s));
+ break;
+ case NVME_CR_CAP_HI:
+ DPRINTF(("%s %s NVME_CR_CAP_HI\r\n", func, s));
+ break;
+ case NVME_CR_VS:
+ DPRINTF(("%s %s NVME_CR_VS\r\n", func, s));
+ break;
+ case NVME_CR_INTMS:
+ DPRINTF(("%s %s NVME_CR_INTMS\r\n", func, s));
+ break;
+ case NVME_CR_INTMC:
+ DPRINTF(("%s %s NVME_CR_INTMC\r\n", func, s));
+ break;
+ case NVME_CR_CC:
+ DPRINTF(("%s %s NVME_CR_CC\r\n", func, s));
+ break;
+ case NVME_CR_CSTS:
+ DPRINTF(("%s %s NVME_CR_CSTS\r\n", func, s));
+ break;
+ case NVME_CR_NSSR:
+ DPRINTF(("%s %s NVME_CR_NSSR\r\n", func, s));
+ break;
+ case NVME_CR_AQA:
+ DPRINTF(("%s %s NVME_CR_AQA\r\n", func, s));
+ break;
+ case NVME_CR_ASQ_LOW:
+ DPRINTF(("%s %s NVME_CR_ASQ_LOW\r\n", func, s));
+ break;
+ case NVME_CR_ASQ_HI:
+ DPRINTF(("%s %s NVME_CR_ASQ_HI\r\n", func, s));
+ break;
+ case NVME_CR_ACQ_LOW:
+ DPRINTF(("%s %s NVME_CR_ACQ_LOW\r\n", func, s));
+ break;
+ case NVME_CR_ACQ_HI:
+ DPRINTF(("%s %s NVME_CR_ACQ_HI\r\n", func, s));
+ break;
+ default:
+ DPRINTF(("unknown nvme bar-0 offset 0x%lx\r\n", offset));
+ }
+
+}
+
+static void
+pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc,
+ uint64_t offset, int size, uint64_t value)
+{
+ uint32_t ccreg;
+
+ if (offset >= NVME_DOORBELL_OFFSET) {
+ uint64_t belloffset = offset - NVME_DOORBELL_OFFSET;
+ uint64_t idx = belloffset / 8; /* door bell size = 2*int */
+ int is_sq = (belloffset % 8) < 4;
+
+ if (belloffset > ((sc->max_queues+1) * 8 - 4)) {
+ WPRINTF(("guest attempted an overflow write offset "
+ "0x%lx, val 0x%lx in %s",
+ offset, value, __func__));
+ return;
+ }
+
+ pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value);
+ return;
+ }
+
+ DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx\r\n",
+ offset, size, value));
+
+ if (size != 4) {
+ WPRINTF(("guest wrote invalid size %d (offset 0x%lx, "
+ "val 0x%lx) to bar0 in %s",
+ size, offset, value, __func__));
+ /* TODO: shutdown device */
+ return;
+ }
+
+ pci_nvme_bar0_reg_dumps(__func__, offset, 1);
+
+ pthread_mutex_lock(&sc->mtx);
+
+ switch (offset) {
+ case NVME_CR_CAP_LOW:
+ case NVME_CR_CAP_HI:
+ /* readonly */
+ break;
+ case NVME_CR_VS:
+ /* readonly */
+ break;
+ case NVME_CR_INTMS:
+ /* MSI-X, so ignore */
+ break;
+ case NVME_CR_INTMC:
+ /* MSI-X, so ignore */
+ break;
+ case NVME_CR_CC:
+ ccreg = (uint32_t)value;
+
+ DPRINTF(("%s NVME_CR_CC en %x css %x shn %x iosqes %u "
+ "iocqes %u\r\n",
+ __func__,
+ NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg),
+ NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg),
+ NVME_CC_GET_IOCQES(ccreg)));
+
+ if (NVME_CC_GET_SHN(ccreg)) {
+ /* perform shutdown - flush out data to backend */
+ sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK <<
+ NVME_CSTS_REG_SHST_SHIFT);
+ sc->regs.csts |= NVME_SHST_COMPLETE <<
+ NVME_CSTS_REG_SHST_SHIFT;
+ }
+ if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) {
+ if (NVME_CC_GET_EN(ccreg) == 0)
+ /* transition 1-> causes controller reset */
+ pci_nvme_reset_locked(sc);
+ else
+ pci_nvme_init_controller(ctx, sc);
+ }
+
+ /* Insert the iocqes, iosqes and en bits from the write */
+ sc->regs.cc &= ~NVME_CC_WRITE_MASK;
+ sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK;
+ if (NVME_CC_GET_EN(ccreg) == 0) {
+ /* Insert the ams, mps and css bit fields */
+ sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK;
+ sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK;
+ sc->regs.csts &= ~NVME_CSTS_RDY;
+ } else if (sc->pending_ios == 0) {
+ sc->regs.csts |= NVME_CSTS_RDY;
+ }
+ break;
+ case NVME_CR_CSTS:
+ break;
+ case NVME_CR_NSSR:
+ /* ignore writes; don't support subsystem reset */
+ break;
+ case NVME_CR_AQA:
+ sc->regs.aqa = (uint32_t)value;
+ break;
+ case NVME_CR_ASQ_LOW:
+ sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) |
+ (0xFFFFF000 & value);
+ break;
+ case NVME_CR_ASQ_HI:
+ sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) |
+ (value << 32);
+ break;
+ case NVME_CR_ACQ_LOW:
+ sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) |
+ (0xFFFFF000 & value);
+ break;
+ case NVME_CR_ACQ_HI:
+ sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) |
+ (value << 32);
+ break;
+ default:
+ DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d\r\n",
+ __func__, offset, value, size));
+ }
+ pthread_mutex_unlock(&sc->mtx);
+}
+
+static void
+pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+ int baridx, uint64_t offset, int size, uint64_t value)
+{
+ struct pci_nvme_softc* sc = pi->pi_arg;
+
+ if (baridx == pci_msix_table_bar(pi) ||
+ baridx == pci_msix_pba_bar(pi)) {
+ DPRINTF(("nvme-write baridx %d, msix: off 0x%lx, size %d, "
+ " value 0x%lx\r\n", baridx, offset, size, value));
+
+ pci_emul_msix_twrite(pi, offset, size, value);
+ return;
+ }
+
+ switch (baridx) {
+ case 0:
+ pci_nvme_write_bar_0(ctx, sc, offset, size, value);
+ break;
+
+ default:
+ DPRINTF(("%s unknown baridx %d, val 0x%lx\r\n",
+ __func__, baridx, value));
+ }
+}
+
+static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc,
+ uint64_t offset, int size)
+{
+ uint64_t value;
+
+ pci_nvme_bar0_reg_dumps(__func__, offset, 0);
+
+ if (offset < NVME_DOORBELL_OFFSET) {
+ void *p = &(sc->regs);
+ pthread_mutex_lock(&sc->mtx);
+ memcpy(&value, (void *)((uintptr_t)p + offset), size);
+ pthread_mutex_unlock(&sc->mtx);
+ } else {
+ value = 0;
+ WPRINTF(("pci_nvme: read invalid offset %ld\r\n", offset));
+ }
+
+ switch (size) {
+ case 1:
+ value &= 0xFF;
+ break;
+ case 2:
+ value &= 0xFFFF;
+ break;
+ case 4:
+ value &= 0xFFFFFFFF;
+ break;
+ }
+
+ DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x\r\n",
+ offset, size, (uint32_t)value));
+
+ return (value);
+}
+
+
+
+static uint64_t
+pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+ uint64_t offset, int size)
+{
+ struct pci_nvme_softc* sc = pi->pi_arg;
+
+ if (baridx == pci_msix_table_bar(pi) ||
+ baridx == pci_msix_pba_bar(pi)) {
+ DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d\r\n",
+ baridx, offset, size));
+
+ return pci_emul_msix_tread(pi, offset, size);
+ }
+
+ switch (baridx) {
+ case 0:
+ return pci_nvme_read_bar_0(sc, offset, size);
+
+ default:
+ DPRINTF(("unknown bar %d, 0x%lx\r\n", baridx, offset));
+ }
+
+ return (0);
+}
+
+
+static int
+pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts)
+{
+ char bident[sizeof("XX:X:X")];
+ char *uopt, *xopts, *config;
+ uint32_t sectsz;
+ int optidx;
+
+ sc->max_queues = NVME_QUEUES;
+ sc->max_qentries = NVME_MAX_QENTRIES;
+ sc->ioslots = NVME_IOSLOTS;
+ sc->num_squeues = sc->max_queues;
+ sc->num_cqueues = sc->max_queues;
+ sectsz = 0;
+
+ uopt = strdup(opts);
+ optidx = 0;
+ snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn),
+ "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
+ for (xopts = strtok(uopt, ",");
+ xopts != NULL;
+ xopts = strtok(NULL, ",")) {
+
+ if ((config = strchr(xopts, '=')) != NULL)
+ *config++ = '\0';
+
+ if (!strcmp("maxq", xopts)) {
+ sc->max_queues = atoi(config);
+ } else if (!strcmp("qsz", xopts)) {
+ sc->max_qentries = atoi(config);
+ } else if (!strcmp("ioslots", xopts)) {
+ sc->ioslots = atoi(config);
+ } else if (!strcmp("sectsz", xopts)) {
+ sectsz = atoi(config);
+ } else if (!strcmp("ser", xopts)) {
+ /*
+ * This field indicates the Product Serial Number in
+ * 7-bit ASCII, unused bytes should be space characters.
+ * Ref: NVMe v1.3c.
+ */
+ cpywithpad((char *)sc->ctrldata.sn,
+ sizeof(sc->ctrldata.sn), config, ' ');
+ } else if (!strcmp("ram", xopts)) {
+ uint64_t sz = strtoull(&xopts[4], NULL, 10);
+
+ sc->nvstore.type = NVME_STOR_RAM;
+ sc->nvstore.size = sz * 1024 * 1024;
+ sc->nvstore.ctx = calloc(1, sc->nvstore.size);
+ sc->nvstore.sectsz = 4096;
+ sc->nvstore.sectsz_bits = 12;
+ if (sc->nvstore.ctx == NULL) {
+ perror("Unable to allocate RAM");
+ free(uopt);
+ return (-1);
+ }
+ } else if (optidx == 0) {
+ snprintf(bident, sizeof(bident), "%d:%d",
+ sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
+ sc->nvstore.ctx = blockif_open(xopts, bident);
+ if (sc->nvstore.ctx == NULL) {
+ perror("Could not open backing file");
+ free(uopt);
+ return (-1);
+ }
+ sc->nvstore.type = NVME_STOR_BLOCKIF;
+ sc->nvstore.size = blockif_size(sc->nvstore.ctx);
+ } else {
+ fprintf(stderr, "Invalid option %s\n", xopts);
+ free(uopt);
+ return (-1);
+ }
+
+ optidx++;
+ }
+ free(uopt);
+
+ if (sc->nvstore.ctx == NULL || sc->nvstore.size == 0) {
+ fprintf(stderr, "backing store not specified\n");
+ return (-1);
+ }
+ if (sectsz == 512 || sectsz == 4096 || sectsz == 8192)
+ sc->nvstore.sectsz = sectsz;
+ else if (sc->nvstore.type != NVME_STOR_RAM)
+ sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx);
+ for (sc->nvstore.sectsz_bits = 9;
+ (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz;
+ sc->nvstore.sectsz_bits++);
+
+ if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES)
+ sc->max_queues = NVME_QUEUES;
+
+ if (sc->max_qentries <= 0) {
+ fprintf(stderr, "Invalid qsz option\n");
+ return (-1);
+ }
+ if (sc->ioslots <= 0) {
+ fprintf(stderr, "Invalid ioslots option\n");
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+ struct pci_nvme_softc *sc;
+ uint32_t pci_membar_sz;
+ int error;
+
+ error = 0;
+
+ sc = calloc(1, sizeof(struct pci_nvme_softc));
+ pi->pi_arg = sc;
+ sc->nsc_pi = pi;
+
+ error = pci_nvme_parse_opts(sc, opts);
+ if (error < 0)
+ goto done;
+ else
+ error = 0;
+
+ sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq));
+ for (int i = 0; i < sc->ioslots; i++) {
+ if (i < (sc->ioslots-1))
+ sc->ioreqs[i].next = &sc->ioreqs[i+1];
+ pthread_mutex_init(&sc->ioreqs[i].mtx, NULL);
+ pthread_cond_init(&sc->ioreqs[i].cv, NULL);
+ }
+ sc->ioreqs_free = sc->ioreqs;
+ sc->intr_coales_aggr_thresh = 1;
+
+ pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A);
+ pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D);
+ pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+ pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM);
+ pci_set_cfgdata8(pi, PCIR_PROGIF,
+ PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0);
+
+ /* allocate size of nvme registers + doorbell space for all queues */
+ pci_membar_sz = sizeof(struct nvme_registers) +
+ 2*sizeof(uint32_t)*(sc->max_queues);
+
+ DPRINTF(("nvme membar size: %u\r\n", pci_membar_sz));
+
+ error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz);
+ if (error) {
+ WPRINTF(("%s pci alloc mem bar failed\r\n", __func__));
+ goto done;
+ }
+
+ error = pci_emul_add_msixcap(pi, sc->max_queues, NVME_MSIX_BAR);
+ if (error) {
+ WPRINTF(("%s pci add msixcap failed\r\n", __func__));
+ goto done;
+ }
+
+ pthread_mutex_init(&sc->mtx, NULL);
+ sem_init(&sc->iosemlock, 0, sc->ioslots);
+
+ pci_nvme_reset(sc);
+ pci_nvme_init_ctrldata(sc);
+ pci_nvme_init_nsdata(sc);
+
+ pci_lintr_request(pi);
+
+done:
+ return (error);
+}
+
+
+struct pci_devemu pci_de_nvme = {
+ .pe_emu = "nvme",
+ .pe_init = pci_nvme_init,
+ .pe_barwrite = pci_nvme_write,
+ .pe_barread = pci_nvme_read
+};
+PCI_EMUL_SET(pci_de_nvme);
diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c
index d2f6ac7785..d272a96d71 100644
--- a/usr/src/cmd/bhyve/pci_virtio_block.c
+++ b/usr/src/cmd/bhyve/pci_virtio_block.c
@@ -127,9 +127,9 @@ struct virtio_blk_hdr {
#define VBH_OP_WRITE 1
#define VBH_OP_FLUSH 4
#define VBH_OP_FLUSH_OUT 5
-#define VBH_OP_IDENT 8
+#define VBH_OP_IDENT 8
#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
- uint32_t vbh_type;
+ uint32_t vbh_type;
uint32_t vbh_ioprio;
uint64_t vbh_sector;
} __packed;
@@ -143,8 +143,8 @@ static int pci_vtblk_debug;
struct pci_vtblk_ioreq {
struct blockif_req io_req;
- struct pci_vtblk_softc *io_sc;
- uint8_t *io_status;
+ struct pci_vtblk_softc *io_sc;
+ uint8_t *io_status;
uint16_t io_idx;
};
@@ -169,7 +169,7 @@ static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
static struct virtio_consts vtblk_vi_consts = {
"vtblk", /* our name */
1, /* we support 1 virtqueue */
- sizeof(struct vtblk_config), /* config reg size */
+ sizeof(struct vtblk_config), /* config reg size */
pci_vtblk_reset, /* reset */
pci_vtblk_notify, /* device-wide qnotify */
pci_vtblk_cfgread, /* read PCI config */
@@ -275,7 +275,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
}
io->io_req.br_resid = iolen;
- DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r",
+ DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r",
writeop ? "write" : "read/ident", iolen, i - 1,
io->io_req.br_offset));
@@ -340,7 +340,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
*/
snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
bctxt = blockif_open(opts, bident);
- if (bctxt == NULL) {
+ if (bctxt == NULL) {
perror("Could not open backing file");
return (1);
}
@@ -374,7 +374,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
*/
MD5Init(&mdctx);
MD5Update(&mdctx, opts, strlen(opts));
- MD5Final(digest, &mdctx);
+ MD5Final(digest, &mdctx);
sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
diff --git a/usr/src/cmd/bhyve/pci_virtio_console.c b/usr/src/cmd/bhyve/pci_virtio_console.c
index c4ee10d53a..e1448780f1 100644
--- a/usr/src/cmd/bhyve/pci_virtio_console.c
+++ b/usr/src/cmd/bhyve/pci_virtio_console.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2016 iXsystems Inc.
* All rights reserved.
*
@@ -316,7 +318,7 @@ pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *name,
sun.sun_family = AF_UNIX;
sun.sun_len = sizeof(struct sockaddr_un);
strcpy(pathcopy, path);
- strncpy(sun.sun_path, basename(pathcopy), sizeof(sun.sun_path));
+ strlcpy(sun.sun_path, basename(pathcopy), sizeof(sun.sun_path));
free(pathcopy);
if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0) {
@@ -326,7 +328,7 @@ pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *name,
#else /* __FreeBSD__ */
/* Do a simple bind rather than the FreeBSD bindat() */
addr.sun_family = AF_UNIX;
- (void) strncpy(addr.sun_path, path, sizeof (addr.sun_path));
+ (void) strlcpy(addr.sun_path, path, sizeof (addr.sun_path));
if (bind(fd, (struct sockaddr *)&addr, sizeof (addr)) < 0) {
error = -1;
goto out;
@@ -594,22 +596,15 @@ pci_vtcon_notify_tx(void *vsc, struct vqueue_info *vq)
struct pci_vtcon_softc *sc;
struct pci_vtcon_port *port;
struct iovec iov[1];
-#ifdef __FreeBSD__
uint16_t idx, n;
-#else
- uint16_t idx;
-#endif
uint16_t flags[8];
sc = vsc;
port = pci_vtcon_vq_to_port(sc, vq);
while (vq_has_descs(vq)) {
-#ifdef __FreeBSD__
n = vq_getchain(vq, &idx, iov, 1, flags);
-#else
- vq_getchain(vq, &idx, iov, 1, flags);
-#endif
+ assert(n >= 1);
if (port != NULL)
port->vsp_cb(port, port->vsp_arg, iov, 1);
@@ -681,7 +676,7 @@ pci_vtcon_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
while ((opt = strsep(&opts, ",")) != NULL) {
portname = strsep(&opt, "=");
- portpath = strdup(opt);
+ portpath = opt;
/* create port */
if (pci_vtcon_sock_add(sc, portname, portpath) < 0) {
diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c
index a3fe72474b..f5eadf4a2c 100644
--- a/usr/src/cmd/bhyve/pci_virtio_net.c
+++ b/usr/src/cmd/bhyve/pci_virtio_net.c
@@ -822,24 +822,24 @@ pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
static int
pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
{
- struct ether_addr *ea;
- char *tmpstr;
- char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
+ struct ether_addr *ea;
+ char *tmpstr;
+ char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
- tmpstr = strsep(&mac_str,"=");
-
- if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
- ea = ether_aton(mac_str);
+ tmpstr = strsep(&mac_str,"=");
- if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
- memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+ if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
+ ea = ether_aton(mac_str);
+
+ if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+ memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
fprintf(stderr, "Invalid MAC %s\n", mac_str);
- return (EINVAL);
- } else
- memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
- }
+ return (EINVAL);
+ } else
+ memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
+ }
- return (0);
+ return (0);
}
#endif /* __FreeBSD__ */
@@ -1104,8 +1104,9 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pthread_mutex_init(&sc->tx_mtx, NULL);
pthread_cond_init(&sc->tx_cond, NULL);
pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
- snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot);
- pthread_set_name_np(sc->tx_tid, tname);
+ snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
+ pi->pi_func);
+ pthread_set_name_np(sc->tx_tid, tname);
return (0);
}
diff --git a/usr/src/cmd/bhyve/pci_virtio_rnd.c b/usr/src/cmd/bhyve/pci_virtio_rnd.c
index 4ce749053c..44bc55e003 100644
--- a/usr/src/cmd/bhyve/pci_virtio_rnd.c
+++ b/usr/src/cmd/bhyve/pci_virtio_rnd.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Nahanni Systems Inc.
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/pci_virtio_scsi.c b/usr/src/cmd/bhyve/pci_virtio_scsi.c
new file mode 100644
index 0000000000..aa906bb854
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_virtio_scsi.c
@@ -0,0 +1,718 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
+ * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/time.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_message.h>
+#include <cam/ctl/ctl.h>
+#include <cam/ctl/ctl_io.h>
+#include <cam/ctl/ctl_backend.h>
+#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_util.h>
+#include <cam/ctl/ctl_scsi_all.h>
+#include <camlib.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+#include "iov.h"
+
+#define VTSCSI_RINGSZ 64
+#define VTSCSI_REQUESTQ 1
+#define VTSCSI_THR_PER_Q 16
+#define VTSCSI_MAXQ (VTSCSI_REQUESTQ + 2)
+#define VTSCSI_MAXSEG 64
+
+#define VTSCSI_IN_HEADER_LEN(_sc) \
+ (sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size)
+
+#define VTSCSI_OUT_HEADER_LEN(_sc) \
+ (sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size)
+
+#define VIRTIO_SCSI_MAX_CHANNEL 0
+#define VIRTIO_SCSI_MAX_TARGET 0
+#define VIRTIO_SCSI_MAX_LUN 16383
+
+#define VIRTIO_SCSI_F_INOUT (1 << 0)
+#define VIRTIO_SCSI_F_HOTPLUG (1 << 1)
+#define VIRTIO_SCSI_F_CHANGE (1 << 2)
+
+static int pci_vtscsi_debug = 0;
+#define DPRINTF(params) if (pci_vtscsi_debug) printf params
+#define WPRINTF(params) printf params
+
+struct pci_vtscsi_config {
+ uint32_t num_queues;
+ uint32_t seg_max;
+ uint32_t max_sectors;
+ uint32_t cmd_per_lun;
+ uint32_t event_info_size;
+ uint32_t sense_size;
+ uint32_t cdb_size;
+ uint16_t max_channel;
+ uint16_t max_target;
+ uint32_t max_lun;
+} __attribute__((packed));
+
+struct pci_vtscsi_queue {
+ struct pci_vtscsi_softc * vsq_sc;
+ struct vqueue_info * vsq_vq;
+ int vsq_ctl_fd;
+ pthread_mutex_t vsq_mtx;
+ pthread_mutex_t vsq_qmtx;
+ pthread_cond_t vsq_cv;
+ STAILQ_HEAD(, pci_vtscsi_request) vsq_requests;
+ LIST_HEAD(, pci_vtscsi_worker) vsq_workers;
+};
+
+struct pci_vtscsi_worker {
+ struct pci_vtscsi_queue * vsw_queue;
+ pthread_t vsw_thread;
+ bool vsw_exiting;
+ LIST_ENTRY(pci_vtscsi_worker) vsw_link;
+};
+
+struct pci_vtscsi_request {
+ struct pci_vtscsi_queue * vsr_queue;
+ struct iovec vsr_iov_in[VTSCSI_MAXSEG];
+ int vsr_niov_in;
+ struct iovec vsr_iov_out[VTSCSI_MAXSEG];
+ int vsr_niov_out;
+ uint32_t vsr_idx;
+ STAILQ_ENTRY(pci_vtscsi_request) vsr_link;
+};
+
+/*
+ * Per-device softc
+ */
+struct pci_vtscsi_softc {
+ struct virtio_softc vss_vs;
+ struct vqueue_info vss_vq[VTSCSI_MAXQ];
+ struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ];
+ pthread_mutex_t vss_mtx;
+ int vss_iid;
+ int vss_ctl_fd;
+ uint32_t vss_features;
+ struct pci_vtscsi_config vss_config;
+};
+
+#define VIRTIO_SCSI_T_TMF 0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
+
+/* command-specific response values */
+#define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
+
+struct pci_vtscsi_ctrl_tmf {
+ uint32_t type;
+ uint32_t subtype;
+ uint8_t lun[8];
+ uint64_t id;
+ uint8_t response;
+} __attribute__((packed));
+
+#define VIRTIO_SCSI_T_AN_QUERY 1
+#define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2
+#define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4
+#define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8
+#define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16
+#define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32
+#define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64
+
+struct pci_vtscsi_ctrl_an {
+ uint32_t type;
+ uint8_t lun[8];
+ uint32_t event_requested;
+ uint32_t event_actual;
+ uint8_t response;
+} __attribute__((packed));
+
+/* command-specific response values */
+#define VIRTIO_SCSI_S_OK 0
+#define VIRTIO_SCSI_S_OVERRUN 1
+#define VIRTIO_SCSI_S_ABORTED 2
+#define VIRTIO_SCSI_S_BAD_TARGET 3
+#define VIRTIO_SCSI_S_RESET 4
+#define VIRTIO_SCSI_S_BUSY 5
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
+#define VIRTIO_SCSI_S_FAILURE 9
+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
+
+/* task_attr */
+#define VIRTIO_SCSI_S_SIMPLE 0
+#define VIRTIO_SCSI_S_ORDERED 1
+#define VIRTIO_SCSI_S_HEAD 2
+#define VIRTIO_SCSI_S_ACA 3
+
+struct pci_vtscsi_event {
+ uint32_t event;
+ uint8_t lun[8];
+ uint32_t reason;
+} __attribute__((packed));
+
+struct pci_vtscsi_req_cmd_rd {
+ uint8_t lun[8];
+ uint64_t id;
+ uint8_t task_attr;
+ uint8_t prio;
+ uint8_t crn;
+ uint8_t cdb[];
+} __attribute__((packed));
+
+struct pci_vtscsi_req_cmd_wr {
+ uint32_t sense_len;
+ uint32_t residual;
+ uint16_t status_qualifier;
+ uint8_t status;
+ uint8_t response;
+ uint8_t sense[];
+} __attribute__((packed));
+
+static void *pci_vtscsi_proc(void *);
+static void pci_vtscsi_reset(void *);
+static void pci_vtscsi_neg_features(void *, uint64_t);
+static int pci_vtscsi_cfgread(void *, int, int, uint32_t *);
+static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t);
+static inline int pci_vtscsi_get_lun(uint8_t *);
+static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t);
+static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *,
+ struct pci_vtscsi_ctrl_tmf *);
+static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *,
+ struct pci_vtscsi_ctrl_an *);
+static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *,
+ int, struct iovec *, int);
+static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *);
+static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *);
+static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *);
+static int pci_vtscsi_init_queue(struct pci_vtscsi_softc *,
+ struct pci_vtscsi_queue *, int);
+static int pci_vtscsi_init(struct vmctx *, struct pci_devinst *, char *);
+
+static struct virtio_consts vtscsi_vi_consts = {
+ "vtscsi", /* our name */
+ VTSCSI_MAXQ, /* we support 2+n virtqueues */
+ sizeof(struct pci_vtscsi_config), /* config reg size */
+ pci_vtscsi_reset, /* reset */
+ NULL, /* device-wide qnotify */
+ pci_vtscsi_cfgread, /* read virtio config */
+ pci_vtscsi_cfgwrite, /* write virtio config */
+ pci_vtscsi_neg_features, /* apply negotiated features */
+ 0, /* our capabilities */
+};
+
+static void *
+pci_vtscsi_proc(void *arg)
+{
+ struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg;
+ struct pci_vtscsi_queue *q = worker->vsw_queue;
+ struct pci_vtscsi_request *req;
+ int iolen;
+
+ for (;;) {
+ pthread_mutex_lock(&q->vsq_mtx);
+
+ while (STAILQ_EMPTY(&q->vsq_requests)
+ && !worker->vsw_exiting)
+ pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx);
+
+ if (worker->vsw_exiting)
+ break;
+
+ req = STAILQ_FIRST(&q->vsq_requests);
+ STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link);
+
+ pthread_mutex_unlock(&q->vsq_mtx);
+ iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in,
+ req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out);
+
+ pthread_mutex_lock(&q->vsq_qmtx);
+ vq_relchain(q->vsq_vq, req->vsr_idx, iolen);
+ vq_endchains(q->vsq_vq, 0);
+ pthread_mutex_unlock(&q->vsq_qmtx);
+
+ DPRINTF(("virtio-scsi: request <idx=%d> completed\n",
+ req->vsr_idx));
+ free(req);
+ }
+
+ pthread_mutex_unlock(&q->vsq_mtx);
+ return (NULL);
+}
+
+static void
+pci_vtscsi_reset(void *vsc)
+{
+ struct pci_vtscsi_softc *sc;
+
+ sc = vsc;
+
+ DPRINTF(("vtscsi: device reset requested\n"));
+ vi_reset_dev(&sc->vss_vs);
+
+ /* initialize config structure */
+ sc->vss_config = (struct pci_vtscsi_config){
+ .num_queues = VTSCSI_REQUESTQ,
+ .seg_max = VTSCSI_MAXSEG,
+ .max_sectors = 2,
+ .cmd_per_lun = 1,
+ .event_info_size = sizeof(struct pci_vtscsi_event),
+ .sense_size = 96,
+ .cdb_size = 32,
+ .max_channel = VIRTIO_SCSI_MAX_CHANNEL,
+ .max_target = VIRTIO_SCSI_MAX_TARGET,
+ .max_lun = VIRTIO_SCSI_MAX_LUN
+ };
+}
+
+static void
+pci_vtscsi_neg_features(void *vsc, uint64_t negotiated_features)
+{
+ struct pci_vtscsi_softc *sc = vsc;
+
+ sc->vss_features = negotiated_features;
+}
+
+static int
+pci_vtscsi_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+ struct pci_vtscsi_softc *sc = vsc;
+ void *ptr;
+
+ ptr = (uint8_t *)&sc->vss_config + offset;
+ memcpy(retval, ptr, size);
+ return (0);
+}
+
+static int
+pci_vtscsi_cfgwrite(void *vsc, int offset, int size, uint32_t val)
+{
+
+ return (0);
+}
+
+static inline int
+pci_vtscsi_get_lun(uint8_t *lun)
+{
+
+ return (((lun[2] << 8) | lun[3]) & 0x3fff);
+}
+
+static int
+pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf,
+ size_t bufsize)
+{
+ struct pci_vtscsi_ctrl_tmf *tmf;
+ struct pci_vtscsi_ctrl_an *an;
+ uint32_t type;
+
+ type = *(uint32_t *)buf;
+
+ if (type == VIRTIO_SCSI_T_TMF) {
+ tmf = (struct pci_vtscsi_ctrl_tmf *)buf;
+ return (pci_vtscsi_tmf_handle(sc, tmf));
+ }
+
+ if (type == VIRTIO_SCSI_T_AN_QUERY) {
+ an = (struct pci_vtscsi_ctrl_an *)buf;
+ return (pci_vtscsi_an_handle(sc, an));
+ }
+
+ return (0);
+}
+
+static int
+pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc,
+ struct pci_vtscsi_ctrl_tmf *tmf)
+{
+ union ctl_io *io;
+ int err;
+
+ io = ctl_scsi_alloc_io(sc->vss_iid);
+ ctl_scsi_zero_io(io);
+
+ io->io_hdr.io_type = CTL_IO_TASK;
+ io->io_hdr.nexus.targ_port = tmf->lun[1];
+ io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun);
+ io->taskio.tag_type = CTL_TAG_SIMPLE;
+ io->taskio.tag_num = (uint32_t)tmf->id;
+
+ switch (tmf->subtype) {
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK:
+ io->taskio.task_action = CTL_TASK_ABORT_TASK;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
+ io->taskio.task_action = CTL_TASK_ABORT_TASK_SET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
+ io->taskio.task_action = CTL_TASK_CLEAR_ACA;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
+ io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+ io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+ io->taskio.task_action = CTL_TASK_LUN_RESET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK:
+ io->taskio.task_action = CTL_TASK_QUERY_TASK;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
+ io->taskio.task_action = CTL_TASK_QUERY_TASK_SET;
+ break;
+ }
+
+ if (pci_vtscsi_debug) {
+ struct sbuf *sb = sbuf_new_auto();
+ ctl_io_sbuf(io, sb);
+ sbuf_finish(sb);
+ DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
+ sbuf_delete(sb);
+ }
+
+ err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
+ if (err != 0)
+ WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno)));
+
+ tmf->response = io->taskio.task_status;
+ ctl_scsi_free_io(io);
+ return (1);
+}
+
+static int
+pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc,
+ struct pci_vtscsi_ctrl_an *an)
+{
+
+ return (0);
+}
+
+static int
+pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
+ int niov_in, struct iovec *iov_out, int niov_out)
+{
+ struct pci_vtscsi_softc *sc = q->vsq_sc;
+ struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL;
+ struct pci_vtscsi_req_cmd_wr *cmd_wr;
+ struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG];
+ union ctl_io *io;
+ size_t data_niov_in, data_niov_out;
+ void *ext_data_ptr = NULL;
+ uint32_t ext_data_len = 0, ext_sg_entries = 0;
+ int err;
+
+ seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in,
+ VTSCSI_IN_HEADER_LEN(sc));
+ seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out,
+ VTSCSI_OUT_HEADER_LEN(sc));
+
+ truncate_iov(iov_in, niov_in, VTSCSI_IN_HEADER_LEN(sc));
+ truncate_iov(iov_out, niov_out, VTSCSI_OUT_HEADER_LEN(sc));
+ iov_to_buf(iov_in, niov_in, (void **)&cmd_rd);
+
+ cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc));
+ io = ctl_scsi_alloc_io(sc->vss_iid);
+ ctl_scsi_zero_io(io);
+
+ io->io_hdr.nexus.targ_port = cmd_rd->lun[1];
+ io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun);
+
+ io->io_hdr.io_type = CTL_IO_SCSI;
+
+ if (data_niov_in > 0) {
+ ext_data_ptr = (void *)data_iov_in;
+ ext_sg_entries = data_niov_in;
+ ext_data_len = count_iov(data_iov_in, data_niov_in);
+ io->io_hdr.flags |= CTL_FLAG_DATA_OUT;
+ } else if (data_niov_out > 0) {
+ ext_data_ptr = (void *)data_iov_out;
+ ext_sg_entries = data_niov_out;
+ ext_data_len = count_iov(data_iov_out, data_niov_out);
+ io->io_hdr.flags |= CTL_FLAG_DATA_IN;
+ }
+
+ io->scsiio.sense_len = sc->vss_config.sense_size;
+ io->scsiio.tag_num = (uint32_t)cmd_rd->id;
+ io->scsiio.tag_type = CTL_TAG_SIMPLE;
+ io->scsiio.ext_sg_entries = ext_sg_entries;
+ io->scsiio.ext_data_ptr = ext_data_ptr;
+ io->scsiio.ext_data_len = ext_data_len;
+ io->scsiio.ext_data_filled = 0;
+ io->scsiio.cdb_len = sc->vss_config.cdb_size;
+ memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size);
+
+ if (pci_vtscsi_debug) {
+ struct sbuf *sb = sbuf_new_auto();
+ ctl_io_sbuf(io, sb);
+ sbuf_finish(sb);
+ DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
+ sbuf_delete(sb);
+ }
+
+ err = ioctl(q->vsq_ctl_fd, CTL_IO, io);
+ if (err != 0) {
+ WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno)));
+ cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
+ } else {
+ cmd_wr->sense_len = MIN(io->scsiio.sense_len,
+ sc->vss_config.sense_size);
+ cmd_wr->residual = io->scsiio.residual;
+ cmd_wr->status = io->scsiio.scsi_status;
+ cmd_wr->response = VIRTIO_SCSI_S_OK;
+ memcpy(&cmd_wr->sense, &io->scsiio.sense_data,
+ cmd_wr->sense_len);
+ }
+
+ buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0);
+ free(cmd_rd);
+ free(cmd_wr);
+ ctl_scsi_free_io(io);
+ return (VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled);
+}
+
+static void
+pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtscsi_softc *sc;
+ struct iovec iov[VTSCSI_MAXSEG];
+ uint16_t idx, n;
+ void *buf = NULL;
+ size_t bufsize;
+ int iolen;
+
+ sc = vsc;
+
+ while (vq_has_descs(vq)) {
+ n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL);
+ bufsize = iov_to_buf(iov, n, &buf);
+ iolen = pci_vtscsi_control_handle(sc, buf, bufsize);
+ buf_to_iov(buf + bufsize - iolen, iolen, iov, n, iolen);
+
+ /*
+ * Release this chain and handle more
+ */
+ vq_relchain(vq, idx, iolen);
+ }
+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
+}
+
+static void
+pci_vtscsi_eventq_notify(void *vsc, struct vqueue_info *vq)
+{
+
+ vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+static void
+pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtscsi_softc *sc;
+ struct pci_vtscsi_queue *q;
+ struct pci_vtscsi_request *req;
+ struct iovec iov[VTSCSI_MAXSEG];
+ uint16_t flags[VTSCSI_MAXSEG];
+ uint16_t idx, n, i;
+ int readable;
+
+ sc = vsc;
+ q = &sc->vss_queues[vq->vq_num - 2];
+
+ while (vq_has_descs(vq)) {
+ readable = 0;
+ n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, flags);
+
+ /* Count readable descriptors */
+ for (i = 0; i < n; i++) {
+ if (flags[i] & VRING_DESC_F_WRITE)
+ break;
+
+ readable++;
+ }
+
+ req = calloc(1, sizeof(struct pci_vtscsi_request));
+ req->vsr_idx = idx;
+ req->vsr_queue = q;
+ req->vsr_niov_in = readable;
+ req->vsr_niov_out = n - readable;
+ memcpy(req->vsr_iov_in, iov,
+ req->vsr_niov_in * sizeof(struct iovec));
+ memcpy(req->vsr_iov_out, iov + readable,
+ req->vsr_niov_out * sizeof(struct iovec));
+
+ pthread_mutex_lock(&q->vsq_mtx);
+ STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);
+ pthread_cond_signal(&q->vsq_cv);
+ pthread_mutex_unlock(&q->vsq_mtx);
+
+ DPRINTF(("virtio-scsi: request <idx=%d> enqueued\n", idx));
+ }
+}
+
+static int
+pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc,
+ struct pci_vtscsi_queue *queue, int num)
+{
+ struct pci_vtscsi_worker *worker;
+ char threadname[16];
+ int i;
+
+ queue->vsq_sc = sc;
+ queue->vsq_ctl_fd = open("/dev/cam/ctl", O_RDWR);
+ queue->vsq_vq = &sc->vss_vq[num + 2];
+
+ if (queue->vsq_ctl_fd < 0) {
+ WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno)));
+ return (-1);
+ }
+
+ pthread_mutex_init(&queue->vsq_mtx, NULL);
+ pthread_mutex_init(&queue->vsq_qmtx, NULL);
+ pthread_cond_init(&queue->vsq_cv, NULL);
+ STAILQ_INIT(&queue->vsq_requests);
+ LIST_INIT(&queue->vsq_workers);
+
+ for (i = 0; i < VTSCSI_THR_PER_Q; i++) {
+ worker = calloc(1, sizeof(struct pci_vtscsi_worker));
+ worker->vsw_queue = queue;
+
+ pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc,
+ (void *)worker);
+
+ sprintf(threadname, "virtio-scsi:%d-%d", num, i);
+ pthread_set_name_np(worker->vsw_thread, threadname);
+ LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link);
+ }
+
+ return (0);
+}
+
+static int
+pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+ struct pci_vtscsi_softc *sc;
+ char *optname = NULL;
+ char *opt;
+ int i;
+
+ sc = calloc(1, sizeof(struct pci_vtscsi_softc));
+ sc->vss_ctl_fd = open("/dev/cam/ctl", O_RDWR);
+
+ if (sc->vss_ctl_fd < 0) {
+ WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno)));
+ return (1);
+ }
+
+ while ((opt = strsep(&opts, ",")) != NULL) {
+ if ((optname = strsep(&opt, "=")) != NULL) {
+ if (strcmp(optname, "iid") == 0) {
+ sc->vss_iid = strtoul(opt, NULL, 10);
+ }
+ }
+ }
+
+ vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq);
+ sc->vss_vs.vs_mtx = &sc->vss_mtx;
+
+ /* controlq */
+ sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ;
+ sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify;
+
+ /* eventq */
+ sc->vss_vq[1].vq_qsize = VTSCSI_RINGSZ;
+ sc->vss_vq[1].vq_notify = pci_vtscsi_eventq_notify;
+
+ /* request queues */
+ for (i = 2; i < VTSCSI_MAXQ; i++) {
+ sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ;
+ sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify;
+ pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2);
+ }
+
+ /* initialize config space */
+ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI);
+ pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+ pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+ pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_SCSI);
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+ if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix()))
+ return (1);
+ vi_set_io_bar(&sc->vss_vs, 0);
+
+ return (0);
+}
+
+
+struct pci_devemu pci_de_vscsi = {
+ .pe_emu = "virtio-scsi",
+ .pe_init = pci_vtscsi_init,
+ .pe_barwrite = vi_pci_write,
+ .pe_barread = vi_pci_read
+};
+PCI_EMUL_SET(pci_de_vscsi);
diff --git a/usr/src/cmd/bhyve/pci_xhci.c b/usr/src/cmd/bhyve/pci_xhci.c
index 1cb2246486..be87453bf1 100644
--- a/usr/src/cmd/bhyve/pci_xhci.c
+++ b/usr/src/cmd/bhyve/pci_xhci.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Leon Dang <ldang@nahannisys.com>
* Copyright 2018 Joyent, Inc.
* All rights reserved.
@@ -2227,12 +2229,12 @@ pci_xhci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
sc = pi->pi_arg;
- assert(baridx == 0);
+ assert(baridx == 0);
- pthread_mutex_lock(&sc->mtx);
+ pthread_mutex_lock(&sc->mtx);
if (offset < XHCI_CAPLEN) /* read only registers */
- WPRINTF(("pci_xhci: write RO-CAPs offset %ld\r\n", offset));
+ WPRINTF(("pci_xhci: write RO-CAPs offset %ld\r\n", offset));
else if (offset < sc->dboff)
pci_xhci_hostop_write(sc, offset, value);
else if (offset < sc->rtsoff)
@@ -2240,9 +2242,9 @@ pci_xhci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
else if (offset < sc->regsend)
pci_xhci_rtsregs_write(sc, offset, value);
else
- WPRINTF(("pci_xhci: write invalid offset %ld\r\n", offset));
+ WPRINTF(("pci_xhci: write invalid offset %ld\r\n", offset));
- pthread_mutex_unlock(&sc->mtx);
+ pthread_mutex_unlock(&sc->mtx);
}
static uint64_t
@@ -2450,9 +2452,9 @@ pci_xhci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
sc = pi->pi_arg;
- assert(baridx == 0);
+ assert(baridx == 0);
- pthread_mutex_lock(&sc->mtx);
+ pthread_mutex_lock(&sc->mtx);
if (offset < XHCI_CAPLEN)
value = pci_xhci_hostcap_read(sc, offset);
else if (offset < sc->dboff)
@@ -2465,10 +2467,10 @@ pci_xhci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
value = pci_xhci_xecp_read(sc, offset);
else {
value = 0;
- WPRINTF(("pci_xhci: read invalid offset %ld\r\n", offset));
+ WPRINTF(("pci_xhci: read invalid offset %ld\r\n", offset));
}
- pthread_mutex_unlock(&sc->mtx);
+ pthread_mutex_unlock(&sc->mtx);
switch (size) {
case 1:
diff --git a/usr/src/cmd/bhyve/pci_xhci.h b/usr/src/cmd/bhyve/pci_xhci.h
index d5f05af5d0..7502f9396a 100644
--- a/usr/src/cmd/bhyve/pci_xhci.h
+++ b/usr/src/cmd/bhyve/pci_xhci.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Leon Dang <ldang@nahannisys.com>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/ps2kbd.c b/usr/src/cmd/bhyve/ps2kbd.c
index ec3bb9814c..ae82957ffa 100644
--- a/usr/src/cmd/bhyve/ps2kbd.c
+++ b/usr/src/cmd/bhyve/ps2kbd.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2015 Nahanni Systems Inc.
* All rights reserved.
diff --git a/usr/src/cmd/bhyve/ps2kbd.h b/usr/src/cmd/bhyve/ps2kbd.h
index 34c31b1ea8..17be6d0466 100644
--- a/usr/src/cmd/bhyve/ps2kbd.h
+++ b/usr/src/cmd/bhyve/ps2kbd.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/ps2mouse.c b/usr/src/cmd/bhyve/ps2mouse.c
index cea7210e2a..b2e08262b1 100644
--- a/usr/src/cmd/bhyve/ps2mouse.c
+++ b/usr/src/cmd/bhyve/ps2mouse.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2015 Nahanni Systems Inc.
* All rights reserved.
diff --git a/usr/src/cmd/bhyve/ps2mouse.h b/usr/src/cmd/bhyve/ps2mouse.h
index 10d5698a30..59430b01e2 100644
--- a/usr/src/cmd/bhyve/ps2mouse.h
+++ b/usr/src/cmd/bhyve/ps2mouse.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c
index e8c74766fe..f761646fc7 100644
--- a/usr/src/cmd/bhyve/rfb.c
+++ b/usr/src/cmd/bhyve/rfb.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2015 Leon Dang
* Copyright 2018 Joyent, Inc.
@@ -77,11 +79,11 @@ static int rfb_debug = 0;
#define AUTH_LENGTH 16
#define PASSWD_LENGTH 8
-#define SECURITY_TYPE_NONE 1
-#define SECURITY_TYPE_VNC_AUTH 2
+#define SECURITY_TYPE_NONE 1
+#define SECURITY_TYPE_VNC_AUTH 2
-#define AUTH_FAILED_UNAUTH 1
-#define AUTH_FAILED_ERROR 2
+#define AUTH_FAILED_UNAUTH 1
+#define AUTH_FAILED_ERROR 2
struct rfb_softc {
int sfd;
@@ -143,12 +145,12 @@ struct rfb_pixfmt_msg {
#define RFB_ENCODING_ZLIB 6
#define RFB_ENCODING_RESIZE -223
-#define RFB_MAX_WIDTH 2000
-#define RFB_MAX_HEIGHT 1200
+#define RFB_MAX_WIDTH 2000
+#define RFB_MAX_HEIGHT 1200
#define RFB_ZLIB_BUFSZ RFB_MAX_WIDTH*RFB_MAX_HEIGHT*4
/* percentage changes to screen before sending the entire screen */
-#define RFB_SEND_ALL_THRESH 25
+#define RFB_SEND_ALL_THRESH 25
struct rfb_enc_msg {
uint8_t type;
@@ -309,7 +311,7 @@ rfb_send_rect(struct rfb_softc *rc, int cfd, struct bhyvegc_image *gc,
int x, int y, int w, int h)
{
struct rfb_srvr_updt_msg supdt_msg;
- struct rfb_srvr_rect_hdr srect_hdr;
+ struct rfb_srvr_rect_hdr srect_hdr;
unsigned long zlen;
ssize_t nwrite, total;
int err;
@@ -469,9 +471,9 @@ doraw:
return (nwrite);
}
-#define PIX_PER_CELL 32
+#define PIX_PER_CELL 32
#define PIXCELL_SHIFT 5
-#define PIXCELL_MASK 0x1F
+#define PIXCELL_MASK 0x1F
static int
rfb_send_screen(struct rfb_softc *rc, int cfd, int all)
@@ -717,7 +719,7 @@ rfb_wr_thr(void *arg)
tv.tv_usec = 10000;
err = select(cfd+1, &rfds, NULL, NULL, &tv);
- if (err < 0)
+ if (err < 0)
return (NULL);
/* Determine if its time to push screen; ~24hz */
diff --git a/usr/src/cmd/bhyve/rfb.h b/usr/src/cmd/bhyve/rfb.h
index 94d937e5b8..990e2075ac 100644
--- a/usr/src/cmd/bhyve/rfb.h
+++ b/usr/src/cmd/bhyve/rfb.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright 2018 Joyent, Inc.
* All rights reserved.
diff --git a/usr/src/cmd/bhyve/rtc.c b/usr/src/cmd/bhyve/rtc.c
index 73b5610771..09ca3f61ae 100644
--- a/usr/src/cmd/bhyve/rtc.c
+++ b/usr/src/cmd/bhyve/rtc.c
@@ -51,7 +51,7 @@ __FBSDID("$FreeBSD$");
#define RTC_HMEM_SB 0x5c
#define RTC_HMEM_MSB 0x5d
-#define m_64KB (64*1024)
+#define m_64KB (64*1024)
#define m_16MB (16*1024*1024)
#define m_4GB (4ULL*1024*1024*1024)
diff --git a/usr/src/cmd/bhyve/sockstream.c b/usr/src/cmd/bhyve/sockstream.c
index 1789206ff3..b592bce9aa 100644
--- a/usr/src/cmd/bhyve/sockstream.c
+++ b/usr/src/cmd/bhyve/sockstream.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Nahanni Systems, Inc.
* All rights reserved.
*
@@ -82,5 +84,3 @@ stream_write(int fd, const void *buf, ssize_t nbytes)
}
return (len);
}
-
-
diff --git a/usr/src/cmd/bhyve/sockstream.h b/usr/src/cmd/bhyve/sockstream.h
index bb0b3b06eb..ecea849471 100644
--- a/usr/src/cmd/bhyve/sockstream.h
+++ b/usr/src/cmd/bhyve/sockstream.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Nahanni Systems, Inc.
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/task_switch.c b/usr/src/cmd/bhyve/task_switch.c
index 6138bcdef8..b5950a19d8 100644
--- a/usr/src/cmd/bhyve/task_switch.c
+++ b/usr/src/cmd/bhyve/task_switch.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Neel Natu <neel@freebsd.org>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/uart_emul.c b/usr/src/cmd/bhyve/uart_emul.c
index 40eefa069a..656a48f93c 100644
--- a/usr/src/cmd/bhyve/uart_emul.c
+++ b/usr/src/cmd/bhyve/uart_emul.c
@@ -81,7 +81,7 @@ __FBSDID("$FreeBSD$");
#define COM1_BASE 0x3F8
#define COM1_IRQ 4
#define COM2_BASE 0x2F8
-#define COM2_IRQ 3
+#define COM2_IRQ 3
#define DEFAULT_RCLK 1843200
#define DEFAULT_BAUD 9600
@@ -94,7 +94,7 @@ __FBSDID("$FreeBSD$");
#define MSR_DELTA_MASK 0x0f
#ifndef REG_SCR
-#define REG_SCR com_scr
+#define REG_SCR com_scr
#endif
#define FIFOSZ 16
diff --git a/usr/src/cmd/bhyve/usb_emul.c b/usr/src/cmd/bhyve/usb_emul.c
index 3dc12a5c3c..6ecdd9530e 100644
--- a/usr/src/cmd/bhyve/usb_emul.c
+++ b/usr/src/cmd/bhyve/usb_emul.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Nahanni Systems Inc.
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/usb_emul.h b/usr/src/cmd/bhyve/usb_emul.h
index 083557f64f..e55a421b6f 100644
--- a/usr/src/cmd/bhyve/usb_emul.h
+++ b/usr/src/cmd/bhyve/usb_emul.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Leon Dang <ldang@nahannisys.com>
* Copyright 2018 Joyent, Inc.
* All rights reserved.
diff --git a/usr/src/cmd/bhyve/usb_mouse.c b/usr/src/cmd/bhyve/usb_mouse.c
index e9fc77ed8a..e613012071 100644
--- a/usr/src/cmd/bhyve/usb_mouse.c
+++ b/usr/src/cmd/bhyve/usb_mouse.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Leon Dang <ldang@nahannisys.com>
* All rights reserved.
*
@@ -220,16 +222,16 @@ struct umouse_bos_desc umouse_bosd = {
HSETW(.wTotalLength, sizeof(umouse_bosd)),
.bNumDeviceCaps = 1,
},
- .usbssd = {
- .bLength = sizeof(umouse_bosd.usbssd),
- .bDescriptorType = UDESC_DEVICE_CAPABILITY,
- .bDevCapabilityType = 3,
- .bmAttributes = 0,
- HSETW(.wSpeedsSupported, 0x08),
- .bFunctionalitySupport = 3,
- .bU1DevExitLat = 0xa, /* dummy - not used */
- .wU2DevExitLat = { 0x20, 0x00 },
- }
+ .usbssd = {
+ .bLength = sizeof(umouse_bosd.usbssd),
+ .bDescriptorType = UDESC_DEVICE_CAPABILITY,
+ .bDevCapabilityType = 3,
+ .bmAttributes = 0,
+ HSETW(.wSpeedsSupported, 0x08),
+ .bFunctionalitySupport = 3,
+ .bU1DevExitLat = 0xa, /* dummy - not used */
+ .wU2DevExitLat = { 0x20, 0x00 },
+ }
};
diff --git a/usr/src/cmd/bhyve/vga.c b/usr/src/cmd/bhyve/vga.c
index a5f68ec543..314ddeb1e8 100644
--- a/usr/src/cmd/bhyve/vga.c
+++ b/usr/src/cmd/bhyve/vga.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/cmd/bhyve/vga.h b/usr/src/cmd/bhyve/vga.h
index 4364f1b17a..36c6dc15fa 100644
--- a/usr/src/cmd/bhyve/vga.h
+++ b/usr/src/cmd/bhyve/vga.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
@@ -38,8 +40,8 @@
#define GEN_MISC_OUTPUT_PORT 0x3cc
#define GEN_INPUT_STS1_MONO_PORT 0x3ba
#define GEN_INPUT_STS1_COLOR_PORT 0x3da
-#define GEN_IS1_VR 0x08 /* Vertical retrace */
-#define GEN_IS1_DE 0x01 /* Display enable not */
+#define GEN_IS1_VR 0x08 /* Vertical retrace */
+#define GEN_IS1_DE 0x01 /* Display enable not */
/* Attribute controller registers. */
#define ATC_IDX_PORT 0x3c0
@@ -49,14 +51,14 @@
#define ATC_PALETTE0 0
#define ATC_PALETTE15 15
#define ATC_MODE_CONTROL 16
-#define ATC_MC_IPS 0x80 /* Internal palette size */
-#define ATC_MC_GA 0x01 /* Graphics/alphanumeric */
+#define ATC_MC_IPS 0x80 /* Internal palette size */
+#define ATC_MC_GA 0x01 /* Graphics/alphanumeric */
#define ATC_OVERSCAN_COLOR 17
#define ATC_COLOR_PLANE_ENABLE 18
#define ATC_HORIZ_PIXEL_PANNING 19
#define ATC_COLOR_SELECT 20
-#define ATC_CS_C67 0x0c /* Color select bits 6+7 */
-#define ATC_CS_C45 0x03 /* Color select bits 4+5 */
+#define ATC_CS_C67 0x0c /* Color select bits 6+7 */
+#define ATC_CS_C45 0x03 /* Color select bits 4+5 */
/* Sequencer registers. */
#define SEQ_IDX_PORT 0x3c4
@@ -66,22 +68,22 @@
#define SEQ_RESET_ASYNC 0x1
#define SEQ_RESET_SYNC 0x2
#define SEQ_CLOCKING_MODE 1
-#define SEQ_CM_SO 0x20 /* Screen off */
-#define SEQ_CM_89 0x01 /* 8/9 dot clock */
+#define SEQ_CM_SO 0x20 /* Screen off */
+#define SEQ_CM_89 0x01 /* 8/9 dot clock */
#define SEQ_MAP_MASK 2
#define SEQ_CHAR_MAP_SELECT 3
-#define SEQ_CMS_SAH 0x20 /* Char map A bit 2 */
-#define SEQ_CMS_SAH_SHIFT 5
-#define SEQ_CMS_SA 0x0c /* Char map A bits 0+1 */
-#define SEQ_CMS_SA_SHIFT 2
-#define SEQ_CMS_SBH 0x10 /* Char map B bit 2 */
-#define SEQ_CMS_SBH_SHIFT 4
-#define SEQ_CMS_SB 0x03 /* Char map B bits 0+1 */
-#define SEQ_CMS_SB_SHIFT 0
+#define SEQ_CMS_SAH 0x20 /* Char map A bit 2 */
+#define SEQ_CMS_SAH_SHIFT 5
+#define SEQ_CMS_SA 0x0c /* Char map A bits 0+1 */
+#define SEQ_CMS_SA_SHIFT 2
+#define SEQ_CMS_SBH 0x10 /* Char map B bit 2 */
+#define SEQ_CMS_SBH_SHIFT 4
+#define SEQ_CMS_SB 0x03 /* Char map B bits 0+1 */
+#define SEQ_CMS_SB_SHIFT 0
#define SEQ_MEMORY_MODE 4
-#define SEQ_MM_C4 0x08 /* Chain 4 */
-#define SEQ_MM_OE 0x04 /* Odd/even */
-#define SEQ_MM_EM 0x02 /* Extended memory */
+#define SEQ_MM_C4 0x08 /* Chain 4 */
+#define SEQ_MM_OE 0x04 /* Odd/even */
+#define SEQ_MM_EM 0x02 /* Extended memory */
/* Graphics controller registers. */
#define GC_IDX_PORT 0x3ce
@@ -93,13 +95,13 @@
#define GC_DATA_ROTATE 3
#define GC_READ_MAP_SELECT 4
#define GC_MODE 5
-#define GC_MODE_OE 0x10 /* Odd/even */
-#define GC_MODE_C4 0x04 /* Chain 4 */
+#define GC_MODE_OE 0x10 /* Odd/even */
+#define GC_MODE_C4 0x04 /* Chain 4 */
#define GC_MISCELLANEOUS 6
-#define GC_MISC_GM 0x01 /* Graphics/alphanumeric */
-#define GC_MISC_MM 0x0c /* memory map */
-#define GC_MISC_MM_SHIFT 2
+#define GC_MISC_GM 0x01 /* Graphics/alphanumeric */
+#define GC_MISC_MM 0x0c /* memory map */
+#define GC_MISC_MM_SHIFT 2
#define GC_COLOR_DONT_CARE 7
#define GC_BIT_MASK 8
@@ -117,36 +119,36 @@
#define CRTC_END_HORIZ_RETRACE 5
#define CRTC_VERT_TOTAL 6
#define CRTC_OVERFLOW 7
-#define CRTC_OF_VRS9 0x80 /* VRS bit 9 */
-#define CRTC_OF_VRS9_SHIFT 7
-#define CRTC_OF_VDE9 0x40 /* VDE bit 9 */
-#define CRTC_OF_VDE9_SHIFT 6
-#define CRTC_OF_VRS8 0x04 /* VRS bit 8 */
-#define CRTC_OF_VRS8_SHIFT 2
-#define CRTC_OF_VDE8 0x02 /* VDE bit 8 */
-#define CRTC_OF_VDE8_SHIFT 1
+#define CRTC_OF_VRS9 0x80 /* VRS bit 9 */
+#define CRTC_OF_VRS9_SHIFT 7
+#define CRTC_OF_VDE9 0x40 /* VDE bit 9 */
+#define CRTC_OF_VDE9_SHIFT 6
+#define CRTC_OF_VRS8 0x04 /* VRS bit 8 */
+#define CRTC_OF_VRS8_SHIFT 2
+#define CRTC_OF_VDE8 0x02 /* VDE bit 8 */
+#define CRTC_OF_VDE8_SHIFT 1
#define CRTC_PRESET_ROW_SCAN 8
#define CRTC_MAX_SCAN_LINE 9
-#define CRTC_MSL_MSL 0x1f
+#define CRTC_MSL_MSL 0x1f
#define CRTC_CURSOR_START 10
-#define CRTC_CS_CO 0x20 /* Cursor off */
-#define CRTC_CS_CS 0x1f /* Cursor start */
+#define CRTC_CS_CO 0x20 /* Cursor off */
+#define CRTC_CS_CS 0x1f /* Cursor start */
#define CRTC_CURSOR_END 11
-#define CRTC_CE_CE 0x1f /* Cursor end */
+#define CRTC_CE_CE 0x1f /* Cursor end */
#define CRTC_START_ADDR_HIGH 12
#define CRTC_START_ADDR_LOW 13
#define CRTC_CURSOR_LOC_HIGH 14
#define CRTC_CURSOR_LOC_LOW 15
#define CRTC_VERT_RETRACE_START 16
#define CRTC_VERT_RETRACE_END 17
-#define CRTC_VRE_MASK 0xf
+#define CRTC_VRE_MASK 0xf
#define CRTC_VERT_DISP_END 18
#define CRTC_OFFSET 19
#define CRTC_UNDERLINE_LOC 20
#define CRTC_START_VERT_BLANK 21
#define CRTC_END_VERT_BLANK 22
#define CRTC_MODE_CONTROL 23
-#define CRTC_MC_TE 0x80 /* Timing enable */
+#define CRTC_MC_TE 0x80 /* Timing enable */
#define CRTC_LINE_COMPARE 24
/* DAC registers */
diff --git a/usr/src/cmd/bhyve/virtio.c b/usr/src/cmd/bhyve/virtio.c
index fc0525c9ee..4c85000796 100644
--- a/usr/src/cmd/bhyve/virtio.c
+++ b/usr/src/cmd/bhyve/virtio.c
@@ -51,7 +51,7 @@ __FBSDID("$FreeBSD$");
* front of virtio-based device softc" constraint, let's use
* this to convert.
*/
-#define DEV_SOFTC(vs) ((void *)(vs))
+#define DEV_SOFTC(vs) ((void *)(vs))
/*
* Link a virtio_softc to its constants, the device softc, and
diff --git a/usr/src/cmd/bhyve/virtio.h b/usr/src/cmd/bhyve/virtio.h
index f59d823448..a2c3362ec2 100644
--- a/usr/src/cmd/bhyve/virtio.h
+++ b/usr/src/cmd/bhyve/virtio.h
@@ -188,7 +188,7 @@ struct vring_used {
/*
* PFN register shift amount
*/
-#define VRING_PFN 12
+#define VRING_PFN 12
/*
* Virtio device types
@@ -215,6 +215,7 @@ struct vring_used {
#define VIRTIO_DEV_BLOCK 0x1001
#define VIRTIO_DEV_CONSOLE 0x1003
#define VIRTIO_DEV_RANDOM 0x1005
+#define VIRTIO_DEV_SCSI 0x1008
/*
* PCI config space constants.
@@ -225,19 +226,19 @@ struct vring_used {
* If MSI-X is not enabled, those two registers disappear and
* the remaining configuration registers start at offset 20.
*/
-#define VTCFG_R_HOSTCAP 0
-#define VTCFG_R_GUESTCAP 4
-#define VTCFG_R_PFN 8
-#define VTCFG_R_QNUM 12
-#define VTCFG_R_QSEL 14
-#define VTCFG_R_QNOTIFY 16
-#define VTCFG_R_STATUS 18
-#define VTCFG_R_ISR 19
-#define VTCFG_R_CFGVEC 20
-#define VTCFG_R_QVEC 22
-#define VTCFG_R_CFG0 20 /* No MSI-X */
-#define VTCFG_R_CFG1 24 /* With MSI-X */
-#define VTCFG_R_MSIX 20
+#define VTCFG_R_HOSTCAP 0
+#define VTCFG_R_GUESTCAP 4
+#define VTCFG_R_PFN 8
+#define VTCFG_R_QNUM 12
+#define VTCFG_R_QSEL 14
+#define VTCFG_R_QNOTIFY 16
+#define VTCFG_R_STATUS 18
+#define VTCFG_R_ISR 19
+#define VTCFG_R_CFGVEC 20
+#define VTCFG_R_QVEC 22
+#define VTCFG_R_CFG0 20 /* No MSI-X */
+#define VTCFG_R_CFG1 24 /* With MSI-X */
+#define VTCFG_R_MSIX 20
/*
* Bits in VTCFG_R_STATUS. Guests need not actually set any of these,
@@ -256,7 +257,7 @@ struct vring_used {
#define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */
#define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */
-#define VIRTIO_MSI_NO_VECTOR 0xFFFF
+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
/*
* Feature flags.
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 5f8932efa8..d7179d5874 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -868,7 +868,7 @@ get_all_registers(struct vmctx *ctx, int vcpu)
if (error == 0)
printf("rflags[%d]\t0x%016lx\n", vcpu, rflags);
}
-
+
return (error);
}
@@ -1135,7 +1135,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu)
vcpu, u64);
}
}
-
+
if (!error && (get_tpr_threshold || get_all)) {
uint64_t threshold;
error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD,
@@ -1153,7 +1153,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu)
vcpu, insterr);
}
}
-
+
if (!error && (get_exit_ctls || get_all)) {
error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl);
if (error == 0)
@@ -1201,7 +1201,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu)
if (error == 0)
printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp);
}
-
+
if (!error && (get_vmcs_link || get_all)) {
error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr);
if (error == 0)
diff --git a/usr/src/compat/freebsd/amd64/machine/atomic.h b/usr/src/compat/freebsd/amd64/machine/atomic.h
index 0b5998880e..6d8235d37c 100644
--- a/usr/src/compat/freebsd/amd64/machine/atomic.h
+++ b/usr/src/compat/freebsd/amd64/machine/atomic.h
@@ -18,6 +18,17 @@
#define _COMPAT_FREEBSD_AMD64_MACHINE_ATOMIC_H_
static __inline u_int
+atomic_load_acq_short(volatile u_short *p)
+{
+ u_short res;
+
+ res = *p;
+ __asm volatile("" : : : "memory");
+
+ return (res);
+}
+
+static __inline u_int
atomic_load_acq_int(volatile u_int *p)
{
u_int res;
@@ -96,6 +107,23 @@ atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
return (res);
}
+static __inline int
+atomic_testandset_int(volatile u_int *p, u_int v)
+{
+ u_char res;
+
+ __asm __volatile(
+ " lock ; "
+ " btsl %2,%1 ; "
+ " setc %0 ; "
+ "# atomic_testandset_int"
+ : "=q" (res), /* 0 */
+ "+m" (*p) /* 1 */
+ : "Ir" (v & 0x1f) /* 2 */
+ : "cc");
+ return (res);
+}
+
/*
* Atomically add the value of v to the integer pointed to by p and return
* the previous value of *p.
@@ -188,6 +216,13 @@ atomic_swap_long(volatile u_long *p, u_long v)
return (v);
}
+
+#define atomic_store_short(p, v) \
+ (*(volatile u_short *)(p) = (u_short)(v))
+#define atomic_store_int(p, v) \
+ (*(volatile u_int *)(p) = (u_int)(v))
+
+
#define atomic_readandclear_int(p) atomic_swap_int(p, 0)
#define atomic_readandclear_long(p) atomic_swap_long(p, 0)
diff --git a/usr/src/compat/freebsd/amd64/machine/reg.h b/usr/src/compat/freebsd/amd64/machine/reg.h
new file mode 100644
index 0000000000..4a73463603
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/reg.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_REG_H_
+#define _COMPAT_FREEBSD_AMD64_MACHINE_REG_H_
+
+#define DBREG_DR6_RESERVED1 0xffff0ff0
+#define DBREG_DR7_RESERVED1 0x0400
+
+
+#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_REG_H_ */
diff --git a/usr/src/compat/freebsd/sys/endian.h b/usr/src/compat/freebsd/sys/endian.h
index a31bff55d6..24ea02d251 100644
--- a/usr/src/compat/freebsd/sys/endian.h
+++ b/usr/src/compat/freebsd/sys/endian.h
@@ -11,6 +11,7 @@
/*
* Copyright 2014 Pluribus Networks Inc.
+ * Copyright 2018 Joyent, Inc.
*/
#ifndef _COMPAT_FREEBSD_SYS_ENDIAN_H_
@@ -122,4 +123,14 @@ le64enc(void *pp, uint64_t u)
le32enc(p + 4, (uint32_t)(u >> 32));
}
+#ifdef _LITTLE_ENDIAN
+#define htole16(x) ((uint16_t)(x))
+#define htole32(x) ((uint32_t)(x))
+#define htole64(x) ((uint64_t)(x))
+
+#define le16toh(x) ((uint16_t)(x))
+#define le32toh(x) ((uint32_t)(x))
+#define le64toh(x) ((uint64_t)(x))
+#endif
+
#endif /* _COMPAT_FREEBSD_SYS_ENDIAN_H_ */
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index c34bb60de6..7d20a3b323 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -77,8 +77,11 @@ __FBSDID("$FreeBSD$");
#ifndef __FreeBSD__
/* shim to no-op for now */
-#define MAP_NOCORE 0
-#define MAP_ALIGNED_SUPER 0
+#define MAP_NOCORE 0
+#define MAP_ALIGNED_SUPER 0
+
+/* Rely on PROT_NONE for guard purposes */
+#define MAP_GUARD (MAP_PRIVATE | MAP_ANON | MAP_NORESERVE)
#endif
/*
@@ -135,19 +138,19 @@ vm_do_ctl(int cmd, const char *name)
static int
vm_device_open(const char *name)
{
- int fd, len;
- char *vmfile;
+ int fd, len;
+ char *vmfile;
len = strlen("/dev/vmm/") + strlen(name) + 1;
vmfile = malloc(len);
assert(vmfile != NULL);
snprintf(vmfile, len, "/dev/vmm/%s", name);
- /* Open the device file */
- fd = open(vmfile, O_RDWR, 0);
+ /* Open the device file */
+ fd = open(vmfile, O_RDWR, 0);
free(vmfile);
- return (fd);
+ return (fd);
}
int
@@ -425,7 +428,7 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
size_t objsize, len;
vm_paddr_t gpa;
char *baseaddr, *ptr;
- int error, flags;
+ int error;
assert(vms == VM_MMAP_ALL);
@@ -454,16 +457,7 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
* and the adjoining guard regions.
*/
len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
- flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
-#ifndef __FreeBSD__
- /*
- * There is no need to reserve swap for the guest physical memory and
- * guard regions. Actual memory is allocated and mapped later through
- * vm_alloc_memseg() and setup_memory_segment().
- */
- flags |= MAP_NORESERVE;
-#endif
- ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0);
+ ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
if (ptr == MAP_FAILED)
return (-1);
@@ -607,8 +601,8 @@ vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
* adjoining guard regions.
*/
len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
- flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
- base = mmap(NULL, len2, PROT_NONE, flags, -1, 0);
+ base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1,
+ 0);
if (base == MAP_FAILED)
goto done;
@@ -997,7 +991,7 @@ vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
vmcap.cpuid = vcpu;
vmcap.captype = cap;
vmcap.capval = val;
-
+
return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
}
@@ -1801,4 +1795,3 @@ vm_get_ioctls(size_t *len)
return (NULL);
}
#endif /* __FreeBSD__ */
-
diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync
index 667f34b9de..e9a2479b13 100644
--- a/usr/src/uts/i86pc/io/vmm/README.sync
+++ b/usr/src/uts/i86pc/io/vmm/README.sync
@@ -1,22 +1,18 @@
The bhyve kernel module and its associated userland consumers have been updated
to the latest upstream FreeBSD sources as of:
-commit 0fac2150fc0f1befa5803ca010ed63a6335847ad
-Author: grehan <grehan@FreeBSD.org>
-Date: Fri May 4 01:36:49 2018 +0000
+commit f81459bd8363602ed5e436f10288320419e80ccf
+Author: andrew <andrew@FreeBSD.org>
+Date: Thu Sep 27 11:16:19 2018 +0000
- Allow arbitrary numbers of columns for VNC server screen resolution.
+ Handle a guest executing a vm instruction by trapping and raising an
+ undefined instruction exception. Previously we would exit the guest,
+ however an unprivileged user could execute these.
- The prior code only allowed multiples of 32 for the
- numbers of columns. Remove this restriction to allow
- a forthcoming UEFI firmware update to allow arbitrary
- x,y resolutions.
+ Found with: syzkaller
+ Reviewed by: araujo, tychon (previous version)
+ Approved by: re (kib)
+ MFC after: 1 week
+ Differential Revision: https://reviews.freebsd.org/D17192
- (the code for handling rows already supported non mult-32 values)
-
- Reviewed by: Leon Dang (original author)
- MFC after: 3 weeks
- Differential Revision: https://reviews.freebsd.org/D15274
-
-
-Which corresponds to SVN revision: 333235
+Which corresponds to SVN revision: 338957
diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
new file mode 100644
index 0000000000..f6b6e60363
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
@@ -0,0 +1,1461 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/resource.h>
+#include <machine/vmm.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/pci_cfgreg.h>
+
+#include "pcib_if.h"
+
+#include "io/iommu.h"
+#include "amdvi_priv.h"
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW, NULL, NULL);
+
+#define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s)))
+#define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s)))
+
+/* Print RID or device ID in PCI string format. */
+#define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d)
+
+static void amdvi_dump_cmds(struct amdvi_softc *softc);
+static void amdvi_print_dev_cap(struct amdvi_softc *softc);
+
+MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi");
+
+extern device_t *ivhd_devs;
+
+extern int ivhd_count;
+SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, count, CTLFLAG_RDTUN, &ivhd_count,
+ 0, NULL);
+
+static int amdvi_enable_user = 0;
+SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, enable, CTLFLAG_RDTUN,
+ &amdvi_enable_user, 0, NULL);
+TUNABLE_INT("hw.vmm.amdvi_enable", &amdvi_enable_user);
+
+#ifdef AMDVI_ATS_ENABLE
+/* XXX: ATS is not tested. */
+static int amdvi_enable_iotlb = 1;
+SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, iotlb_enabled, CTLFLAG_RDTUN,
+ &amdvi_enable_iotlb, 0, NULL);
+TUNABLE_INT("hw.vmm.enable_iotlb", &amdvi_enable_iotlb);
+#endif
+
+static int amdvi_host_ptp = 1; /* Use page tables for host. */
+SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN,
+ &amdvi_host_ptp, 0, NULL);
+TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp);
+
+/* Page table level used <= supported by h/w[v1=7]. */
+static int amdvi_ptp_level = 4;
+SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN,
+ &amdvi_ptp_level, 0, NULL);
+TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level);
+
+/* Disable fault event reporting. */
+static int amdvi_disable_io_fault = 0;
+SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, disable_io_fault, CTLFLAG_RDTUN,
+ &amdvi_disable_io_fault, 0, NULL);
+TUNABLE_INT("hw.vmm.amdvi.disable_io_fault", &amdvi_disable_io_fault);
+
+static uint32_t amdvi_dom_id = 0; /* 0 is reserved for host. */
+SYSCTL_UINT(_hw_vmm_amdvi, OID_AUTO, domain_id, CTLFLAG_RD,
+ &amdvi_dom_id, 0, NULL);
+/*
+ * Device table entry.
+ * Bus(256) x Dev(32) x Fun(8) x DTE(256 bits or 32 bytes).
+ * = 256 * 2 * PAGE_SIZE.
+ */
+static struct amdvi_dte amdvi_dte[PCI_NUM_DEV_MAX] __aligned(PAGE_SIZE);
+CTASSERT(PCI_NUM_DEV_MAX == 0x10000);
+CTASSERT(sizeof(amdvi_dte) == 0x200000);
+
+static SLIST_HEAD (, amdvi_domain) dom_head;
+
+static inline uint32_t
+amdvi_pci_read(struct amdvi_softc *softc, int off)
+{
+
+ return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid),
+ PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
+ off, 4));
+}
+
+#ifdef AMDVI_ATS_ENABLE
+/* XXX: Should be in pci.c */
+/*
+ * Check if device has ATS capability and its enabled.
+ * If ATS is absent or disabled, return (-1), otherwise ATS
+ * queue length.
+ */
+static int
+amdvi_find_ats_qlen(uint16_t devid)
+{
+ device_t dev;
+ uint32_t off, cap;
+ int qlen = -1;
+
+ dev = pci_find_bsf(PCI_RID2BUS(devid), PCI_RID2SLOT(devid),
+ PCI_RID2FUNC(devid));
+
+ if (!dev) {
+ return (-1);
+ }
+#define PCIM_ATS_EN BIT(31)
+
+ if (pci_find_extcap(dev, PCIZ_ATS, &off) == 0) {
+ cap = pci_read_config(dev, off + 4, 4);
+ qlen = (cap & 0x1F);
+ qlen = qlen ? qlen : 32;
+ printf("AMD-Vi: PCI device %d.%d.%d ATS %s qlen=%d\n",
+ RID2PCI_STR(devid),
+ (cap & PCIM_ATS_EN) ? "enabled" : "Disabled",
+ qlen);
+ qlen = (cap & PCIM_ATS_EN) ? qlen : -1;
+ }
+
+ return (qlen);
+}
+
+/*
+ * Check if an endpoint device support device IOTLB or ATS.
+ */
+static inline bool
+amdvi_dev_support_iotlb(struct amdvi_softc *softc, uint16_t devid)
+{
+ struct ivhd_dev_cfg *cfg;
+ int qlen, i;
+ bool pci_ats, ivhd_ats;
+
+ qlen = amdvi_find_ats_qlen(devid);
+ if (qlen < 0)
+ return (false);
+
+ KASSERT(softc, ("softc is NULL"));
+ cfg = softc->dev_cfg;
+
+ ivhd_ats = false;
+ for (i = 0; i < softc->dev_cfg_cnt; i++) {
+ if ((cfg->start_id <= devid) && (cfg->end_id >= devid)) {
+ ivhd_ats = cfg->enable_ats;
+ break;
+ }
+ cfg++;
+ }
+
+ pci_ats = (qlen < 0) ? false : true;
+ if (pci_ats != ivhd_ats)
+ device_printf(softc->dev,
+ "BIOS bug: mismatch in ATS setting for %d.%d.%d,"
+ "ATS inv qlen = %d\n", RID2PCI_STR(devid), qlen);
+
+ /* Ignore IVRS setting and respect PCI setting. */
+ return (pci_ats);
+}
+#endif
+
+/* Enable IOTLB support for IOMMU if its supported. */
+static inline void
+amdvi_hw_enable_iotlb(struct amdvi_softc *softc)
+{
+#ifndef AMDVI_ATS_ENABLE
+ softc->iotlb = false;
+#else
+ bool supported;
+
+ supported = (softc->ivhd_flag & IVHD_FLAG_IOTLB) ? true : false;
+
+ if (softc->pci_cap & AMDVI_PCI_CAP_IOTLB) {
+ if (!supported)
+ device_printf(softc->dev, "IOTLB disabled by BIOS.\n");
+
+ if (supported && !amdvi_enable_iotlb) {
+ device_printf(softc->dev, "IOTLB disabled by user.\n");
+ supported = false;
+ }
+ } else
+ supported = false;
+
+ softc->iotlb = supported;
+
+#endif
+}
+
+static int
+amdvi_init_cmd(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl = softc->ctrl;
+
+ ctrl->cmd.len = 8; /* Use 256 command buffer entries. */
+ softc->cmd_max = 1 << ctrl->cmd.len;
+
+ softc->cmd = malloc(sizeof(struct amdvi_cmd) *
+ softc->cmd_max, M_AMDVI, M_WAITOK | M_ZERO);
+
+ if ((uintptr_t)softc->cmd & PAGE_MASK)
+ panic("AMDVi: Command buffer not aligned on page boundary.");
+
+ ctrl->cmd.base = vtophys(softc->cmd) / PAGE_SIZE;
+ /*
+ * XXX: Reset the h/w pointers in case IOMMU is restarting,
+ * h/w doesn't clear these pointers based on empirical data.
+ */
+ ctrl->cmd_tail = 0;
+ ctrl->cmd_head = 0;
+
+ return (0);
+}
+
+/*
+ * Note: Update tail pointer after we have written the command since tail
+ * pointer update cause h/w to execute new commands, see section 3.3
+ * of AMD IOMMU spec ver 2.0.
+ */
+/* Get the command tail pointer w/o updating it. */
+static struct amdvi_cmd *
+amdvi_get_cmd_tail(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+ struct amdvi_cmd *tail;
+
+ KASSERT(softc, ("softc is NULL"));
+ KASSERT(softc->cmd != NULL, ("cmd is NULL"));
+
+ ctrl = softc->ctrl;
+ KASSERT(ctrl != NULL, ("ctrl is NULL"));
+
+ tail = (struct amdvi_cmd *)((uint8_t *)softc->cmd +
+ ctrl->cmd_tail);
+
+ return (tail);
+}
+
+/*
+ * Update the command tail pointer which will start command execution.
+ */
+static void
+amdvi_update_cmd_tail(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+ int size;
+
+ size = sizeof(struct amdvi_cmd);
+ KASSERT(softc->cmd != NULL, ("cmd is NULL"));
+
+ ctrl = softc->ctrl;
+ KASSERT(ctrl != NULL, ("ctrl is NULL"));
+
+ ctrl->cmd_tail = MOD_INC(ctrl->cmd_tail, size, softc->cmd_max);
+ softc->total_cmd++;
+
+#ifdef AMDVI_DEBUG_CMD
+ device_printf(softc->dev, "cmd_tail: %s Tail:0x%x, Head:0x%x.\n",
+ ctrl->cmd_tail,
+ ctrl->cmd_head);
+#endif
+
+}
+
+/*
+ * Various commands supported by IOMMU.
+ */
+
+/* Completion wait command. */
+static void
+amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data)
+{
+ struct amdvi_cmd *cmd;
+ uint64_t pa;
+
+ cmd = amdvi_get_cmd_tail(softc);
+ KASSERT(cmd != NULL, ("Cmd is NULL"));
+
+ pa = vtophys(&softc->cmp_data);
+ cmd->opcode = AMDVI_CMP_WAIT_OPCODE;
+ cmd->word0 = (pa & 0xFFFFFFF8) |
+ (AMDVI_CMP_WAIT_STORE);
+ //(AMDVI_CMP_WAIT_FLUSH | AMDVI_CMP_WAIT_STORE);
+ cmd->word1 = (pa >> 32) & 0xFFFFF;
+ cmd->addr = data;
+
+ amdvi_update_cmd_tail(softc);
+}
+
+/* Invalidate device table entry. */
+static void
+amdvi_cmd_inv_dte(struct amdvi_softc *softc, uint16_t devid)
+{
+ struct amdvi_cmd *cmd;
+
+ cmd = amdvi_get_cmd_tail(softc);
+ KASSERT(cmd != NULL, ("Cmd is NULL"));
+ cmd->opcode = AMDVI_INVD_DTE_OPCODE;
+ cmd->word0 = devid;
+ amdvi_update_cmd_tail(softc);
+#ifdef AMDVI_DEBUG_CMD
+ device_printf(softc->dev, "Invalidated DTE:0x%x\n", devid);
+#endif
+}
+
+/* Invalidate IOMMU page, use for invalidation of domain. */
+static void
+amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id,
+ uint64_t addr, bool guest_nested,
+ bool pde, bool page)
+{
+ struct amdvi_cmd *cmd;
+
+ cmd = amdvi_get_cmd_tail(softc);
+ KASSERT(cmd != NULL, ("Cmd is NULL"));
+
+
+ cmd->opcode = AMDVI_INVD_PAGE_OPCODE;
+ cmd->word1 = domain_id;
+ /*
+ * Invalidate all addresses for this domain.
+ */
+ cmd->addr = addr;
+ cmd->addr |= pde ? AMDVI_INVD_PAGE_PDE : 0;
+ cmd->addr |= page ? AMDVI_INVD_PAGE_S : 0;
+
+ amdvi_update_cmd_tail(softc);
+}
+
+#ifdef AMDVI_ATS_ENABLE
+/* Invalidate device IOTLB. */
+static void
+amdvi_cmd_inv_iotlb(struct amdvi_softc *softc, uint16_t devid)
+{
+ struct amdvi_cmd *cmd;
+ int qlen;
+
+ if (!softc->iotlb)
+ return;
+
+ qlen = amdvi_find_ats_qlen(devid);
+ if (qlen < 0) {
+ panic("AMDVI: Invalid ATS qlen(%d) for device %d.%d.%d\n",
+ qlen, RID2PCI_STR(devid));
+ }
+ cmd = amdvi_get_cmd_tail(softc);
+ KASSERT(cmd != NULL, ("Cmd is NULL"));
+
+#ifdef AMDVI_DEBUG_CMD
+ device_printf(softc->dev, "Invalidate IOTLB devID 0x%x"
+ " Qlen:%d\n", devid, qlen);
+#endif
+ cmd->opcode = AMDVI_INVD_IOTLB_OPCODE;
+ cmd->word0 = devid;
+ cmd->word1 = qlen;
+ cmd->addr = AMDVI_INVD_IOTLB_ALL_ADDR |
+ AMDVI_INVD_IOTLB_S;
+ amdvi_update_cmd_tail(softc);
+}
+#endif
+
+#ifdef notyet /* For Interrupt Remap. */
+static void
+amdvi_cmd_inv_intr_map(struct amdvi_softc *softc,
+ uint16_t devid)
+{
+ struct amdvi_cmd *cmd;
+
+ cmd = amdvi_get_cmd_tail(softc);
+ KASSERT(cmd != NULL, ("Cmd is NULL"));
+ cmd->opcode = AMDVI_INVD_INTR_OPCODE;
+ cmd->word0 = devid;
+ amdvi_update_cmd_tail(softc);
+#ifdef AMDVI_DEBUG_CMD
+ device_printf(softc->dev, "Invalidate INTR map of devID 0x%x\n", devid);
+#endif
+}
+#endif
+
+/* Invalidate domain using INVALIDATE_IOMMU_PAGES command. */
+static void
+amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id)
+{
+ struct amdvi_cmd *cmd;
+
+ cmd = amdvi_get_cmd_tail(softc);
+ KASSERT(cmd != NULL, ("Cmd is NULL"));
+
+ /*
+ * See section 3.3.3 of IOMMU spec rev 2.0, software note
+ * for invalidating domain.
+ */
+ amdvi_cmd_inv_iommu_pages(softc, domain_id, AMDVI_INVD_PAGE_ALL_ADDR,
+ false, true, true);
+
+#ifdef AMDVI_DEBUG_CMD
+ device_printf(softc->dev, "Invalidate domain:0x%x\n", domain_id);
+
+#endif
+}
+
+static bool
+amdvi_cmp_wait(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+ const uint64_t VERIFY = 0xA5A5;
+ volatile uint64_t *read;
+ int i;
+ bool status;
+
+ ctrl = softc->ctrl;
+ read = &softc->cmp_data;
+ *read = 0;
+ amdvi_cmd_cmp(softc, VERIFY);
+ /* Wait for h/w to update completion data. */
+ for (i = 0; i < 100 && (*read != VERIFY); i++) {
+ DELAY(1000); /* 1 ms */
+ }
+ status = (VERIFY == softc->cmp_data) ? true : false;
+
+#ifdef AMDVI_DEBUG_CMD
+ if (status)
+ device_printf(softc->dev, "CMD completion DONE Tail:0x%x, "
+ "Head:0x%x, loop:%d.\n", ctrl->cmd_tail,
+ ctrl->cmd_head, loop);
+#endif
+ return (status);
+}
+
+static void
+amdvi_wait(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+ int i;
+
+ KASSERT(softc, ("softc is NULL"));
+
+ ctrl = softc->ctrl;
+ KASSERT(ctrl != NULL, ("ctrl is NULL"));
+ /* Don't wait if h/w is not enabled. */
+ if ((ctrl->control & AMDVI_CTRL_EN) == 0)
+ return;
+
+ for (i = 0; i < 10; i++) {
+ if (amdvi_cmp_wait(softc))
+ return;
+ }
+
+ device_printf(softc->dev, "Error: completion failed"
+ " tail:0x%x, head:0x%x.\n",
+ ctrl->cmd_tail, ctrl->cmd_head);
+ amdvi_dump_cmds(softc);
+}
+
+static void
+amdvi_dump_cmds(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+ struct amdvi_cmd *cmd;
+ int off, i;
+
+ ctrl = softc->ctrl;
+ device_printf(softc->dev, "Dump all the commands:\n");
+ /*
+ * If h/w is stuck in completion, it is the previous command,
+ * start dumping from previous command onward.
+ */
+ off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd),
+ softc->cmd_max);
+ for (i = 0; off != ctrl->cmd_tail &&
+ i < softc->cmd_max; i++) {
+ cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off);
+ printf(" [CMD%d, off:0x%x] opcode= 0x%x 0x%x"
+ " 0x%x 0x%lx\n", i, off, cmd->opcode,
+ cmd->word0, cmd->word1, cmd->addr);
+ off = (off + sizeof(struct amdvi_cmd)) %
+ (softc->cmd_max * sizeof(struct amdvi_cmd));
+ }
+}
+
+static int
+amdvi_init_event(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+
+ ctrl = softc->ctrl;
+ ctrl->event.len = 8;
+ softc->event_max = 1 << ctrl->event.len;
+ softc->event = malloc(sizeof(struct amdvi_event) *
+ softc->event_max, M_AMDVI, M_WAITOK | M_ZERO);
+ if ((uintptr_t)softc->event & PAGE_MASK) {
+ device_printf(softc->dev, "Event buffer not aligned on page.");
+ return (false);
+ }
+ ctrl->event.base = vtophys(softc->event) / PAGE_SIZE;
+
+ /* Reset the pointers. */
+ ctrl->evt_head = 0;
+ ctrl->evt_tail = 0;
+
+ return (0);
+}
+
+static inline void
+amdvi_decode_evt_flag(uint16_t flag)
+{
+
+ flag &= AMDVI_EVENT_FLAG_MASK;
+ printf(" 0x%b]\n", flag,
+ "\020"
+ "\001GN"
+ "\002NX"
+ "\003US"
+ "\004I"
+ "\005PR"
+ "\006RW"
+ "\007PE"
+ "\010RZ"
+ "\011TR"
+ );
+}
+
+/* See section 2.5.4 of AMD IOMMU spec ver 2.62.*/
+static inline void
+amdvi_decode_evt_flag_type(uint8_t type)
+{
+
+ switch (AMDVI_EVENT_FLAG_TYPE(type)) {
+ case 0:
+ printf("RSVD\n");
+ break;
+ case 1:
+ printf("Master Abort\n");
+ break;
+ case 2:
+ printf("Target Abort\n");
+ break;
+ case 3:
+ printf("Data Err\n");
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+amdvi_decode_inv_dte_evt(uint16_t devid, uint16_t domid, uint64_t addr,
+ uint16_t flag)
+{
+
+ printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
+ " Addr:0x%lx",
+ devid, domid, addr);
+ amdvi_decode_evt_flag(flag);
+}
+
+static void
+amdvi_decode_pf_evt(uint16_t devid, uint16_t domid, uint64_t addr,
+ uint16_t flag)
+{
+
+ printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
+ " Addr:0x%lx",
+ devid, domid, addr);
+ amdvi_decode_evt_flag(flag);
+}
+
+static void
+amdvi_decode_dte_hwerr_evt(uint16_t devid, uint16_t domid,
+ uint64_t addr, uint16_t flag)
+{
+
+ printf("\t[DEV_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
+ " Addr:0x%lx", devid, domid, addr);
+ amdvi_decode_evt_flag(flag);
+ amdvi_decode_evt_flag_type(flag);
+}
+
+static void
+amdvi_decode_page_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr,
+ uint16_t flag)
+{
+
+ printf("\t[PAGE_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
+ " Addr:0x%lx", devid, domid, addr);
+ amdvi_decode_evt_flag(flag);
+ amdvi_decode_evt_flag_type(AMDVI_EVENT_FLAG_TYPE(flag));
+}
+
+static void
+amdvi_decode_evt(struct amdvi_event *evt)
+{
+ struct amdvi_cmd *cmd;
+
+ switch (evt->opcode) {
+ case AMDVI_EVENT_INVALID_DTE:
+ amdvi_decode_inv_dte_evt(evt->devid, evt->pasid_domid,
+ evt->addr, evt->flag);
+ break;
+
+ case AMDVI_EVENT_PFAULT:
+ amdvi_decode_pf_evt(evt->devid, evt->pasid_domid,
+ evt->addr, evt->flag);
+ break;
+
+ case AMDVI_EVENT_DTE_HW_ERROR:
+ amdvi_decode_dte_hwerr_evt(evt->devid, evt->pasid_domid,
+ evt->addr, evt->flag);
+ break;
+
+ case AMDVI_EVENT_PAGE_HW_ERROR:
+ amdvi_decode_page_hwerr_evt(evt->devid, evt->pasid_domid,
+ evt->addr, evt->flag);
+ break;
+
+ case AMDVI_EVENT_ILLEGAL_CMD:
+ /* FALL THROUGH */
+ case AMDVI_EVENT_CMD_HW_ERROR:
+ printf("\t[%s EVT]\n", (evt->opcode == AMDVI_EVENT_ILLEGAL_CMD) ?
+ "ILLEGAL CMD" : "CMD HW ERR");
+ cmd = (struct amdvi_cmd *)PHYS_TO_DMAP(evt->addr);
+ printf("\tCMD opcode= 0x%x 0x%x 0x%x 0x%lx\n",
+ cmd->opcode, cmd->word0, cmd->word1, cmd->addr);
+ break;
+
+ case AMDVI_EVENT_IOTLB_TIMEOUT:
+ printf("\t[IOTLB_INV_TIMEOUT devid:0x%x addr:0x%lx]\n",
+ evt->devid, evt->addr);
+ break;
+
+ case AMDVI_EVENT_INVALID_DTE_REQ:
+ printf("\t[INV_DTE devid:0x%x addr:0x%lx type:0x%x tr:%d]\n",
+ evt->devid, evt->addr, evt->flag >> 9,
+ (evt->flag >> 8) & 1);
+ break;
+
+ case AMDVI_EVENT_INVALID_PPR_REQ:
+ case AMDVI_EVENT_COUNTER_ZERO:
+ printf("AMD-Vi: v2 events.\n");
+ break;
+
+ default:
+ printf("Unsupported AMD-Vi event:%d\n", evt->opcode);
+ }
+}
+
+static void
+amdvi_print_events(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+ struct amdvi_event *event;
+ int i, size;
+
+ ctrl = softc->ctrl;
+ size = sizeof(struct amdvi_event);
+ for (i = 0; i < softc->event_max; i++) {
+ event = &softc->event[ctrl->evt_head / size];
+ if (!event->opcode)
+ break;
+ device_printf(softc->dev, "\t[Event%d: Head:0x%x Tail:0x%x]\n",
+ i, ctrl->evt_head, ctrl->evt_tail);
+ amdvi_decode_evt(event);
+ ctrl->evt_head = MOD_INC(ctrl->evt_head, size,
+ softc->event_max);
+ }
+}
+
+static int
+amdvi_init_dte(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+
+ ctrl = softc->ctrl;
+ ctrl->dte.base = vtophys(amdvi_dte) / PAGE_SIZE;
+ ctrl->dte.size = 0x1FF; /* 2MB device table. */
+
+ return (0);
+}
+
+/*
+ * Not all capabilities of IOMMU are available in ACPI IVHD flag
+ * or EFR entry, read directly from device.
+ */
+static int
+amdvi_print_pci_cap(device_t dev)
+{
+ struct amdvi_softc *softc;
+ uint32_t off, cap;
+
+
+ softc = device_get_softc(dev);
+ off = softc->cap_off;
+
+ /*
+ * Section 3.7.1 of IOMMU sepc rev 2.0.
+ * Read capability from device.
+ */
+ cap = amdvi_pci_read(softc, off);
+
+ /* Make sure capability type[18:16] is 3. */
+ KASSERT((((cap >> 16) & 0x7) == 0x3),
+ ("Not a IOMMU capability 0x%x@0x%x", cap, off));
+
+ softc->pci_cap = cap >> 24;
+ device_printf(softc->dev, "PCI cap 0x%x@0x%x feature:%b\n",
+ cap, off, softc->pci_cap,
+ "\20\1IOTLB\2HT\3NPCache\4EFR\5CapExt");
+
+ return (0);
+}
+
+static void
+amdvi_event_intr(void *arg)
+{
+ struct amdvi_softc *softc;
+ struct amdvi_ctrl *ctrl;
+
+ softc = (struct amdvi_softc *)arg;
+ ctrl = softc->ctrl;
+ device_printf(softc->dev, "EVT INTR %ld Status:0x%x"
+ " EVT Head:0x%x Tail:0x%x]\n", softc->event_intr_cnt++,
+ ctrl->status, ctrl->evt_head, ctrl->evt_tail);
+ printf(" [CMD Total 0x%lx] Tail:0x%x, Head:0x%x.\n",
+ softc->total_cmd, ctrl->cmd_tail, ctrl->cmd_head);
+
+ amdvi_print_events(softc);
+ ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
+}
+
+static void
+amdvi_free_evt_intr_res(device_t dev)
+{
+
+ struct amdvi_softc *softc;
+
+ softc = device_get_softc(dev);
+ if (softc->event_tag != NULL) {
+ bus_teardown_intr(dev, softc->event_res, softc->event_tag);
+ }
+ if (softc->event_res != NULL) {
+ bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid,
+ softc->event_res);
+ }
+ bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid);
+ PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)),
+ dev, 1, &softc->event_irq);
+}
+
+static bool
+amdvi_alloc_intr_resources(struct amdvi_softc *softc)
+{
+ struct amdvi_ctrl *ctrl;
+ device_t dev, pcib;
+ device_t mmio_dev;
+ uint64_t msi_addr;
+ uint32_t msi_data;
+ int err;
+
+ dev = softc->dev;
+ pcib = device_get_parent(device_get_parent(dev));
+ mmio_dev = pci_find_bsf(PCI_RID2BUS(softc->pci_rid),
+ PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid));
+ if (device_is_attached(mmio_dev)) {
+ device_printf(dev,
+ "warning: IOMMU device is claimed by another driver %s\n",
+ device_get_driver(mmio_dev)->name);
+ }
+
+ softc->event_irq = -1;
+ softc->event_rid = 0;
+
+ /*
+ * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one
+ * interrupt. XXX: Enable MSI/X support.
+ */
+ err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq);
+ if (err) {
+ device_printf(dev,
+ "Couldn't find event MSI IRQ resource.\n");
+ return (ENOENT);
+ }
+
+ err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid,
+ softc->event_irq, 1);
+ if (err) {
+ device_printf(dev, "Couldn't set event MSI resource.\n");
+ return (ENXIO);
+ }
+
+ softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &softc->event_rid, RF_ACTIVE);
+ if (!softc->event_res) {
+ device_printf(dev,
+ "Unable to allocate event INTR resource.\n");
+ return (ENOMEM);
+ }
+
+ if (bus_setup_intr(dev, softc->event_res,
+ INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr,
+ softc, &softc->event_tag)) {
+ device_printf(dev, "Fail to setup event intr\n");
+ bus_release_resource(softc->dev, SYS_RES_IRQ,
+ softc->event_rid, softc->event_res);
+ softc->event_res = NULL;
+ return (ENXIO);
+ }
+
+ bus_describe_intr(dev, softc->event_res, softc->event_tag,
+ "fault");
+
+ err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr,
+ &msi_data);
+ if (err) {
+ device_printf(dev,
+ "Event interrupt config failed, err=%d.\n",
+ err);
+ amdvi_free_evt_intr_res(softc->dev);
+ return (err);
+ }
+
+ /* Clear interrupt status bits. */
+ ctrl = softc->ctrl;
+ ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
+
+ /* Now enable MSI interrupt. */
+ pci_enable_msi(mmio_dev, msi_addr, msi_data);
+ return (0);
+}
+
+
+static void
+amdvi_print_dev_cap(struct amdvi_softc *softc)
+{
+ struct ivhd_dev_cfg *cfg;
+ int i;
+
+ cfg = softc->dev_cfg;
+ for (i = 0; i < softc->dev_cfg_cnt; i++) {
+ device_printf(softc->dev, "device [0x%x - 0x%x]"
+ "config:%b%s\n", cfg->start_id, cfg->end_id,
+ cfg->data,
+ "\020\001INIT\002ExtInt\003NMI"
+ "\007LINT0\008LINT1",
+ cfg->enable_ats ? "ATS enabled" : "");
+ cfg++;
+ }
+}
+
+static int
+amdvi_handle_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct amdvi_softc *softc;
+ int result, type, error = 0;
+
+ softc = (struct amdvi_softc *)arg1;
+ type = arg2;
+
+ switch (type) {
+ case 0:
+ result = softc->ctrl->cmd_head;
+ error = sysctl_handle_int(oidp, &result, 0,
+ req);
+ break;
+ case 1:
+ result = softc->ctrl->cmd_tail;
+ error = sysctl_handle_int(oidp, &result, 0,
+ req);
+ break;
+ case 2:
+ result = softc->ctrl->evt_head;
+ error = sysctl_handle_int(oidp, &result, 0,
+ req);
+ break;
+ case 3:
+ result = softc->ctrl->evt_tail;
+ error = sysctl_handle_int(oidp, &result, 0,
+ req);
+ break;
+
+ default:
+ device_printf(softc->dev, "Unknown sysctl:%d\n", type);
+ }
+
+ return (error);
+}
+
+static void
+amdvi_add_sysctl(struct amdvi_softc *softc)
+{
+ struct sysctl_oid_list *child;
+ struct sysctl_ctx_list *ctx;
+ device_t dev;
+
+ dev = softc->dev;
+ ctx = device_get_sysctl_ctx(dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "event_intr_count", CTLFLAG_RD,
+ &softc->event_intr_cnt, "Event interrupt count");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "command_count", CTLFLAG_RD,
+ &softc->total_cmd, "Command submitted count");
+ SYSCTL_ADD_U16(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD,
+ &softc->pci_rid, 0, "IOMMU RID");
+ SYSCTL_ADD_U16(ctx, child, OID_AUTO, "start_dev_rid", CTLFLAG_RD,
+ &softc->start_dev_rid, 0, "Start of device under this IOMMU");
+ SYSCTL_ADD_U16(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD,
+ &softc->end_dev_rid, 0, "End of device under this IOMMU");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head",
+ CTLTYPE_UINT | CTLFLAG_RD, softc, 0,
+ amdvi_handle_sysctl, "IU", "Command head");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail",
+ CTLTYPE_UINT | CTLFLAG_RD, softc, 1,
+ amdvi_handle_sysctl, "IU", "Command tail");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head",
+ CTLTYPE_UINT | CTLFLAG_RD, softc, 2,
+ amdvi_handle_sysctl, "IU", "Command head");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail",
+ CTLTYPE_UINT | CTLFLAG_RD, softc, 3,
+ amdvi_handle_sysctl, "IU", "Command tail");
+}
+
+int
+amdvi_setup_hw(struct amdvi_softc *softc)
+{
+ device_t dev;
+ int status;
+
+ dev = softc->dev;
+
+ amdvi_hw_enable_iotlb(softc);
+
+ amdvi_print_dev_cap(softc);
+
+ if ((status = amdvi_print_pci_cap(dev)) != 0) {
+ device_printf(dev, "PCI capability.\n");
+ return (status);
+ }
+ if ((status = amdvi_init_cmd(softc)) != 0) {
+ device_printf(dev, "Couldn't configure command buffer.\n");
+ return (status);
+ }
+ if ((status = amdvi_init_event(softc)) != 0) {
+ device_printf(dev, "Couldn't configure event buffer.\n");
+ return (status);
+ }
+ if ((status = amdvi_init_dte(softc)) != 0) {
+ device_printf(dev, "Couldn't configure device table.\n");
+ return (status);
+ }
+ if ((status = amdvi_alloc_intr_resources(softc)) != 0) {
+ return (status);
+ }
+ amdvi_add_sysctl(softc);
+ return (0);
+}
+
+int
+amdvi_teardown_hw(struct amdvi_softc *softc)
+{
+ device_t dev;
+
+ dev = softc->dev;
+
+ /*
+ * Called after disable, h/w is stopped by now, free all the resources.
+ */
+ amdvi_free_evt_intr_res(dev);
+
+ if (softc->cmd)
+ free(softc->cmd, M_AMDVI);
+
+ if (softc->event)
+ free(softc->event, M_AMDVI);
+
+ return (0);
+}
+
+/*********** bhyve interfaces *********************/
+static int
+amdvi_init(void)
+{
+ if (!ivhd_count) {
+ return (EIO);
+ }
+ if (!amdvi_enable_user && ivhd_count) {
+ printf("bhyve: Found %d AMD-Vi/IOMMU device(s), "
+ "use hw.vmm.amdvi.enable=1 to enable pass-through.\n",
+ ivhd_count);
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static void
+amdvi_cleanup(void)
+{
+ /* Nothing. */
+}
+
+static uint16_t
+amdvi_domainId(void)
+{
+
+ /*
+ * If we hit maximum domain limit, rollover leaving host
+ * domain(0).
+ * XXX: make sure that this domain is not used.
+ */
+ if (amdvi_dom_id == AMDVI_MAX_DOMAIN)
+ amdvi_dom_id = 1;
+
+ return ((uint16_t)amdvi_dom_id++);
+}
+
+static void
+amdvi_do_inv_domain(uint16_t domain_id, bool create)
+{
+ struct amdvi_softc *softc;
+ int i;
+
+ for (i = 0; i < ivhd_count; i++) {
+ softc = device_get_softc(ivhd_devs[i]);
+ KASSERT(softc, ("softc is NULL"));
+ /*
+ * If not present pages are cached, invalidate page after
+ * creating domain.
+ */
+#if 0
+ if (create && ((softc->pci_cap & AMDVI_PCI_CAP_NPCACHE) == 0))
+ continue;
+#endif
+ amdvi_inv_domain(softc, domain_id);
+ amdvi_wait(softc);
+ }
+}
+
+static void *
+amdvi_create_domain(vm_paddr_t maxaddr)
+{
+ struct amdvi_domain *dom;
+
+ dom = malloc(sizeof(struct amdvi_domain), M_AMDVI, M_ZERO | M_WAITOK);
+ dom->id = amdvi_domainId();
+ //dom->maxaddr = maxaddr;
+#ifdef AMDVI_DEBUG_CMD
+ printf("Created domain #%d\n", dom->id);
+#endif
+ /*
+ * Host domain(#0) don't create translation table.
+ */
+ if (dom->id || amdvi_host_ptp)
+ dom->ptp = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
+
+ dom->ptp_level = amdvi_ptp_level;
+
+ amdvi_do_inv_domain(dom->id, true);
+ SLIST_INSERT_HEAD(&dom_head, dom, next);
+
+ return (dom);
+}
+
+static void
+amdvi_free_ptp(uint64_t *ptp, int level)
+{
+ int i;
+
+ if (level < 1)
+ return;
+
+ for (i = 0; i < NPTEPG ; i++) {
+ if ((ptp[i] & AMDVI_PT_PRESENT) == 0)
+ continue;
+ /* XXX: Add super-page or PTE mapping > 4KB. */
+#ifdef notyet
+ /* Super-page mapping. */
+ if (AMDVI_PD_SUPER(ptp[i]))
+ continue;
+#endif
+
+ amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i]
+ & AMDVI_PT_MASK), level - 1);
+
+ }
+
+ free(ptp, M_AMDVI);
+}
+
+static void
+amdvi_destroy_domain(void *arg)
+{
+ struct amdvi_domain *domain;
+
+ domain = (struct amdvi_domain *)arg;
+ KASSERT(domain, ("domain is NULL"));
+#ifdef AMDVI_DEBUG_CMD
+ printf("Destroying domain %d\n", domain->id);
+#endif
+ if (domain->ptp)
+ amdvi_free_ptp(domain->ptp, domain->ptp_level);
+
+ amdvi_do_inv_domain(domain->id, false);
+ SLIST_REMOVE(&dom_head, domain, amdvi_domain, next);
+ free(domain, M_AMDVI);
+}
+
+static uint64_t
+amdvi_set_pt(uint64_t *pt, int level, vm_paddr_t gpa,
+ vm_paddr_t hpa, uint64_t pg_size, bool create)
+{
+ uint64_t *page, pa;
+ int shift, index;
+ const int PT_SHIFT = 9;
+ const int PT_INDEX_MASK = (1 << PT_SHIFT) - 1; /* Based on PT_SHIFT */
+
+ if (!pg_size)
+ return (0);
+
+ if (hpa & (pg_size - 1)) {
+ printf("HPA is not size aligned.\n");
+ return (0);
+ }
+ if (gpa & (pg_size - 1)) {
+ printf("HPA is not size aligned.\n");
+ return (0);
+ }
+ shift = PML4SHIFT;
+ while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
+ index = (gpa >> shift) & PT_INDEX_MASK;
+
+ if ((pt[index] == 0) && create) {
+ page = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
+ pa = vtophys(page);
+ pt[index] = pa | AMDVI_PT_PRESENT | AMDVI_PT_RW |
+ ((level - 1) << AMDVI_PD_LEVEL_SHIFT);
+ }
+#ifdef AMDVI_DEBUG_PTE
+ if ((gpa % 0x1000000) == 0)
+ printf("[level%d, shift = %d]PTE:0x%lx\n",
+ level, shift, pt[index]);
+#endif
+#define PTE2PA(x) ((uint64_t)(x) & AMDVI_PT_MASK)
+ pa = PTE2PA(pt[index]);
+ pt = (uint64_t *)PHYS_TO_DMAP(pa);
+ shift -= PT_SHIFT;
+ level--;
+ }
+
+ /* Leaf entry. */
+ index = (gpa >> shift) & PT_INDEX_MASK;
+
+ if (create) {
+ pt[index] = hpa | AMDVI_PT_RW | AMDVI_PT_PRESENT;
+ } else
+ pt[index] = 0;
+
+#ifdef AMDVI_DEBUG_PTE
+ if ((gpa % 0x1000000) == 0)
+ printf("[Last level%d, shift = %d]PTE:0x%lx\n",
+ level, shift, pt[index]);
+#endif
+ return (1ULL << shift);
+}
+
+static uint64_t
+amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa,
+ vm_paddr_t hpa, uint64_t size, bool create)
+{
+ uint64_t mapped, *ptp, len;
+ int level;
+
+ KASSERT(domain, ("domain is NULL"));
+ level = domain->ptp_level;
+ KASSERT(level, ("Page table level is 0"));
+
+ ptp = domain->ptp;
+ KASSERT(ptp, ("PTP is NULL"));
+ mapped = 0;
+ while (mapped < size) {
+ len = amdvi_set_pt(ptp, level, gpa + mapped, hpa + mapped,
+ PAGE_SIZE, create);
+ if (!len) {
+ printf("Error: Couldn't map HPA:0x%lx GPA:0x%lx\n",
+ hpa, gpa);
+ return (0);
+ }
+ mapped += len;
+ }
+
+ return (mapped);
+}
+
+static uint64_t
+amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
+ uint64_t len)
+{
+ struct amdvi_domain *domain;
+
+ domain = (struct amdvi_domain *)arg;
+
+ if (domain->id && !domain->ptp) {
+ printf("ptp is NULL");
+ return (-1);
+ }
+
+ /*
+ * If host domain is created w/o page table, skip IOMMU page
+ * table set-up.
+ */
+ if (domain->ptp)
+ return (amdvi_update_mapping(domain, gpa, hpa, len, true));
+ else
+ return (len);
+}
+
+static uint64_t
+amdvi_destroy_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
+{
+ struct amdvi_domain *domain;
+
+ domain = (struct amdvi_domain *)arg;
+ /*
+ * If host domain is created w/o page table, skip IOMMU page
+ * table set-up.
+ */
+ if (domain->ptp)
+ return (amdvi_update_mapping(domain, gpa, 0, len, false));
+ return
+ (len);
+}
+
+static struct amdvi_softc *
+amdvi_find_iommu(uint16_t devid)
+{
+ struct amdvi_softc *softc;
+ int i;
+
+ for (i = 0; i < ivhd_count; i++) {
+ softc = device_get_softc(ivhd_devs[i]);
+ if ((devid >= softc->start_dev_rid) &&
+ (devid <= softc->end_dev_rid))
+ return (softc);
+ }
+
+ /*
+ * XXX: BIOS bug, device not in IVRS table, assume its from first IOMMU.
+ */
+ printf("BIOS bug device(%d.%d.%d) doesn't have IVHD entry.\n",
+ RID2PCI_STR(devid));
+
+ return (device_get_softc(ivhd_devs[0]));
+}
+
+/*
+ * Set-up device table entry.
+ * IOMMU spec Rev 2.0, section 3.2.2.2, some of the fields must
+ * be set concurrently, e.g. read and write bits.
+ */
+static void
+amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
+{
+ struct amdvi_softc *softc;
+ struct amdvi_dte* temp;
+
+ KASSERT(domain, ("domain is NULL for pci_rid:0x%x\n", devid));
+
+ softc = amdvi_find_iommu(devid);
+ KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
+
+ temp = &amdvi_dte[devid];
+
+#ifdef AMDVI_ATS_ENABLE
+ /* If IOMMU and device support IOTLB, enable it. */
+ if (amdvi_dev_support_iotlb(softc, devid) && softc->iotlb)
+ temp->iotlb_enable = 1;
+#endif
+
+ /* Avoid duplicate I/O faults. */
+ temp->sup_second_io_fault = 1;
+ temp->sup_all_io_fault = amdvi_disable_io_fault;
+
+ temp->dt_valid = 1;
+ temp->domain_id = domain->id;
+
+ if (enable) {
+ if (domain->ptp) {
+ temp->pt_base = vtophys(domain->ptp) >> 12;
+ temp->pt_level = amdvi_ptp_level;
+ }
+ /*
+ * XXX: Page table valid[TV] bit must be set even if host domain
+ * page tables are not enabled.
+ */
+ temp->pt_valid = 1;
+ temp->read_allow = 1;
+ temp->write_allow = 1;
+ }
+}
+
+static void
+amdvi_inv_device(uint16_t devid)
+{
+ struct amdvi_softc *softc;
+
+ softc = amdvi_find_iommu(devid);
+ KASSERT(softc, ("softc is NULL"));
+
+ amdvi_cmd_inv_dte(softc, devid);
+#ifdef AMDVI_ATS_ENABLE
+ if (amdvi_dev_support_iotlb(softc, devid))
+ amdvi_cmd_inv_iotlb(softc, devid);
+#endif
+ amdvi_wait(softc);
+}
+
+static void
+amdvi_add_device(void *arg, uint16_t devid)
+{
+ struct amdvi_domain *domain;
+
+ domain = (struct amdvi_domain *)arg;
+ KASSERT(domain != NULL, ("domain is NULL"));
+#ifdef AMDVI_DEBUG_CMD
+ printf("Assigning device(%d.%d.%d) to domain:%d\n",
+ RID2PCI_STR(devid), domain->id);
+#endif
+ amdvi_set_dte(domain, devid, true);
+ amdvi_inv_device(devid);
+}
+
+static void
+amdvi_remove_device(void *arg, uint16_t devid)
+{
+ struct amdvi_domain *domain;
+
+ domain = (struct amdvi_domain *)arg;
+#ifdef AMDVI_DEBUG_CMD
+ printf("Remove device(0x%x) from domain:%d\n",
+ devid, domain->id);
+#endif
+ amdvi_set_dte(domain, devid, false);
+ amdvi_inv_device(devid);
+}
+
+static void
+amdvi_enable(void)
+{
+ struct amdvi_ctrl *ctrl;
+ struct amdvi_softc *softc;
+ uint64_t val;
+ int i;
+
+ for (i = 0; i < ivhd_count; i++) {
+ softc = device_get_softc(ivhd_devs[i]);
+ KASSERT(softc, ("softc is NULL\n"));
+ ctrl = softc->ctrl;
+ KASSERT(ctrl, ("ctrl is NULL\n"));
+
+ val = ( AMDVI_CTRL_EN |
+ AMDVI_CTRL_CMD |
+ AMDVI_CTRL_ELOG |
+ AMDVI_CTRL_ELOGINT |
+ AMDVI_CTRL_INV_TO_1S);
+
+ if (softc->ivhd_flag & IVHD_FLAG_COH)
+ val |= AMDVI_CTRL_COH;
+ if (softc->ivhd_flag & IVHD_FLAG_HTT)
+ val |= AMDVI_CTRL_HTT;
+ if (softc->ivhd_flag & IVHD_FLAG_RPPW)
+ val |= AMDVI_CTRL_RPPW;
+ if (softc->ivhd_flag & IVHD_FLAG_PPW)
+ val |= AMDVI_CTRL_PPW;
+ if (softc->ivhd_flag & IVHD_FLAG_ISOC)
+ val |= AMDVI_CTRL_ISOC;
+
+ ctrl->control = val;
+ }
+}
+
+static void
+amdvi_disable(void)
+{
+ struct amdvi_ctrl *ctrl;
+ struct amdvi_softc *softc;
+ int i;
+
+ for (i = 0; i < ivhd_count; i++) {
+ softc = device_get_softc(ivhd_devs[i]);
+ KASSERT(softc, ("softc is NULL\n"));
+ ctrl = softc->ctrl;
+ KASSERT(ctrl, ("ctrl is NULL\n"));
+
+ ctrl->control = 0;
+ }
+}
+
+static void
+amdvi_inv_tlb(void *arg)
+{
+ struct amdvi_domain *domain;
+
+ domain = (struct amdvi_domain *)arg;
+ KASSERT(domain, ("domain is NULL"));
+ amdvi_do_inv_domain(domain->id, false);
+}
+
+struct iommu_ops iommu_ops_amd = {
+ amdvi_init,
+ amdvi_cleanup,
+ amdvi_enable,
+ amdvi_disable,
+ amdvi_create_domain,
+ amdvi_destroy_domain,
+ amdvi_create_mapping,
+ amdvi_destroy_mapping,
+ amdvi_add_device,
+ amdvi_remove_device,
+ amdvi_inv_tlb
+};
diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h
new file mode 100644
index 0000000000..6ee6c36632
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h
@@ -0,0 +1,431 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 Anish Gupta (anish@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _AMDVI_PRIV_H_
+#define _AMDVI_PRIV_H_
+
+#include <contrib/dev/acpica/include/acpi.h>
+
+#define BIT(n) (1ULL << (n))
+/* Return value of bits[n:m] where n and (n >= ) m are bit positions. */
+#define REG_BITS(x, n, m) (((x) >> (m)) & \
+ ((1 << (((n) - (m)) + 1)) - 1))
+
+/*
+ * IOMMU PCI capability.
+ */
+#define AMDVI_PCI_CAP_IOTLB BIT(0) /* IOTLB is supported. */
+#define AMDVI_PCI_CAP_HT BIT(1) /* HyperTransport tunnel support. */
+#define AMDVI_PCI_CAP_NPCACHE BIT(2) /* Not present page cached. */
+#define AMDVI_PCI_CAP_EFR BIT(3) /* Extended features. */
+#define AMDVI_PCI_CAP_EXT BIT(4) /* Miscellaneous information reg. */
+
+/*
+ * IOMMU extended features.
+ */
+#define AMDVI_EX_FEA_PREFSUP BIT(0) /* Prefetch command support. */
+#define AMDVI_EX_FEA_PPRSUP BIT(1) /* PPR support */
+#define AMDVI_EX_FEA_XTSUP BIT(2) /* Reserved */
+#define AMDVI_EX_FEA_NXSUP BIT(3) /* No-execute. */
+#define AMDVI_EX_FEA_GTSUP BIT(4) /* Guest translation support. */
+#define AMDVI_EX_FEA_EFRW BIT(5) /* Reserved */
+#define AMDVI_EX_FEA_IASUP BIT(6) /* Invalidate all command supp. */
+#define AMDVI_EX_FEA_GASUP BIT(7) /* Guest APIC or AVIC support. */
+#define AMDVI_EX_FEA_HESUP BIT(8) /* Hardware Error. */
+#define AMDVI_EX_FEA_PCSUP BIT(9) /* Performance counters support. */
+/* XXX: add more EFER bits. */
+
+/*
+ * Device table entry or DTE
+ * NOTE: Must be 256-bits/32 bytes aligned.
+ */
+struct amdvi_dte {
+ uint32_t dt_valid:1; /* Device Table valid. */
+ uint32_t pt_valid:1; /* Page translation valid. */
+ uint16_t :7; /* Reserved[8:2] */
+ uint8_t pt_level:3; /* Paging level, 0 to disable. */
+ uint64_t pt_base:40; /* Page table root pointer. */
+ uint8_t :3; /* Reserved[54:52] */
+ uint8_t gv_valid:1; /* Revision 2, GVA to SPA. */
+ uint8_t gv_level:2; /* Revision 2, GLX level. */
+ uint8_t gv_cr3_lsb:3; /* Revision 2, GCR3[14:12] */
+ uint8_t read_allow:1; /* I/O read enabled. */
+ uint8_t write_allow:1; /* I/O write enabled. */
+ uint8_t :1; /* Reserved[63] */
+ uint16_t domain_id:16; /* Domain ID */
+ uint16_t gv_cr3_lsb2:16; /* Revision 2, GCR3[30:15] */
+ uint8_t iotlb_enable:1; /* Device support IOTLB */
+ uint8_t sup_second_io_fault:1; /* Suppress subsequent I/O faults. */
+ uint8_t sup_all_io_fault:1; /* Suppress all I/O page faults. */
+ uint8_t IOctl:2; /* Port I/O control. */
+ uint8_t iotlb_cache_disable:1; /* IOTLB cache hints. */
+ uint8_t snoop_disable:1; /* Snoop disable. */
+ uint8_t allow_ex:1; /* Allow exclusion. */
+ uint8_t sysmgmt:2; /* System management message.*/
+ uint8_t :1; /* Reserved[106] */
+ uint32_t gv_cr3_msb:21; /* Revision 2, GCR3[51:31] */
+ uint8_t intmap_valid:1; /* Interrupt map valid. */
+ uint8_t intmap_len:4; /* Interrupt map table length. */
+ uint8_t intmap_ign:1; /* Ignore unmapped interrupts. */
+ uint64_t intmap_base:46; /* IntMap base. */
+ uint8_t :4; /* Reserved[183:180] */
+ uint8_t init_pass:1; /* INIT pass through or PT */
+ uint8_t extintr_pass:1; /* External Interrupt PT */
+ uint8_t nmi_pass:1; /* NMI PT */
+ uint8_t :1; /* Reserved[187] */
+ uint8_t intr_ctrl:2; /* Interrupt control */
+ uint8_t lint0_pass:1; /* LINT0 PT */
+ uint8_t lint1_pass:1; /* LINT1 PT */
+ uint64_t :64; /* Reserved[255:192] */
+} __attribute__((__packed__));
+CTASSERT(sizeof(struct amdvi_dte) == 32);
+
+/*
+ * IOMMU command entry.
+ */
+struct amdvi_cmd {
+ uint32_t word0;
+ uint32_t word1:28;
+ uint8_t opcode:4;
+ uint64_t addr;
+} __attribute__((__packed__));
+
+/* Command opcodes. */
+#define AMDVI_CMP_WAIT_OPCODE 0x1 /* Completion wait. */
+#define AMDVI_INVD_DTE_OPCODE 0x2 /* Invalidate device table entry. */
+#define AMDVI_INVD_PAGE_OPCODE 0x3 /* Invalidate pages. */
+#define AMDVI_INVD_IOTLB_OPCODE 0x4 /* Invalidate IOTLB pages. */
+#define AMDVI_INVD_INTR_OPCODE 0x5 /* Invalidate Interrupt table. */
+#define AMDVI_PREFETCH_PAGES_OPCODE 0x6 /* Prefetch IOMMU pages. */
+#define AMDVI_COMP_PPR_OPCODE 0x7 /* Complete PPR request. */
+#define AMDVI_INV_ALL_OPCODE 0x8 /* Invalidate all. */
+
+/* Completion wait attributes. */
+#define AMDVI_CMP_WAIT_STORE BIT(0) /* Write back data. */
+#define AMDVI_CMP_WAIT_INTR BIT(1) /* Completion wait interrupt. */
+#define AMDVI_CMP_WAIT_FLUSH BIT(2) /* Flush queue. */
+
+/* Invalidate page. */
+#define AMDVI_INVD_PAGE_S BIT(0) /* Invalidation size. */
+#define AMDVI_INVD_PAGE_PDE BIT(1) /* Invalidate PDE. */
+#define AMDVI_INVD_PAGE_GN_GVA BIT(2) /* GPA or GVA. */
+
+#define AMDVI_INVD_PAGE_ALL_ADDR (0x7FFFFFFFFFFFFULL << 12)
+
+/* Invalidate IOTLB. */
+#define AMDVI_INVD_IOTLB_S BIT(0) /* Invalidation size 4k or addr */
+#define AMDVI_INVD_IOTLB_GN_GVA BIT(2) /* GPA or GVA. */
+
+#define AMDVI_INVD_IOTLB_ALL_ADDR (0x7FFFFFFFFFFFFULL << 12)
+/* XXX: add more command entries. */
+
+/*
+ * IOMMU event entry.
+ */
+struct amdvi_event {
+ uint16_t devid;
+ uint16_t pasid_hi;
+ uint16_t pasid_domid; /* PASID low or DomainID */
+ uint16_t flag:12;
+ uint8_t opcode:4;
+ uint64_t addr;
+} __attribute__((__packed__));
+CTASSERT(sizeof(struct amdvi_event) == 16);
+
+/* Various event types. */
+#define AMDVI_EVENT_INVALID_DTE 0x1
+#define AMDVI_EVENT_PFAULT 0x2
+#define AMDVI_EVENT_DTE_HW_ERROR 0x3
+#define AMDVI_EVENT_PAGE_HW_ERROR 0x4
+#define AMDVI_EVENT_ILLEGAL_CMD 0x5
+#define AMDVI_EVENT_CMD_HW_ERROR 0x6
+#define AMDVI_EVENT_IOTLB_TIMEOUT 0x7
+#define AMDVI_EVENT_INVALID_DTE_REQ 0x8
+#define AMDVI_EVENT_INVALID_PPR_REQ 0x9
+#define AMDVI_EVENT_COUNTER_ZERO 0xA
+
+#define AMDVI_EVENT_FLAG_MASK 0x1FF /* Mask for event flags. */
+#define AMDVI_EVENT_FLAG_TYPE(x) (((x) >> 9) & 0x3)
+
+/*
+ * IOMMU control block.
+ */
+struct amdvi_ctrl {
+ struct {
+ uint16_t size:9;
+ uint16_t :3;
+ uint64_t base:40; /* Devtable register base. */
+ uint16_t :12;
+ } dte;
+ struct {
+ uint16_t :12;
+ uint64_t base:40;
+ uint8_t :4;
+ uint8_t len:4;
+ uint8_t :4;
+ } cmd;
+ struct {
+ uint16_t :12;
+ uint64_t base:40;
+ uint8_t :4;
+ uint8_t len:4;
+ uint8_t :4;
+ } event;
+ uint16_t control :13;
+ uint64_t :51;
+ struct {
+ uint8_t enable:1;
+ uint8_t allow:1;
+ uint16_t :10;
+ uint64_t base:40;
+ uint16_t :12;
+ uint16_t :12;
+ uint64_t limit:40;
+ uint16_t :12;
+ } excl;
+ /*
+ * Revision 2 only.
+ */
+ uint64_t ex_feature;
+ struct {
+ uint16_t :12;
+ uint64_t base:40;
+ uint8_t :4;
+ uint8_t len:4;
+ uint8_t :4;
+ } ppr;
+ uint64_t first_event;
+ uint64_t second_event;
+ uint64_t event_status;
+ /* Revision 2 only, end. */
+ uint8_t pad1[0x1FA8]; /* Padding. */
+ uint32_t cmd_head:19;
+ uint64_t :45;
+ uint32_t cmd_tail:19;
+ uint64_t :45;
+ uint32_t evt_head:19;
+ uint64_t :45;
+ uint32_t evt_tail:19;
+ uint64_t :45;
+ uint32_t status:19;
+ uint64_t :45;
+ uint64_t pad2;
+ uint8_t :4;
+ uint16_t ppr_head:15;
+ uint64_t :45;
+ uint8_t :4;
+ uint16_t ppr_tail:15;
+ uint64_t :45;
+ uint8_t pad3[0x1FC0]; /* Padding. */
+
+ /* XXX: More for rev2. */
+} __attribute__((__packed__));
+CTASSERT(offsetof(struct amdvi_ctrl, pad1)== 0x58);
+CTASSERT(offsetof(struct amdvi_ctrl, pad2)== 0x2028);
+CTASSERT(offsetof(struct amdvi_ctrl, pad3)== 0x2040);
+
+#define AMDVI_MMIO_V1_SIZE (4 * PAGE_SIZE) /* v1 size */
+/*
+ * AMF IOMMU v2 size including event counters
+ */
+#define AMDVI_MMIO_V2_SIZE (8 * PAGE_SIZE)
+
+CTASSERT(sizeof(struct amdvi_ctrl) == 0x4000);
+CTASSERT(sizeof(struct amdvi_ctrl) == AMDVI_MMIO_V1_SIZE);
+
+/* IVHD flag */
+#define IVHD_FLAG_HTT BIT(0) /* Hypertransport Tunnel. */
+#define IVHD_FLAG_PPW BIT(1) /* Pass posted write. */
+#define IVHD_FLAG_RPPW BIT(2) /* Response pass posted write. */
+#define IVHD_FLAG_ISOC BIT(3) /* Isoc support. */
+#define IVHD_FLAG_IOTLB BIT(4) /* IOTLB support. */
+#define IVHD_FLAG_COH BIT(5) /* Coherent control, default 1 */
+#define IVHD_FLAG_PFS BIT(6) /* Prefetch IOMMU pages. */
+#define IVHD_FLAG_PPRS BIT(7) /* Peripheral page support. */
+
+/* IVHD device entry data setting. */
+#define IVHD_DEV_LINT0_PASS BIT(6) /* LINT0 interrupts. */
+#define IVHD_DEV_LINT1_PASS BIT(7) /* LINT1 interrupts. */
+
+/* Bit[5:4] for System Mgmt. Bit3 is reserved. */
+#define IVHD_DEV_INIT_PASS BIT(0) /* INIT */
+#define IVHD_DEV_EXTINTR_PASS BIT(1) /* ExtInt */
+#define IVHD_DEV_NMI_PASS BIT(2) /* NMI */
+
+/* IVHD 8-byte extended data settings. */
+#define IVHD_DEV_EXT_ATS_DISABLE BIT(31) /* Disable ATS */
+
+/* IOMMU control register. */
+#define AMDVI_CTRL_EN BIT(0) /* IOMMU enable. */
+#define AMDVI_CTRL_HTT BIT(1) /* Hypertransport tunnel enable. */
+#define AMDVI_CTRL_ELOG BIT(2) /* Event log enable. */
+#define AMDVI_CTRL_ELOGINT BIT(3) /* Event log interrupt. */
+#define AMDVI_CTRL_COMINT BIT(4) /* Completion wait interrupt. */
+#define AMDVI_CTRL_PPW BIT(8)
+#define AMDVI_CTRL_RPPW BIT(9)
+#define AMDVI_CTRL_COH BIT(10)
+#define AMDVI_CTRL_ISOC BIT(11)
+#define AMDVI_CTRL_CMD BIT(12) /* Command buffer enable. */
+#define AMDVI_CTRL_PPRLOG BIT(13)
+#define AMDVI_CTRL_PPRINT BIT(14)
+#define AMDVI_CTRL_PPREN BIT(15)
+#define AMDVI_CTRL_GTE BIT(16) /* Guest translation enable. */
+#define AMDVI_CTRL_GAE BIT(17) /* Guest APIC enable. */
+
+/* Invalidation timeout. */
+#define AMDVI_CTRL_INV_NO_TO 0 /* No timeout. */
+#define AMDVI_CTRL_INV_TO_1ms 1 /* 1 ms */
+#define AMDVI_CTRL_INV_TO_10ms 2 /* 10 ms */
+#define AMDVI_CTRL_INV_TO_100ms 3 /* 100 ms */
+#define AMDVI_CTRL_INV_TO_1S 4 /* 1 second */
+#define AMDVI_CTRL_INV_TO_10S 5 /* 10 second */
+#define AMDVI_CTRL_INV_TO_100S 6 /* 100 second */
+
+/*
+ * Max number of PCI devices.
+ * 256 bus x 32 slot/devices x 8 functions.
+ */
+#define PCI_NUM_DEV_MAX 0x10000
+
+/* Maximum number of domains supported by IOMMU. */
+#define AMDVI_MAX_DOMAIN (BIT(16) - 1)
+
+/*
+ * IOMMU Page Table attributes.
+ */
+#define AMDVI_PT_PRESENT BIT(0)
+#define AMDVI_PT_COHERENT BIT(60)
+#define AMDVI_PT_READ BIT(61)
+#define AMDVI_PT_WRITE BIT(62)
+
+#define AMDVI_PT_RW (AMDVI_PT_READ | AMDVI_PT_WRITE)
+#define AMDVI_PT_MASK 0xFFFFFFFFFF000UL /* Only [51:12] for PA */
+
+#define AMDVI_PD_LEVEL_SHIFT 9
+#define AMDVI_PD_SUPER(x) (((x) >> AMDVI_PD_LEVEL_SHIFT) == 7)
+/*
+ * IOMMU Status, offset 0x2020
+ */
+#define AMDVI_STATUS_EV_OF BIT(0) /* Event overflow. */
+#define AMDVI_STATUS_EV_INTR BIT(1) /* Event interrupt. */
+/* Completion wait command completed. */
+#define AMDVI_STATUS_CMP BIT(2)
+
+#define IVRS_CTRL_RID 1 /* MMIO RID */
+
+/* ACPI IVHD */
+struct ivhd_dev_cfg {
+ uint32_t start_id;
+ uint32_t end_id;
+ uint8_t data; /* Device configuration. */
+ bool enable_ats; /* ATS enabled for the device. */
+ int ats_qlen; /* ATS invalidation queue depth. */
+};
+
+struct amdvi_domain {
+ uint64_t *ptp; /* Highest level page table */
+ int ptp_level; /* Level of page tables */
+ u_int id; /* Domain id */
+ SLIST_ENTRY (amdvi_domain) next;
+};
+
+/*
+ * I/O Virtualization Hardware Definition Block (IVHD) type 0x10 (legacy)
+ * uses ACPI_IVRS_HARDWARE define in contrib/dev/acpica/include/actbl2.h
+ * New IVHD types 0x11 and 0x40 as defined in AMD IOMMU spec[48882] are missing in
+ * ACPI code. These new types add extra field EFR(Extended Feature Register).
+ * XXX : Use definition from ACPI when it is available.
+ */
+typedef struct acpi_ivrs_hardware_efr_sup
+{
+ ACPI_IVRS_HEADER Header;
+ UINT16 CapabilityOffset; /* Offset for IOMMU control fields */
+ UINT64 BaseAddress; /* IOMMU control registers */
+ UINT16 PciSegmentGroup;
+ UINT16 Info; /* MSI number and unit ID */
+ UINT32 Attr; /* IOMMU Feature */
+ UINT64 ExtFR; /* IOMMU Extended Feature */
+ UINT64 Reserved; /* v1 feature or v2 attribute */
+} __attribute__ ((__packed__)) ACPI_IVRS_HARDWARE_EFRSUP;
+CTASSERT(sizeof(ACPI_IVRS_HARDWARE_EFRSUP) == 40);
+
+/*
+ * Different type of IVHD.
+ * XXX: Use AcpiIvrsType once new IVHD types are available.
+*/
+enum IvrsType
+{
+ IVRS_TYPE_HARDWARE_LEGACY = 0x10, /* Legacy without EFRi support. */
+ IVRS_TYPE_HARDWARE_EFR = 0x11, /* With EFR support. */
+ IVRS_TYPE_HARDWARE_MIXED = 0x40, /* Mixed with EFR support. */
+};
+
+/*
+ * AMD IOMMU softc.
+ */
+struct amdvi_softc {
+ struct amdvi_ctrl *ctrl; /* Control area. */
+ device_t dev; /* IOMMU device. */
+ enum IvrsType ivhd_type; /* IOMMU IVHD type. */
+ bool iotlb; /* IOTLB supported by IOMMU */
+ struct amdvi_cmd *cmd; /* Command descriptor area. */
+ int cmd_max; /* Max number of commands. */
+ uint64_t cmp_data; /* Command completion write back. */
+ struct amdvi_event *event; /* Event descriptor area. */
+ struct resource *event_res; /* Event interrupt resource. */
+ void *event_tag; /* Event interrupt tag. */
+ int event_max; /* Max number of events. */
+ int event_irq;
+ int event_rid;
+ /* ACPI various flags. */
+ uint32_t ivhd_flag; /* ACPI IVHD flag. */
+ uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */
+ uint64_t ext_feature; /* IVHD EFR */
+ /* PCI related. */
+ uint16_t cap_off; /* PCI Capability offset. */
+ uint8_t pci_cap; /* PCI capability. */
+ uint16_t pci_seg; /* IOMMU PCI domain/segment. */
+ uint16_t pci_rid; /* PCI BDF of IOMMU */
+ /* Device range under this IOMMU. */
+ uint16_t start_dev_rid; /* First device under this IOMMU. */
+ uint16_t end_dev_rid; /* Last device under this IOMMU. */
+
+ /* BIOS provided device configuration for end points. */
+ struct ivhd_dev_cfg dev_cfg[10];
+ int dev_cfg_cnt;
+
+ /* Software statistics. */
+ uint64_t event_intr_cnt; /* Total event INTR count. */
+ uint64_t total_cmd; /* Total number of commands. */
+};
+
+int amdvi_setup_hw(struct amdvi_softc *softc);
+int amdvi_teardown_hw(struct amdvi_softc *softc);
+#endif /* _AMDVI_PRIV_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c
new file mode 100644
index 0000000000..370c20fb01
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c
@@ -0,0 +1,735 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/malloc.h>
+
+#include <machine/vmparam.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+
+#include "io/iommu.h"
+#include "amdvi_priv.h"
+
+device_t *ivhd_devs; /* IVHD or AMD-Vi device list. */
+int ivhd_count; /* Number of IVHD header. */
+/*
+ * Cached IVHD header list.
+ * Single entry for each IVHD, filtered the legacy one.
+ */
+ACPI_IVRS_HARDWARE *ivhd_hdrs[10];
+
+extern int amdvi_ptp_level; /* Page table levels. */
+
+typedef int (*ivhd_iter_t)(ACPI_IVRS_HEADER *ptr, void *arg);
+/*
+ * Iterate IVRS table for IVHD and IVMD device type.
+ */
+static void
+ivrs_hdr_iterate_tbl(ivhd_iter_t iter, void *arg)
+{
+ ACPI_TABLE_IVRS *ivrs;
+ ACPI_IVRS_HEADER *ivrs_hdr, *end;
+ ACPI_STATUS status;
+
+ status = AcpiGetTable(ACPI_SIG_IVRS, 1, (ACPI_TABLE_HEADER **)&ivrs);
+ if (ACPI_FAILURE(status))
+ return;
+
+ if (ivrs->Header.Length == 0) {
+ return;
+ }
+
+ ivrs_hdr = (ACPI_IVRS_HEADER *)(ivrs + 1);
+ end = (ACPI_IVRS_HEADER *)((char *)ivrs + ivrs->Header.Length);
+
+ while (ivrs_hdr < end) {
+ if ((uint8_t *)ivrs_hdr + ivrs_hdr->Length > (uint8_t *)end) {
+ printf("AMD-Vi:IVHD/IVMD is corrupted, length : %d\n",
+ ivrs_hdr->Length);
+ break;
+ }
+
+ switch (ivrs_hdr->Type) {
+ case IVRS_TYPE_HARDWARE_LEGACY: /* Legacy */
+ case IVRS_TYPE_HARDWARE_EFR:
+ case IVRS_TYPE_HARDWARE_MIXED:
+ if (!iter(ivrs_hdr, arg))
+ return;
+ break;
+
+ case ACPI_IVRS_TYPE_MEMORY1:
+ case ACPI_IVRS_TYPE_MEMORY2:
+ case ACPI_IVRS_TYPE_MEMORY3:
+ if (!iter(ivrs_hdr, arg))
+ return;
+
+ break;
+
+ default:
+ printf("AMD-Vi:Not IVHD/IVMD type(%d)", ivrs_hdr->Type);
+
+ }
+
+ ivrs_hdr = (ACPI_IVRS_HEADER *)((uint8_t *)ivrs_hdr +
+ ivrs_hdr->Length);
+ }
+}
+
+static bool
+ivrs_is_ivhd(UINT8 type)
+{
+
+ switch(type) {
+ case IVRS_TYPE_HARDWARE_LEGACY:
+ case IVRS_TYPE_HARDWARE_EFR:
+ case IVRS_TYPE_HARDWARE_MIXED:
+ return (true);
+
+ default:
+ return (false);
+ }
+}
+
+/* Count the number of AMD-Vi devices in the system. */
+static int
+ivhd_count_iter(ACPI_IVRS_HEADER * ivrs_he, void *arg)
+{
+
+ if (ivrs_is_ivhd(ivrs_he->Type))
+ ivhd_count++;
+
+ return (1);
+}
+
+struct find_ivrs_hdr_args {
+ int i;
+ ACPI_IVRS_HEADER *ptr;
+};
+
+static int
+ivrs_hdr_find_iter(ACPI_IVRS_HEADER * ivrs_hdr, void *args)
+{
+ struct find_ivrs_hdr_args *fi;
+
+ fi = (struct find_ivrs_hdr_args *)args;
+ if (ivrs_is_ivhd(ivrs_hdr->Type)) {
+ if (fi->i == 0) {
+ fi->ptr = ivrs_hdr;
+ return (0);
+ }
+ fi->i--;
+ }
+
+ return (1);
+}
+
+static ACPI_IVRS_HARDWARE *
+ivhd_find_by_index(int idx)
+{
+ struct find_ivrs_hdr_args fi;
+
+ fi.i = idx;
+ fi.ptr = NULL;
+
+ ivrs_hdr_iterate_tbl(ivrs_hdr_find_iter, &fi);
+
+ return ((ACPI_IVRS_HARDWARE *)fi.ptr);
+}
+
+static void
+ivhd_dev_add_entry(struct amdvi_softc *softc, uint32_t start_id,
+ uint32_t end_id, uint8_t cfg, bool ats)
+{
+ struct ivhd_dev_cfg *dev_cfg;
+
+ /* If device doesn't have special data, don't add it. */
+ if (!cfg)
+ return;
+
+ dev_cfg = &softc->dev_cfg[softc->dev_cfg_cnt++];
+ dev_cfg->start_id = start_id;
+ dev_cfg->end_id = end_id;
+ dev_cfg->data = cfg;
+ dev_cfg->enable_ats = ats;
+}
+
+/*
+ * Record device attributes as suggested by BIOS.
+ */
+static int
+ivhd_dev_parse(ACPI_IVRS_HARDWARE* ivhd, struct amdvi_softc *softc)
+{
+ ACPI_IVRS_DE_HEADER *de;
+ uint8_t *p, *end;
+ int range_start_id = 0, range_end_id = 0;
+ uint32_t *extended;
+ uint8_t all_data = 0, range_data = 0;
+ bool range_enable_ats = false, enable_ats;
+
+ softc->start_dev_rid = ~0;
+ softc->end_dev_rid = 0;
+
+ switch (ivhd->Header.Type) {
+ case IVRS_TYPE_HARDWARE_LEGACY:
+ p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE);
+ break;
+
+ case IVRS_TYPE_HARDWARE_EFR:
+ case IVRS_TYPE_HARDWARE_MIXED:
+ p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE_EFRSUP);
+ break;
+
+ default:
+ device_printf(softc->dev,
+ "unknown type: 0x%x\n", ivhd->Header.Type);
+ return (-1);
+ }
+
+ end = (uint8_t *)ivhd + ivhd->Header.Length;
+
+ while (p < end) {
+ de = (ACPI_IVRS_DE_HEADER *)p;
+ softc->start_dev_rid = MIN(softc->start_dev_rid, de->Id);
+ softc->end_dev_rid = MAX(softc->end_dev_rid, de->Id);
+ switch (de->Type) {
+ case ACPI_IVRS_TYPE_ALL:
+ all_data = de->DataSetting;
+ break;
+
+ case ACPI_IVRS_TYPE_SELECT:
+ case ACPI_IVRS_TYPE_ALIAS_SELECT:
+ case ACPI_IVRS_TYPE_EXT_SELECT:
+ enable_ats = false;
+ if (de->Type == ACPI_IVRS_TYPE_EXT_SELECT) {
+ extended = (uint32_t *)(de + 1);
+ enable_ats =
+ (*extended & IVHD_DEV_EXT_ATS_DISABLE) ?
+ false : true;
+ }
+ ivhd_dev_add_entry(softc, de->Id, de->Id,
+ de->DataSetting | all_data, enable_ats);
+ break;
+
+ case ACPI_IVRS_TYPE_START:
+ case ACPI_IVRS_TYPE_ALIAS_START:
+ case ACPI_IVRS_TYPE_EXT_START:
+ range_start_id = de->Id;
+ range_data = de->DataSetting;
+ if (de->Type == ACPI_IVRS_TYPE_EXT_START) {
+ extended = (uint32_t *)(de + 1);
+ range_enable_ats =
+ (*extended & IVHD_DEV_EXT_ATS_DISABLE) ?
+ false : true;
+ }
+ break;
+
+ case ACPI_IVRS_TYPE_END:
+ range_end_id = de->Id;
+ ivhd_dev_add_entry(softc, range_start_id, range_end_id,
+ range_data | all_data, range_enable_ats);
+ range_start_id = range_end_id = 0;
+ range_data = 0;
+ all_data = 0;
+ break;
+
+ case ACPI_IVRS_TYPE_PAD4:
+ break;
+
+ case ACPI_IVRS_TYPE_SPECIAL:
+ /* HPET or IOAPIC */
+ break;
+ default:
+ if ((de->Type < 5) ||
+ (de->Type >= ACPI_IVRS_TYPE_PAD8))
+ device_printf(softc->dev,
+ "Unknown dev entry:0x%x\n", de->Type);
+ }
+
+ if (softc->dev_cfg_cnt >
+ (sizeof(softc->dev_cfg) / sizeof(softc->dev_cfg[0]))) {
+ device_printf(softc->dev,
+ "WARN Too many device entries.\n");
+ return (EINVAL);
+ }
+ if (de->Type < 0x40)
+ p += sizeof(ACPI_IVRS_DEVICE4);
+ else if (de->Type < 0x80)
+ p += sizeof(ACPI_IVRS_DEVICE8A);
+ else {
+ printf("Variable size IVHD type 0x%x not supported\n",
+ de->Type);
+ break;
+ }
+ }
+
+ KASSERT((softc->end_dev_rid >= softc->start_dev_rid),
+ ("Device end[0x%x] < start[0x%x.\n",
+ softc->end_dev_rid, softc->start_dev_rid));
+
+ return (0);
+}
+
+static bool
+ivhd_is_newer(ACPI_IVRS_HEADER *old, ACPI_IVRS_HEADER *new)
+{
+ /*
+ * Newer IVRS header type take precedence.
+ */
+ if ((old->DeviceId == new->DeviceId) &&
+ (old->Type == IVRS_TYPE_HARDWARE_LEGACY) &&
+ ((new->Type == IVRS_TYPE_HARDWARE_EFR) ||
+ (new->Type == IVRS_TYPE_HARDWARE_MIXED))) {
+ return (true);
+ }
+
+ return (false);
+}
+
+static void
+ivhd_identify(driver_t *driver, device_t parent)
+{
+ ACPI_TABLE_IVRS *ivrs;
+ ACPI_IVRS_HARDWARE *ivhd;
+ ACPI_STATUS status;
+ int i, count = 0;
+ uint32_t ivrs_ivinfo;
+
+ if (acpi_disabled("ivhd"))
+ return;
+
+ status = AcpiGetTable(ACPI_SIG_IVRS, 1, (ACPI_TABLE_HEADER **)&ivrs);
+ if (ACPI_FAILURE(status))
+ return;
+
+ if (ivrs->Header.Length == 0) {
+ return;
+ }
+
+ ivrs_ivinfo = ivrs->Info;
+ printf("AMD-Vi: IVRS Info VAsize = %d PAsize = %d GVAsize = %d"
+ " flags:%b\n",
+ REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8),
+ REG_BITS(ivrs_ivinfo, 7, 5), REG_BITS(ivrs_ivinfo, 22, 22),
+ "\020\001EFRSup");
+
+ ivrs_hdr_iterate_tbl(ivhd_count_iter, NULL);
+ if (!ivhd_count)
+ return;
+
+ for (i = 0; i < ivhd_count; i++) {
+ ivhd = ivhd_find_by_index(i);
+ KASSERT(ivhd, ("ivhd%d is NULL\n", i));
+ ivhd_hdrs[i] = ivhd;
+ }
+
+ /*
+ * Scan for presence of legacy and non-legacy device type
+ * for same AMD-Vi device and override the old one.
+ */
+ for (i = ivhd_count - 1 ; i > 0 ; i--){
+ if (ivhd_is_newer(&ivhd_hdrs[i-1]->Header,
+ &ivhd_hdrs[i]->Header)) {
+ ivhd_hdrs[i-1] = ivhd_hdrs[i];
+ ivhd_count--;
+ }
+ }
+
+ ivhd_devs = malloc(sizeof(device_t) * ivhd_count, M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < ivhd_count; i++) {
+ ivhd = ivhd_hdrs[i];
+ KASSERT(ivhd, ("ivhd%d is NULL\n", i));
+
+ /*
+ * Use a high order to ensure that this driver is probed after
+ * the Host-PCI bridge and the root PCI bus.
+ */
+ ivhd_devs[i] = BUS_ADD_CHILD(parent,
+ ACPI_DEV_BASE_ORDER + 10 * 10, "ivhd", i);
+
+ /*
+ * XXX: In case device was not destroyed before, add will fail.
+ * locate the old device instance.
+ */
+ if (ivhd_devs[i] == NULL) {
+ ivhd_devs[i] = device_find_child(parent, "ivhd", i);
+ if (ivhd_devs[i] == NULL) {
+ printf("AMD-Vi: cant find ivhd%d\n", i);
+ break;
+ }
+ }
+ count++;
+ }
+
+ /*
+ * Update device count in case failed to attach.
+ */
+ ivhd_count = count;
+}
+
+static int
+ivhd_probe(device_t dev)
+{
+ ACPI_IVRS_HARDWARE *ivhd;
+ int unit;
+
+ if (acpi_get_handle(dev) != NULL)
+ return (ENXIO);
+
+ unit = device_get_unit(dev);
+ KASSERT((unit < ivhd_count),
+ ("ivhd unit %d > count %d", unit, ivhd_count));
+ ivhd = ivhd_hdrs[unit];
+ KASSERT(ivhd, ("ivhd is NULL"));
+
+ switch (ivhd->Header.Type) {
+ case IVRS_TYPE_HARDWARE_EFR:
+ device_set_desc(dev, "AMD-Vi/IOMMU ivhd with EFR");
+ break;
+
+ case IVRS_TYPE_HARDWARE_MIXED:
+ device_set_desc(dev, "AMD-Vi/IOMMU ivhd in mixed format");
+ break;
+
+ case IVRS_TYPE_HARDWARE_LEGACY:
+ default:
+ device_set_desc(dev, "AMD-Vi/IOMMU ivhd");
+ break;
+ }
+
+ return (BUS_PROBE_NOWILDCARD);
+}
+
+static void
+ivhd_print_flag(device_t dev, enum IvrsType ivhd_type, uint8_t flag)
+{
+ /*
+ * IVHD lgeacy type has two extra high bits in flag which has
+ * been moved to EFR for non-legacy device.
+ */
+ switch (ivhd_type) {
+ case IVRS_TYPE_HARDWARE_LEGACY:
+ device_printf(dev, "Flag:%b\n", flag,
+ "\020"
+ "\001HtTunEn"
+ "\002PassPW"
+ "\003ResPassPW"
+ "\004Isoc"
+ "\005IotlbSup"
+ "\006Coherent"
+ "\007PreFSup"
+ "\008PPRSup");
+ break;
+
+ case IVRS_TYPE_HARDWARE_EFR:
+ case IVRS_TYPE_HARDWARE_MIXED:
+ device_printf(dev, "Flag:%b\n", flag,
+ "\020"
+ "\001HtTunEn"
+ "\002PassPW"
+ "\003ResPassPW"
+ "\004Isoc"
+ "\005IotlbSup"
+ "\006Coherent");
+ break;
+
+ default:
+ device_printf(dev, "Can't decode flag of ivhd type :0x%x\n",
+ ivhd_type);
+ break;
+ }
+}
+
+/*
+ * Feature in legacy IVHD type(0x10) and attribute in newer type(0x11 and 0x40).
+ */
+static void
+ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature)
+{
+ switch (ivhd_type) {
+ case IVRS_TYPE_HARDWARE_LEGACY:
+ device_printf(dev, "Features(type:0x%x) HATS = %d GATS = %d"
+ " MsiNumPPR = %d PNBanks= %d PNCounters= %d\n",
+ ivhd_type,
+ REG_BITS(feature, 31, 30),
+ REG_BITS(feature, 29, 28),
+ REG_BITS(feature, 27, 23),
+ REG_BITS(feature, 22, 17),
+ REG_BITS(feature, 16, 13));
+ device_printf(dev, "max PASID = %d GLXSup = %d Feature:%b\n",
+ REG_BITS(feature, 12, 8),
+ REG_BITS(feature, 4, 3),
+ feature,
+ "\020"
+ "\002NXSup"
+ "\003GTSup"
+ "\004<b4>"
+ "\005IASup"
+ "\006GASup"
+ "\007HESup");
+ break;
+
+ /* Fewer features or attributes are reported in non-legacy type. */
+ case IVRS_TYPE_HARDWARE_EFR:
+ case IVRS_TYPE_HARDWARE_MIXED:
+ device_printf(dev, "Features(type:0x%x) MsiNumPPR = %d"
+ " PNBanks= %d PNCounters= %d\n",
+ ivhd_type,
+ REG_BITS(feature, 27, 23),
+ REG_BITS(feature, 22, 17),
+ REG_BITS(feature, 16, 13));
+ break;
+
+ default: /* Other ivhd type features are not decoded. */
+ device_printf(dev, "Can't decode ivhd type :0x%x\n", ivhd_type);
+ }
+}
+
+/* Print extended features of IOMMU. */
+static void
+ivhd_print_ext_feature(device_t dev, uint64_t ext_feature)
+{
+ uint32_t ext_low, ext_high;
+
+ if (!ext_feature)
+ return;
+
+ ext_low = ext_feature;
+ device_printf(dev, "Extended features[31:0]:%b "
+ "HATS = 0x%x GATS = 0x%x "
+ "GLXSup = 0x%x SmiFSup = 0x%x SmiFRC = 0x%x "
+ "GAMSup = 0x%x DualPortLogSup = 0x%x DualEventLogSup = 0x%x\n",
+ (int)ext_low,
+ "\020"
+ "\001PreFSup"
+ "\002PPRSup"
+ "\003<b2>"
+ "\004NXSup"
+ "\005GTSup"
+ "\006<b5>"
+ "\007IASup"
+ "\008GASup"
+ "\009HESup"
+ "\010PCSup",
+ REG_BITS(ext_low, 11, 10),
+ REG_BITS(ext_low, 13, 12),
+ REG_BITS(ext_low, 15, 14),
+ REG_BITS(ext_low, 17, 16),
+ REG_BITS(ext_low, 20, 18),
+ REG_BITS(ext_low, 23, 21),
+ REG_BITS(ext_low, 25, 24),
+ REG_BITS(ext_low, 29, 28));
+
+ ext_high = ext_feature >> 32;
+ device_printf(dev, "Extended features[62:32]:%b "
+ "Max PASID: 0x%x DevTblSegSup = 0x%x "
+ "MarcSup = 0x%x\n",
+ (int)(ext_high),
+ "\020"
+ "\006USSup"
+ "\009PprOvrflwEarlySup"
+ "\010PPRAutoRspSup"
+ "\013BlKStopMrkSup"
+ "\014PerfOptSup"
+ "\015MsiCapMmioSup"
+ "\017GIOSup"
+ "\018HASup"
+ "\019EPHSup"
+ "\020AttrFWSup"
+ "\021HDSup"
+ "\023InvIotlbSup",
+ REG_BITS(ext_high, 5, 0),
+ REG_BITS(ext_high, 8, 7),
+ REG_BITS(ext_high, 11, 10));
+}
+
+static int
+ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE * ivhd)
+{
+ device_t dev;
+ int max_ptp_level;
+
+ dev = softc->dev;
+
+ ivhd_print_flag(dev, softc->ivhd_type, softc->ivhd_flag);
+ ivhd_print_feature(dev, softc->ivhd_type, softc->ivhd_feature);
+ ivhd_print_ext_feature(dev, softc->ext_feature);
+ max_ptp_level = 7;
+ /* Make sure device support minimum page level as requested by user. */
+ if (max_ptp_level < amdvi_ptp_level) {
+ device_printf(dev, "insufficient PTP level:%d\n",
+ max_ptp_level);
+ return (EINVAL);
+ } else {
+ device_printf(softc->dev, "supported paging level:%d, will use only: %d\n",
+ max_ptp_level, amdvi_ptp_level);
+ }
+
+ device_printf(softc->dev, "device range: 0x%x - 0x%x\n",
+ softc->start_dev_rid, softc->end_dev_rid);
+
+ return (0);
+}
+
+static int
+ivhd_attach(device_t dev)
+{
+ ACPI_IVRS_HARDWARE *ivhd;
+ ACPI_IVRS_HARDWARE_EFRSUP *ivhd_efr;
+ struct amdvi_softc *softc;
+ int status, unit;
+
+ unit = device_get_unit(dev);
+ KASSERT((unit < ivhd_count),
+ ("ivhd unit %d > count %d", unit, ivhd_count));
+ /* Make sure its same device for which attach is called. */
+ KASSERT((ivhd_devs[unit] == dev),
+ ("Not same device old %p new %p", ivhd_devs[unit], dev));
+
+ softc = device_get_softc(dev);
+ softc->dev = dev;
+ ivhd = ivhd_hdrs[unit];
+ KASSERT(ivhd, ("ivhd is NULL"));
+
+ softc->ivhd_type = ivhd->Header.Type;
+ softc->pci_seg = ivhd->PciSegmentGroup;
+ softc->pci_rid = ivhd->Header.DeviceId;
+ softc->ivhd_flag = ivhd->Header.Flags;
+ /*
+ * On lgeacy IVHD type(0x10), it is documented as feature
+ * but in newer type it is attribute.
+ */
+ softc->ivhd_feature = ivhd->Reserved;
+ /*
+ * PCI capability has more capabilities that are not part of IVRS.
+ */
+ softc->cap_off = ivhd->CapabilityOffset;
+
+#ifdef notyet
+ /* IVHD Info bit[4:0] is event MSI/X number. */
+ softc->event_msix = ivhd->Info & 0x1F;
+#endif
+ switch (ivhd->Header.Type) {
+ case IVRS_TYPE_HARDWARE_EFR:
+ case IVRS_TYPE_HARDWARE_MIXED:
+ ivhd_efr = (ACPI_IVRS_HARDWARE_EFRSUP *)ivhd;
+ softc->ext_feature = ivhd_efr->ExtFR;
+ break;
+
+ }
+
+ softc->ctrl = (struct amdvi_ctrl *) PHYS_TO_DMAP(ivhd->BaseAddress);
+ status = ivhd_dev_parse(ivhd, softc);
+ if (status != 0) {
+ device_printf(dev,
+ "endpoint device parsing error=%d\n", status);
+ }
+
+ status = ivhd_print_cap(softc, ivhd);
+ if (status != 0) {
+ return (status);
+ }
+
+ status = amdvi_setup_hw(softc);
+ if (status != 0) {
+ device_printf(dev, "couldn't be initialised, error=%d\n",
+ status);
+ return (status);
+ }
+
+ return (0);
+}
+
+static int
+ivhd_detach(device_t dev)
+{
+ struct amdvi_softc *softc;
+
+ softc = device_get_softc(dev);
+
+ amdvi_teardown_hw(softc);
+
+ /*
+ * XXX: delete the device.
+ * don't allow detach, return EBUSY.
+ */
+ return (0);
+}
+
+static int
+ivhd_suspend(device_t dev)
+{
+
+ return (0);
+}
+
+static int
+ivhd_resume(device_t dev)
+{
+
+ return (0);
+}
+
+static device_method_t ivhd_methods[] = {
+ DEVMETHOD(device_identify, ivhd_identify),
+ DEVMETHOD(device_probe, ivhd_probe),
+ DEVMETHOD(device_attach, ivhd_attach),
+ DEVMETHOD(device_detach, ivhd_detach),
+ DEVMETHOD(device_suspend, ivhd_suspend),
+ DEVMETHOD(device_resume, ivhd_resume),
+ DEVMETHOD_END
+};
+
+static driver_t ivhd_driver = {
+ "ivhd",
+ ivhd_methods,
+ sizeof(struct amdvi_softc),
+};
+
+static devclass_t ivhd_devclass;
+
+/*
+ * Load this module at the end after PCI re-probing to configure interrupt.
+ */
+DRIVER_MODULE_ORDERED(ivhd, acpi, ivhd_driver, ivhd_devclass, 0, 0,
+ SI_ORDER_ANY);
+MODULE_DEPEND(ivhd, acpi, 1, 1, 1);
+MODULE_DEPEND(ivhd, pci, 1, 1, 1);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.c b/usr/src/uts/i86pc/io/vmm/amd/npt.c
index e1c1b79e1b..e61464a964 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/npt.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/npt.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.h b/usr/src/uts/i86pc/io/vmm/amd/npt.h
index 5966474711..35530d7833 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/npt.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/npt.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index cb6251a791..9c22fc2532 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com)
* All rights reserved.
*
@@ -50,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <machine/psl.h>
#include <machine/md_var.h>
+#include <machine/reg.h>
#include <machine/specialreg.h>
#include <machine/smp.h>
#include <machine/vmm.h>
@@ -528,8 +531,8 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
PAT_VALUE(7, PAT_UNCACHEABLE);
/* Set up DR6/7 to power-on state */
- state->dr6 = 0xffff0ff0;
- state->dr7 = 0x400;
+ state->dr6 = DBREG_DR6_RESERVED1;
+ state->dr7 = DBREG_DR7_RESERVED1;
}
/*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.h b/usr/src/uts/i86pc/io/vmm/amd/svm.h
index 2f4277df2f..c78f7eb067 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
index 49208a351c..0417983233 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014, Neel Natu (neel@freebsd.org)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h
index 07716c86de..1dba8101ab 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Neel Natu (neel@freebsd.org)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
index 9377bf529a..8735353bb4 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
index b1232c713d..5075b69867 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
index 163f48f010..ec7caa91f9 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index 1df2271b3a..769780e0d9 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <machine/psl.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
+#include <machine/reg.h>
#include <machine/segments.h>
#include <machine/smp.h>
#include <machine/specialreg.h>
@@ -227,6 +228,15 @@ static u_int vpid_alloc_failed;
SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
&vpid_alloc_failed, 0, NULL);
+static int guest_l1d_flush;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD,
+ &guest_l1d_flush, 0, NULL);
+static int guest_l1d_flush_sw;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD,
+ &guest_l1d_flush_sw, 0, NULL);
+
+static struct msr_entry msr_load_list[1] __aligned(16);
+
/*
* The definitions of SDT probes for VMX.
*/
@@ -297,6 +307,9 @@ SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor,
SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait,
"struct vmx *", "int", "struct vm_exit *");
+SDT_PROBE_DEFINE3(vmm, vmx, exit, vminsn,
+ "struct vmx *", "int", "struct vm_exit *");
+
SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown,
"struct vmx *", "int", "struct vm_exit *", "uint32_t");
@@ -627,6 +640,9 @@ vmx_cleanup(void)
vpid_unr = NULL;
}
+ if (nmi_flush_l1d_sw == 1)
+ nmi_flush_l1d_sw = 0;
+
smp_rendezvous(NULL, vmx_disable, NULL, NULL);
return (0);
@@ -886,6 +902,36 @@ vmx_init(int ipinum)
return (error);
}
+#ifdef __FreeBSD__
+ guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0;
+ TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush);
+
+ /*
+ * L1D cache flush is enabled. Use IA32_FLUSH_CMD MSR when
+ * available. Otherwise fall back to the software flush
+ * method which loads enough data from the kernel text to
+ * flush existing L1D content, both on VMX entry and on NMI
+ * return.
+ */
+ if (guest_l1d_flush) {
+ if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) {
+ guest_l1d_flush_sw = 1;
+ TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw",
+ &guest_l1d_flush_sw);
+ }
+ if (guest_l1d_flush_sw) {
+ if (nmi_flush_l1d_sw <= 1)
+ nmi_flush_l1d_sw = 1;
+ } else {
+ msr_load_list[0].index = MSR_IA32_FLUSH_CMD;
+ msr_load_list[0].val = IA32_FLUSH_CMD_L1D;
+ }
+ }
+#else
+ /* L1D flushing is taken care of by ht_acquire() and friends */
+ guest_l1d_flush = 0;
+#endif /* __FreeBSD__ */
+
/*
* Stash the cr0 and cr4 bits that must be fixed to 0 or 1
*/
@@ -1109,6 +1155,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
#endif
error += vmwrite(VMCS_VPID, vpid[i]);
+ if (guest_l1d_flush && !guest_l1d_flush_sw) {
+ vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
+ (vm_offset_t)&msr_load_list[0]));
+ vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT,
+ nitems(msr_load_list));
+ vmcs_write(VMCS_EXIT_MSR_STORE, 0);
+ vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0);
+ }
+
/* exception bitmap */
if (vcpu_trace_exceptions(vm, i))
exc_bitmap = 0xffffffff;
@@ -1116,8 +1171,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
exc_bitmap = 1 << IDT_MC;
error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap);
- vmx->ctx[i].guest_dr6 = 0xffff0ff0;
- error += vmwrite(VMCS_GUEST_DR7, 0x400);
+ vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
+ error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
if (virtual_interrupt_delivery) {
error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
@@ -2976,6 +3031,19 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_MWAIT;
break;
+ case EXIT_REASON_VMCALL:
+ case EXIT_REASON_VMCLEAR:
+ case EXIT_REASON_VMLAUNCH:
+ case EXIT_REASON_VMPTRLD:
+ case EXIT_REASON_VMPTRST:
+ case EXIT_REASON_VMREAD:
+ case EXIT_REASON_VMRESUME:
+ case EXIT_REASON_VMWRITE:
+ case EXIT_REASON_VMXOFF:
+ case EXIT_REASON_VMXON:
+ SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit);
+ vmexit->exitcode = VM_EXITCODE_VMINSN;
+ break;
default:
SDT_PROBE4(vmm, vmx, exit, unknown,
vmx, vcpu, vmexit, reason);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.c b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
index 6e94f5bd9a..ba4cd7785e 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.h b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
index 12f2db2c61..4bf9fe048d 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
index 1e7bb93d7b..4df909777d 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014, Neel Natu (neel@freebsd.org)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
index 039a28145b..e6562da5c0 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Neel Natu (neel@freebsd.org)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
index 0d61631626..f12d22fc26 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014, Neel Natu (neel@freebsd.org)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.h b/usr/src/uts/i86pc/io/vmm/io/vrtc.h
index ffab3a5af0..13abbedeb9 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Neel Natu (neel@freebsd.org)
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 4d8cf1748d..9a4bbad9c1 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -991,8 +991,8 @@ sysmem_mapping(struct vm *vm, struct mem_map *mm)
return (false);
}
-static vm_paddr_t
-sysmem_maxaddr(struct vm *vm)
+vm_paddr_t
+vmm_sysmem_maxaddr(struct vm *vm)
{
struct mem_map *mm;
vm_paddr_t maxaddr;
@@ -1127,7 +1127,7 @@ vm_assign_pptdev(struct vm *vm, int pptfd)
if (ppt_assigned_devices(vm) == 0) {
KASSERT(vm->iommu == NULL,
("vm_assign_pptdev: iommu must be NULL"));
- maxaddr = sysmem_maxaddr(vm);
+ maxaddr = vmm_sysmem_maxaddr(vm);
vm->iommu = iommu_create_domain(maxaddr);
if (vm->iommu == NULL)
return (ENXIO);
@@ -2190,6 +2190,7 @@ restart:
break;
case VM_EXITCODE_MONITOR:
case VM_EXITCODE_MWAIT:
+ case VM_EXITCODE_VMINSN:
vm_inject_ud(vm, vcpuid);
break;
#ifndef __FreeBSD__
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
index 934e01a38f..3d08fd5e85 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
index ba51989b1a..14e315f400 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* All rights reserved.
*
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index c200a5eb33..163c0781cf 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -240,6 +240,7 @@ int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
struct vm_object **objptr);
+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
int prot, void **cookie);
void vm_gpa_release(void *cookie);
@@ -587,6 +588,7 @@ enum vm_exitcode {
VM_EXITCODE_SVM,
VM_EXITCODE_REQIDLE,
VM_EXITCODE_DEBUG,
+ VM_EXITCODE_VMINSN,
#ifndef __FreeBSD__
VM_EXITCODE_HT,
#endif