summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/Makefile1
-rw-r--r--usr/src/cmd/bhyve/pci_passthru.c433
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c8
-rw-r--r--usr/src/cmd/devfsadm/i386/misc_link_i386.c19
-rw-r--r--usr/src/cmd/pptadm/Makefile43
-rw-r--r--usr/src/cmd/pptadm/pptadm.c205
-rw-r--r--usr/src/compat/freebsd/amd64/machine/cpu.h23
-rw-r--r--usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h21
-rw-r--r--usr/src/compat/freebsd/dev/pci/pcivar.h38
-rw-r--r--usr/src/compat/freebsd/sys/bus.h21
-rw-r--r--usr/src/lib/Makefile3
-rw-r--r--usr/src/lib/libppt/Makefile44
-rw-r--r--usr/src/lib/libppt/Makefile.com46
-rw-r--r--usr/src/lib/libppt/amd64/Makefile19
-rw-r--r--usr/src/lib/libppt/common/libppt.c506
-rw-r--r--usr/src/lib/libppt/common/libppt.h36
-rw-r--r--usr/src/lib/libppt/common/llib-lppt19
-rw-r--r--usr/src/lib/libppt/common/mapfile-vers40
-rw-r--r--usr/src/lib/libppt/i386/Makefile18
-rw-r--r--usr/src/lib/libppt/sparc/Makefile18
-rw-r--r--usr/src/lib/libppt/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libvmmapi/common/mapfile-vers1
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c100
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.h15
-rw-r--r--usr/src/man/man1m/Makefile1
-rw-r--r--usr/src/man/man1m/pptadm.1m74
-rw-r--r--usr/src/pkg/manifests/system-bhyve.mf8
-rw-r--r--usr/src/pkg/manifests/system-library-bhyve.mf3
-rw-r--r--usr/src/uts/common/os/modsysfile.c26
-rw-r--r--usr/src/uts/i86pc/Makefile.files7
-rw-r--r--usr/src/uts/i86pc/Makefile.i86pc1
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vtd.c103
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vtd_sol.c83
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/iommu.c383
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/ppt.c1436
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/ppt.conf14
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/ppt.h27
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/ppt.mapfile52
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/sol_iommu.c86
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/sol_ppt.c92
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c27
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c57
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c15
-rw-r--r--usr/src/uts/i86pc/ppt/Makefile86
-rw-r--r--usr/src/uts/i86pc/sys/ppt_dev.h56
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h5
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h47
-rw-r--r--usr/src/uts/i86pc/vmm/Makefile5
-rw-r--r--usr/src/uts/intel/ia32/ml/modstubs.s18
49 files changed, 3960 insertions, 448 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index f20274bd35..0f2cc306aa 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -323,6 +323,7 @@ COMMON_SUBDIRS= \
ppgsz \
pg \
plockstat \
+ pptadm \
pr \
prctl \
print \
diff --git a/usr/src/cmd/bhyve/pci_passthru.c b/usr/src/cmd/bhyve/pci_passthru.c
index d2c69e795c..3782914cd5 100644
--- a/usr/src/cmd/bhyve/pci_passthru.c
+++ b/usr/src/cmd/bhyve/pci_passthru.c
@@ -40,6 +40,8 @@ __FBSDID("$FreeBSD$");
#include <sys/pciio.h>
#include <sys/ioctl.h>
+#include <sys/pci.h>
+
#include <dev/io/iodev.h>
#include <dev/pci/pcireg.h>
@@ -59,30 +61,15 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <vmmapi.h>
+#include <sys/ppt_dev.h>
#include "pci_emul.h"
#include "mem.h"
-#ifndef _PATH_DEVPCI
-#define _PATH_DEVPCI "/dev/pci"
-#endif
-
-#ifndef _PATH_DEVIO
-#define _PATH_DEVIO "/dev/io"
-#endif
-
-#ifndef _PATH_MEM
-#define _PATH_MEM "/dev/mem"
-#endif
-
#define LEGACY_SUPPORT 1
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
#define MSIX_CAPLEN 12
-static int pcifd = -1;
-static int iofd = -1;
-static int memfd = -1;
-
struct passthru_softc {
struct pci_devinst *psc_pi;
struct pcibar psc_bar[PCI_BARMAX + 1];
@@ -94,14 +81,16 @@ struct passthru_softc {
struct {
int capoff;
} psc_msix;
- struct pcisel psc_sel;
+ int pptfd;
+ int msi_limit;
+ int msix_limit;
};
static int
msi_caplen(int msgctrl)
{
int len;
-
+
len = 10; /* minimum length of msi capability */
if (msgctrl & PCIM_MSICTRL_64BIT)
@@ -120,33 +109,76 @@ msi_caplen(int msgctrl)
}
static uint32_t
-read_config(const struct pcisel *sel, long reg, int width)
+read_config(const struct passthru_softc *sc, long reg, int width)
{
- struct pci_io pi;
+ struct ppt_cfg_io pi;
- bzero(&pi, sizeof(pi));
- pi.pi_sel = *sel;
- pi.pi_reg = reg;
- pi.pi_width = width;
+ pi.pci_off = reg;
+ pi.pci_width = width;
- if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
- return (0); /* XXX */
- else
- return (pi.pi_data);
+ if (ioctl(sc->pptfd, PPT_CFG_READ, &pi) != 0) {
+ return (0);
+ }
+ return (pi.pci_data);
}
static void
-write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
+write_config(const struct passthru_softc *sc, long reg, int width,
+ uint32_t data)
{
- struct pci_io pi;
+ struct ppt_cfg_io pi;
- bzero(&pi, sizeof(pi));
- pi.pi_sel = *sel;
- pi.pi_reg = reg;
- pi.pi_width = width;
- pi.pi_data = data;
+ pi.pci_off = reg;
+ pi.pci_width = width;
+ pi.pci_data = data;
- (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */
+ (void) ioctl(sc->pptfd, PPT_CFG_WRITE, &pi);
+}
+
+static int
+passthru_get_bar(struct passthru_softc *sc, int bar, enum pcibar_type *type,
+ uint64_t *base, uint64_t *size)
+{
+ struct ppt_bar_query pb;
+
+ pb.pbq_baridx = bar;
+
+ if (ioctl(sc->pptfd, PPT_BAR_QUERY, &pb) != 0) {
+ return (-1);
+ }
+
+ switch (pb.pbq_type) {
+ case PCI_ADDR_IO:
+ *type = PCIBAR_IO;
+ break;
+ case PCI_ADDR_MEM32:
+ *type = PCIBAR_MEM32;
+ break;
+ case PCI_ADDR_MEM64:
+ *type = PCIBAR_MEM64;
+ break;
+ default:
+ err(1, "unrecognized BAR type: %u\n", pb.pbq_type);
+ break;
+ }
+
+ *base = pb.pbq_base;
+ *size = pb.pbq_size;
+ return (0);
+}
+
+static int
+passthru_dev_open(const char *path, int *pptfdp)
+{
+ int pptfd;
+
+ if ((pptfd = open(path, O_RDWR)) < 0) {
+ return (errno);
+ }
+
+ /* XXX: verify fd with ioctl? */
+ *pptfdp = pptfd;
+ return (0);
}
#ifdef LEGACY_SUPPORT
@@ -174,55 +206,87 @@ passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
}
#endif /* LEGACY_SUPPORT */
+static void
+passthru_intr_limit(struct passthru_softc *sc, struct msixcap *msixcap)
+{
+ struct pci_devinst *pi = sc->psc_pi;
+ int off;
+
+ /* Reduce the number of MSI vectors if higher than OS limit */
+ if ((off = sc->psc_msi.capoff) != 0 && sc->msi_limit != -1) {
+ int msi_limit, mmc;
+
+ msi_limit =
+ sc->msi_limit > 16 ? PCIM_MSICTRL_MMC_32 :
+ sc->msi_limit > 8 ? PCIM_MSICTRL_MMC_16 :
+ sc->msi_limit > 4 ? PCIM_MSICTRL_MMC_8 :
+ sc->msi_limit > 2 ? PCIM_MSICTRL_MMC_4 :
+ sc->msi_limit > 1 ? PCIM_MSICTRL_MMC_2 :
+ PCIM_MSICTRL_MMC_1;
+ mmc = sc->psc_msi.msgctrl & PCIM_MSICTRL_MMC_MASK;
+
+ if (mmc > msi_limit) {
+ sc->psc_msi.msgctrl &= ~PCIM_MSICTRL_MMC_MASK;
+ sc->psc_msi.msgctrl |= msi_limit;
+ pci_set_cfgdata16(pi, off + 2, sc->psc_msi.msgctrl);
+ }
+ }
+
+ /* Reduce the number of MSI-X vectors if higher than OS limit */
+ if ((off = sc->psc_msix.capoff) != 0 && sc->msix_limit != -1) {
+ if (MSIX_TABLE_COUNT(msixcap->msgctrl) > sc->msix_limit) {
+ msixcap->msgctrl &= ~PCIM_MSIXCTRL_TABLE_SIZE;
+ msixcap->msgctrl |= sc->msix_limit - 1;
+ pci_set_cfgdata16(pi, off + 2, msixcap->msgctrl);
+ }
+ }
+}
+
static int
cfginitmsi(struct passthru_softc *sc)
{
int i, ptr, capptr, cap, sts, caplen, table_size;
uint32_t u32;
- struct pcisel sel;
- struct pci_devinst *pi;
+ struct pci_devinst *pi = sc->psc_pi;
struct msixcap msixcap;
uint32_t *msixcap_ptr;
- pi = sc->psc_pi;
- sel = sc->psc_sel;
-
/*
* Parse the capabilities and cache the location of the MSI
* and MSI-X capabilities.
*/
- sts = read_config(&sel, PCIR_STATUS, 2);
+ sts = read_config(sc, PCIR_STATUS, 2);
if (sts & PCIM_STATUS_CAPPRESENT) {
- ptr = read_config(&sel, PCIR_CAP_PTR, 1);
+ ptr = read_config(sc, PCIR_CAP_PTR, 1);
while (ptr != 0 && ptr != 0xff) {
- cap = read_config(&sel, ptr + PCICAP_ID, 1);
+ cap = read_config(sc, ptr + PCICAP_ID, 1);
if (cap == PCIY_MSI) {
/*
* Copy the MSI capability into the config
* space of the emulated pci device
*/
sc->psc_msi.capoff = ptr;
- sc->psc_msi.msgctrl = read_config(&sel,
- ptr + 2, 2);
+ sc->psc_msi.msgctrl = read_config(sc,
+ ptr + 2, 2);
sc->psc_msi.emulated = 0;
caplen = msi_caplen(sc->psc_msi.msgctrl);
capptr = ptr;
while (caplen > 0) {
- u32 = read_config(&sel, capptr, 4);
+ u32 = read_config(sc, capptr, 4);
pci_set_cfgdata32(pi, capptr, u32);
caplen -= 4;
capptr += 4;
}
} else if (cap == PCIY_MSIX) {
/*
- * Copy the MSI-X capability
+ * Copy the MSI-X capability
*/
sc->psc_msix.capoff = ptr;
caplen = 12;
msixcap_ptr = (uint32_t*) &msixcap;
capptr = ptr;
while (caplen > 0) {
- u32 = read_config(&sel, capptr, 4);
+ u32 = read_config(sc, capptr, 4);
*msixcap_ptr = u32;
pci_set_cfgdata32(pi, capptr, u32);
caplen -= 4;
@@ -230,10 +294,12 @@ cfginitmsi(struct passthru_softc *sc)
msixcap_ptr++;
}
}
- ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
+ ptr = read_config(sc, ptr + PCICAP_NEXTPTR, 1);
}
}
+ passthru_intr_limit(sc, &msixcap);
+
if (sc->psc_msix.capoff != 0) {
pi->pi_msix.pba_bar =
msixcap.pba_info & PCIM_MSIX_BIR_MASK;
@@ -265,7 +331,7 @@ cfginitmsi(struct passthru_softc *sc)
*/
if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
int origptr, msiptr;
- origptr = read_config(&sel, PCIR_CAP_PTR, 1);
+ origptr = read_config(sc, PCIR_CAP_PTR, 1);
msiptr = passthru_add_msicap(pi, 1, origptr);
sc->psc_msi.capoff = msiptr;
sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
@@ -275,14 +341,15 @@ cfginitmsi(struct passthru_softc *sc)
#endif
/* Make sure one of the capabilities is present */
- if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0)
+ if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) {
return (-1);
- else
+ } else {
return (0);
+ }
}
static uint64_t
-msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
+passthru_msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
{
struct pci_devinst *pi;
struct msix_table_entry *entry;
@@ -360,8 +427,8 @@ msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
}
static void
-msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
- uint64_t offset, int size, uint64_t data)
+passthru_msix_table_write(struct vmctx *ctx, int vcpu,
+ struct passthru_softc *sc, uint64_t offset, int size, uint64_t data)
{
struct pci_devinst *pi;
struct msix_table_entry *entry;
@@ -426,10 +493,9 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
/* If the entry is masked, don't set it up */
if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
(vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
- (void)vm_setup_pptdev_msix(ctx, vcpu,
- sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, index, entry->addr,
- entry->msg_data, entry->vector_control);
+ (void) vm_setup_pptdev_msix(ctx, vcpu, sc->pptfd,
+ index, entry->addr, entry->msg_data,
+ entry->vector_control);
}
}
}
@@ -437,7 +503,6 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
static int
init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
{
- int b, s, f;
int error, idx;
size_t len, remaining;
uint32_t table_size, table_offset;
@@ -447,14 +512,10 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0);
- b = sc->psc_sel.pc_bus;
- s = sc->psc_sel.pc_dev;
- f = sc->psc_sel.pc_func;
-
- /*
+ /*
* If the MSI-X table BAR maps memory intended for
- * other uses, it is at least assured that the table
- * either resides in its own page within the region,
+ * other uses, it is at least assured that the table
+ * either resides in its own page within the region,
* or it resides in a page shared with only the PBA.
*/
table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
@@ -490,12 +551,11 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
pi->pi_msix.pba_page_offset = table_offset +
table_size - 4096;
pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ |
- PROT_WRITE, MAP_SHARED, memfd, start +
+ PROT_WRITE, MAP_SHARED, sc->pptfd,
pi->pi_msix.pba_page_offset);
if (pi->pi_msix.pba_page == MAP_FAILED) {
- warn(
- "Failed to map PBA page for MSI-X on %d/%d/%d",
- b, s, f);
+ warn("Failed to map PBA page for MSI-X on %d",
+ sc->pptfd);
return (-1);
}
}
@@ -504,7 +564,7 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
/* Map everything before the MSI-X table */
if (table_offset > 0) {
len = table_offset;
- error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
+ error = vm_map_pptdev_mmio(ctx, sc->pptfd, start, len, base);
if (error)
return (error);
@@ -521,7 +581,7 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
/* Map everything beyond the end of the MSI-X table */
if (remaining > 0) {
len = remaining;
- error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
+ error = vm_map_pptdev_mmio(ctx, sc->pptfd, start, len, base);
if (error)
return (error);
}
@@ -532,47 +592,26 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
static int
cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
{
- int i, error;
- struct pci_devinst *pi;
- struct pci_bar_io bar;
- enum pcibar_type bartype;
- uint64_t base, size;
-
- pi = sc->psc_pi;
+ struct pci_devinst *pi = sc->psc_pi;
+ uint_t i;
/*
* Initialize BAR registers
*/
for (i = 0; i <= PCI_BARMAX; i++) {
- bzero(&bar, sizeof(bar));
- bar.pbi_sel = sc->psc_sel;
- bar.pbi_reg = PCIR_BAR(i);
+ enum pcibar_type bartype;
+ uint64_t base, size;
+ int error;
- if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
+ if (passthru_get_bar(sc, i, &bartype, &base, &size) != 0) {
continue;
-
- if (PCI_BAR_IO(bar.pbi_base)) {
- bartype = PCIBAR_IO;
- base = bar.pbi_base & PCIM_BAR_IO_BASE;
- } else {
- switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
- case PCIM_BAR_MEM_64:
- bartype = PCIBAR_MEM64;
- break;
- default:
- bartype = PCIBAR_MEM32;
- break;
- }
- base = bar.pbi_base & PCIM_BAR_MEM_BASE;
}
- size = bar.pbi_length;
if (bartype != PCIBAR_IO) {
if (((base | size) & PAGE_MASK) != 0) {
- warnx("passthru device %d/%d/%d BAR %d: "
+ warnx("passthru device %d BAR %d: "
"base %#lx or size %#lx not page aligned\n",
- sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, i, base, size);
+ sc->pptfd, i, base, size);
return (-1);
}
}
@@ -590,13 +629,12 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
/* The MSI-X table needs special handling */
if (i == pci_msix_table_bar(pi)) {
error = init_msix_table(ctx, sc, base);
- if (error)
+ if (error)
return (-1);
} else if (bartype != PCIBAR_IO) {
/* Map the physical BAR in the guest MMIO space */
- error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
- pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
+ error = vm_map_pptdev_mmio(ctx, sc->pptfd,
+ pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
if (error)
return (-1);
}
@@ -614,114 +652,43 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
}
static int
-cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
+cfginit(struct vmctx *ctx, struct passthru_softc *sc)
{
- int error;
- struct passthru_softc *sc;
-
- error = 1;
- sc = pi->pi_arg;
-
- bzero(&sc->psc_sel, sizeof(struct pcisel));
- sc->psc_sel.pc_bus = bus;
- sc->psc_sel.pc_dev = slot;
- sc->psc_sel.pc_func = func;
-
if (cfginitmsi(sc) != 0) {
- warnx("failed to initialize MSI for PCI %d/%d/%d",
- bus, slot, func);
- goto done;
+ warnx("failed to initialize MSI for PCI %d", sc->pptfd);
+ return (-1);
}
if (cfginitbar(ctx, sc) != 0) {
- warnx("failed to initialize BARs for PCI %d/%d/%d",
- bus, slot, func);
- goto done;
+ warnx("failed to initialize BARs for PCI %d", sc->pptfd);
+ return (-1);
}
- error = 0; /* success */
-done:
- return (error);
+ return (0);
}
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
{
- int bus, slot, func, error, memflags;
+ int error, memflags, pptfd;
struct passthru_softc *sc;
-#ifndef WITHOUT_CAPSICUM
- cap_rights_t rights;
- cap_ioctl_t pci_ioctls[] = { PCIOCREAD, PCIOCWRITE, PCIOCGETBAR };
- cap_ioctl_t io_ioctls[] = { IODEV_PIO };
-#endif
sc = NULL;
error = 1;
-#ifndef WITHOUT_CAPSICUM
- cap_rights_init(&rights, CAP_IOCTL, CAP_READ, CAP_WRITE);
-#endif
-
memflags = vm_get_memflags(ctx);
if (!(memflags & VM_MEM_F_WIRED)) {
warnx("passthru requires guest memory to be wired");
goto done;
}
- if (pcifd < 0) {
- pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
- if (pcifd < 0) {
- warn("failed to open %s", _PATH_DEVPCI);
- goto done;
- }
- }
-
-#ifndef WITHOUT_CAPSICUM
- if (cap_rights_limit(pcifd, &rights) == -1 && errno != ENOSYS)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
- if (cap_ioctls_limit(pcifd, pci_ioctls, nitems(pci_ioctls)) == -1 && errno != ENOSYS)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-#endif
-
- if (iofd < 0) {
- iofd = open(_PATH_DEVIO, O_RDWR, 0);
- if (iofd < 0) {
- warn("failed to open %s", _PATH_DEVIO);
- goto done;
- }
- }
-
-#ifndef WITHOUT_CAPSICUM
- if (cap_rights_limit(iofd, &rights) == -1 && errno != ENOSYS)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
- if (cap_ioctls_limit(iofd, io_ioctls, nitems(io_ioctls)) == -1 && errno != ENOSYS)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-#endif
-
- if (memfd < 0) {
- memfd = open(_PATH_MEM, O_RDWR, 0);
- if (memfd < 0) {
- warn("failed to open %s", _PATH_MEM);
- goto done;
- }
- }
-
-#ifndef WITHOUT_CAPSICUM
- cap_rights_clear(&rights, CAP_IOCTL);
- cap_rights_set(&rights, CAP_MMAP_RW);
- if (cap_rights_limit(memfd, &rights) == -1 && errno != ENOSYS)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-#endif
-
- if (opts == NULL ||
- sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) {
+ if (opts == NULL || passthru_dev_open(opts, &pptfd) != 0) {
warnx("invalid passthru options");
goto done;
}
- if (vm_assign_pptdev(ctx, bus, slot, func) != 0) {
- warnx("PCI device at %d/%d/%d is not using the ppt(4) driver",
- bus, slot, func);
+ if (vm_assign_pptdev(ctx, pptfd) != 0) {
+ warnx("PCI device at %d is not using the ppt driver", pptfd);
goto done;
}
@@ -729,16 +696,21 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pi->pi_arg = sc;
sc->psc_pi = pi;
+ sc->pptfd = pptfd;
+
+ if ((error = vm_get_pptdev_limits(ctx, pptfd, &sc->msi_limit,
+ &sc->msix_limit)) != 0)
+ goto done;
/* initialize config space */
- if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+ if ((error = cfginit(ctx, sc)) != 0)
goto done;
-
+
error = 0; /* success */
done:
if (error) {
free(sc);
- vm_unassign_pptdev(ctx, bus, slot, func);
+ vm_unassign_pptdev(ctx, pptfd);
}
return (error);
}
@@ -768,7 +740,7 @@ msicap_access(struct passthru_softc *sc, int coff)
return (0);
}
-static int
+static int
msixcap_access(struct passthru_softc *sc, int coff)
{
if (sc->psc_msix.capoff == 0)
@@ -780,7 +752,7 @@ msixcap_access(struct passthru_softc *sc, int coff)
static int
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int coff, int bytes, uint32_t *rv)
+ int coff, int bytes, uint32_t *rv)
{
struct passthru_softc *sc;
@@ -792,6 +764,13 @@ passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
if (bar_access(coff) || msicap_access(sc, coff))
return (-1);
+ /*
+ * MSI-X is also emulated since a limit on interrupts may be imposed by
+ * the OS, altering the perceived register state.
+ */
+ if (msixcap_access(sc, coff))
+ return (-1);
+
#ifdef LEGACY_SUPPORT
/*
* Emulate PCIR_CAP_PTR if this device does not support MSI capability
@@ -804,14 +783,14 @@ passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
#endif
/* Everything else just read from the device's config space */
- *rv = read_config(&sc->psc_sel, coff, bytes);
+ *rv = read_config(sc, coff, bytes);
return (0);
}
static int
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int coff, int bytes, uint32_t val)
+ int coff, int bytes, uint32_t val)
{
int error, msix_table_entries, i;
struct passthru_softc *sc;
@@ -830,10 +809,8 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
if (msicap_access(sc, coff)) {
msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
- error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
- pi->pi_msi.addr, pi->pi_msi.msg_data,
- pi->pi_msi.maxmsgnum);
+ error = vm_setup_pptdev_msi(ctx, vcpu, sc->pptfd,
+ pi->pi_msi.addr, pi->pi_msi.msg_data, pi->pi_msi.maxmsgnum);
if (error != 0)
err(1, "vm_setup_pptdev_msi");
return (0);
@@ -845,12 +822,11 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
msix_table_entries = pi->pi_msix.table_count;
for (i = 0; i < msix_table_entries; i++) {
error = vm_setup_pptdev_msix(ctx, vcpu,
- sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, i,
+ sc->pptfd, i,
pi->pi_msix.table[i].addr,
pi->pi_msix.table[i].msg_data,
pi->pi_msix.table[i].vector_control);
-
+
if (error)
err(1, "vm_setup_pptdev_msix");
}
@@ -870,57 +846,54 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
}
#endif
- write_config(&sc->psc_sel, coff, bytes, val);
+ write_config(sc, coff, bytes, val);
return (0);
}
static void
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
- uint64_t offset, int size, uint64_t value)
+ uint64_t offset, int size, uint64_t value)
{
- struct passthru_softc *sc;
- struct iodev_pio_req pio;
-
- sc = pi->pi_arg;
+ struct passthru_softc *sc = pi->pi_arg;
if (baridx == pci_msix_table_bar(pi)) {
- msix_table_write(ctx, vcpu, sc, offset, size, value);
+ passthru_msix_table_write(ctx, vcpu, sc, offset, size, value);
} else {
+ struct ppt_bar_io pbi;
+
assert(pi->pi_bar[baridx].type == PCIBAR_IO);
- bzero(&pio, sizeof(struct iodev_pio_req));
- pio.access = IODEV_PIO_WRITE;
- pio.port = sc->psc_bar[baridx].addr + offset;
- pio.width = size;
- pio.val = value;
-
- (void)ioctl(iofd, IODEV_PIO, &pio);
+
+ pbi.pbi_bar = baridx;
+ pbi.pbi_width = size;
+ pbi.pbi_off = offset;
+ pbi.pbi_data = value;
+ (void) ioctl(sc->pptfd, PPT_BAR_WRITE, &pbi);
}
}
static uint64_t
passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
- uint64_t offset, int size)
+ uint64_t offset, int size)
{
- struct passthru_softc *sc;
- struct iodev_pio_req pio;
+ struct passthru_softc *sc = pi->pi_arg;
uint64_t val;
- sc = pi->pi_arg;
-
if (baridx == pci_msix_table_bar(pi)) {
- val = msix_table_read(sc, offset, size);
+ val = passthru_msix_table_read(sc, offset, size);
} else {
- assert(pi->pi_bar[baridx].type == PCIBAR_IO);
- bzero(&pio, sizeof(struct iodev_pio_req));
- pio.access = IODEV_PIO_READ;
- pio.port = sc->psc_bar[baridx].addr + offset;
- pio.width = size;
- pio.val = 0;
+ struct ppt_bar_io pbi;
- (void)ioctl(iofd, IODEV_PIO, &pio);
+ assert(pi->pi_bar[baridx].type == PCIBAR_IO);
- val = pio.val;
+ pbi.pbi_bar = baridx;
+ pbi.pbi_width = size;
+ pbi.pbi_off = offset;
+ if (ioctl(sc->pptfd, PPT_BAR_READ, &pbi) == 0) {
+ val = pbi.pbi_data;
+ } else {
+ val = 0;
+ }
}
return (val);
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index b8bdf524a9..bbe36917fd 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -183,7 +183,9 @@ usage(bool cpu_intel)
" [--get-ldtr]\n"
" [--set-x2apic-state=<state>]\n"
" [--get-x2apic-state]\n"
+#ifdef __FreeBSD__
" [--unassign-pptdev=<bus/slot/func>]\n"
+#endif
" [--set-mem=<memory in units of MB>]\n"
" [--get-lowmem]\n"
" [--get-highmem]\n"
@@ -302,7 +304,9 @@ static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
static int set_x2apic_state, get_x2apic_state;
enum x2apic_state x2apic_state;
+#ifdef __FreeBSD__
static int unassign_pptdev, bus, slot, func;
+#endif
static int run;
static int get_cpu_topology;
#ifndef __FreeBSD__
@@ -1875,11 +1879,13 @@ main(int argc, char *argv[])
case CAPNAME:
capname = optarg;
break;
+#ifdef __FreeBSD__
case UNASSIGN_PPTDEV:
unassign_pptdev = 1;
if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3)
usage(cpu_intel);
break;
+#endif
case ASSERT_LAPIC_LVT:
assert_lapic_lvt = atoi(optarg);
break;
@@ -2040,8 +2046,10 @@ main(int argc, char *argv[])
if (!error && set_x2apic_state)
error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
+#ifdef __FreeBSD__
if (!error && unassign_pptdev)
error = vm_unassign_pptdev(ctx, bus, slot, func);
+#endif /* __FreeBSD__ */
if (!error && set_exception_bitmap) {
if (cpu_intel)
diff --git a/usr/src/cmd/devfsadm/i386/misc_link_i386.c b/usr/src/cmd/devfsadm/i386/misc_link_i386.c
index 0f8e64551d..eb5f789c37 100644
--- a/usr/src/cmd/devfsadm/i386/misc_link_i386.c
+++ b/usr/src/cmd/devfsadm/i386/misc_link_i386.c
@@ -46,6 +46,7 @@ static int kdmouse(di_minor_t minor, di_node_t node);
static int ipmi(di_minor_t minor, di_node_t node);
static int mc_node(di_minor_t minor, di_node_t node);
static int vmmctl(di_minor_t minor, di_node_t node);
+static int ppt(di_minor_t minor, di_node_t node);
static devfsadm_create_t misc_cbt[] = {
{ "vt00", "ddi_display", NULL,
@@ -90,6 +91,9 @@ static devfsadm_create_t misc_cbt[] = {
},
{ "pseudo", "ddi_pseudo", "vmm",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, vmmctl,
+ },
+ { "pseudo", "ddi_pseudo", "ppt",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, ppt,
}
};
@@ -122,6 +126,9 @@ static devfsadm_remove_t misc_remove_cbt[] = {
},
{ "pseudo", "^vmmctl$", RM_ALWAYS | RM_PRE | RM_HOT,
ILEVEL_0, devfsadm_rm_all
+ },
+ { "pseudo", "^ppt$", RM_ALWAYS | RM_PRE | RM_HOT,
+ ILEVEL_0, devfsadm_rm_all
}
};
@@ -369,3 +376,15 @@ vmmctl(di_minor_t minor, di_node_t node)
(void) devfsadm_mklink("vmmctl", node, minor, 0);
return (DEVFSADM_CONTINUE);
}
+
+static int
+ppt(di_minor_t minor, di_node_t node)
+{
+ char linkpath[PATH_MAX];
+
+ (void) snprintf(linkpath, sizeof (linkpath), "ppt%d",
+ di_instance(node));
+
+ (void) devfsadm_mklink(linkpath, node, minor, 0);
+ return (DEVFSADM_CONTINUE);
+}
diff --git a/usr/src/cmd/pptadm/Makefile b/usr/src/cmd/pptadm/Makefile
new file mode 100644
index 0000000000..3be558a7a0
--- /dev/null
+++ b/usr/src/cmd/pptadm/Makefile
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+PROG = pptadm
+OBJS = pptadm.o
+SRCS = $(OBJS:%.o=%.c)
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+LDLIBS += -lofmt -lppt -lnvpair
+
+CSTD = $(CSTD_GNU99)
+C99LMODE = -Xc99=%all
+
+CLEANFILES += $(OBJS)
+
+.KEEP_STATE:
+
+all: $(OBJS) $(PROG)
+
+install: all $(ROOTUSRSBINPROG)
+
+clean:
+ -$(RM) $(CLEANFILES)
+
+lint: lint_SRCS
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/pptadm/pptadm.c b/usr/src/cmd/pptadm/pptadm.c
new file mode 100644
index 0000000000..c6b9094408
--- /dev/null
+++ b/usr/src/cmd/pptadm/pptadm.c
@@ -0,0 +1,205 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <string.h>
+#include <ofmt.h>
+#include <err.h>
+
+#include <libppt.h>
+
+typedef enum field {
+ PPT_DEV,
+ PPT_VENDOR,
+ PPT_DEVICE,
+ PPT_SUBVENDOR,
+ PPT_SUBDEVICE,
+ PPT_REV,
+ PPT_PATH,
+ PPT_LABEL
+} field_t;
+
+const char *valname[] = {
+ "dev",
+ "vendor-id",
+ "device-id",
+ "subsystem-vendor-id",
+ "subsystem-id",
+ "revision-id",
+ "path",
+ "label"
+};
+
+static ofmt_cb_t print_field;
+
+static ofmt_field_t fields[] = {
+/* name, field width, index, callback */
+{ "DEV", sizeof ("/dev/pptXX"), PPT_DEV, print_field },
+{ "VENDOR", sizeof ("VENDOR"), PPT_VENDOR, print_field },
+{ "DEVICE", sizeof ("DEVICE"), PPT_DEVICE, print_field },
+{ "SUBVENDOR", sizeof ("SUBVENDOR"), PPT_SUBVENDOR, print_field },
+{ "SUBDEVICE", sizeof ("SUBDEVICE"), PPT_SUBDEVICE, print_field },
+{ "REV", sizeof ("REV"), PPT_REV, print_field },
+{ "PATH", 50, PPT_PATH, print_field },
+{ "LABEL", 60, PPT_LABEL, print_field },
+{ NULL, 0, 0, NULL },
+};
+
+static void
+usage(const char *errmsg)
+{
+ if (errmsg != NULL)
+ (void) fprintf(stderr, "pptadm: %s\n", errmsg);
+ (void) fprintf(errmsg != NULL ? stderr : stdout,
+ "Usage:\n"
+ "pptadm list [ -j ]\n"
+ "pptadm list [-ap] [-o fields]\n");
+ exit(errmsg != NULL ? EXIT_FAILURE : EXIT_SUCCESS);
+}
+
+/* PRINTFLIKE1 */
+static void
+die(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ verrx(EXIT_FAILURE, fmt, ap);
+ va_end(ap);
+}
+
+static boolean_t
+print_field(ofmt_arg_t *arg, char *buf, uint_t bufsize)
+{
+ nvlist_t *nvl = arg->ofmt_cbarg;
+ nvpair_t *nvp = NULL;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ const char *name = nvpair_name(nvp);
+ char *val = NULL;
+
+ (void) nvpair_value_string(nvp, &val);
+
+ if (strcmp(name, valname[arg->ofmt_id]) != 0)
+ continue;
+
+ (void) snprintf(buf, bufsize, "%s", val);
+ return (B_TRUE);
+ }
+
+ (void) snprintf(buf, bufsize, "--");
+ return (B_TRUE);
+}
+
+static int
+list(int argc, char *argv[])
+{
+ const char *fields_str = NULL;
+ boolean_t parsable = B_FALSE;
+ boolean_t json = B_FALSE;
+ boolean_t all = B_FALSE;
+ uint_t ofmtflags = 0;
+ ofmt_status_t oferr;
+ ofmt_handle_t ofmt;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "ahjo:p")) != -1) {
+ switch (opt) {
+ case 'a':
+ all = B_TRUE;
+ break;
+ case 'h':
+ usage(NULL);
+ break;
+ case 'j':
+ json = B_TRUE;
+ break;
+ case 'o':
+ fields_str = optarg;
+ break;
+ case 'p':
+ ofmtflags |= OFMT_PARSABLE;
+ parsable = B_TRUE;
+ break;
+ default:
+ usage("unrecognized option");
+ break;
+ }
+ }
+
+ if (optind == (argc - 1))
+ usage("unused arguments");
+
+ if (json && (parsable || fields_str != NULL))
+ usage("-j option cannot be used with -p or -o options");
+
+ if (fields_str == NULL) {
+ if (parsable)
+ usage("-o must be provided when using -p option");
+ fields_str = "dev,vendor,device,path";
+ }
+
+ oferr = ofmt_open(fields_str, fields, ofmtflags, 0, &ofmt);
+
+ ofmt_check(oferr, parsable, ofmt, die, warn);
+
+ nvlist_t *nvl = all ? ppt_list() : ppt_list_assigned();
+ nvpair_t *nvp = NULL;
+
+ if (json) {
+ if (printf("{\n\t\"devices\": [\n") < 0)
+ err(EXIT_FAILURE, "failed to write JSON");
+ }
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ nvlist_t *props;
+
+ (void) nvpair_value_nvlist(nvp, &props);
+
+ if (json) {
+ if (printf("\t\t") < 0)
+ err(EXIT_FAILURE, "failed to write JSON");
+ if (nvlist_print_json(stdout, props) < 0)
+ err(EXIT_FAILURE, "failed to write JSON");
+ if (nvlist_next_nvpair(nvl, nvp) != NULL)
+ (void) printf(",\n");
+ } else {
+ ofmt_print(ofmt, props);
+ }
+ }
+
+ if (json) {
+ if (printf("\n\t]\n}\n") < 0)
+ err(EXIT_FAILURE, "failed to write JSON");
+ }
+
+ nvlist_free(nvl);
+ ofmt_close(ofmt);
+ return (EXIT_SUCCESS);
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (argc == 1)
+ return (list(argc - 1, argv));
+
+ if (strcmp(argv[1], "list") == 0) {
+ return (list(argc - 1, &argv[1]));
+ } else {
+ usage("unknown sub-command");
+ }
+
+ return (EXIT_SUCCESS);
+}
diff --git a/usr/src/compat/freebsd/amd64/machine/cpu.h b/usr/src/compat/freebsd/amd64/machine/cpu.h
new file mode 100644
index 0000000000..40253af108
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/cpu.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2017 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_CPU_H
+#define _COMPAT_FREEBSD_AMD64_MACHINE_CPU_H
+
+#include <sys/cpu.h>
+
+#define cpu_spinwait() SMT_PAUSE()
+
+#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_CPU_H */
diff --git a/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h b/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h
new file mode 100644
index 0000000000..2668f98ab3
--- /dev/null
+++ b/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2017 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_CONTRIB_DEV_ACPICA_INCLUDE_ACPI_H
+#define _COMPAT_FREEBSD_CONTRIB_DEV_ACPICA_INCLUDE_ACPI_H
+
+#include <sys/acpi/acpi.h>
+
+#endif /* _COMPAT_FREEBSD_CONTRIB_DEV_ACPICA_INCLUDE_ACPI_H */
diff --git a/usr/src/compat/freebsd/dev/pci/pcivar.h b/usr/src/compat/freebsd/dev/pci/pcivar.h
new file mode 100644
index 0000000000..064d983117
--- /dev/null
+++ b/usr/src/compat/freebsd/dev/pci/pcivar.h
@@ -0,0 +1,38 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_DEV_PCI_PCIVAR_H
+#define _COMPAT_FREEBSD_DEV_PCI_PCIVAR_H
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pcie.h>
+#include <sys/pcie_impl.h>
+
+static inline pcie_req_id_t
+pci_get_bdf(device_t dev)
+{
+ pcie_req_id_t bdf;
+
+ VERIFY(pcie_get_bdf_from_dip(dev, &bdf) == DDI_SUCCESS);
+
+ return (bdf);
+}
+
+#define pci_get_rid(dev) (pci_get_bdf(dev))
+
+#endif /* _COMPAT_FREEBSD_DEV_PCI_PCIVAR_H */
diff --git a/usr/src/compat/freebsd/sys/bus.h b/usr/src/compat/freebsd/sys/bus.h
new file mode 100644
index 0000000000..e3b5e0e69d
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/bus.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2017 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_BUS_H
+#define _COMPAT_FREEBSD_SYS_BUS_H
+
+#define device_get_softc(dev) ddi_get_driver_private(dev)
+
+#endif /* _COMPAT_FREEBSD_SYS_BUS_H */
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index b64d4c2bc1..c40721fd55 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -186,6 +186,7 @@ SUBDIRS += \
libpkg \
libpool \
libpp \
+ libppt \
libproc \
libproject \
libpthread \
@@ -446,6 +447,7 @@ HDRSUBDIRS= \
libpicltree \
libpool \
libpp \
+ libppt \
libproc \
libraidcfg \
librcm \
@@ -657,6 +659,7 @@ libpctx: libproc
libpkg: libscf libadm
libpool: libscf libexacct
libpp: libast
+libppt: libpcidb libdevinfo libcmdutils
libproc: ../cmd/sgs/librtld_db ../cmd/sgs/libelf libctf
$(INTEL_BLD)libproc: libsaveargs
libproject: libpool libproc libsecdb
diff --git a/usr/src/lib/libppt/Makefile b/usr/src/lib/libppt/Makefile
new file mode 100644
index 0000000000..21c26d447e
--- /dev/null
+++ b/usr/src/lib/libppt/Makefile
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+include $(SRC)/lib/Makefile.lib
+
+SUBDIRS = $(MACH) $(BUILD64) $(MACH64)
+
+HDRS = libppt.h
+HDRDIR = common
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+all install: install_h
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/libppt/Makefile.com b/usr/src/lib/libppt/Makefile.com
new file mode 100644
index 0000000000..7b2ff4885f
--- /dev/null
+++ b/usr/src/lib/libppt/Makefile.com
@@ -0,0 +1,46 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+LIBRARY = libppt.a
+VERS = .1
+
+OBJECTS = libppt.o
+
+include $(SRC)/lib/Makefile.lib
+
+SRCDIR = ../common
+
+LIBS = $(DYNLIB) $(LINTLIB)
+SRCS = $(SRCDIR)/libppt.c
+
+CSTD= $(CSTD_GNU99)
+C99LMODE= -Xc99=%all
+
+#
+# lint doesn't like %4s in sscanf().
+#
+LINTFLAGS += -erroff=E_BAD_FORMAT_ARG_TYPE2
+LINTFLAGS64 += -erroff=E_BAD_FORMAT_ARG_TYPE2
+
+$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
+LDLIBS += -lpcidb -ldevinfo -lcmdutils -lnvpair -lc
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/libppt/amd64/Makefile b/usr/src/lib/libppt/amd64/Makefile
new file mode 100644
index 0000000000..5a304d7fe7
--- /dev/null
+++ b/usr/src/lib/libppt/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libppt/common/libppt.c b/usr/src/lib/libppt/common/libppt.c
new file mode 100644
index 0000000000..7e8385da06
--- /dev/null
+++ b/usr/src/lib/libppt/common/libppt.c
@@ -0,0 +1,506 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ *
+ * Convenience routines for identifying current or available devices that are
+ * suitable for PCI passthrough to a bhyve guest.
+ */
+
+#include <libdevinfo.h>
+#include <libppt.h>
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/list.h>
+#include <strings.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <pcidb.h>
+#include <glob.h>
+
+typedef struct node_data {
+ pcidb_hdl_t *nd_db;
+ list_t nd_matches;
+ nvlist_t *nd_nvl;
+ int nd_err;
+} node_data_t;
+
+typedef struct ppt_match {
+ list_node_t pm_list;
+ char pm_path[MAXPATHLEN];
+ char pm_vendor[5];
+ char pm_device[5];
+} ppt_match_t;
+
+static boolean_t
+is_pci(di_node_t di_node)
+{
+ char *svals;
+
+ if (di_prop_lookup_strings(DDI_DEV_T_ANY, di_parent_node(di_node),
+ "device_type", &svals) != 1)
+ return (B_FALSE);
+
+ return (strcmp(svals, "pci") == 0 || strcmp(svals, "pciex") == 0);
+}
+
+static int
+populate_int_prop(di_node_t di_node, nvlist_t *nvl, const char *name, int *ival)
+{
+ char val[20];
+ int *ivals;
+ int err;
+
+ if (di_prop_lookup_ints(DDI_DEV_T_ANY, di_node, name, &ivals) != 1)
+ return (errno);
+
+ (void) snprintf(val, sizeof (val), "%x", ivals[0]);
+
+ err = nvlist_add_string(nvl, name, val);
+
+ if (err == 0 && ival != NULL)
+ *ival = ivals[0];
+
+ return (err);
+}
+
+static int
+dev_getlabel(pcidb_hdl_t *db, int vid, int did, char *buf, size_t buflen)
+{
+ pcidb_vendor_t *vend = NULL;
+ pcidb_device_t *dev = NULL;
+
+ if ((vend = pcidb_lookup_vendor(db, vid)) == NULL)
+ return (ENOENT);
+
+ if ((dev = pcidb_lookup_device_by_vendor(vend, did)) == NULL)
+ return (ENOENT);
+
+ (void) snprintf(buf, buflen, "%s %s", pcidb_vendor_name(vend),
+ pcidb_device_name(dev));
+
+ return (0);
+}
+
+static nvlist_t *
+dev_getinfo(di_node_t di_node, pcidb_hdl_t *db,
+ const char *dev, const char *path)
+{
+ char label[MAXPATHLEN];
+ nvlist_t *nvl = NULL;
+ int vid, did;
+ int err;
+
+ if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if (dev != NULL && (err = nvlist_add_string(nvl, "dev", dev)) != 0)
+ goto out;
+ if ((err = nvlist_add_string(nvl, "path", path)) != 0)
+ goto out;
+ if ((err = populate_int_prop(di_node, nvl, "vendor-id", &vid)) != 0)
+ goto out;
+ if ((err = populate_int_prop(di_node, nvl, "device-id", &did)) != 0)
+ goto out;
+ if ((err = populate_int_prop(di_node, nvl,
+ "subsystem-vendor-id", NULL)) != 0)
+ goto out;
+ if ((err = populate_int_prop(di_node, nvl, "subsystem-id", NULL)) != 0)
+ goto out;
+ if ((err = populate_int_prop(di_node, nvl, "revision-id", NULL)) != 0)
+ goto out;
+
+ err = dev_getlabel(db, vid, did, label, sizeof (label));
+
+ if (err == 0) {
+ err = nvlist_add_string(nvl, "label", label);
+ } else if (err == ENOENT) {
+ err = 0;
+ }
+
+out:
+ if (err) {
+ nvlist_free(nvl);
+ errno = err;
+ return (NULL);
+ }
+
+ return (nvl);
+}
+
+/*
+ * /devices/pci0@0/....@0,1:ppt -> /pci0@0/...@0,1
+ */
+static const char *
+fs_to_phys_path(char *fspath)
+{
+ const char prefix[] = "/devices";
+ char *c;
+
+ if ((c = strrchr(fspath, ':')) != NULL && strcmp(c, ":ppt") == 0)
+ *c = '\0';
+
+ c = fspath;
+
+ if (strncmp(c, prefix, sizeof (prefix) - 1) == 0)
+ c += sizeof (prefix) - 1;
+
+ return (c);
+}
+
+/*
+ * Return an nvlist representing the mappings of /dev/ppt* devices to physical
+ * devices. Of the form:
+ *
+ * /pci@0,0/... {
+ * dev: "/dev/ppt0"
+ * path: "/pci@0,0/..."
+ * vendor-id: "8086"
+ * device-id: "1528"
+ * subsystem-vendor-id: "8086"
+ * subsystem-id: "1528"
+ * revision-id: "1"
+ * label: "Intel Corporation ..."
+ * },
+ * /pci@0,0/...
+ *
+ * The nvlist should be freed by the caller.
+ */
+nvlist_t *
+ppt_list_assigned(void)
+{
+ di_node_t di_root = DI_NODE_NIL;
+ pcidb_hdl_t *db = NULL;
+ nvlist_t *nvl = NULL;
+ glob_t gl;
+ int err;
+
+ bzero(&gl, sizeof (gl));
+
+ if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
+ return (NULL);
+
+ if ((db = pcidb_open(PCIDB_VERSION)) == NULL) {
+ err = errno;
+ goto out;
+ }
+
+ if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = glob("/dev/ppt*", GLOB_KEEPSTAT | GLOB_ERR,
+ NULL, &gl)) != 0) {
+ err = (err == GLOB_NOMATCH) ? 0 : errno;
+ goto out;
+ }
+
+ for (size_t i = 0; i < gl.gl_pathc; i++) {
+ char fspath[MAXPATHLEN];
+ nvlist_t *info_nvl;
+ di_node_t di_node;
+ const char *path;
+
+ if (!S_ISLNK(gl.gl_statv[i]->st_mode))
+ continue;
+
+ if (realpath(gl.gl_pathv[i], fspath) == NULL) {
+ err = errno;
+ goto out;
+ }
+
+ path = fs_to_phys_path(fspath);
+
+ /*
+ * path argument is treated as const.
+ */
+ if ((di_node = di_lookup_node(di_root, (char *)path)) == NULL) {
+ err = errno;
+ goto out;
+ }
+
+ if (!is_pci(di_node))
+ continue;
+
+ info_nvl = dev_getinfo(di_node, db, gl.gl_pathv[i], path);
+
+ if (info_nvl == NULL) {
+ err = errno;
+ goto out;
+ }
+
+ err = nvlist_add_nvlist(nvl, path, info_nvl);
+ nvlist_free(info_nvl);
+
+ if (err)
+ goto out;
+ }
+
+out:
+ if (di_root != DI_NODE_NIL)
+ di_fini(di_root);
+
+ pcidb_close(db);
+ globfree(&gl);
+
+ if (err) {
+ nvlist_free(nvl);
+ errno = err;
+ return (NULL);
+ }
+
+ return (nvl);
+}
+
+/*
+ * Read in our list of potential PPT devices. A boot-module provided file
+ * explicitly over-rides anything delivered.
+ */
+static int
+get_matches(list_t *listp)
+{
+ int err = 0;
+ FILE *fp;
+
+ list_create(listp, sizeof (ppt_match_t),
+ offsetof(ppt_match_t, pm_list));
+
+ if ((fp = fopen("/system/boot/etc/ppt_matches", "r")) == NULL) {
+ if (errno != ENOENT)
+ return (errno);
+
+ if ((fp = fopen("/etc/ppt_matches", "r")) == NULL) {
+ if (errno == ENOENT)
+ return (0);
+ return (errno);
+ }
+ }
+
+ for (;;) {
+ char *line = NULL;
+ ppt_match_t *pm;
+ size_t cap = 0;
+ ssize_t read;
+
+ if ((read = getline(&line, &cap, fp)) <= 0) {
+ free(line);
+ break;
+ }
+
+ if (line[read - 1] == '\n')
+ line[read - 1] = '\0';
+
+ if ((pm = malloc(sizeof (*pm))) == NULL) {
+ err = errno;
+ free(line);
+ goto out;
+ }
+
+ bzero(pm, sizeof (*pm));
+
+ if (sscanf(line, "pciex%4s,%4s", &pm->pm_vendor,
+ &pm->pm_device) == 2 ||
+ sscanf(line, "pci%4s,%4s", &pm->pm_vendor,
+ &pm->pm_device) == 2 ||
+ sscanf(line, "pciex%4s", &pm->pm_vendor) == 1 ||
+ sscanf(line, "pci%4s", &pm->pm_vendor) == 1) {
+ list_insert_tail(listp, pm);
+ } else if (line[0] == '/') {
+ (void) strlcpy(pm->pm_path, line, sizeof (pm->pm_path));
+ list_insert_tail(listp, pm);
+ } else {
+ /*
+ * Ignore any line we don't understand.
+ */
+ free(pm);
+ }
+
+ free(line);
+ }
+
+out:
+ (void) fclose(fp);
+ return (err);
+}
+
+static boolean_t
+match_ppt(list_t *matches, nvlist_t *nvl)
+{
+ char *vendor;
+ char *device;
+ char *path;
+
+ if (nvlist_lookup_string(nvl, "path", &path) != 0 ||
+ nvlist_lookup_string(nvl, "vendor-id", &vendor) != 0 ||
+ nvlist_lookup_string(nvl, "device-id", &device) != 0)
+ return (B_FALSE);
+
+ for (ppt_match_t *pm = list_head(matches); pm != NULL;
+ pm = list_next(matches, pm)) {
+ if (pm->pm_path[0] != '\0' && strcmp(pm->pm_path, path) == 0)
+ return (B_TRUE);
+
+ if (pm->pm_vendor[0] != '\0' &&
+ strcmp(pm->pm_vendor, vendor) == 0) {
+ if (pm->pm_device[0] == '\0')
+ return (B_TRUE);
+ if (strcmp(pm->pm_device, device) == 0)
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
+static int
+inspect_node(di_node_t di_node, void *arg)
+{
+ node_data_t *data = arg;
+ nvlist_t *info_nvl = NULL;
+ char *devname = NULL;
+ const char *driver;
+ char *path = NULL;
+
+ if (!is_pci(di_node))
+ return (DI_WALK_CONTINUE);
+
+ driver = di_driver_name(di_node);
+
+ if (driver != NULL && strcmp(driver, "ppt") == 0) {
+ if (asprintf(&devname, "/dev/ppt%d",
+ di_instance(di_node)) < 0) {
+ data->nd_err = errno;
+ goto out;
+ }
+ }
+
+ if ((path = di_devfs_path(di_node)) == NULL) {
+ data->nd_err = ENOENT;
+ goto out;
+ }
+
+ info_nvl = dev_getinfo(di_node, data->nd_db, devname, path);
+
+ if (info_nvl == NULL)
+ goto out;
+
+ if (devname == NULL && !match_ppt(&data->nd_matches, info_nvl))
+ goto out;
+
+ data->nd_err = nvlist_add_nvlist(data->nd_nvl, path, info_nvl);
+
+out:
+ free(path);
+ free(devname);
+ nvlist_free(info_nvl);
+ return (data->nd_err ? DI_WALK_TERMINATE : DI_WALK_CONTINUE);
+}
+
+/*
+ * Like ppt_list_assigned() output, but includes all devices that could be used
+ * for passthrough, whether assigned or not.
+ */
+nvlist_t *
+ppt_list(void)
+{
+ node_data_t nd = { NULL, };
+ di_node_t di_root;
+ int err;
+
+ if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
+ return (NULL);
+
+ if ((err = get_matches(&nd.nd_matches)) != 0)
+ goto out;
+
+ if ((nd.nd_db = pcidb_open(PCIDB_VERSION)) == NULL) {
+ err = errno;
+ goto out;
+ }
+
+ if ((err = nvlist_alloc(&nd.nd_nvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = di_walk_node(di_root, DI_WALK_CLDFIRST,
+ &nd, inspect_node)) != 0)
+ goto out;
+
+ err = nd.nd_err;
+
+out:
+ pcidb_close(nd.nd_db);
+
+ for (ppt_match_t *pm = list_head(&nd.nd_matches); pm != NULL; ) {
+ ppt_match_t *next = list_next(&nd.nd_matches, pm);
+ free(pm);
+ pm = next;
+ }
+
+ if (di_root != DI_NODE_NIL)
+ di_fini(di_root);
+
+ if (err) {
+ nvlist_free(nd.nd_nvl);
+ errno = err;
+ return (NULL);
+ }
+
+ return (nd.nd_nvl);
+}
+
+/*
+ * Given a physical path such as "/devices/pci0@0...", return the "/dev/pptX"
+ * that is bound to it, if any. The "/devices/" prefix is optional. The
+ * physical path may have the ":ppt" minor name suffix.
+ *
+ * Returns ENOENT if no such PPT device exists.
+ */
+int
+ppt_devpath_to_dev(const char *inpath, char *buf, size_t buflen)
+{
+ char fspath[MAXPATHLEN] = "";
+ nvpair_t *nvp = NULL;
+ const char *devpath;
+ int err = ENOENT;
+ nvlist_t *nvl;
+
+ if (strlcat(fspath, inpath, sizeof (fspath)) >= sizeof (fspath))
+ return (ENAMETOOLONG);
+
+ devpath = fs_to_phys_path(fspath);
+
+ if ((nvl = ppt_list_assigned()) == NULL)
+ return (errno);
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ const char *name = nvpair_name(nvp);
+ char *ppt = NULL;
+ nvlist_t *props;
+
+ (void) nvpair_value_nvlist(nvp, &props);
+
+ if (strcmp(name, devpath) == 0) {
+ (void) nvlist_lookup_string(props, "dev", &ppt);
+
+ err = 0;
+
+ if (strlcpy(buf, ppt, buflen) >= buflen)
+ err = ENAMETOOLONG;
+ break;
+ }
+ }
+
+ nvlist_free(nvl);
+ return (err);
+}
diff --git a/usr/src/lib/libppt/common/libppt.h b/usr/src/lib/libppt/common/libppt.h
new file mode 100644
index 0000000000..efbf2c7b8b
--- /dev/null
+++ b/usr/src/lib/libppt/common/libppt.h
@@ -0,0 +1,36 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ *
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _LIBPPT_H
+#define _LIBPPT_H
+
+#include <sys/types.h>
+
+#include <libnvpair.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int ppt_devpath_to_dev(const char *, char *, size_t);
+
+extern nvlist_t *ppt_list_assigned(void);
+
+extern nvlist_t *ppt_list(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBPPT_H */
diff --git a/usr/src/lib/libppt/common/llib-lppt b/usr/src/lib/libppt/common/llib-lppt
new file mode 100644
index 0000000000..dadd992a31
--- /dev/null
+++ b/usr/src/lib/libppt/common/llib-lppt
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libppt.h>
diff --git a/usr/src/lib/libppt/common/mapfile-vers b/usr/src/lib/libppt/common/mapfile-vers
new file mode 100644
index 0000000000..d9d882874b
--- /dev/null
+++ b/usr/src/lib/libppt/common/mapfile-vers
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOSprivate {
+ global:
+ ppt_devpath_to_dev;
+ ppt_list_assigned;
+ ppt_list;
+
+ local:
+ *;
+};
diff --git a/usr/src/lib/libppt/i386/Makefile b/usr/src/lib/libppt/i386/Makefile
new file mode 100644
index 0000000000..3f11e556d4
--- /dev/null
+++ b/usr/src/lib/libppt/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libppt/sparc/Makefile b/usr/src/lib/libppt/sparc/Makefile
new file mode 100644
index 0000000000..3f11e556d4
--- /dev/null
+++ b/usr/src/lib/libppt/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libppt/sparcv9/Makefile b/usr/src/lib/libppt/sparcv9/Makefile
new file mode 100644
index 0000000000..5a304d7fe7
--- /dev/null
+++ b/usr/src/lib/libppt/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers
index a64231ad1c..397ebd7d59 100644
--- a/usr/src/lib/libvmmapi/common/mapfile-vers
+++ b/usr/src/lib/libvmmapi/common/mapfile-vers
@@ -61,6 +61,7 @@ SYMBOL_VERSION ILLUMOSprivate {
vm_get_lowmem_size;
vm_get_memflags;
vm_get_memseg;
+ vm_get_pptdev_limits;
vm_get_register;
vm_get_register_set;
vm_get_seg_desc;
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index 0b9b871081..ceac495746 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -995,6 +995,7 @@ vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
}
+#ifdef __FreeBSD__
int
vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
{
@@ -1056,7 +1057,7 @@ vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
}
-int
+int
vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
{
@@ -1075,6 +1076,103 @@ vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
}
+int
+vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func,
+ int *msi_limit, int *msix_limit)
+{
+ struct vm_pptdev_limits pptlimits;
+ int error;
+
+ bzero(&pptlimits, sizeof (pptlimits));
+ pptlimits.bus = bus;
+ pptlimits.slot = slot;
+ pptlimits.func = func;
+
+ error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits);
+
+ *msi_limit = pptlimits.msi_limit;
+ *msix_limit = pptlimits.msix_limit;
+
+ return (error);
+}
+#else /* __FreeBSD__ */
+int
+vm_assign_pptdev(struct vmctx *ctx, int pptfd)
+{
+ struct vm_pptdev pptdev;
+
+ pptdev.pptfd = pptfd;
+ return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
+}
+
+int
+vm_unassign_pptdev(struct vmctx *ctx, int pptfd)
+{
+ struct vm_pptdev pptdev;
+
+ pptdev.pptfd = pptfd;
+ return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
+}
+
+int
+vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len,
+ vm_paddr_t hpa)
+{
+ struct vm_pptdev_mmio pptmmio;
+
+ pptmmio.pptfd = pptfd;
+ pptmmio.gpa = gpa;
+ pptmmio.len = len;
+ pptmmio.hpa = hpa;
+ return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
+}
+
+int
+vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, uint64_t addr,
+ uint64_t msg, int numvec)
+{
+ struct vm_pptdev_msi pptmsi;
+
+ pptmsi.vcpu = vcpu;
+ pptmsi.pptfd = pptfd;
+ pptmsi.msg = msg;
+ pptmsi.addr = addr;
+ pptmsi.numvec = numvec;
+ return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
+}
+
+int
+vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, int idx,
+ uint64_t addr, uint64_t msg, uint32_t vector_control)
+{
+ struct vm_pptdev_msix pptmsix;
+
+ pptmsix.vcpu = vcpu;
+ pptmsix.pptfd = pptfd;
+ pptmsix.idx = idx;
+ pptmsix.msg = msg;
+ pptmsix.addr = addr;
+ pptmsix.vector_control = vector_control;
+ return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
+}
+
+int
+vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit,
+ int *msix_limit)
+{
+ struct vm_pptdev_limits pptlimits;
+ int error;
+
+ bzero(&pptlimits, sizeof (pptlimits));
+ pptlimits.pptfd = pptfd;
+ error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits);
+
+ *msi_limit = pptlimits.msi_limit;
+ *msix_limit = pptlimits.msix_limit;
+ return (error);
+}
+#endif /* __FreeBSD__ */
+
uint64_t *
vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
int *ret_entries)
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index a1507255cb..1b08a9cae5 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -177,6 +177,7 @@ int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
int *retval);
int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
int val);
+#ifdef __FreeBSD__
int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
@@ -186,6 +187,20 @@ int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
int func, int idx, uint64_t addr, uint64_t msg,
uint32_t vector_control);
+int vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func,
+ int *msi_limit, int *msix_limit);
+#else /* __FreeBSD__ */
+int vm_assign_pptdev(struct vmctx *ctx, int pptfd);
+int vm_unassign_pptdev(struct vmctx *ctx, int pptfd);
+int vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa,
+ size_t len, vm_paddr_t hpa);
+int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd,
+ uint64_t addr, uint64_t msg, int numvec);
+int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd,
+ int idx, uint64_t addr, uint64_t msg, uint32_t vector_control);
+int vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit,
+ int *msix_limit);
+#endif /* __FreeBSD__ */
int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile
index 9f01ad7606..fc43842db2 100644
--- a/usr/src/man/man1m/Makefile
+++ b/usr/src/man/man1m/Makefile
@@ -547,6 +547,7 @@ i386_MANFILES= \
acpidump.1m \
acpixtract.1m \
nvmeadm.1m \
+ pptadm.1m \
rdmsr.1m
sparc_MANFILES= dcs.1m \
diff --git a/usr/src/man/man1m/pptadm.1m b/usr/src/man/man1m/pptadm.1m
new file mode 100644
index 0000000000..f13a5e32a4
--- /dev/null
+++ b/usr/src/man/man1m/pptadm.1m
@@ -0,0 +1,74 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright 2018 Joyent, Inc.
+.\"
+.Dd April 10, 2018
+.Dt PPTADM 1M
+.Os
+.Sh NAME
+.Nm pptadm
+.Nd PPT administration utility
+.Sh SYNOPSIS
+.Nm
+.Cm list -j
+.Op Fl a
+.Nm
+.Cm list
+.Op Fl ap Op Fl o Ar fields
+.Sh DESCRIPTION
+The
+.Nm
+utility can enumerate passthrough devices for use by a virtualized guest.
+.Sh OPTIONS
+The following options to the
+.Cm list
+command are supported:
+.Bl -tag -width Ds
+.It Fl a
+Show all PPT devices, both available and assigned.
+.It Fl j
+Output JSON.
+.It Fl o
+Specify fields to output, or "all". Available fields are
+dev,path,vendor,device,subvendor,subdevice,rev,label
+.It Fl p
+Output in a parsable format; this requires the -o option to be specified.
+.El
+.Sh JSON OUTPUT
+The JSON output consists of an array under the key "devices" with the fields:
+.Bl -tag -width Ds
+.It dev
+The PPT /dev path, if assigned and bound.
+.It path
+The physical /devices path.
+.It vendor-id
+The PCI vendor ID.
+.It device-id
+The PCI device ID.
+.It subsystem-vendor-id
+The PCI subsystem vendor ID.
+.It subsystem-id
+The PCI subsystem ID.
+.It revision-id
+The PCI device revision.
+.It label
+Human-readable description from the PCI database.
+.El
+.Sh FILES
+.Bl -tag -width Ds
+.It /etc/ppt_aliases
+Containts the bindings of PPT devices in the same format as /etc/driver_aliases
+.It /etc/ppt_matches
+Identifies devices that PPT could be bound to, either by physical path, or by
+PCI ID.
+.El
+.Sh EXIT STATUS
+.Ex -std
diff --git a/usr/src/pkg/manifests/system-bhyve.mf b/usr/src/pkg/manifests/system-bhyve.mf
index 7fdeb81254..002bef64cc 100644
--- a/usr/src/pkg/manifests/system-bhyve.mf
+++ b/usr/src/pkg/manifests/system-bhyve.mf
@@ -35,14 +35,22 @@ dir path=usr group=sys
dir path=usr/kernel/drv group=sys
dir path=usr/kernel/drv/$(ARCH64) group=sys
dir path=usr/sbin
+dir path=usr/share
+dir path=usr/share/man
+dir path=usr/share/man/man1m
+driver name=ppt
driver name=viona
driver name=vmm
+file path=usr/kernel/drv/$(ARCH64)/ppt
file path=usr/kernel/drv/$(ARCH64)/viona
file path=usr/kernel/drv/$(ARCH64)/vmm
+file path=usr/kernel/drv/ppt.conf
file path=usr/kernel/drv/viona.conf
file path=usr/kernel/drv/vmm.conf
file path=usr/sbin/bhyve mode=0555
file path=usr/sbin/bhyvectl mode=0555
+file path=usr/sbin/pptadm mode=0555
+file path=usr/share/man/man1m/pptadm.1m
license lic_CDDL license=lic_CDDL
depend fmri=developer/acpi type=require
depend fmri=system/bhyve/firmware type=require
diff --git a/usr/src/pkg/manifests/system-library-bhyve.mf b/usr/src/pkg/manifests/system-library-bhyve.mf
index d9a15e1b37..3c7e52c938 100644
--- a/usr/src/pkg/manifests/system-library-bhyve.mf
+++ b/usr/src/pkg/manifests/system-library-bhyve.mf
@@ -27,5 +27,8 @@ dir path=lib group=bin
dir path=lib/$(ARCH64) group=bin
dir path=usr group=sys
dir path=usr/lib group=bin
+dir path=usr/lib/$(ARCH64) group=bin
file path=lib/$(ARCH64)/libvmmapi.so.1
+file path=usr/lib/$(ARCH64)/libppt.so.1
+file path=usr/lib/libppt.so.1
license lic_CDDL license=lic_CDDL
diff --git a/usr/src/uts/common/os/modsysfile.c b/usr/src/uts/common/os/modsysfile.c
index 7875824a86..2015cfefae 100644
--- a/usr/src/uts/common/os/modsysfile.c
+++ b/usr/src/uts/common/os/modsysfile.c
@@ -58,10 +58,12 @@ struct hwc_class *hcl_head; /* head of list of classes */
static kmutex_t hcl_lock; /* for accessing list of classes */
#define DAFILE "/etc/driver_aliases"
+#define PPTFILE "/etc/ppt_aliases"
#define CLASSFILE "/etc/driver_classes"
#define DACFFILE "/etc/dacf.conf"
static char class_file[] = CLASSFILE;
+static char pptfile[] = PPTFILE;
static char dafile[] = DAFILE;
static char dacffile[] = DACFFILE;
@@ -2170,14 +2172,13 @@ hwc_parse_now(char *fname, struct par_list **pl, ddi_prop_t **props)
return (0); /* always return success */
}
-void
-make_aliases(struct bind **bhash)
+static void
+parse_aliases(struct bind **bhash, struct _buf *file)
{
enum {
AL_NEW, AL_DRVNAME, AL_DRVNAME_COMMA, AL_ALIAS, AL_ALIAS_COMMA
} state;
- struct _buf *file;
char tokbuf[MAXPATHLEN];
char drvbuf[MAXPATHLEN];
token_t token;
@@ -2186,9 +2187,6 @@ make_aliases(struct bind **bhash)
static char dupwarn[] = "!Driver alias \"%s\" conflicts with "
"an existing driver name or alias.";
- if ((file = kobj_open_file(dafile)) == (struct _buf *)-1)
- return;
-
state = AL_NEW;
major = DDI_MAJOR_T_NONE;
while (!done) {
@@ -2273,8 +2271,22 @@ make_aliases(struct bind **bhash)
kobj_file_err(CE_WARN, file, tok_err, tokbuf);
}
}
+}
- kobj_close_file(file);
+void
+make_aliases(struct bind **bhash)
+{
+ struct _buf *file;
+
+ if ((file = kobj_open_file(pptfile)) != (struct _buf *)-1) {
+ parse_aliases(bhash, file);
+ kobj_close_file(file);
+ }
+
+ if ((file = kobj_open_file(dafile)) != (struct _buf *)-1) {
+ parse_aliases(bhash, file);
+ kobj_close_file(file);
+ }
}
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 312c0f233d..ed404d3d6d 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -247,6 +247,7 @@ VMM_OBJS += vmm.o \
vmm_stat.o \
vmm_util.o \
x86.o \
+ iommu.o \
vdev.o \
vatpic.o \
vatpit.o \
@@ -260,14 +261,14 @@ VMM_OBJS += vmm.o \
vmx_msr.o \
vmx.o \
vmx_support.o \
+ vtd.o \
+ vtd_sol.o \
svm.o \
svm_msr.o \
npt.o \
vmcb.o \
svm_support.o \
amdv.o \
- sol_iommu.o \
- sol_ppt.o \
gipt.o \
vmm_sol_vm.o \
vmm_sol_glue.o \
@@ -282,6 +283,8 @@ VIONA_OBJS += viona_main.o \
viona_tx.o \
viona_hook.o \
+PPT_OBJS += ppt.o
+
#
# Build up defines and paths.
#
diff --git a/usr/src/uts/i86pc/Makefile.i86pc b/usr/src/uts/i86pc/Makefile.i86pc
index b60d24d82c..0c921b4028 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc
+++ b/usr/src/uts/i86pc/Makefile.i86pc
@@ -248,6 +248,7 @@ DRV_KMODS += fipe
DRV_KMODS += imc imcstub
DRV_KMODS += vmm
DRV_KMODS += viona
+DRV_KMODS += ppt
DRV_KMODS += cpudrv
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
index 9474b30fc6..902080e34c 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
@@ -44,6 +44,8 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
#include <contrib/dev/acpica/include/acpi.h>
+#include <sys/sunndi.h>
+
#include "io/iommu.h"
/*
@@ -120,6 +122,9 @@ static int drhd_num;
static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
static int max_domains;
typedef int (*drhd_ident_func_t)(void);
+#ifndef __FreeBSD__
+static dev_info_t *vtddips[DRHD_MAX_UNITS];
+#endif
static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
@@ -237,19 +242,63 @@ vtd_translation_disable(struct vtdmap *vtdmap)
;
}
+static void *
+vtd_map(dev_info_t *dip)
+{
+ caddr_t regs;
+ ddi_acc_handle_t hdl;
+ int error;
+
+ static ddi_device_acc_attr_t regs_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_NEVERSWAP_ACC,
+ DDI_STRICTORDER_ACC,
+ };
+
+ error = ddi_regs_map_setup(dip, 0, &regs, 0, PAGE_SIZE, &regs_attr,
+ &hdl);
+
+ if (error != DDI_SUCCESS)
+ return (NULL);
+
+ ddi_set_driver_private(dip, hdl);
+
+ return (regs);
+}
+
+static void
+vtd_unmap(dev_info_t *dip)
+{
+ ddi_acc_handle_t hdl = ddi_get_driver_private(dip);
+
+ if (hdl != NULL)
+ ddi_regs_map_free(&hdl);
+}
+
+#ifndef __FreeBSD__
+/*
+ * This lives in vtd_sol.c for license reasons.
+ */
+extern dev_info_t *vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *, int);
+#endif
+
static int
vtd_init(void)
{
int i, units, remaining;
struct vtdmap *vtdmap;
vm_paddr_t ctx_paddr;
- char *end, envname[32];
+ char *end;
+#ifdef __FreeBSD__
+ char envname[32];
unsigned long mapaddr;
+#endif
ACPI_STATUS status;
ACPI_TABLE_DMAR *dmar;
ACPI_DMAR_HEADER *hdr;
ACPI_DMAR_HARDWARE_UNIT *drhd;
+#ifdef __FreeBSD__
/*
* Allow the user to override the ACPI DMAR table by specifying the
* physical address of each remapping unit.
@@ -268,7 +317,9 @@ vtd_init(void)
if (units > 0)
goto skip_dmar;
-
+#else
+ units = 0;
+#endif
/* Search for DMAR table. */
status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
if (ACPI_FAILURE(status))
@@ -291,7 +342,15 @@ vtd_init(void)
break;
drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
+#ifdef __FreeBSD__
vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
+#else
+ vtddips[units] = vtd_get_dip(drhd, units);
+ vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]);
+ if (vtdmaps[units] == NULL)
+ goto fail;
+ units++;
+#endif
if (units >= DRHD_MAX_UNITS)
break;
remaining -= hdr->Length;
@@ -300,7 +359,9 @@ vtd_init(void)
if (units <= 0)
return (ENXIO);
+#ifdef __FreeBSD__
skip_dmar:
+#endif
drhd_num = units;
vtdmap = vtdmaps[0];
@@ -321,11 +382,36 @@ skip_dmar:
}
return (0);
+
+#ifndef __FreeBSD__
+fail:
+ for (i = 0; i <= units; i++)
+ vtd_unmap(vtddips[i]);
+ return (ENXIO);
+#endif
}
static void
vtd_cleanup(void)
{
+#ifndef __FreeBSD__
+ int i;
+
+ KASSERT(SLIST_EMPTY(&domhead), ("domain list not empty"));
+
+ bzero(root_table, sizeof (root_table));
+
+ for (i = 0; i <= drhd_num; i++) {
+ vtdmaps[i] = NULL;
+ /*
+ * Unmap the vtd registers. Note that the devinfo nodes
+ * themselves aren't removed, they are considered system state
+ * and can be reused when the module is reloaded.
+ */
+ if (vtddips[i] != NULL)
+ vtd_unmap(vtddips[i]);
+ }
+#endif
}
static void
@@ -619,6 +705,7 @@ vtd_create_domain(vm_paddr_t maxaddr)
if ((uintptr_t)dom->ptp & PAGE_MASK)
panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
+#ifdef __FreeBSD__
#ifdef notyet
/*
* XXX superpage mappings for the iommu do not work correctly.
@@ -636,6 +723,18 @@ vtd_create_domain(vm_paddr_t maxaddr)
*/
dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
#endif
+#else
+ /*
+ * On illumos we decidedly do not remove memory mapped to a VM's domain
+ * from the host_domain, so we don't have to deal with page demotion and
+ * can just use large pages.
+ *
+ * Since VM memory is currently allocated as 4k pages and mapped into
+ * the VM domain page by page, the use of large pages is essentially
+ * limited to the host_domain.
+ */
+ dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
+#endif
SLIST_INSERT_HEAD(&domhead, dom, next);
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd_sol.c b/usr/src/uts/i86pc/io/vmm/intel/vtd_sol.c
new file mode 100644
index 0000000000..1dbe8ffa48
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vtd_sol.c
@@ -0,0 +1,83 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#include <sys/sunndi.h>
+#include <contrib/dev/acpica/include/acpi.h>
+
+dev_info_t *
+vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *drhd, int unit)
+{
+ dev_info_t *dip;
+ struct ddi_parent_private_data *pdptr;
+ struct regspec reg;
+ int circ;
+
+ /*
+ * Try to find an existing devinfo node for this vtd unit.
+ */
+ ndi_devi_enter(ddi_root_node(), &circ);
+ dip = ddi_find_devinfo("vtd", unit, 0);
+ ndi_devi_exit(ddi_root_node(), circ);
+
+ if (dip != NULL)
+ return (dip);
+
+ /*
+ * None found, construct a devinfo node for this vtd unit.
+ */
+ dip = ddi_add_child(ddi_root_node(), "vtd",
+ DEVI_SID_NODEID, unit);
+
+ reg.regspec_bustype = 0;
+ reg.regspec_addr = drhd->Address;
+ reg.regspec_size = PAGE_SIZE;
+
+ /*
+ * update the reg properties
+ *
+ * reg property will be used for register
+ * set access
+ *
+ * refer to the bus_map of root nexus driver
+ * I/O or memory mapping:
+ *
+ * <bustype=0, addr=x, len=x>: memory
+ * <bustype=1, addr=x, len=x>: i/o
+ * <bustype>1, addr=0, len=x>: x86-compatibility i/o
+ */
+ (void) ndi_prop_update_int_array(DDI_DEV_T_NONE,
+ dip, "reg", (int *)&reg,
+ sizeof (struct regspec) / sizeof (int));
+
+ /*
+ * This is an artificially constructed dev_info, and we
+ * need to set a few more things to be able to use it
+ * for ddi_dma_alloc_handle/free_handle.
+ */
+ ddi_set_driver(dip, ddi_get_driver(ddi_root_node()));
+ DEVI(dip)->devi_bus_dma_allochdl =
+ DEVI(ddi_get_driver((ddi_root_node())));
+
+ pdptr = kmem_zalloc(sizeof (struct ddi_parent_private_data)
+ + sizeof (struct regspec), KM_SLEEP);
+ pdptr->par_nreg = 1;
+ pdptr->par_reg = (struct regspec *)(pdptr + 1);
+ pdptr->par_reg->regspec_bustype = 0;
+ pdptr->par_reg->regspec_addr = drhd->Address;
+ pdptr->par_reg->regspec_size = PAGE_SIZE;
+ ddi_set_parent_data(dip, pdptr);
+
+ return (dip);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c
new file mode 100644
index 0000000000..b949573fe2
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c
@@ -0,0 +1,383 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/eventhandler.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pci.h>
+
+#include "vmm_util.h"
+#include "vmm_mem.h"
+#include "iommu.h"
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, iommu, CTLFLAG_RW, 0, "bhyve iommu parameters");
+
+static int iommu_avail;
+SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail,
+ 0, "bhyve iommu initialized?");
+
+static int iommu_enable = 1;
+SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, enable, CTLFLAG_RDTUN, &iommu_enable, 0,
+ "Enable use of I/O MMU (required for PCI passthrough).");
+
+static struct iommu_ops *ops;
+static void *host_domain;
+#ifdef __FreeBSD__
+static eventhandler_tag add_tag, delete_tag;
+#endif
+
+#ifndef __FreeBSD__
+static volatile u_int iommu_initted;
+#endif
+
+static __inline int
+IOMMU_INIT(void)
+{
+ if (ops != NULL)
+ return ((*ops->init)());
+ else
+ return (ENXIO);
+}
+
+static __inline void
+IOMMU_CLEANUP(void)
+{
+ if (ops != NULL && iommu_avail)
+ (*ops->cleanup)();
+}
+
+static __inline void *
+IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr)
+{
+
+ if (ops != NULL && iommu_avail)
+ return ((*ops->create_domain)(maxaddr));
+ else
+ return (NULL);
+}
+
+static __inline void
+IOMMU_DESTROY_DOMAIN(void *dom)
+{
+
+ if (ops != NULL && iommu_avail)
+ (*ops->destroy_domain)(dom);
+}
+
+static __inline uint64_t
+IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
+{
+
+ if (ops != NULL && iommu_avail)
+ return ((*ops->create_mapping)(domain, gpa, hpa, len));
+ else
+ return (len); /* XXX */
+}
+
+static __inline uint64_t
+IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len)
+{
+
+ if (ops != NULL && iommu_avail)
+ return ((*ops->remove_mapping)(domain, gpa, len));
+ else
+ return (len); /* XXX */
+}
+
+static __inline void
+IOMMU_ADD_DEVICE(void *domain, uint16_t rid)
+{
+
+ if (ops != NULL && iommu_avail)
+ (*ops->add_device)(domain, rid);
+}
+
+static __inline void
+IOMMU_REMOVE_DEVICE(void *domain, uint16_t rid)
+{
+
+ if (ops != NULL && iommu_avail)
+ (*ops->remove_device)(domain, rid);
+}
+
+static __inline void
+IOMMU_INVALIDATE_TLB(void *domain)
+{
+
+ if (ops != NULL && iommu_avail)
+ (*ops->invalidate_tlb)(domain);
+}
+
+static __inline void
+IOMMU_ENABLE(void)
+{
+
+ if (ops != NULL && iommu_avail)
+ (*ops->enable)();
+}
+
+static __inline void
+IOMMU_DISABLE(void)
+{
+
+ if (ops != NULL && iommu_avail)
+ (*ops->disable)();
+}
+
+#ifdef __FreeBSD__
+static void
+iommu_pci_add(void *arg, device_t dev)
+{
+
+ /* Add new devices to the host domain. */
+ iommu_add_device(host_domain, pci_get_rid(dev));
+}
+
+static void
+iommu_pci_delete(void *arg, device_t dev)
+{
+
+ iommu_remove_device(host_domain, pci_get_rid(dev));
+}
+#endif
+
+#ifndef __FreeBSD__
+static int
+iommu_find_device(dev_info_t *dip, void *arg)
+{
+ boolean_t add = (boolean_t)arg;
+
+ if (pcie_is_pci_device(dip)) {
+ if (add)
+ iommu_add_device(host_domain, pci_get_rid(dip));
+ else
+ iommu_remove_device(host_domain, pci_get_rid(dip));
+ }
+
+ return (DDI_WALK_CONTINUE);
+}
+#endif
+
+static void
+iommu_init(void)
+{
+ int error, bus, slot, func;
+ vm_paddr_t maxaddr;
+#ifdef __FreeBSD__
+ devclass_t dc;
+#endif
+ device_t dev;
+
+ if (!iommu_enable)
+ return;
+
+ if (vmm_is_intel())
+ ops = &iommu_ops_intel;
+ else if (vmm_is_amd())
+ ops = &iommu_ops_amd;
+ else
+ ops = NULL;
+
+ error = IOMMU_INIT();
+ if (error)
+ return;
+
+ iommu_avail = 1;
+
+ /*
+ * Create a domain for the devices owned by the host
+ */
+ maxaddr = vmm_mem_maxaddr();
+ host_domain = IOMMU_CREATE_DOMAIN(maxaddr);
+ if (host_domain == NULL) {
+ printf("iommu_init: unable to create a host domain");
+ IOMMU_CLEANUP();
+ ops = NULL;
+ iommu_avail = 0;
+ return;
+ }
+
+ /*
+ * Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to
+ * the host
+ */
+ iommu_create_mapping(host_domain, 0, 0, maxaddr);
+
+#ifdef __FreeBSD__
+ add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0);
+ delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete,
+ NULL, 0);
+ dc = devclass_find("ppt");
+ for (bus = 0; bus <= PCI_BUSMAX; bus++) {
+ for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
+ for (func = 0; func <= PCI_FUNCMAX; func++) {
+ dev = pci_find_dbsf(0, bus, slot, func);
+ if (dev == NULL)
+ continue;
+
+ /* Skip passthrough devices. */
+ if (dc != NULL &&
+ device_get_devclass(dev) == dc)
+ continue;
+
+ /*
+ * Everything else belongs to the host
+ * domain.
+ */
+ iommu_add_device(host_domain,
+ pci_get_rid(dev));
+ }
+ }
+ }
+#else
+ ddi_walk_devs(ddi_root_node(), iommu_find_device, (void *)B_TRUE);
+#endif
+ IOMMU_ENABLE();
+
+}
+
+void
+iommu_cleanup(void)
+{
+#ifdef __FreeBSD__
+ if (add_tag != NULL) {
+ EVENTHANDLER_DEREGISTER(pci_add_device, add_tag);
+ add_tag = NULL;
+ }
+ if (delete_tag != NULL) {
+ EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag);
+ delete_tag = NULL;
+ }
+#else
+ atomic_store_rel_int(&iommu_initted, 0);
+#endif
+ IOMMU_DISABLE();
+#ifndef __FreeBSD__
+ ddi_walk_devs(ddi_root_node(), iommu_find_device, (void *)B_FALSE);
+#endif
+ IOMMU_DESTROY_DOMAIN(host_domain);
+ IOMMU_CLEANUP();
+#ifndef __FreeBSD__
+ ops = NULL;
+#endif
+}
+
+void *
+iommu_create_domain(vm_paddr_t maxaddr)
+{
+ if (iommu_initted < 2) {
+ if (atomic_cmpset_int(&iommu_initted, 0, 1)) {
+ iommu_init();
+ atomic_store_rel_int(&iommu_initted, 2);
+ } else
+ while (iommu_initted == 1)
+ cpu_spinwait();
+ }
+ return (IOMMU_CREATE_DOMAIN(maxaddr));
+}
+
+void
+iommu_destroy_domain(void *dom)
+{
+
+ IOMMU_DESTROY_DOMAIN(dom);
+}
+
+void
+iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len)
+{
+ uint64_t mapped, remaining;
+
+ remaining = len;
+
+ while (remaining > 0) {
+ mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining);
+ gpa += mapped;
+ hpa += mapped;
+ remaining -= mapped;
+ }
+}
+
+void
+iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len)
+{
+ uint64_t unmapped, remaining;
+
+ remaining = len;
+
+ while (remaining > 0) {
+ unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining);
+ gpa += unmapped;
+ remaining -= unmapped;
+ }
+}
+
+void *
+iommu_host_domain(void)
+{
+
+ return (host_domain);
+}
+
+void
+iommu_add_device(void *dom, uint16_t rid)
+{
+
+ IOMMU_ADD_DEVICE(dom, rid);
+}
+
+void
+iommu_remove_device(void *dom, uint16_t rid)
+{
+
+ IOMMU_REMOVE_DEVICE(dom, rid);
+}
+
+void
+iommu_invalidate_tlb(void *domain)
+{
+
+ IOMMU_INVALIDATE_TLB(domain);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c
new file mode 100644
index 0000000000..a71ce86c2d
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c
@@ -0,0 +1,1436 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/pciio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/pci.h>
+#include <sys/pci_cap.h>
+#include <sys/pcie_impl.h>
+#include <sys/ppt_dev.h>
+#include <sys/mkdev.h>
+#include <sys/sysmacros.h>
+
+#include "vmm_lapic.h"
+#include "vmm_ktr.h"
+
+#include "iommu.h"
+#include "ppt.h"
+
+#define MAX_MSIMSGS 32
+
+/*
+ * If the MSI-X table is located in the middle of a BAR then that MMIO
+ * region gets split into two segments - one segment above the MSI-X table
+ * and the other segment below the MSI-X table - with a hole in place of
+ * the MSI-X table so accesses to it can be trapped and emulated.
+ *
+ * So, allocate a MMIO segment for each BAR register + 1 additional segment.
+ */
+#define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
+
+struct pptintr_arg {
+ struct pptdev *pptdev;
+ uint64_t addr;
+ uint64_t msg_data;
+};
+
+struct pptseg {
+ vm_paddr_t gpa;
+ size_t len;
+ int wired;
+};
+
+struct pptbar {
+ uint64_t base;
+ uint64_t size;
+ uint_t type;
+ ddi_acc_handle_t io_handle;
+ caddr_t io_ptr;
+};
+
+struct pptdev {
+ dev_info_t *pptd_dip;
+ list_node_t pptd_node;
+ ddi_acc_handle_t pptd_cfg;
+ struct pptbar pptd_bars[PCI_BASE_NUM];
+ struct vm *vm;
+ struct pptseg mmio[MAX_MMIOSEGS];
+ struct {
+ int num_msgs; /* guest state */
+ boolean_t is_fixed;
+ size_t inth_sz;
+ ddi_intr_handle_t *inth;
+ struct pptintr_arg arg[MAX_MSIMSGS];
+ } msi;
+
+ struct {
+ int num_msgs;
+ size_t inth_sz;
+ size_t arg_sz;
+ ddi_intr_handle_t *inth;
+ struct pptintr_arg *arg;
+ } msix;
+};
+
+
+static major_t ppt_major;
+static void *ppt_state;
+static kmutex_t pptdev_mtx;
+static list_t pptdev_list;
+
+#define PPT_MINOR_NAME "ppt"
+
+static ddi_device_acc_attr_t ppt_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_NEVERSWAP_ACC,
+ DDI_STORECACHING_OK_ACC,
+ DDI_DEFAULT_ACC
+};
+
+static int
+ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr)
+{
+ /* XXX: require extra privs? */
+ return (0);
+}
+
+#define BAR_TO_IDX(bar) (((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32)
+#define BAR_VALID(b) ( \
+ (b) >= PCI_CONF_BASE0 && \
+ (b) <= PCI_CONF_BASE5 && \
+ ((b) & (PCI_BAR_SZ_32-1)) == 0)
+
+static int
+ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
+{
+ minor_t minor = getminor(dev);
+ struct pptdev *ppt;
+ void *data = (void *)arg;
+
+ if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) {
+ return (ENOENT);
+ }
+
+ switch (cmd) {
+ case PPT_CFG_READ: {
+ struct ppt_cfg_io cio;
+ ddi_acc_handle_t cfg = ppt->pptd_cfg;
+
+ if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
+ return (EFAULT);
+ }
+ switch (cio.pci_width) {
+ case 4:
+ cio.pci_data = pci_config_get32(cfg, cio.pci_off);
+ break;
+ case 2:
+ cio.pci_data = pci_config_get16(cfg, cio.pci_off);
+ break;
+ case 1:
+ cio.pci_data = pci_config_get8(cfg, cio.pci_off);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+ case PPT_CFG_WRITE: {
+ struct ppt_cfg_io cio;
+ ddi_acc_handle_t cfg = ppt->pptd_cfg;
+
+ if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
+ return (EFAULT);
+ }
+ switch (cio.pci_width) {
+ case 4:
+ pci_config_put32(cfg, cio.pci_off, cio.pci_data);
+ break;
+ case 2:
+ pci_config_put16(cfg, cio.pci_off, cio.pci_data);
+ break;
+ case 1:
+ pci_config_put8(cfg, cio.pci_off, cio.pci_data);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+ }
+ case PPT_BAR_QUERY: {
+ struct ppt_bar_query barg;
+ struct pptbar *pbar;
+
+ if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) {
+ return (EFAULT);
+ }
+ if (barg.pbq_baridx >= PCI_BASE_NUM) {
+ return (EINVAL);
+ }
+ pbar = &ppt->pptd_bars[barg.pbq_baridx];
+
+ if (pbar->base == 0 || pbar->size == 0) {
+ return (ENOENT);
+ }
+ barg.pbq_type = pbar->type;
+ barg.pbq_base = pbar->base;
+ barg.pbq_size = pbar->size;
+
+ if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+ case PPT_BAR_READ: {
+ struct ppt_bar_io bio;
+ struct pptbar *pbar;
+ void *addr;
+ uint_t rnum;
+ ddi_acc_handle_t cfg;
+
+ if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
+ return (EFAULT);
+ }
+ rnum = bio.pbi_bar;
+ if (rnum >= PCI_BASE_NUM) {
+ return (EINVAL);
+ }
+ pbar = &ppt->pptd_bars[rnum];
+ if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
+ return (EINVAL);
+ }
+ addr = pbar->io_ptr + bio.pbi_off;
+
+ switch (bio.pbi_width) {
+ case 4:
+ bio.pbi_data = ddi_get32(pbar->io_handle, addr);
+ break;
+ case 2:
+ bio.pbi_data = ddi_get16(pbar->io_handle, addr);
+ break;
+ case 1:
+ bio.pbi_data = ddi_get8(pbar->io_handle, addr);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+ case PPT_BAR_WRITE: {
+ struct ppt_bar_io bio;
+ struct pptbar *pbar;
+ void *addr;
+ uint_t rnum;
+ ddi_acc_handle_t cfg;
+
+ if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
+ return (EFAULT);
+ }
+ rnum = bio.pbi_bar;
+ if (rnum >= PCI_BASE_NUM) {
+ return (EINVAL);
+ }
+ pbar = &ppt->pptd_bars[rnum];
+ if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
+ return (EINVAL);
+ }
+ addr = pbar->io_ptr + bio.pbi_off;
+
+ switch (bio.pbi_width) {
+ case 4:
+ ddi_put32(pbar->io_handle, addr, bio.pbi_data);
+ break;
+ case 2:
+ ddi_put16(pbar->io_handle, addr, bio.pbi_data);
+ break;
+ case 1:
+ ddi_put8(pbar->io_handle, addr, bio.pbi_data);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+ }
+
+ default:
+ return (ENOTTY);
+ }
+
+ return (0);
+}
+
+static int
+ppt_find_pba_bar(struct pptdev *ppt)
+{
+ uint16_t base;
+ uint32_t pba_off;
+
+ if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) !=
+ DDI_SUCCESS)
+ return (-1);
+
+ pba_off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_PBA_OFFSET);
+
+ if (pba_off == PCI_EINVAL32)
+ return (-1);
+
+ return (pba_off & PCI_MSIX_PBA_BIR_MASK);
+}
+
+static int
+ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
+ size_t *maplen, uint_t model)
+{
+ minor_t minor;
+ struct pptdev *ppt;
+ int err;
+ int bar;
+
+ minor = getminor(dev);
+
+ if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL)
+ return (ENXIO);
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(model) != DDI_MODEL_NONE)
+ return (ENXIO);
+#endif
+
+ if (off < 0 || off != P2ALIGN(off, PAGESIZE))
+ return (EINVAL);
+
+ if ((bar = ppt_find_pba_bar(ppt)) == -1)
+ return (EINVAL);
+
+ /*
+ * Add 1 to the BAR number to get the register number used by DDI.
+ * Register 0 corresponds to PCI config space, the PCI BARs start at 1.
+ */
+ bar += 1;
+
+ err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, bar, off, len,
+ PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr);
+
+ if (err == DDI_SUCCESS)
+ *maplen = len;
+
+ return (err);
+}
+
+
+static void
+ppt_bar_wipe(struct pptdev *ppt)
+{
+ uint_t i;
+
+ for (i = 0; i < PCI_BASE_NUM; i++) {
+ struct pptbar *pbar = &ppt->pptd_bars[i];
+ if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) {
+ ddi_regs_map_free(&pbar->io_handle);
+ }
+ }
+ bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars));
+}
+
+static int
+ppt_bar_crawl(struct pptdev *ppt)
+{
+ pci_regspec_t *regs;
+ uint_t rcount, i;
+ int err = 0, rlen;
+
+ if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS,
+ "assigned-addresses", (caddr_t)&regs, &rlen) != DDI_PROP_SUCCESS) {
+ return (EIO);
+ }
+
+ VERIFY3S(rlen, >, 0);
+ rcount = rlen / sizeof (pci_regspec_t);
+ for (i = 0; i < rcount; i++) {
+ pci_regspec_t *reg = &regs[i];
+ struct pptbar *pbar;
+ uint_t bar, rnum;
+
+ DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg);
+ bar = PCI_REG_REG_G(reg->pci_phys_hi);
+ if (!BAR_VALID(bar)) {
+ continue;
+ }
+
+ rnum = BAR_TO_IDX(bar);
+ pbar = &ppt->pptd_bars[rnum];
+ /* is this somehow already populated? */
+ if (pbar->base != 0 || pbar->size != 0) {
+ err = EEXIST;
+ break;
+ }
+
+ pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK;
+ pbar->base = ((uint64_t)reg->pci_phys_mid << 32) |
+ (uint64_t)reg->pci_phys_low;
+ pbar->size = ((uint64_t)reg->pci_size_hi << 32) |
+ (uint64_t)reg->pci_size_low;
+ if (pbar->type == PCI_ADDR_IO) {
+ err = ddi_regs_map_setup(ppt->pptd_dip, rnum,
+ &pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle);
+ if (err != 0) {
+ break;
+ }
+ }
+ }
+ kmem_free(regs, rlen);
+
+ if (err != 0) {
+ ppt_bar_wipe(ppt);
+ }
+ return (err);
+}
+
+static boolean_t
+ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size)
+{
+ const uint64_t map_end = base + size;
+
+ /* Zero-length or overflow mappings are not valid */
+ if (map_end <= base) {
+ return (B_FALSE);
+ }
+ /* MMIO bounds should be page-aligned */
+ if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
+ return (B_FALSE);
+ }
+
+ for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
+ const struct pptbar *bar = &ppt->pptd_bars[i];
+ const uint64_t bar_end = bar->base + bar->size;
+
+ /* Only memory BARs can be mapped */
+ if (bar->type != PCI_ADDR_MEM32 &&
+ bar->type != PCI_ADDR_MEM64) {
+ continue;
+ }
+
+ /* Does the mapping fit within this BAR? */
+ if (base < bar->base || base >= bar_end ||
+ map_end < bar->base || map_end > bar_end) {
+ continue;
+ }
+
+ /* This BAR satisfies the provided map */
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+static int
+ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ struct pptdev *ppt = NULL;
+ char name[PPT_MAXNAMELEN];
+ int inst;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ inst = ddi_get_instance(dip);
+
+ if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) {
+ goto fail;
+ }
+ VERIFY(ppt = ddi_get_soft_state(ppt_state, inst));
+ ppt->pptd_dip = dip;
+ ddi_set_driver_private(dip, ppt);
+
+ if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) {
+ goto fail;
+ }
+ if (ppt_bar_crawl(ppt) != 0) {
+ goto fail;
+ }
+ if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst,
+ DDI_PSEUDO, 0) != DDI_SUCCESS) {
+ goto fail;
+ }
+
+ mutex_enter(&pptdev_mtx);
+ list_insert_tail(&pptdev_list, ppt);
+ mutex_exit(&pptdev_mtx);
+
+ return (DDI_SUCCESS);
+
+fail:
+ if (ppt != NULL) {
+ ddi_remove_minor_node(dip, NULL);
+ if (ppt->pptd_cfg != NULL) {
+ pci_config_teardown(&ppt->pptd_cfg);
+ }
+ ppt_bar_wipe(ppt);
+ ddi_soft_state_free(ppt_state, inst);
+ }
+ return (DDI_FAILURE);
+}
+
+static int
+ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ struct pptdev *ppt;
+ int inst;
+
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ppt = ddi_get_driver_private(dip);
+ inst = ddi_get_instance(dip);
+
+ ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt);
+
+ mutex_enter(&pptdev_mtx);
+ if (ppt->vm != NULL) {
+ mutex_exit(&pptdev_mtx);
+ return (DDI_FAILURE);
+ }
+ list_remove(&pptdev_list, ppt);
+ mutex_exit(&pptdev_mtx);
+
+ ddi_remove_minor_node(dip, PPT_MINOR_NAME);
+ ppt_bar_wipe(ppt);
+ pci_config_teardown(&ppt->pptd_cfg);
+ ddi_set_driver_private(dip, NULL);
+ ddi_soft_state_free(ppt_state, inst);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ int error = DDI_FAILURE;
+ int inst = getminor((dev_t)arg);
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO: {
+ struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst);
+
+ if (ppt != NULL) {
+ *result = (void *)ppt->pptd_dip;
+ error = DDI_SUCCESS;
+ }
+ break;
+ }
+ case DDI_INFO_DEVT2INSTANCE: {
+ *result = (void *)(uintptr_t)inst;
+ error = DDI_SUCCESS;
+ break;
+ }
+ default:
+ break;
+ }
+ return (error);
+}
+
+static struct cb_ops ppt_cb_ops = {
+ ppt_open,
+ nulldev, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ ppt_ioctl,
+ ppt_devmap, /* devmap */
+ NULL, /* mmap */
+ NULL, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op,
+ NULL,
+ D_NEW | D_MP | D_64BIT | D_DEVMAP,
+ CB_REV
+};
+
+static struct dev_ops ppt_ops = {
+ DEVO_REV,
+ 0,
+ ppt_ddi_info,
+ nulldev, /* identify */
+ nulldev, /* probe */
+ ppt_ddi_attach,
+ ppt_ddi_detach,
+ nodev, /* reset */
+ &ppt_cb_ops,
+ (struct bus_ops *)NULL
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "bhyve pci pass-thru",
+ &ppt_ops
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ int error;
+
+ mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL);
+ list_create(&pptdev_list, sizeof (struct pptdev),
+ offsetof(struct pptdev, pptd_node));
+
+ error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0);
+ if (error) {
+ goto fail;
+ }
+
+ error = mod_install(&modlinkage);
+
+ ppt_major = ddi_name_to_major("ppt");
+fail:
+ if (error) {
+ ddi_soft_state_fini(&ppt_state);
+ }
+ return (error);
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ error = mod_remove(&modlinkage);
+ if (error)
+ return (error);
+ ddi_soft_state_fini(&ppt_state);
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+static boolean_t
+ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us)
+{
+ uint16_t cap_ptr, devsts;
+ ddi_acc_handle_t hdl;
+
+ if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
+ return (B_FALSE);
+
+ if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) {
+ pci_config_teardown(&hdl);
+ return (B_FALSE);
+ }
+
+ devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
+ while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) {
+ if (max_delay_us == 0) {
+ pci_config_teardown(&hdl);
+ return (B_FALSE);
+ }
+
+ /* Poll once every 100 milliseconds up to the timeout. */
+ if (max_delay_us > 100000) {
+ delay(drv_usectohz(100000));
+ max_delay_us -= 100000;
+ } else {
+ delay(drv_usectohz(max_delay_us));
+ max_delay_us = 0;
+ }
+ devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
+ }
+
+ pci_config_teardown(&hdl);
+ return (B_TRUE);
+}
+
+static uint_t
+ppt_max_completion_tmo_us(dev_info_t *dip)
+{
+ uint_t timo = 0;
+ uint16_t cap_ptr;
+ ddi_acc_handle_t hdl;
+ uint_t timo_ranges[] = { /* timeout ranges */
+ 50000, /* 50ms */
+ 100, /* 100us */
+ 10000, /* 10ms */
+ 0,
+ 0,
+ 55000, /* 55ms */
+ 210000, /* 210ms */
+ 0,
+ 0,
+ 900000, /* 900ms */
+ 3500000, /* 3.5s */
+ 0,
+ 0,
+ 13000000, /* 13s */
+ 64000000, /* 64s */
+ 0
+ };
+
+ if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
+ return (50000); /* default 50ms */
+
+ if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
+ goto out;
+
+ if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) &
+ PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0)
+ goto out;
+
+ if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCAP2) &
+ PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0)
+ goto out;
+
+ timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) &
+ PCIE_DEVCAP2_COM_TO_RANGE_MASK];
+
+out:
+ if (timo == 0)
+ timo = 50000; /* default 50ms */
+
+ pci_config_teardown(&hdl);
+ return (timo);
+}
+
+static boolean_t
+ppt_flr(dev_info_t *dip, boolean_t force)
+{
+ uint16_t cap_ptr, ctl, cmd;
+ ddi_acc_handle_t hdl;
+ uint_t compl_delay = 0, max_delay_us;
+
+ if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
+ return (B_FALSE);
+
+ if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
+ goto fail;
+
+ if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR)
+ == 0)
+ goto fail;
+
+ max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000);
+
+ /*
+ * Disable busmastering to prevent generation of new transactions while
+ * waiting for the device to go idle. If the idle timeout fails, the
+ * command register is restored which will re-enable busmastering.
+ */
+ cmd = pci_config_get16(hdl, PCI_CONF_COMM);
+ pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME);
+ if (!ppt_wait_for_pending_txn(dip, max_delay_us)) {
+ if (!force) {
+ pci_config_put16(hdl, PCI_CONF_COMM, cmd);
+ goto fail;
+ }
+ dev_err(dip, CE_WARN,
+ "?Resetting with transactions pending after %u us\n",
+ max_delay_us);
+
+ /*
+ * Extend the post-FLR delay to cover the maximum Completion
+ * Timeout delay of anything in flight during the FLR delay.
+ * Enforce a minimum delay of at least 10ms.
+ */
+ compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000));
+ }
+
+ /* Initiate the reset. */
+ ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL);
+ (void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL,
+ ctl | PCIE_DEVCTL_INITIATE_FLR);
+
+ /* Wait for at least 100ms */
+ delay(drv_usectohz((100 + compl_delay) * 1000));
+
+ pci_config_teardown(&hdl);
+ return (B_TRUE);
+
+fail:
+ /*
+ * TODO: If the FLR fails for some reason, we should attempt a reset
+ * using the PCI power management facilities (if possible).
+ */
+ pci_config_teardown(&hdl);
+ return (B_FALSE);
+}
+
+
+static struct pptdev *
+ppt_findf(int fd)
+{
+ struct pptdev *ppt = NULL;
+ file_t *fp;
+ vattr_t va;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (NULL);
+ }
+
+ va.va_mask = AT_RDEV;
+ if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 ||
+ getmajor(va.va_rdev) != ppt_major)
+ goto fail;
+
+ ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev));
+
+ if (ppt != NULL)
+ return (ppt);
+
+fail:
+ releasef(fd);
+ return (NULL);
+}
+
+static void
+ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
+{
+ int i;
+ struct pptseg *seg;
+
+ for (i = 0; i < MAX_MMIOSEGS; i++) {
+ seg = &ppt->mmio[i];
+ if (seg->len == 0)
+ continue;
+ (void) vm_unmap_mmio(vm, seg->gpa, seg->len);
+ bzero(seg, sizeof (struct pptseg));
+ }
+}
+
+static void
+ppt_teardown_msi(struct pptdev *ppt)
+{
+ int i;
+
+ if (ppt->msi.num_msgs == 0)
+ return;
+
+ for (i = 0; i < ppt->msi.num_msgs; i++) {
+ int intr_cap;
+
+ (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
+ if (intr_cap & DDI_INTR_FLAG_BLOCK)
+ ddi_intr_block_disable(&ppt->msi.inth[i], 1);
+ else
+ ddi_intr_disable(ppt->msi.inth[i]);
+
+ ddi_intr_remove_handler(ppt->msi.inth[i]);
+ ddi_intr_free(ppt->msi.inth[i]);
+
+ ppt->msi.inth[i] = NULL;
+ }
+
+ kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
+ ppt->msi.inth = NULL;
+ ppt->msi.inth_sz = 0;
+ ppt->msi.is_fixed = B_FALSE;
+
+ ppt->msi.num_msgs = 0;
+}
+
+static void
+ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
+{
+ if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) {
+ int intr_cap;
+
+ (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
+ if (intr_cap & DDI_INTR_FLAG_BLOCK)
+ ddi_intr_block_disable(&ppt->msix.inth[idx], 1);
+ else
+ ddi_intr_disable(ppt->msix.inth[idx]);
+
+ ddi_intr_remove_handler(ppt->msix.inth[idx]);
+ }
+}
+
+static void
+ppt_teardown_msix(struct pptdev *ppt)
+{
+ uint_t i;
+
+ if (ppt->msix.num_msgs == 0)
+ return;
+
+ for (i = 0; i < ppt->msix.num_msgs; i++)
+ ppt_teardown_msix_intr(ppt, i);
+
+ if (ppt->msix.inth) {
+ for (i = 0; i < ppt->msix.num_msgs; i++)
+ ddi_intr_free(ppt->msix.inth[i]);
+ kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
+ ppt->msix.inth = NULL;
+ ppt->msix.inth_sz = 0;
+ kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
+ ppt->msix.arg = NULL;
+ ppt->msix.arg_sz = 0;
+ }
+
+ ppt->msix.num_msgs = 0;
+}
+
+int
+ppt_assigned_devices(struct vm *vm)
+{
+ struct pptdev *ppt;
+ uint_t num = 0;
+
+ mutex_enter(&pptdev_mtx);
+ for (ppt = list_head(&pptdev_list); ppt != NULL;
+ ppt = list_next(&pptdev_list, ppt)) {
+ if (ppt->vm == vm) {
+ num++;
+ }
+ }
+ mutex_exit(&pptdev_mtx);
+ return (num);
+}
+
+boolean_t
+ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
+{
+ struct pptdev *ppt = list_head(&pptdev_list);
+
+ /* XXX: this should probably be restructured to avoid the lock */
+ mutex_enter(&pptdev_mtx);
+ for (ppt = list_head(&pptdev_list); ppt != NULL;
+ ppt = list_next(&pptdev_list, ppt)) {
+ if (ppt->vm != vm) {
+ continue;
+ }
+
+ for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
+ struct pptseg *seg = &ppt->mmio[i];
+
+ if (seg->len == 0)
+ continue;
+ if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) {
+ mutex_exit(&pptdev_mtx);
+ return (B_TRUE);
+ }
+ }
+ }
+
+ mutex_exit(&pptdev_mtx);
+ return (B_FALSE);
+}
+
+int
+ppt_assign_device(struct vm *vm, int pptfd)
+{
+ struct pptdev *ppt;
+ int err = 0;
+
+ mutex_enter(&pptdev_mtx);
+ ppt = ppt_findf(pptfd);
+ if (ppt == NULL) {
+ mutex_exit(&pptdev_mtx);
+ return (EBADF);
+ }
+
+ /* Only one VM may own a device at any given time */
+ if (ppt->vm != NULL && ppt->vm != vm) {
+ err = EBUSY;
+ goto done;
+ }
+
+ if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
+ err = EIO;
+ goto done;
+ }
+ ppt_flr(ppt->pptd_dip, B_TRUE);
+
+ /*
+ * Restore the device state after reset and then perform another save
+ * so the "pristine" state can be restored when the device is removed
+ * from the guest.
+ */
+ if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS ||
+ pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
+ err = EIO;
+ goto done;
+ }
+
+ ppt->vm = vm;
+ iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
+ iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
+ pf_set_passthru(ppt->pptd_dip, B_TRUE);
+
+done:
+ releasef(pptfd);
+ mutex_exit(&pptdev_mtx);
+ return (err);
+}
+
+static void
+ppt_reset_pci_power_state(dev_info_t *dip)
+{
+ ddi_acc_handle_t cfg;
+ uint16_t cap_ptr;
+
+ if (pci_config_setup(dip, &cfg) != DDI_SUCCESS)
+ return;
+
+ if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) {
+ uint16_t val;
+
+ val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR);
+ if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) {
+ val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0;
+ (void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR,
+ val);
+ }
+ }
+
+ pci_config_teardown(&cfg);
+}
+
+static void
+ppt_do_unassign(struct pptdev *ppt)
+{
+ struct vm *vm = ppt->vm;
+
+ ASSERT3P(vm, !=, NULL);
+ ASSERT(MUTEX_HELD(&pptdev_mtx));
+
+
+ ppt_flr(ppt->pptd_dip, B_TRUE);
+
+ /*
+ * Restore from the state saved during device assignment.
+ * If the device power state has been altered, that must be remedied
+ * first, as it will reset register state during the transition.
+ */
+ ppt_reset_pci_power_state(ppt->pptd_dip);
+ (void) pci_restore_config_regs(ppt->pptd_dip);
+
+ pf_set_passthru(ppt->pptd_dip, B_FALSE);
+
+ ppt_unmap_mmio(vm, ppt);
+ ppt_teardown_msi(ppt);
+ ppt_teardown_msix(ppt);
+ iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
+ iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
+ ppt->vm = NULL;
+}
+
+int
+ppt_unassign_device(struct vm *vm, int pptfd)
+{
+ struct pptdev *ppt;
+ int err = 0;
+
+ mutex_enter(&pptdev_mtx);
+ ppt = ppt_findf(pptfd);
+ if (ppt == NULL) {
+ mutex_exit(&pptdev_mtx);
+ return (EBADF);
+ }
+
+ /* If this device is not owned by this 'vm' then bail out. */
+ if (ppt->vm != vm) {
+ err = EBUSY;
+ goto done;
+ }
+ ppt_do_unassign(ppt);
+
+done:
+ releasef(pptfd);
+ mutex_exit(&pptdev_mtx);
+ return (err);
+}
+
+int
+ppt_unassign_all(struct vm *vm)
+{
+ struct pptdev *ppt;
+
+ mutex_enter(&pptdev_mtx);
+ for (ppt = list_head(&pptdev_list); ppt != NULL;
+ ppt = list_next(&pptdev_list, ppt)) {
+ if (ppt->vm == vm) {
+ ppt_do_unassign(ppt);
+ }
+ }
+ mutex_exit(&pptdev_mtx);
+
+ return (0);
+}
+
+int
+ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len,
+ vm_paddr_t hpa)
+{
+ struct pptdev *ppt;
+ int err = 0;
+
+ mutex_enter(&pptdev_mtx);
+ ppt = ppt_findf(pptfd);
+ if (ppt == NULL) {
+ mutex_exit(&pptdev_mtx);
+ return (EBADF);
+ }
+ if (ppt->vm != vm) {
+ err = EBUSY;
+ goto done;
+ }
+
+ /*
+ * Ensure that the host-physical range of the requested mapping fits
+ * within one of the MMIO BARs of the device.
+ */
+ if (!ppt_bar_verify_mmio(ppt, hpa, len)) {
+ err = EINVAL;
+ goto done;
+ }
+
+ for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
+ struct pptseg *seg = &ppt->mmio[i];
+
+ if (seg->len == 0) {
+ err = vm_map_mmio(vm, gpa, len, hpa);
+ if (err == 0) {
+ seg->gpa = gpa;
+ seg->len = len;
+ }
+ goto done;
+ }
+ }
+ err = ENOSPC;
+
+done:
+ releasef(pptfd);
+ mutex_exit(&pptdev_mtx);
+ return (err);
+}
+
+static uint_t
+pptintr(caddr_t arg, caddr_t unused)
+{
+ struct pptintr_arg *pptarg = (struct pptintr_arg *)arg;
+ struct pptdev *ppt = pptarg->pptdev;
+
+ if (ppt->vm != NULL) {
+ lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
+ } else {
+ /*
+ * XXX
+ * This is not expected to happen - panic?
+ */
+ }
+
+ /*
+ * For legacy interrupts give other filters a chance in case
+ * the interrupt was not generated by the passthrough device.
+ */
+ return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED);
+}
+
+int
+ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg,
+ int numvec)
+{
+ int i, msi_count, intr_type;
+ struct pptdev *ppt;
+ int err = 0;
+
+ if (numvec < 0 || numvec > MAX_MSIMSGS)
+ return (EINVAL);
+
+ mutex_enter(&pptdev_mtx);
+ ppt = ppt_findf(pptfd);
+ if (ppt == NULL) {
+ mutex_exit(&pptdev_mtx);
+ return (EBADF);
+ }
+ if (ppt->vm != vm) {
+ /* Make sure we own this device */
+ err = EBUSY;
+ goto done;
+ }
+
+ /* Free any allocated resources */
+ ppt_teardown_msi(ppt);
+
+ if (numvec == 0) {
+ /* nothing more to do */
+ goto done;
+ }
+
+ if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
+ &msi_count) != DDI_SUCCESS) {
+ if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED,
+ &msi_count) != DDI_SUCCESS) {
+ err = EINVAL;
+ goto done;
+ }
+
+ intr_type = DDI_INTR_TYPE_FIXED;
+ ppt->msi.is_fixed = B_TRUE;
+ } else {
+ intr_type = DDI_INTR_TYPE_MSI;
+ }
+
+ /*
+ * The device must be capable of supporting the number of vectors
+ * the guest wants to allocate.
+ */
+ if (numvec > msi_count) {
+ err = EINVAL;
+ goto done;
+ }
+
+ ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t);
+ ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP);
+ if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0,
+ numvec, &msi_count, 0) != DDI_SUCCESS) {
+ kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
+ err = EINVAL;
+ goto done;
+ }
+
+ /* Verify that we got as many vectors as the guest requested */
+ if (numvec != msi_count) {
+ ppt_teardown_msi(ppt);
+ err = EINVAL;
+ goto done;
+ }
+
+ /* Set up & enable interrupt handler for each vector. */
+ for (i = 0; i < numvec; i++) {
+ int res, intr_cap = 0;
+
+ ppt->msi.num_msgs = i + 1;
+ ppt->msi.arg[i].pptdev = ppt;
+ ppt->msi.arg[i].addr = addr;
+ ppt->msi.arg[i].msg_data = msg + i;
+
+ if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr,
+ &ppt->msi.arg[i], NULL) != DDI_SUCCESS)
+ break;
+
+ (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
+ if (intr_cap & DDI_INTR_FLAG_BLOCK)
+ res = ddi_intr_block_enable(&ppt->msi.inth[i], 1);
+ else
+ res = ddi_intr_enable(ppt->msi.inth[i]);
+
+ if (res != DDI_SUCCESS)
+ break;
+ }
+ if (i < numvec) {
+ ppt_teardown_msi(ppt);
+ err = ENXIO;
+ }
+
+done:
+ releasef(pptfd);
+ mutex_exit(&pptdev_mtx);
+ return (err);
+}
+
+int
+ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
+ uint64_t msg, uint32_t vector_control)
+{
+ struct pptdev *ppt;
+ int numvec, alloced;
+ int err = 0;
+
+ mutex_enter(&pptdev_mtx);
+ ppt = ppt_findf(pptfd);
+ if (ppt == NULL) {
+ mutex_exit(&pptdev_mtx);
+ return (EBADF);
+ }
+ /* Make sure we own this device */
+ if (ppt->vm != vm) {
+ err = EBUSY;
+ goto done;
+ }
+
+ /*
+ * First-time configuration:
+ * Allocate the MSI-X table
+ * Allocate the IRQ resources
+ * Set up some variables in ppt->msix
+ */
+ if (ppt->msix.num_msgs == 0) {
+ dev_info_t *dip = ppt->pptd_dip;
+
+ if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX,
+ &numvec) != DDI_SUCCESS) {
+ err = EINVAL;
+ goto done;
+ }
+
+ ppt->msix.num_msgs = numvec;
+
+ ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]);
+ ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP);
+ ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t);
+ ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP);
+
+ if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0,
+ numvec, &alloced, 0) != DDI_SUCCESS) {
+ kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
+ kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
+ ppt->msix.arg = NULL;
+ ppt->msix.inth = NULL;
+ ppt->msix.arg_sz = ppt->msix.inth_sz = 0;
+ err = EINVAL;
+ goto done;
+ }
+
+ if (numvec != alloced) {
+ ppt_teardown_msix(ppt);
+ err = EINVAL;
+ goto done;
+ }
+ }
+
+ if (idx >= ppt->msix.num_msgs) {
+ err = EINVAL;
+ goto done;
+ }
+
+ if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
+ int intr_cap, res;
+
+ /* Tear down the IRQ if it's already set up */
+ ppt_teardown_msix_intr(ppt, idx);
+
+ ppt->msix.arg[idx].pptdev = ppt;
+ ppt->msix.arg[idx].addr = addr;
+ ppt->msix.arg[idx].msg_data = msg;
+
+ /* Setup the MSI-X interrupt */
+ if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr,
+ &ppt->msix.arg[idx], NULL) != DDI_SUCCESS) {
+ err = ENXIO;
+ goto done;
+ }
+
+ (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
+ if (intr_cap & DDI_INTR_FLAG_BLOCK)
+ res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1);
+ else
+ res = ddi_intr_enable(ppt->msix.inth[idx]);
+
+ if (res != DDI_SUCCESS) {
+ ddi_intr_remove_handler(ppt->msix.inth[idx]);
+ err = ENXIO;
+ goto done;
+ }
+ } else {
+ /* Masked, tear it down if it's already been set up */
+ ppt_teardown_msix_intr(ppt, idx);
+ }
+
+done:
+ releasef(pptfd);
+ mutex_exit(&pptdev_mtx);
+ return (err);
+}
+
+int
+ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit)
+{
+ struct pptdev *ppt;
+ int err = 0;
+
+ mutex_enter(&pptdev_mtx);
+ ppt = ppt_findf(pptfd);
+ if (ppt == NULL) {
+ mutex_exit(&pptdev_mtx);
+ return (EBADF);
+ }
+ if (ppt->vm != vm) {
+ err = EBUSY;
+ goto done;
+ }
+
+ if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
+ msilimit) != DDI_SUCCESS) {
+ *msilimit = -1;
+ }
+ if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX,
+ msixlimit) != DDI_SUCCESS) {
+ *msixlimit = -1;
+ }
+
+done:
+ releasef(pptfd);
+ mutex_exit(&pptdev_mtx);
+ return (err);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.conf b/usr/src/uts/i86pc/io/vmm/io/ppt.conf
new file mode 100644
index 0000000000..698cecb6f8
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.conf
@@ -0,0 +1,14 @@
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2017 Joyent, Inc.
+#
+
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.h b/usr/src/uts/i86pc/io/vmm/io/ppt.h
index 686b15db49..979c0e18ac 100644
--- a/usr/src/uts/i86pc/io/vmm/io/ppt.h
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.h
@@ -31,26 +31,21 @@
#ifndef _IO_PPT_H_
#define _IO_PPT_H_
-int ppt_unassign_all(struct vm *vm);
-int ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
- vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
-int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
- uint64_t addr, uint64_t msg, int numvec);
-int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
- int idx, uint64_t addr, uint64_t msg, uint32_t vector_control);
-int ppt_assigned_devices(struct vm *vm);
+int ppt_unassign_all(struct vm *vm);
+int ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len,
+ vm_paddr_t hpa);
+int ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr,
+ uint64_t msg, int numvec);
+int ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
+ uint64_t msg, uint32_t vector_control);
+int ppt_assigned_devices(struct vm *vm);
boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa);
-
-/*
- * Returns the number of devices sequestered by the ppt driver for assignment
- * to virtual machines.
- */
-int ppt_avail_devices(void);
+int ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit);
/*
* The following functions should never be called directly.
* Use 'vm_assign_pptdev()' and 'vm_unassign_pptdev()' instead.
*/
-int ppt_assign_device(struct vm *vm, int bus, int slot, int func);
-int ppt_unassign_device(struct vm *vm, int bus, int slot, int func);
+int ppt_assign_device(struct vm *vm, int pptfd);
+int ppt_unassign_device(struct vm *vm, int pptfd);
#endif
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile b/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile
new file mode 100644
index 0000000000..aac896e89e
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile
@@ -0,0 +1,52 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOSprivate {
+ global:
+ # DDI Interfaces
+ _fini;
+ _init;
+ _info;
+
+ # PCI pass-thru API for bhyve
+ ppt_assigned_devices;
+ ppt_is_mmio;
+ ppt_assign_device;
+ ppt_unassign_device;
+ ppt_unassign_all;
+ ppt_map_mmio;
+ ppt_setup_msi;
+ ppt_setup_msix;
+ ppt_get_limits;
+
+ local:
+ *;
+};
diff --git a/usr/src/uts/i86pc/io/vmm/io/sol_iommu.c b/usr/src/uts/i86pc/io/vmm/io/sol_iommu.c
deleted file mode 100644
index 989e88e17b..0000000000
--- a/usr/src/uts/i86pc/io/vmm/io/sol_iommu.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2017 Joyent, Inc.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/cmn_err.h>
-
-/*
- * IOMMU Stub
- *
- * Until proper iommu support can be wired into bhyve, stub out all the
- * functions to either fail, if reasonable, or panic.
- */
-
-void
-iommu_cleanup(void)
-{
-}
-
-void *
-iommu_host_domain(void)
-{
- return (NULL);
-}
-
-/*ARGSUSED*/
-void *
-iommu_create_domain(vm_paddr_t maxaddr)
-{
- return (NULL);
-}
-
-/*ARGSUSED*/
-void
-iommu_destroy_domain(void *dom)
-{
- panic("unimplemented");
-}
-
-/*ARGSUSED*/
-void
-iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len)
-{
- panic("unimplemented");
-}
-
-/*ARGSUSED*/
-void
-iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len)
-{
- panic("unimplemented");
-}
-
-/*ARGSUSED*/
-void
-iommu_add_device(void *dom, uint16_t rid)
-{
- panic("unimplemented");
-}
-
-/*ARGSUSED*/
-void
-iommu_remove_device(void *dom, uint16_t rid)
-{
- panic("unimplemented");
-}
-
-/*ARGSUSED*/
-void
-iommu_invalidate_tlb(void *domain)
-{
- panic("unimplemented");
-}
-
diff --git a/usr/src/uts/i86pc/io/vmm/io/sol_ppt.c b/usr/src/uts/i86pc/io/vmm/io/sol_ppt.c
deleted file mode 100644
index 9d5b1f5cdc..0000000000
--- a/usr/src/uts/i86pc/io/vmm/io/sol_ppt.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2017 Joyent, Inc.
- */
-
-#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/cmn_err.h>
-
-#include <sys/vmm.h>
-
-/*
- * PCI Pass-Through Stub
- *
- * Until proper passthrough support can be wired into bhyve, stub out all the
- * functions to either fail or no-op.
- */
-
-int
-ppt_unassign_all(struct vm *vm)
-{
- return (0);
-}
-
-/*ARGSUSED*/
-int
-ppt_map_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa,
- size_t len, vm_paddr_t hpa)
-{
- return (ENXIO);
-}
-
-/*ARGSUSED*/
-int
-ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
- uint64_t addr, uint64_t msg, int numvec)
-{
- return (ENXIO);
-}
-
-/*ARGSUSED*/
-int
-ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, int idx,
- uint64_t addr, uint64_t msg, uint32_t vector_control)
-{
- return (ENXIO);
-}
-
-/*ARGSUSED*/
-int
-ppt_assigned_devices(struct vm *vm)
-{
- return (0);
-}
-
-/*ARGSUSED*/
-boolean_t
-ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
-{
- return (B_FALSE);
-}
-
-/*ARGSUSED*/
-int
-ppt_avail_devices(void)
-{
- return (0);
-}
-
-/*ARGSUSED*/
-int
-ppt_assign_device(struct vm *vm, int bus, int slot, int func)
-{
- return (ENOENT);
-}
-
-/*ARGSUSED*/
-int
-ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
-{
- return (ENXIO);
-}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 6df094b50e..dd24a18f6a 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -495,6 +495,7 @@ vmm_mod_unload()
{
int error;
+ iommu_cleanup();
error = VMM_CLEANUP();
if (error)
return (error);
@@ -1054,10 +1055,14 @@ vm_iommu_modify(struct vm *vm, boolean_t map)
hpa = DMAP_TO_PHYS((uintptr_t)vp);
if (map) {
iommu_create_mapping(vm->iommu, gpa, hpa, sz);
+#ifdef __FreeBSD__
iommu_remove_mapping(host_domain, hpa, sz);
+#endif
} else {
iommu_remove_mapping(vm->iommu, gpa, sz);
+#ifdef __FreeBSD__
iommu_create_mapping(host_domain, hpa, hpa, sz);
+#endif
}
gpa += PAGE_SIZE;
@@ -1068,21 +1073,34 @@ vm_iommu_modify(struct vm *vm, boolean_t map)
* Invalidate the cached translations associated with the domain
* from which pages were removed.
*/
+#ifdef __FreeBSD__
if (map)
iommu_invalidate_tlb(host_domain);
else
iommu_invalidate_tlb(vm->iommu);
+#else
+ iommu_invalidate_tlb(vm->iommu);
+#endif
}
#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE)
#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE)
+#ifdef __FreeBSD__
int
vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
+#else
+int
+vm_unassign_pptdev(struct vm *vm, int pptfd)
+#endif /* __FreeBSD__ */
{
int error;
+#ifdef __FreeBSD__
error = ppt_unassign_device(vm, bus, slot, func);
+#else
+ error = ppt_unassign_device(vm, pptfd);
+#endif /* __FreeBSD__ */
if (error)
return (error);
@@ -1092,8 +1110,13 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
return (0);
}
+#ifdef __FreeBSD__
int
vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
+#else
+int
+vm_assign_pptdev(struct vm *vm, int pptfd)
+#endif /* __FreeBSD__ */
{
int error;
vm_paddr_t maxaddr;
@@ -1109,7 +1132,11 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
vm_iommu_map(vm);
}
+#ifdef __FreeBSD__
error = ppt_assign_device(vm, bus, slot, func);
+#else
+ error = ppt_assign_device(vm, pptfd);
+#endif /* __FreeBSD__ */
return (error);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 2b612b20e9..d84580d04c 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -42,6 +42,7 @@
#include <vm/vm.h>
#include <vm/seg_dev.h>
+#include "io/ppt.h"
#include "io/vatpic.h"
#include "io/vioapic.h"
#include "io/vrtc.h"
@@ -564,7 +565,6 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
break;
}
- /* XXXJOY: punt on these for now */
case VM_PPTDEV_MSI: {
struct vm_pptdev_msi pptmsi;
@@ -572,7 +572,9 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
error = EFAULT;
break;
}
- return (ENOTTY);
+ error = ppt_setup_msi(sc->vmm_vm, pptmsi.vcpu, pptmsi.pptfd,
+ pptmsi.addr, pptmsi.msg, pptmsi.numvec);
+ break;
}
case VM_PPTDEV_MSIX: {
struct vm_pptdev_msix pptmsix;
@@ -581,7 +583,10 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
error = EFAULT;
break;
}
- return (ENOTTY);
+ error = ppt_setup_msix(sc->vmm_vm, pptmsix.vcpu, pptmsix.pptfd,
+ pptmsix.idx, pptmsix.addr, pptmsix.msg,
+ pptmsix.vector_control);
+ break;
}
case VM_MAP_PPTDEV_MMIO: {
struct vm_pptdev_mmio pptmmio;
@@ -590,9 +595,20 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
error = EFAULT;
break;
}
- return (ENOTTY);
+ error = ppt_map_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa,
+ pptmmio.len, pptmmio.hpa);
+ break;
+ }
+ case VM_BIND_PPTDEV: {
+ struct vm_pptdev pptdev;
+
+ if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
+ error = EFAULT;
+ break;
+ }
+ error = vm_assign_pptdev(sc->vmm_vm, pptdev.pptfd);
+ break;
}
- case VM_BIND_PPTDEV:
case VM_UNBIND_PPTDEV: {
struct vm_pptdev pptdev;
@@ -600,12 +616,27 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
error = EFAULT;
break;
}
- return (ENOTTY);
+ error = vm_unassign_pptdev(sc->vmm_vm, pptdev.pptfd);
+ break;
}
+ case VM_GET_PPTDEV_LIMITS: {
+ struct vm_pptdev_limits pptlimits;
+ if (ddi_copyin(datap, &pptlimits, sizeof (pptlimits), md)) {
+ error = EFAULT;
+ break;
+ }
+ error = ppt_get_limits(sc->vmm_vm, pptlimits.pptfd,
+ &pptlimits.msi_limit, &pptlimits.msix_limit);
+ if (error == 0 &&
+ ddi_copyout(&pptlimits, datap, sizeof (pptlimits), md)) {
+ error = EFAULT;
+ break;
+ }
+ break;
+ }
case VM_INJECT_EXCEPTION: {
struct vm_exception vmexc;
-
if (ddi_copyin(datap, &vmexc, sizeof (vmexc), md)) {
error = EFAULT;
break;
@@ -2091,8 +2122,16 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
return (DDI_FAILURE);
}
- /* Ensure that all resources have been cleaned up */
- mutex_enter(&vmmdev_mtx);
+ /*
+ * Ensure that all resources have been cleaned up.
+ *
+ * To prevent a deadlock with iommu_cleanup() we'll fail the detach if
+ * vmmdev_mtx is already held. We can't wait for vmmdev_mtx with our
+ * devinfo locked as iommu_cleanup() tries to recursively lock each
+ * devinfo, including our own, while holding vmmdev_mtx.
+ */
+ if (mutex_tryenter(&vmmdev_mtx) == 0)
+ return (DDI_FAILURE);
mutex_enter(&vmm_mtx);
if (!list_is_empty(&vmm_list) || !list_is_empty(&vmm_destroy_list)) {
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index e2522858dd..2401774ab7 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -55,6 +55,8 @@
#include <sys/modhash.h>
#include <sys/hma.h>
+#include <sys/x86_archext.h>
+
#include <machine/cpufunc.h>
#include <machine/fpu.h>
#include <machine/md_var.h>
@@ -91,6 +93,19 @@ u_char const bin2bcd_data[] = {
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
};
+void
+pmap_invalidate_cache(void)
+{
+ cpuset_t cpuset;
+
+ kpreempt_disable();
+ cpuset_all_but(&cpuset, CPU->cpu_id);
+ xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL,
+ CPUSET2BV(cpuset), (xc_func_t)invalidate_cache);
+ invalidate_cache();
+ kpreempt_enable();
+}
+
vm_paddr_t
pmap_kextract(vm_offset_t va)
{
diff --git a/usr/src/uts/i86pc/ppt/Makefile b/usr/src/uts/i86pc/ppt/Makefile
new file mode 100644
index 0000000000..f231dfddf6
--- /dev/null
+++ b/usr/src/uts/i86pc/ppt/Makefile
@@ -0,0 +1,86 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+# Copyright 2019 Joyent, Inc.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ppt
+OBJECTS = $(PPT_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(PPT_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/i86pc/io/vmm/io
+MAPFILE = $(UTSBASE)/i86pc/io/vmm/io/ppt.mapfile
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Overrides and additions
+#
+ALL_BUILDS = $(ALL_BUILDSONLY64)
+DEF_BUILDS = $(DEF_BUILDSONLY64)
+PRE_INC_PATH = -I$(COMPAT)/freebsd -I$(COMPAT)/freebsd/amd64 \
+ -I$(CONTRIB)/freebsd -I$(CONTRIB)/freebsd/amd64
+INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io
+AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR)
+
+LDFLAGS += -dy -N drv/vmm -N misc/pcie
+LDFLAGS += -M $(MAPFILE)
+
+$(OBJS_DIR)/ppt.o := CERRWARN += -_gcc=-Wno-unused-variable
+
+# needs work
+SMOFF += all_func_returns
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/sys/ppt_dev.h b/usr/src/uts/i86pc/sys/ppt_dev.h
new file mode 100644
index 0000000000..e25f941f14
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/ppt_dev.h
@@ -0,0 +1,56 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc
+ */
+
+#ifndef _PPT_DEV_H
+#define _PPT_DEV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PPT_IOC (('P' << 16)|('T' << 8))
+
+#define PPT_CFG_READ (PPT_IOC | 0x01)
+#define PPT_CFG_WRITE (PPT_IOC | 0x02)
+#define PPT_BAR_QUERY (PPT_IOC | 0x03)
+#define PPT_BAR_READ (PPT_IOC | 0x04)
+#define PPT_BAR_WRITE (PPT_IOC | 0x05)
+
+#define PPT_MAXNAMELEN 32
+
+struct ppt_cfg_io {
+ uint64_t pci_off;
+ uint32_t pci_width;
+ uint32_t pci_data;
+};
+struct ppt_bar_io {
+ uint32_t pbi_bar;
+ uint32_t pbi_off;
+ uint32_t pbi_width;
+ uint32_t pbi_data;
+};
+
+struct ppt_bar_query {
+ uint32_t pbq_baridx;
+ uint32_t pbq_type;
+ uint64_t pbq_base;
+ uint64_t pbq_size;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PPT_DEV_H */
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 8a35d123c7..ac8f14b042 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -224,8 +224,13 @@ int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
void vm_free_memseg(struct vm *vm, int ident);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
+#ifdef __FreeBSD__
int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
+#else
+int vm_assign_pptdev(struct vm *vm, int pptfd);
+int vm_unassign_pptdev(struct vm *vm, int pptfd);
+#endif /* __FreeBSD__ */
/*
* APIs that inspect the guest memory map require only a *single* vcpu to
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index 58e581a60d..dd87dcb0a6 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -127,6 +127,7 @@ struct vm_capability {
int allcpus;
};
+#ifdef __FreeBSD__
struct vm_pptdev {
int bus;
int slot;
@@ -163,6 +164,49 @@ struct vm_pptdev_msix {
uint64_t addr;
};
+struct vm_pptdev_limits {
+ int bus;
+ int slot;
+ int func;
+ int msi_limit;
+ int msix_limit;
+};
+#else /* __FreeBSD__ */
+struct vm_pptdev {
+ int pptfd;
+};
+
+struct vm_pptdev_mmio {
+ int pptfd;
+ vm_paddr_t gpa;
+ vm_paddr_t hpa;
+ size_t len;
+};
+
+struct vm_pptdev_msi {
+ int vcpu;
+ int pptfd;
+ int numvec; /* 0 means disabled */
+ uint64_t msg;
+ uint64_t addr;
+};
+
+struct vm_pptdev_msix {
+ int vcpu;
+ int pptfd;
+ int idx;
+ uint64_t msg;
+ uint32_t vector_control;
+ uint64_t addr;
+};
+
+struct vm_pptdev_limits {
+ int pptfd;
+ int msi_limit;
+ int msix_limit;
+};
+#endif /* __FreeBSD__ */
+
struct vm_nmi {
int cpuid;
};
@@ -307,6 +351,7 @@ enum {
IOCNUM_MAP_PPTDEV_MMIO = 42,
IOCNUM_PPTDEV_MSI = 43,
IOCNUM_PPTDEV_MSIX = 44,
+ IOCNUM_GET_PPTDEV_LIMITS = 45,
/* statistics */
IOCNUM_VM_STATS = 50,
@@ -410,6 +455,8 @@ enum {
_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
#define VM_PPTDEV_MSIX \
_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
+#define VM_GET_PPTDEV_LIMITS \
+ _IOR('v', IOCNUM_GET_PPTDEV_LIMITS, struct vm_pptdev_limits)
#define VM_INJECT_NMI \
_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
#define VM_STATS_IOC \
diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile
index 5b93db987b..d5dc8d7124 100644
--- a/usr/src/uts/i86pc/vmm/Makefile
+++ b/usr/src/uts/i86pc/vmm/Makefile
@@ -104,11 +104,12 @@ CFLAGS += -_gcc=-Wno-format
# enable collection of VMM statistics
CFLAGS += -DVMM_KEEP_STATS
-LDFLAGS += -Nfs/dev
-
$(OBJS_DIR)/vmm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits
$(OBJS_DIR)/svm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits
+$(OBJS_DIR)/vmx.o := CERRWARN += -_gcc=-Wno-unused-variable
+$(OBJS_DIR)/iommu.o := CERRWARN += -_gcc=-Wno-unused-variable
+LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev
LDFLAGS += -z type=kmod -M $(MAPFILE)
OFFSETS_VMX = $(CONF_SRCDIR)/intel/offsets.in
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index 2562f9ec4b..49c0cce31c 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -1293,9 +1293,25 @@ fcnname/**/_info: \
#endif
/*
- * this is just a marker for the area of text that contains stubs
+ * Stubs for ppt module (bhyve PCI passthrough driver)
*/
+#ifndef PPT_MODULE
+ MODULE(ppt,drv);
+ WSTUB(ppt, ppt_unassign_all, nomod_zero);
+ WSTUB(ppt, ppt_map_mmio, nomod_einval);
+ WSTUB(ppt, ppt_setup_msi, nomod_einval);
+ WSTUB(ppt, ppt_setup_msix, nomod_einval);
+ WSTUB(ppt, ppt_assigned_devices, nomod_zero);
+ WSTUB(ppt, ppt_is_mmio, nomod_zero);
+ WSTUB(ppt, ppt_assign_device, nomod_einval);
+ WSTUB(ppt, ppt_unassign_device, nomod_einval);
+ WSTUB(ppt, ppt_get_limits, nomod_einval);
+ END_MODULE(ppt);
+#endif
+/*
+ * this is just a marker for the area of text that contains stubs
+ */
ENTRY_NP(stubs_end)
nop