diff options
author | Hans Rosenfeld <hans.rosenfeld@joyent.com> | 2018-07-16 09:03:51 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@pfmooney.com> | 2020-05-22 14:27:14 +0000 |
commit | eb9a1df2aeb866bf1de4494433b6d7e5fa07b3ae (patch) | |
tree | f9cc894879d944a84aa3e16bfabc9e16a4fcaf6b /usr/src | |
parent | 3c5f2a9de9c6554ce899ad4ebf7978ea7293994a (diff) | |
download | illumos-joyent-eb9a1df2aeb866bf1de4494433b6d7e5fa07b3ae.tar.gz |
12680 want PCI pass-thru in bhyve
Portions contributed by: Patrick Mooney <patrick.mooney@joyent.com>
Portions contributed by: John Levon <john.levon@joyent.com>
Portions contributed by: Andy Fiddaman <omnios@citrus-it.co.uk>
Reviewed by: Patrick Mooney <pmooney@oxide.computer>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src')
49 files changed, 3960 insertions, 448 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index f20274bd35..0f2cc306aa 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -323,6 +323,7 @@ COMMON_SUBDIRS= \ ppgsz \ pg \ plockstat \ + pptadm \ pr \ prctl \ print \ diff --git a/usr/src/cmd/bhyve/pci_passthru.c b/usr/src/cmd/bhyve/pci_passthru.c index d2c69e795c..3782914cd5 100644 --- a/usr/src/cmd/bhyve/pci_passthru.c +++ b/usr/src/cmd/bhyve/pci_passthru.c @@ -40,6 +40,8 @@ __FBSDID("$FreeBSD$"); #include <sys/pciio.h> #include <sys/ioctl.h> +#include <sys/pci.h> + #include <dev/io/iodev.h> #include <dev/pci/pcireg.h> @@ -59,30 +61,15 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> #include <vmmapi.h> +#include <sys/ppt_dev.h> #include "pci_emul.h" #include "mem.h" -#ifndef _PATH_DEVPCI -#define _PATH_DEVPCI "/dev/pci" -#endif - -#ifndef _PATH_DEVIO -#define _PATH_DEVIO "/dev/io" -#endif - -#ifndef _PATH_MEM -#define _PATH_MEM "/dev/mem" -#endif - #define LEGACY_SUPPORT 1 #define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) #define MSIX_CAPLEN 12 -static int pcifd = -1; -static int iofd = -1; -static int memfd = -1; - struct passthru_softc { struct pci_devinst *psc_pi; struct pcibar psc_bar[PCI_BARMAX + 1]; @@ -94,14 +81,16 @@ struct passthru_softc { struct { int capoff; } psc_msix; - struct pcisel psc_sel; + int pptfd; + int msi_limit; + int msix_limit; }; static int msi_caplen(int msgctrl) { int len; - + len = 10; /* minimum length of msi capability */ if (msgctrl & PCIM_MSICTRL_64BIT) @@ -120,33 +109,76 @@ msi_caplen(int msgctrl) } static uint32_t -read_config(const struct pcisel *sel, long reg, int width) +read_config(const struct passthru_softc *sc, long reg, int width) { - struct pci_io pi; + struct ppt_cfg_io pi; - bzero(&pi, sizeof(pi)); - pi.pi_sel = *sel; - pi.pi_reg = reg; - pi.pi_width = width; + pi.pci_off = reg; + pi.pci_width = width; - if (ioctl(pcifd, PCIOCREAD, &pi) < 0) - return (0); /* XXX */ - else - return (pi.pi_data); + if (ioctl(sc->pptfd, PPT_CFG_READ, &pi) != 0) { + return (0); + } + return (pi.pci_data); } static void -write_config(const struct pcisel *sel, long reg, int width, uint32_t data) +write_config(const struct passthru_softc *sc, long reg, int width, + uint32_t data) { - struct pci_io pi; + struct ppt_cfg_io pi; - bzero(&pi, sizeof(pi)); - pi.pi_sel = *sel; - pi.pi_reg = reg; - pi.pi_width = width; - pi.pi_data = data; + pi.pci_off = reg; + pi.pci_width = width; + pi.pci_data = data; - (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ + (void) ioctl(sc->pptfd, PPT_CFG_WRITE, &pi); +} + +static int +passthru_get_bar(struct passthru_softc *sc, int bar, enum pcibar_type *type, + uint64_t *base, uint64_t *size) +{ + struct ppt_bar_query pb; + + pb.pbq_baridx = bar; + + if (ioctl(sc->pptfd, PPT_BAR_QUERY, &pb) != 0) { + return (-1); + } + + switch (pb.pbq_type) { + case PCI_ADDR_IO: + *type = PCIBAR_IO; + break; + case PCI_ADDR_MEM32: + *type = PCIBAR_MEM32; + break; + case PCI_ADDR_MEM64: + *type = PCIBAR_MEM64; + break; + default: + err(1, "unrecognized BAR type: %u\n", pb.pbq_type); + break; + } + + *base = pb.pbq_base; + *size = pb.pbq_size; + return (0); +} + +static int +passthru_dev_open(const char *path, int *pptfdp) +{ + int pptfd; + + if ((pptfd = open(path, O_RDWR)) < 0) { + return (errno); + } + + /* XXX: verify fd with ioctl? */ + *pptfdp = pptfd; + return (0); } #ifdef LEGACY_SUPPORT @@ -174,55 +206,87 @@ passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) } #endif /* LEGACY_SUPPORT */ +static void +passthru_intr_limit(struct passthru_softc *sc, struct msixcap *msixcap) +{ + struct pci_devinst *pi = sc->psc_pi; + int off; + + /* Reduce the number of MSI vectors if higher than OS limit */ + if ((off = sc->psc_msi.capoff) != 0 && sc->msi_limit != -1) { + int msi_limit, mmc; + + msi_limit = + sc->msi_limit > 16 ? PCIM_MSICTRL_MMC_32 : + sc->msi_limit > 8 ? PCIM_MSICTRL_MMC_16 : + sc->msi_limit > 4 ? PCIM_MSICTRL_MMC_8 : + sc->msi_limit > 2 ? PCIM_MSICTRL_MMC_4 : + sc->msi_limit > 1 ? PCIM_MSICTRL_MMC_2 : + PCIM_MSICTRL_MMC_1; + mmc = sc->psc_msi.msgctrl & PCIM_MSICTRL_MMC_MASK; + + if (mmc > msi_limit) { + sc->psc_msi.msgctrl &= ~PCIM_MSICTRL_MMC_MASK; + sc->psc_msi.msgctrl |= msi_limit; + pci_set_cfgdata16(pi, off + 2, sc->psc_msi.msgctrl); + } + } + + /* Reduce the number of MSI-X vectors if higher than OS limit */ + if ((off = sc->psc_msix.capoff) != 0 && sc->msix_limit != -1) { + if (MSIX_TABLE_COUNT(msixcap->msgctrl) > sc->msix_limit) { + msixcap->msgctrl &= ~PCIM_MSIXCTRL_TABLE_SIZE; + msixcap->msgctrl |= sc->msix_limit - 1; + pci_set_cfgdata16(pi, off + 2, msixcap->msgctrl); + } + } +} + static int cfginitmsi(struct passthru_softc *sc) { int i, ptr, capptr, cap, sts, caplen, table_size; uint32_t u32; - struct pcisel sel; - struct pci_devinst *pi; + struct pci_devinst *pi = sc->psc_pi; struct msixcap msixcap; uint32_t *msixcap_ptr; - pi = sc->psc_pi; - sel = sc->psc_sel; - /* * Parse the capabilities and cache the location of the MSI * and MSI-X capabilities. */ - sts = read_config(&sel, PCIR_STATUS, 2); + sts = read_config(sc, PCIR_STATUS, 2); if (sts & PCIM_STATUS_CAPPRESENT) { - ptr = read_config(&sel, PCIR_CAP_PTR, 1); + ptr = read_config(sc, PCIR_CAP_PTR, 1); while (ptr != 0 && ptr != 0xff) { - cap = read_config(&sel, ptr + PCICAP_ID, 1); + cap = read_config(sc, ptr + PCICAP_ID, 1); if (cap == PCIY_MSI) { /* * Copy the MSI capability into the config * space of the emulated pci device */ sc->psc_msi.capoff = ptr; - sc->psc_msi.msgctrl = read_config(&sel, - ptr + 2, 2); + sc->psc_msi.msgctrl = read_config(sc, + ptr + 2, 2); sc->psc_msi.emulated = 0; caplen = msi_caplen(sc->psc_msi.msgctrl); capptr = ptr; while (caplen > 0) { - u32 = read_config(&sel, capptr, 4); + u32 = read_config(sc, capptr, 4); pci_set_cfgdata32(pi, capptr, u32); caplen -= 4; capptr += 4; } } else if (cap == PCIY_MSIX) { /* - * Copy the MSI-X capability + * Copy the MSI-X capability */ sc->psc_msix.capoff = ptr; caplen = 12; msixcap_ptr = (uint32_t*) &msixcap; capptr = ptr; while (caplen > 0) { - u32 = read_config(&sel, capptr, 4); + u32 = read_config(sc, capptr, 4); *msixcap_ptr = u32; pci_set_cfgdata32(pi, capptr, u32); caplen -= 4; @@ -230,10 +294,12 @@ cfginitmsi(struct passthru_softc *sc) msixcap_ptr++; } } - ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); + ptr = read_config(sc, ptr + PCICAP_NEXTPTR, 1); } } + passthru_intr_limit(sc, &msixcap); + if (sc->psc_msix.capoff != 0) { pi->pi_msix.pba_bar = msixcap.pba_info & PCIM_MSIX_BIR_MASK; @@ -265,7 +331,7 @@ cfginitmsi(struct passthru_softc *sc) */ if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { int origptr, msiptr; - origptr = read_config(&sel, PCIR_CAP_PTR, 1); + origptr = read_config(sc, PCIR_CAP_PTR, 1); msiptr = passthru_add_msicap(pi, 1, origptr); sc->psc_msi.capoff = msiptr; sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); @@ -275,14 +341,15 @@ cfginitmsi(struct passthru_softc *sc) #endif /* Make sure one of the capabilities is present */ - if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) + if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) { return (-1); - else + } else { return (0); + } } static uint64_t -msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) +passthru_msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) { struct pci_devinst *pi; struct msix_table_entry *entry; @@ -360,8 +427,8 @@ msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) } static void -msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, - uint64_t offset, int size, uint64_t data) +passthru_msix_table_write(struct vmctx *ctx, int vcpu, + struct passthru_softc *sc, uint64_t offset, int size, uint64_t data) { struct pci_devinst *pi; struct msix_table_entry *entry; @@ -426,10 +493,9 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, /* If the entry is masked, don't set it up */ if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { - (void)vm_setup_pptdev_msix(ctx, vcpu, - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, index, entry->addr, - entry->msg_data, entry->vector_control); + (void) vm_setup_pptdev_msix(ctx, vcpu, sc->pptfd, + index, entry->addr, entry->msg_data, + entry->vector_control); } } } @@ -437,7 +503,6 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, static int init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) { - int b, s, f; int error, idx; size_t len, remaining; uint32_t table_size, table_offset; @@ -447,14 +512,10 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); - b = sc->psc_sel.pc_bus; - s = sc->psc_sel.pc_dev; - f = sc->psc_sel.pc_func; - - /* + /* * If the MSI-X table BAR maps memory intended for - * other uses, it is at least assured that the table - * either resides in its own page within the region, + * other uses, it is at least assured that the table + * either resides in its own page within the region, * or it resides in a page shared with only the PBA. */ table_offset = rounddown2(pi->pi_msix.table_offset, 4096); @@ -490,12 +551,11 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) pi->pi_msix.pba_page_offset = table_offset + table_size - 4096; pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ | - PROT_WRITE, MAP_SHARED, memfd, start + + PROT_WRITE, MAP_SHARED, sc->pptfd, pi->pi_msix.pba_page_offset); if (pi->pi_msix.pba_page == MAP_FAILED) { - warn( - "Failed to map PBA page for MSI-X on %d/%d/%d", - b, s, f); + warn("Failed to map PBA page for MSI-X on %d", + sc->pptfd); return (-1); } } @@ -504,7 +564,7 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) /* Map everything before the MSI-X table */ if (table_offset > 0) { len = table_offset; - error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); + error = vm_map_pptdev_mmio(ctx, sc->pptfd, start, len, base); if (error) return (error); @@ -521,7 +581,7 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) /* Map everything beyond the end of the MSI-X table */ if (remaining > 0) { len = remaining; - error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); + error = vm_map_pptdev_mmio(ctx, sc->pptfd, start, len, base); if (error) return (error); } @@ -532,47 +592,26 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) static int cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) { - int i, error; - struct pci_devinst *pi; - struct pci_bar_io bar; - enum pcibar_type bartype; - uint64_t base, size; - - pi = sc->psc_pi; + struct pci_devinst *pi = sc->psc_pi; + uint_t i; /* * Initialize BAR registers */ for (i = 0; i <= PCI_BARMAX; i++) { - bzero(&bar, sizeof(bar)); - bar.pbi_sel = sc->psc_sel; - bar.pbi_reg = PCIR_BAR(i); + enum pcibar_type bartype; + uint64_t base, size; + int error; - if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) + if (passthru_get_bar(sc, i, &bartype, &base, &size) != 0) { continue; - - if (PCI_BAR_IO(bar.pbi_base)) { - bartype = PCIBAR_IO; - base = bar.pbi_base & PCIM_BAR_IO_BASE; - } else { - switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { - case PCIM_BAR_MEM_64: - bartype = PCIBAR_MEM64; - break; - default: - bartype = PCIBAR_MEM32; - break; - } - base = bar.pbi_base & PCIM_BAR_MEM_BASE; } - size = bar.pbi_length; if (bartype != PCIBAR_IO) { if (((base | size) & PAGE_MASK) != 0) { - warnx("passthru device %d/%d/%d BAR %d: " + warnx("passthru device %d BAR %d: " "base %#lx or size %#lx not page aligned\n", - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, base, size); + sc->pptfd, i, base, size); return (-1); } } @@ -590,13 +629,12 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) /* The MSI-X table needs special handling */ if (i == pci_msix_table_bar(pi)) { error = init_msix_table(ctx, sc, base); - if (error) + if (error) return (-1); } else if (bartype != PCIBAR_IO) { /* Map the physical BAR in the guest MMIO space */ - error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, sc->psc_sel.pc_func, - pi->pi_bar[i].addr, pi->pi_bar[i].size, base); + error = vm_map_pptdev_mmio(ctx, sc->pptfd, + pi->pi_bar[i].addr, pi->pi_bar[i].size, base); if (error) return (-1); } @@ -614,114 +652,43 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) } static int -cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) +cfginit(struct vmctx *ctx, struct passthru_softc *sc) { - int error; - struct passthru_softc *sc; - - error = 1; - sc = pi->pi_arg; - - bzero(&sc->psc_sel, sizeof(struct pcisel)); - sc->psc_sel.pc_bus = bus; - sc->psc_sel.pc_dev = slot; - sc->psc_sel.pc_func = func; - if (cfginitmsi(sc) != 0) { - warnx("failed to initialize MSI for PCI %d/%d/%d", - bus, slot, func); - goto done; + warnx("failed to initialize MSI for PCI %d", sc->pptfd); + return (-1); } if (cfginitbar(ctx, sc) != 0) { - warnx("failed to initialize BARs for PCI %d/%d/%d", - bus, slot, func); - goto done; + warnx("failed to initialize BARs for PCI %d", sc->pptfd); + return (-1); } - error = 0; /* success */ -done: - return (error); + return (0); } static int passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { - int bus, slot, func, error, memflags; + int error, memflags, pptfd; struct passthru_softc *sc; -#ifndef WITHOUT_CAPSICUM - cap_rights_t rights; - cap_ioctl_t pci_ioctls[] = { PCIOCREAD, PCIOCWRITE, PCIOCGETBAR }; - cap_ioctl_t io_ioctls[] = { IODEV_PIO }; -#endif sc = NULL; error = 1; -#ifndef WITHOUT_CAPSICUM - cap_rights_init(&rights, CAP_IOCTL, CAP_READ, CAP_WRITE); -#endif - memflags = vm_get_memflags(ctx); if (!(memflags & VM_MEM_F_WIRED)) { warnx("passthru requires guest memory to be wired"); goto done; } - if (pcifd < 0) { - pcifd = open(_PATH_DEVPCI, O_RDWR, 0); - if (pcifd < 0) { - warn("failed to open %s", _PATH_DEVPCI); - goto done; - } - } - -#ifndef WITHOUT_CAPSICUM - if (cap_rights_limit(pcifd, &rights) == -1 && errno != ENOSYS) - errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(pcifd, pci_ioctls, nitems(pci_ioctls)) == -1 && errno != ENOSYS) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - if (iofd < 0) { - iofd = open(_PATH_DEVIO, O_RDWR, 0); - if (iofd < 0) { - warn("failed to open %s", _PATH_DEVIO); - goto done; - } - } - -#ifndef WITHOUT_CAPSICUM - if (cap_rights_limit(iofd, &rights) == -1 && errno != ENOSYS) - errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(iofd, io_ioctls, nitems(io_ioctls)) == -1 && errno != ENOSYS) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - if (memfd < 0) { - memfd = open(_PATH_MEM, O_RDWR, 0); - if (memfd < 0) { - warn("failed to open %s", _PATH_MEM); - goto done; - } - } - -#ifndef WITHOUT_CAPSICUM - cap_rights_clear(&rights, CAP_IOCTL); - cap_rights_set(&rights, CAP_MMAP_RW); - if (cap_rights_limit(memfd, &rights) == -1 && errno != ENOSYS) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - if (opts == NULL || - sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) { + if (opts == NULL || passthru_dev_open(opts, &pptfd) != 0) { warnx("invalid passthru options"); goto done; } - if (vm_assign_pptdev(ctx, bus, slot, func) != 0) { - warnx("PCI device at %d/%d/%d is not using the ppt(4) driver", - bus, slot, func); + if (vm_assign_pptdev(ctx, pptfd) != 0) { + warnx("PCI device at %d is not using the ppt driver", pptfd); goto done; } @@ -729,16 +696,21 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pi->pi_arg = sc; sc->psc_pi = pi; + sc->pptfd = pptfd; + + if ((error = vm_get_pptdev_limits(ctx, pptfd, &sc->msi_limit, + &sc->msix_limit)) != 0) + goto done; /* initialize config space */ - if ((error = cfginit(ctx, pi, bus, slot, func)) != 0) + if ((error = cfginit(ctx, sc)) != 0) goto done; - + error = 0; /* success */ done: if (error) { free(sc); - vm_unassign_pptdev(ctx, bus, slot, func); + vm_unassign_pptdev(ctx, pptfd); } return (error); } @@ -768,7 +740,7 @@ msicap_access(struct passthru_softc *sc, int coff) return (0); } -static int +static int msixcap_access(struct passthru_softc *sc, int coff) { if (sc->psc_msix.capoff == 0) @@ -780,7 +752,7 @@ msixcap_access(struct passthru_softc *sc, int coff) static int passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int coff, int bytes, uint32_t *rv) + int coff, int bytes, uint32_t *rv) { struct passthru_softc *sc; @@ -792,6 +764,13 @@ passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (bar_access(coff) || msicap_access(sc, coff)) return (-1); + /* + * MSI-X is also emulated since a limit on interrupts may be imposed by + * the OS, altering the perceived register state. + */ + if (msixcap_access(sc, coff)) + return (-1); + #ifdef LEGACY_SUPPORT /* * Emulate PCIR_CAP_PTR if this device does not support MSI capability @@ -804,14 +783,14 @@ passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, #endif /* Everything else just read from the device's config space */ - *rv = read_config(&sc->psc_sel, coff, bytes); + *rv = read_config(sc, coff, bytes); return (0); } static int passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int coff, int bytes, uint32_t val) + int coff, int bytes, uint32_t val) { int error, msix_table_entries, i; struct passthru_softc *sc; @@ -830,10 +809,8 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (msicap_access(sc, coff)) { msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); - error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, sc->psc_sel.pc_func, - pi->pi_msi.addr, pi->pi_msi.msg_data, - pi->pi_msi.maxmsgnum); + error = vm_setup_pptdev_msi(ctx, vcpu, sc->pptfd, + pi->pi_msi.addr, pi->pi_msi.msg_data, pi->pi_msi.maxmsgnum); if (error != 0) err(1, "vm_setup_pptdev_msi"); return (0); @@ -845,12 +822,11 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, msix_table_entries = pi->pi_msix.table_count; for (i = 0; i < msix_table_entries; i++) { error = vm_setup_pptdev_msix(ctx, vcpu, - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, + sc->pptfd, i, pi->pi_msix.table[i].addr, pi->pi_msix.table[i].msg_data, pi->pi_msix.table[i].vector_control); - + if (error) err(1, "vm_setup_pptdev_msix"); } @@ -870,57 +846,54 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, } #endif - write_config(&sc->psc_sel, coff, bytes, val); + write_config(sc, coff, bytes, val); return (0); } static void passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size, uint64_t value) + uint64_t offset, int size, uint64_t value) { - struct passthru_softc *sc; - struct iodev_pio_req pio; - - sc = pi->pi_arg; + struct passthru_softc *sc = pi->pi_arg; if (baridx == pci_msix_table_bar(pi)) { - msix_table_write(ctx, vcpu, sc, offset, size, value); + passthru_msix_table_write(ctx, vcpu, sc, offset, size, value); } else { + struct ppt_bar_io pbi; + assert(pi->pi_bar[baridx].type == PCIBAR_IO); - bzero(&pio, sizeof(struct iodev_pio_req)); - pio.access = IODEV_PIO_WRITE; - pio.port = sc->psc_bar[baridx].addr + offset; - pio.width = size; - pio.val = value; - - (void)ioctl(iofd, IODEV_PIO, &pio); + + pbi.pbi_bar = baridx; + pbi.pbi_width = size; + pbi.pbi_off = offset; + pbi.pbi_data = value; + (void) ioctl(sc->pptfd, PPT_BAR_WRITE, &pbi); } } static uint64_t passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size) + uint64_t offset, int size) { - struct passthru_softc *sc; - struct iodev_pio_req pio; + struct passthru_softc *sc = pi->pi_arg; uint64_t val; - sc = pi->pi_arg; - if (baridx == pci_msix_table_bar(pi)) { - val = msix_table_read(sc, offset, size); + val = passthru_msix_table_read(sc, offset, size); } else { - assert(pi->pi_bar[baridx].type == PCIBAR_IO); - bzero(&pio, sizeof(struct iodev_pio_req)); - pio.access = IODEV_PIO_READ; - pio.port = sc->psc_bar[baridx].addr + offset; - pio.width = size; - pio.val = 0; + struct ppt_bar_io pbi; - (void)ioctl(iofd, IODEV_PIO, &pio); + assert(pi->pi_bar[baridx].type == PCIBAR_IO); - val = pio.val; + pbi.pbi_bar = baridx; + pbi.pbi_width = size; + pbi.pbi_off = offset; + if (ioctl(sc->pptfd, PPT_BAR_READ, &pbi) == 0) { + val = pbi.pbi_data; + } else { + val = 0; + } } return (val); diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index b8bdf524a9..bbe36917fd 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -183,7 +183,9 @@ usage(bool cpu_intel) " [--get-ldtr]\n" " [--set-x2apic-state=<state>]\n" " [--get-x2apic-state]\n" +#ifdef __FreeBSD__ " [--unassign-pptdev=<bus/slot/func>]\n" +#endif " [--set-mem=<memory in units of MB>]\n" " [--get-lowmem]\n" " [--get-highmem]\n" @@ -302,7 +304,9 @@ static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; +#ifdef __FreeBSD__ static int unassign_pptdev, bus, slot, func; +#endif static int run; static int get_cpu_topology; #ifndef __FreeBSD__ @@ -1875,11 +1879,13 @@ main(int argc, char *argv[]) case CAPNAME: capname = optarg; break; +#ifdef __FreeBSD__ case UNASSIGN_PPTDEV: unassign_pptdev = 1; if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3) usage(cpu_intel); break; +#endif case ASSERT_LAPIC_LVT: assert_lapic_lvt = atoi(optarg); break; @@ -2040,8 +2046,10 @@ main(int argc, char *argv[]) if (!error && set_x2apic_state) error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); +#ifdef __FreeBSD__ if (!error && unassign_pptdev) error = vm_unassign_pptdev(ctx, bus, slot, func); +#endif /* __FreeBSD__ */ if (!error && set_exception_bitmap) { if (cpu_intel) diff --git a/usr/src/cmd/devfsadm/i386/misc_link_i386.c b/usr/src/cmd/devfsadm/i386/misc_link_i386.c index 0f8e64551d..eb5f789c37 100644 --- a/usr/src/cmd/devfsadm/i386/misc_link_i386.c +++ b/usr/src/cmd/devfsadm/i386/misc_link_i386.c @@ -46,6 +46,7 @@ static int kdmouse(di_minor_t minor, di_node_t node); static int ipmi(di_minor_t minor, di_node_t node); static int mc_node(di_minor_t minor, di_node_t node); static int vmmctl(di_minor_t minor, di_node_t node); +static int ppt(di_minor_t minor, di_node_t node); static devfsadm_create_t misc_cbt[] = { { "vt00", "ddi_display", NULL, @@ -90,6 +91,9 @@ static devfsadm_create_t misc_cbt[] = { }, { "pseudo", "ddi_pseudo", "vmm", TYPE_EXACT | DRV_EXACT, ILEVEL_0, vmmctl, + }, + { "pseudo", "ddi_pseudo", "ppt", + TYPE_EXACT | DRV_EXACT, ILEVEL_0, ppt, } }; @@ -122,6 +126,9 @@ static devfsadm_remove_t misc_remove_cbt[] = { }, { "pseudo", "^vmmctl$", RM_ALWAYS | RM_PRE | RM_HOT, ILEVEL_0, devfsadm_rm_all + }, + { "pseudo", "^ppt$", RM_ALWAYS | RM_PRE | RM_HOT, + ILEVEL_0, devfsadm_rm_all } }; @@ -369,3 +376,15 @@ vmmctl(di_minor_t minor, di_node_t node) (void) devfsadm_mklink("vmmctl", node, minor, 0); return (DEVFSADM_CONTINUE); } + +static int +ppt(di_minor_t minor, di_node_t node) +{ + char linkpath[PATH_MAX]; + + (void) snprintf(linkpath, sizeof (linkpath), "ppt%d", + di_instance(node)); + + (void) devfsadm_mklink(linkpath, node, minor, 0); + return (DEVFSADM_CONTINUE); +} diff --git a/usr/src/cmd/pptadm/Makefile b/usr/src/cmd/pptadm/Makefile new file mode 100644 index 0000000000..3be558a7a0 --- /dev/null +++ b/usr/src/cmd/pptadm/Makefile @@ -0,0 +1,43 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# Copyright 2018 Joyent, Inc. +# + +PROG = pptadm +OBJS = pptadm.o +SRCS = $(OBJS:%.o=%.c) + +include ../Makefile.cmd +include ../Makefile.ctf + +LDLIBS += -lofmt -lppt -lnvpair + +CSTD = $(CSTD_GNU99) +C99LMODE = -Xc99=%all + +CLEANFILES += $(OBJS) + +.KEEP_STATE: + +all: $(OBJS) $(PROG) + +install: all $(ROOTUSRSBINPROG) + +clean: + -$(RM) $(CLEANFILES) + +lint: lint_SRCS + +%.o: ../%.c + $(COMPILE.c) $< + $(POST_PROCESS_O) + +include ../Makefile.targ diff --git a/usr/src/cmd/pptadm/pptadm.c b/usr/src/cmd/pptadm/pptadm.c new file mode 100644 index 0000000000..c6b9094408 --- /dev/null +++ b/usr/src/cmd/pptadm/pptadm.c @@ -0,0 +1,205 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2018 Joyent, Inc. + */ + +#include <stdlib.h> +#include <stdarg.h> +#include <getopt.h> +#include <string.h> +#include <ofmt.h> +#include <err.h> + +#include <libppt.h> + +typedef enum field { + PPT_DEV, + PPT_VENDOR, + PPT_DEVICE, + PPT_SUBVENDOR, + PPT_SUBDEVICE, + PPT_REV, + PPT_PATH, + PPT_LABEL +} field_t; + +const char *valname[] = { + "dev", + "vendor-id", + "device-id", + "subsystem-vendor-id", + "subsystem-id", + "revision-id", + "path", + "label" +}; + +static ofmt_cb_t print_field; + +static ofmt_field_t fields[] = { +/* name, field width, index, callback */ +{ "DEV", sizeof ("/dev/pptXX"), PPT_DEV, print_field }, +{ "VENDOR", sizeof ("VENDOR"), PPT_VENDOR, print_field }, +{ "DEVICE", sizeof ("DEVICE"), PPT_DEVICE, print_field }, +{ "SUBVENDOR", sizeof ("SUBVENDOR"), PPT_SUBVENDOR, print_field }, +{ "SUBDEVICE", sizeof ("SUBDEVICE"), PPT_SUBDEVICE, print_field }, +{ "REV", sizeof ("REV"), PPT_REV, print_field }, +{ "PATH", 50, PPT_PATH, print_field }, +{ "LABEL", 60, PPT_LABEL, print_field }, +{ NULL, 0, 0, NULL }, +}; + +static void +usage(const char *errmsg) +{ + if (errmsg != NULL) + (void) fprintf(stderr, "pptadm: %s\n", errmsg); + (void) fprintf(errmsg != NULL ? stderr : stdout, + "Usage:\n" + "pptadm list [ -j ]\n" + "pptadm list [-ap] [-o fields]\n"); + exit(errmsg != NULL ? EXIT_FAILURE : EXIT_SUCCESS); +} + +/* PRINTFLIKE1 */ +static void +die(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + verrx(EXIT_FAILURE, fmt, ap); + va_end(ap); +} + +static boolean_t +print_field(ofmt_arg_t *arg, char *buf, uint_t bufsize) +{ + nvlist_t *nvl = arg->ofmt_cbarg; + nvpair_t *nvp = NULL; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *name = nvpair_name(nvp); + char *val = NULL; + + (void) nvpair_value_string(nvp, &val); + + if (strcmp(name, valname[arg->ofmt_id]) != 0) + continue; + + (void) snprintf(buf, bufsize, "%s", val); + return (B_TRUE); + } + + (void) snprintf(buf, bufsize, "--"); + return (B_TRUE); +} + +static int +list(int argc, char *argv[]) +{ + const char *fields_str = NULL; + boolean_t parsable = B_FALSE; + boolean_t json = B_FALSE; + boolean_t all = B_FALSE; + uint_t ofmtflags = 0; + ofmt_status_t oferr; + ofmt_handle_t ofmt; + int opt; + + while ((opt = getopt(argc, argv, "ahjo:p")) != -1) { + switch (opt) { + case 'a': + all = B_TRUE; + break; + case 'h': + usage(NULL); + break; + case 'j': + json = B_TRUE; + break; + case 'o': + fields_str = optarg; + break; + case 'p': + ofmtflags |= OFMT_PARSABLE; + parsable = B_TRUE; + break; + default: + usage("unrecognized option"); + break; + } + } + + if (optind == (argc - 1)) + usage("unused arguments"); + + if (json && (parsable || fields_str != NULL)) + usage("-j option cannot be used with -p or -o options"); + + if (fields_str == NULL) { + if (parsable) + usage("-o must be provided when using -p option"); + fields_str = "dev,vendor,device,path"; + } + + oferr = ofmt_open(fields_str, fields, ofmtflags, 0, &ofmt); + + ofmt_check(oferr, parsable, ofmt, die, warn); + + nvlist_t *nvl = all ? ppt_list() : ppt_list_assigned(); + nvpair_t *nvp = NULL; + + if (json) { + if (printf("{\n\t\"devices\": [\n") < 0) + err(EXIT_FAILURE, "failed to write JSON"); + } + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + nvlist_t *props; + + (void) nvpair_value_nvlist(nvp, &props); + + if (json) { + if (printf("\t\t") < 0) + err(EXIT_FAILURE, "failed to write JSON"); + if (nvlist_print_json(stdout, props) < 0) + err(EXIT_FAILURE, "failed to write JSON"); + if (nvlist_next_nvpair(nvl, nvp) != NULL) + (void) printf(",\n"); + } else { + ofmt_print(ofmt, props); + } + } + + if (json) { + if (printf("\n\t]\n}\n") < 0) + err(EXIT_FAILURE, "failed to write JSON"); + } + + nvlist_free(nvl); + ofmt_close(ofmt); + return (EXIT_SUCCESS); +} + +int +main(int argc, char *argv[]) +{ + if (argc == 1) + return (list(argc - 1, argv)); + + if (strcmp(argv[1], "list") == 0) { + return (list(argc - 1, &argv[1])); + } else { + usage("unknown sub-command"); + } + + return (EXIT_SUCCESS); +} diff --git a/usr/src/compat/freebsd/amd64/machine/cpu.h b/usr/src/compat/freebsd/amd64/machine/cpu.h new file mode 100644 index 0000000000..40253af108 --- /dev/null +++ b/usr/src/compat/freebsd/amd64/machine/cpu.h @@ -0,0 +1,23 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2017 Joyent, Inc. + */ + +#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_CPU_H +#define _COMPAT_FREEBSD_AMD64_MACHINE_CPU_H + +#include <sys/cpu.h> + +#define cpu_spinwait() SMT_PAUSE() + +#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_CPU_H */ diff --git a/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h b/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h new file mode 100644 index 0000000000..2668f98ab3 --- /dev/null +++ b/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h @@ -0,0 +1,21 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2017 Joyent, Inc. + */ + +#ifndef _COMPAT_FREEBSD_CONTRIB_DEV_ACPICA_INCLUDE_ACPI_H +#define _COMPAT_FREEBSD_CONTRIB_DEV_ACPICA_INCLUDE_ACPI_H + +#include <sys/acpi/acpi.h> + +#endif /* _COMPAT_FREEBSD_CONTRIB_DEV_ACPICA_INCLUDE_ACPI_H */ diff --git a/usr/src/compat/freebsd/dev/pci/pcivar.h b/usr/src/compat/freebsd/dev/pci/pcivar.h new file mode 100644 index 0000000000..064d983117 --- /dev/null +++ b/usr/src/compat/freebsd/dev/pci/pcivar.h @@ -0,0 +1,38 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +#ifndef _COMPAT_FREEBSD_DEV_PCI_PCIVAR_H +#define _COMPAT_FREEBSD_DEV_PCI_PCIVAR_H + +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/pcie.h> +#include <sys/pcie_impl.h> + +static inline pcie_req_id_t +pci_get_bdf(device_t dev) +{ + pcie_req_id_t bdf; + + VERIFY(pcie_get_bdf_from_dip(dev, &bdf) == DDI_SUCCESS); + + return (bdf); +} + +#define pci_get_rid(dev) (pci_get_bdf(dev)) + +#endif /* _COMPAT_FREEBSD_DEV_PCI_PCIVAR_H */ diff --git a/usr/src/compat/freebsd/sys/bus.h b/usr/src/compat/freebsd/sys/bus.h new file mode 100644 index 0000000000..e3b5e0e69d --- /dev/null +++ b/usr/src/compat/freebsd/sys/bus.h @@ -0,0 +1,21 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2017 Joyent, Inc. + */ + +#ifndef _COMPAT_FREEBSD_SYS_BUS_H +#define _COMPAT_FREEBSD_SYS_BUS_H + +#define device_get_softc(dev) ddi_get_driver_private(dev) + +#endif /* _COMPAT_FREEBSD_SYS_BUS_H */ diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index b64d4c2bc1..c40721fd55 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -186,6 +186,7 @@ SUBDIRS += \ libpkg \ libpool \ libpp \ + libppt \ libproc \ libproject \ libpthread \ @@ -446,6 +447,7 @@ HDRSUBDIRS= \ libpicltree \ libpool \ libpp \ + libppt \ libproc \ libraidcfg \ librcm \ @@ -657,6 +659,7 @@ libpctx: libproc libpkg: libscf libadm libpool: libscf libexacct libpp: libast +libppt: libpcidb libdevinfo libcmdutils libproc: ../cmd/sgs/librtld_db ../cmd/sgs/libelf libctf $(INTEL_BLD)libproc: libsaveargs libproject: libpool libproc libsecdb diff --git a/usr/src/lib/libppt/Makefile b/usr/src/lib/libppt/Makefile new file mode 100644 index 0000000000..21c26d447e --- /dev/null +++ b/usr/src/lib/libppt/Makefile @@ -0,0 +1,44 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +include $(SRC)/lib/Makefile.lib + +SUBDIRS = $(MACH) $(BUILD64) $(MACH64) + +HDRS = libppt.h +HDRDIR = common + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all clean clobber install lint: $(SUBDIRS) + +install_h: $(ROOTHDRS) + +all install: install_h + +check: $(CHECKHDRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libppt/Makefile.com b/usr/src/lib/libppt/Makefile.com new file mode 100644 index 0000000000..7b2ff4885f --- /dev/null +++ b/usr/src/lib/libppt/Makefile.com @@ -0,0 +1,46 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +LIBRARY = libppt.a +VERS = .1 + +OBJECTS = libppt.o + +include $(SRC)/lib/Makefile.lib + +SRCDIR = ../common + +LIBS = $(DYNLIB) $(LINTLIB) +SRCS = $(SRCDIR)/libppt.c + +CSTD= $(CSTD_GNU99) +C99LMODE= -Xc99=%all + +# +# lint doesn't like %4s in sscanf(). +# +LINTFLAGS += -erroff=E_BAD_FORMAT_ARG_TYPE2 +LINTFLAGS64 += -erroff=E_BAD_FORMAT_ARG_TYPE2 + +$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) +LDLIBS += -lpcidb -ldevinfo -lcmdutils -lnvpair -lc + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libppt/amd64/Makefile b/usr/src/lib/libppt/amd64/Makefile new file mode 100644 index 0000000000..5a304d7fe7 --- /dev/null +++ b/usr/src/lib/libppt/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/libppt/common/libppt.c b/usr/src/lib/libppt/common/libppt.c new file mode 100644 index 0000000000..7e8385da06 --- /dev/null +++ b/usr/src/lib/libppt/common/libppt.c @@ -0,0 +1,506 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + * + * Convenience routines for identifying current or available devices that are + * suitable for PCI passthrough to a bhyve guest. + */ + +#include <libdevinfo.h> +#include <libppt.h> + +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/list.h> +#include <strings.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <pcidb.h> +#include <glob.h> + +typedef struct node_data { + pcidb_hdl_t *nd_db; + list_t nd_matches; + nvlist_t *nd_nvl; + int nd_err; +} node_data_t; + +typedef struct ppt_match { + list_node_t pm_list; + char pm_path[MAXPATHLEN]; + char pm_vendor[5]; + char pm_device[5]; +} ppt_match_t; + +static boolean_t +is_pci(di_node_t di_node) +{ + char *svals; + + if (di_prop_lookup_strings(DDI_DEV_T_ANY, di_parent_node(di_node), + "device_type", &svals) != 1) + return (B_FALSE); + + return (strcmp(svals, "pci") == 0 || strcmp(svals, "pciex") == 0); +} + +static int +populate_int_prop(di_node_t di_node, nvlist_t *nvl, const char *name, int *ival) +{ + char val[20]; + int *ivals; + int err; + + if (di_prop_lookup_ints(DDI_DEV_T_ANY, di_node, name, &ivals) != 1) + return (errno); + + (void) snprintf(val, sizeof (val), "%x", ivals[0]); + + err = nvlist_add_string(nvl, name, val); + + if (err == 0 && ival != NULL) + *ival = ivals[0]; + + return (err); +} + +static int +dev_getlabel(pcidb_hdl_t *db, int vid, int did, char *buf, size_t buflen) +{ + pcidb_vendor_t *vend = NULL; + pcidb_device_t *dev = NULL; + + if ((vend = pcidb_lookup_vendor(db, vid)) == NULL) + return (ENOENT); + + if ((dev = pcidb_lookup_device_by_vendor(vend, did)) == NULL) + return (ENOENT); + + (void) snprintf(buf, buflen, "%s %s", pcidb_vendor_name(vend), + pcidb_device_name(dev)); + + return (0); +} + +static nvlist_t * +dev_getinfo(di_node_t di_node, pcidb_hdl_t *db, + const char *dev, const char *path) +{ + char label[MAXPATHLEN]; + nvlist_t *nvl = NULL; + int vid, did; + int err; + + if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) + goto out; + + if (dev != NULL && (err = nvlist_add_string(nvl, "dev", dev)) != 0) + goto out; + if ((err = nvlist_add_string(nvl, "path", path)) != 0) + goto out; + if ((err = populate_int_prop(di_node, nvl, "vendor-id", &vid)) != 0) + goto out; + if ((err = populate_int_prop(di_node, nvl, "device-id", &did)) != 0) + goto out; + if ((err = populate_int_prop(di_node, nvl, + "subsystem-vendor-id", NULL)) != 0) + goto out; + if ((err = populate_int_prop(di_node, nvl, "subsystem-id", NULL)) != 0) + goto out; + if ((err = populate_int_prop(di_node, nvl, "revision-id", NULL)) != 0) + goto out; + + err = dev_getlabel(db, vid, did, label, sizeof (label)); + + if (err == 0) { + err = nvlist_add_string(nvl, "label", label); + } else if (err == ENOENT) { + err = 0; + } + +out: + if (err) { + nvlist_free(nvl); + errno = err; + return (NULL); + } + + return (nvl); +} + +/* + * /devices/pci0@0/....@0,1:ppt -> /pci0@0/...@0,1 + */ +static const char * +fs_to_phys_path(char *fspath) +{ + const char prefix[] = "/devices"; + char *c; + + if ((c = strrchr(fspath, ':')) != NULL && strcmp(c, ":ppt") == 0) + *c = '\0'; + + c = fspath; + + if (strncmp(c, prefix, sizeof (prefix) - 1) == 0) + c += sizeof (prefix) - 1; + + return (c); +} + +/* + * Return an nvlist representing the mappings of /dev/ppt* devices to physical + * devices. Of the form: + * + * /pci@0,0/... { + * dev: "/dev/ppt0" + * path: "/pci@0,0/..." + * vendor-id: "8086" + * device-id: "1528" + * subsystem-vendor-id: "8086" + * subsystem-id: "1528" + * revision-id: "1" + * label: "Intel Corporation ..." + * }, + * /pci@0,0/... + * + * The nvlist should be freed by the caller. + */ +nvlist_t * +ppt_list_assigned(void) +{ + di_node_t di_root = DI_NODE_NIL; + pcidb_hdl_t *db = NULL; + nvlist_t *nvl = NULL; + glob_t gl; + int err; + + bzero(&gl, sizeof (gl)); + + if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL) + return (NULL); + + if ((db = pcidb_open(PCIDB_VERSION)) == NULL) { + err = errno; + goto out; + } + + if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) + goto out; + + if ((err = glob("/dev/ppt*", GLOB_KEEPSTAT | GLOB_ERR, + NULL, &gl)) != 0) { + err = (err == GLOB_NOMATCH) ? 0 : errno; + goto out; + } + + for (size_t i = 0; i < gl.gl_pathc; i++) { + char fspath[MAXPATHLEN]; + nvlist_t *info_nvl; + di_node_t di_node; + const char *path; + + if (!S_ISLNK(gl.gl_statv[i]->st_mode)) + continue; + + if (realpath(gl.gl_pathv[i], fspath) == NULL) { + err = errno; + goto out; + } + + path = fs_to_phys_path(fspath); + + /* + * path argument is treated as const. + */ + if ((di_node = di_lookup_node(di_root, (char *)path)) == NULL) { + err = errno; + goto out; + } + + if (!is_pci(di_node)) + continue; + + info_nvl = dev_getinfo(di_node, db, gl.gl_pathv[i], path); + + if (info_nvl == NULL) { + err = errno; + goto out; + } + + err = nvlist_add_nvlist(nvl, path, info_nvl); + nvlist_free(info_nvl); + + if (err) + goto out; + } + +out: + if (di_root != DI_NODE_NIL) + di_fini(di_root); + + pcidb_close(db); + globfree(&gl); + + if (err) { + nvlist_free(nvl); + errno = err; + return (NULL); + } + + return (nvl); +} + +/* + * Read in our list of potential PPT devices. A boot-module provided file + * explicitly over-rides anything delivered. + */ +static int +get_matches(list_t *listp) +{ + int err = 0; + FILE *fp; + + list_create(listp, sizeof (ppt_match_t), + offsetof(ppt_match_t, pm_list)); + + if ((fp = fopen("/system/boot/etc/ppt_matches", "r")) == NULL) { + if (errno != ENOENT) + return (errno); + + if ((fp = fopen("/etc/ppt_matches", "r")) == NULL) { + if (errno == ENOENT) + return (0); + return (errno); + } + } + + for (;;) { + char *line = NULL; + ppt_match_t *pm; + size_t cap = 0; + ssize_t read; + + if ((read = getline(&line, &cap, fp)) <= 0) { + free(line); + break; + } + + if (line[read - 1] == '\n') + line[read - 1] = '\0'; + + if ((pm = malloc(sizeof (*pm))) == NULL) { + err = errno; + free(line); + goto out; + } + + bzero(pm, sizeof (*pm)); + + if (sscanf(line, "pciex%4s,%4s", &pm->pm_vendor, + &pm->pm_device) == 2 || + sscanf(line, "pci%4s,%4s", &pm->pm_vendor, + &pm->pm_device) == 2 || + sscanf(line, "pciex%4s", &pm->pm_vendor) == 1 || + sscanf(line, "pci%4s", &pm->pm_vendor) == 1) { + list_insert_tail(listp, pm); + } else if (line[0] == '/') { + (void) strlcpy(pm->pm_path, line, sizeof (pm->pm_path)); + list_insert_tail(listp, pm); + } else { + /* + * Ignore any line we don't understand. + */ + free(pm); + } + + free(line); + } + +out: + (void) fclose(fp); + return (err); +} + +static boolean_t +match_ppt(list_t *matches, nvlist_t *nvl) +{ + char *vendor; + char *device; + char *path; + + if (nvlist_lookup_string(nvl, "path", &path) != 0 || + nvlist_lookup_string(nvl, "vendor-id", &vendor) != 0 || + nvlist_lookup_string(nvl, "device-id", &device) != 0) + return (B_FALSE); + + for (ppt_match_t *pm = list_head(matches); pm != NULL; + pm = list_next(matches, pm)) { + if (pm->pm_path[0] != '\0' && strcmp(pm->pm_path, path) == 0) + return (B_TRUE); + + if (pm->pm_vendor[0] != '\0' && + strcmp(pm->pm_vendor, vendor) == 0) { + if (pm->pm_device[0] == '\0') + return (B_TRUE); + if (strcmp(pm->pm_device, device) == 0) + return (B_TRUE); + } + } + + return (B_FALSE); +} + +static int +inspect_node(di_node_t di_node, void *arg) +{ + node_data_t *data = arg; + nvlist_t *info_nvl = NULL; + char *devname = NULL; + const char *driver; + char *path = NULL; + + if (!is_pci(di_node)) + return (DI_WALK_CONTINUE); + + driver = di_driver_name(di_node); + + if (driver != NULL && strcmp(driver, "ppt") == 0) { + if (asprintf(&devname, "/dev/ppt%d", + di_instance(di_node)) < 0) { + data->nd_err = errno; + goto out; + } + } + + if ((path = di_devfs_path(di_node)) == NULL) { + data->nd_err = ENOENT; + goto out; + } + + info_nvl = dev_getinfo(di_node, data->nd_db, devname, path); + + if (info_nvl == NULL) + goto out; + + if (devname == NULL && !match_ppt(&data->nd_matches, info_nvl)) + goto out; + + data->nd_err = nvlist_add_nvlist(data->nd_nvl, path, info_nvl); + +out: + free(path); + free(devname); + nvlist_free(info_nvl); + return (data->nd_err ? DI_WALK_TERMINATE : DI_WALK_CONTINUE); +} + +/* + * Like ppt_list_assigned() output, but includes all devices that could be used + * for passthrough, whether assigned or not. + */ +nvlist_t * +ppt_list(void) +{ + node_data_t nd = { NULL, }; + di_node_t di_root; + int err; + + if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL) + return (NULL); + + if ((err = get_matches(&nd.nd_matches)) != 0) + goto out; + + if ((nd.nd_db = pcidb_open(PCIDB_VERSION)) == NULL) { + err = errno; + goto out; + } + + if ((err = nvlist_alloc(&nd.nd_nvl, NV_UNIQUE_NAME, 0)) != 0) + goto out; + + if ((err = di_walk_node(di_root, DI_WALK_CLDFIRST, + &nd, inspect_node)) != 0) + goto out; + + err = nd.nd_err; + +out: + pcidb_close(nd.nd_db); + + for (ppt_match_t *pm = list_head(&nd.nd_matches); pm != NULL; ) { + ppt_match_t *next = list_next(&nd.nd_matches, pm); + free(pm); + pm = next; + } + + if (di_root != DI_NODE_NIL) + di_fini(di_root); + + if (err) { + nvlist_free(nd.nd_nvl); + errno = err; + return (NULL); + } + + return (nd.nd_nvl); +} + +/* + * Given a physical path such as "/devices/pci0@0...", return the "/dev/pptX" + * that is bound to it, if any. The "/devices/" prefix is optional. The + * physical path may have the ":ppt" minor name suffix. + * + * Returns ENOENT if no such PPT device exists. + */ +int +ppt_devpath_to_dev(const char *inpath, char *buf, size_t buflen) +{ + char fspath[MAXPATHLEN] = ""; + nvpair_t *nvp = NULL; + const char *devpath; + int err = ENOENT; + nvlist_t *nvl; + + if (strlcat(fspath, inpath, sizeof (fspath)) >= sizeof (fspath)) + return (ENAMETOOLONG); + + devpath = fs_to_phys_path(fspath); + + if ((nvl = ppt_list_assigned()) == NULL) + return (errno); + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *name = nvpair_name(nvp); + char *ppt = NULL; + nvlist_t *props; + + (void) nvpair_value_nvlist(nvp, &props); + + if (strcmp(name, devpath) == 0) { + (void) nvlist_lookup_string(props, "dev", &ppt); + + err = 0; + + if (strlcpy(buf, ppt, buflen) >= buflen) + err = ENAMETOOLONG; + break; + } + } + + nvlist_free(nvl); + return (err); +} diff --git a/usr/src/lib/libppt/common/libppt.h b/usr/src/lib/libppt/common/libppt.h new file mode 100644 index 0000000000..efbf2c7b8b --- /dev/null +++ b/usr/src/lib/libppt/common/libppt.h @@ -0,0 +1,36 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * + * Copyright 2018 Joyent, Inc. + */ + +#ifndef _LIBPPT_H +#define _LIBPPT_H + +#include <sys/types.h> + +#include <libnvpair.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern int ppt_devpath_to_dev(const char *, char *, size_t); + +extern nvlist_t *ppt_list_assigned(void); + +extern nvlist_t *ppt_list(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBPPT_H */ diff --git a/usr/src/lib/libppt/common/llib-lppt b/usr/src/lib/libppt/common/llib-lppt new file mode 100644 index 0000000000..dadd992a31 --- /dev/null +++ b/usr/src/lib/libppt/common/llib-lppt @@ -0,0 +1,19 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <libppt.h> diff --git a/usr/src/lib/libppt/common/mapfile-vers b/usr/src/lib/libppt/common/mapfile-vers new file mode 100644 index 0000000000..d9d882874b --- /dev/null +++ b/usr/src/lib/libppt/common/mapfile-vers @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION ILLUMOSprivate { + global: + ppt_devpath_to_dev; + ppt_list_assigned; + ppt_list; + + local: + *; +}; diff --git a/usr/src/lib/libppt/i386/Makefile b/usr/src/lib/libppt/i386/Makefile new file mode 100644 index 0000000000..3f11e556d4 --- /dev/null +++ b/usr/src/lib/libppt/i386/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libppt/sparc/Makefile b/usr/src/lib/libppt/sparc/Makefile new file mode 100644 index 0000000000..3f11e556d4 --- /dev/null +++ b/usr/src/lib/libppt/sparc/Makefile @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libppt/sparcv9/Makefile b/usr/src/lib/libppt/sparcv9/Makefile new file mode 100644 index 0000000000..5a304d7fe7 --- /dev/null +++ b/usr/src/lib/libppt/sparcv9/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers index a64231ad1c..397ebd7d59 100644 --- a/usr/src/lib/libvmmapi/common/mapfile-vers +++ b/usr/src/lib/libvmmapi/common/mapfile-vers @@ -61,6 +61,7 @@ SYMBOL_VERSION ILLUMOSprivate { vm_get_lowmem_size; vm_get_memflags; vm_get_memseg; + vm_get_pptdev_limits; vm_get_register; vm_get_register_set; vm_get_seg_desc; diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index 0b9b871081..ceac495746 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -995,6 +995,7 @@ vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); } +#ifdef __FreeBSD__ int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) { @@ -1056,7 +1057,7 @@ vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); } -int +int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) { @@ -1075,6 +1076,103 @@ vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); } +int +vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func, + int *msi_limit, int *msix_limit) +{ + struct vm_pptdev_limits pptlimits; + int error; + + bzero(&pptlimits, sizeof (pptlimits)); + pptlimits.bus = bus; + pptlimits.slot = slot; + pptlimits.func = func; + + error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); + + *msi_limit = pptlimits.msi_limit; + *msix_limit = pptlimits.msix_limit; + + return (error); +} +#else /* __FreeBSD__ */ +int +vm_assign_pptdev(struct vmctx *ctx, int pptfd) +{ + struct vm_pptdev pptdev; + + pptdev.pptfd = pptfd; + return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); +} + +int +vm_unassign_pptdev(struct vmctx *ctx, int pptfd) +{ + struct vm_pptdev pptdev; + + pptdev.pptfd = pptfd; + return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); +} + +int +vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len, + vm_paddr_t hpa) +{ + struct vm_pptdev_mmio pptmmio; + + pptmmio.pptfd = pptfd; + pptmmio.gpa = gpa; + pptmmio.len = len; + pptmmio.hpa = hpa; + return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); +} + +int +vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, uint64_t addr, + uint64_t msg, int numvec) +{ + struct vm_pptdev_msi pptmsi; + + pptmsi.vcpu = vcpu; + pptmsi.pptfd = pptfd; + pptmsi.msg = msg; + pptmsi.addr = addr; + pptmsi.numvec = numvec; + return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); +} + +int +vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, int idx, + uint64_t addr, uint64_t msg, uint32_t vector_control) +{ + struct vm_pptdev_msix pptmsix; + + pptmsix.vcpu = vcpu; + pptmsix.pptfd = pptfd; + pptmsix.idx = idx; + pptmsix.msg = msg; + pptmsix.addr = addr; + pptmsix.vector_control = vector_control; + return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); +} + +int +vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit, + int *msix_limit) +{ + struct vm_pptdev_limits pptlimits; + int error; + + bzero(&pptlimits, sizeof (pptlimits)); + pptlimits.pptfd = pptfd; + error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); + + *msi_limit = pptlimits.msi_limit; + *msix_limit = pptlimits.msix_limit; + return (error); +} +#endif /* __FreeBSD__ */ + uint64_t * vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, int *ret_entries) diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index a1507255cb..1b08a9cae5 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -177,6 +177,7 @@ int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int *retval); int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val); +#ifdef __FreeBSD__ int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func); int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func); int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, @@ -186,6 +187,20 @@ int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); +int vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func, + int *msi_limit, int *msix_limit); +#else /* __FreeBSD__ */ +int vm_assign_pptdev(struct vmctx *ctx, int pptfd); +int vm_unassign_pptdev(struct vmctx *ctx, int pptfd); +int vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, + size_t len, vm_paddr_t hpa); +int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, + uint64_t addr, uint64_t msg, int numvec); +int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, + int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); +int vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit, + int *msix_limit); +#endif /* __FreeBSD__ */ int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2); int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo); diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile index 9f01ad7606..fc43842db2 100644 --- a/usr/src/man/man1m/Makefile +++ b/usr/src/man/man1m/Makefile @@ -547,6 +547,7 @@ i386_MANFILES= \ acpidump.1m \ acpixtract.1m \ nvmeadm.1m \ + pptadm.1m \ rdmsr.1m sparc_MANFILES= dcs.1m \ diff --git a/usr/src/man/man1m/pptadm.1m b/usr/src/man/man1m/pptadm.1m new file mode 100644 index 0000000000..f13a5e32a4 --- /dev/null +++ b/usr/src/man/man1m/pptadm.1m @@ -0,0 +1,74 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" Copyright 2018 Joyent, Inc. +.\" +.Dd April 10, 2018 +.Dt PPTADM 1M +.Os +.Sh NAME +.Nm pptadm +.Nd PPT administration utility +.Sh SYNOPSIS +.Nm +.Cm list -j +.Op Fl a +.Nm +.Cm list +.Op Fl ap Op Fl o Ar fields +.Sh DESCRIPTION +The +.Nm +utility can enumerate passthrough devices for use by a virtualized guest. +.Sh OPTIONS +The following options to the +.Cm list +command are supported: +.Bl -tag -width Ds +.It Fl a +Show all PPT devices, both available and assigned. +.It Fl j +Output JSON. +.It Fl o +Specify fields to output, or "all". Available fields are +dev,path,vendor,device,subvendor,subdevice,rev,label +.It Fl p +Output in a parsable format; this requires the -o option to be specified. +.El +.Sh JSON OUTPUT +The JSON output consists of an array under the key "devices" with the fields: +.Bl -tag -width Ds +.It dev +The PPT /dev path, if assigned and bound. +.It path +The physical /devices path. +.It vendor-id +The PCI vendor ID. +.It device-id +The PCI device ID. +.It subsystem-vendor-id +The PCI subsystem vendor ID. +.It subsystem-id +The PCI subsystem ID. +.It revision-id +The PCI device revision. +.It label +Human-readable description from the PCI database. +.El +.Sh FILES +.Bl -tag -width Ds +.It /etc/ppt_aliases +Containts the bindings of PPT devices in the same format as /etc/driver_aliases +.It /etc/ppt_matches +Identifies devices that PPT could be bound to, either by physical path, or by +PCI ID. +.El +.Sh EXIT STATUS +.Ex -std diff --git a/usr/src/pkg/manifests/system-bhyve.mf b/usr/src/pkg/manifests/system-bhyve.mf index 7fdeb81254..002bef64cc 100644 --- a/usr/src/pkg/manifests/system-bhyve.mf +++ b/usr/src/pkg/manifests/system-bhyve.mf @@ -35,14 +35,22 @@ dir path=usr group=sys dir path=usr/kernel/drv group=sys dir path=usr/kernel/drv/$(ARCH64) group=sys dir path=usr/sbin +dir path=usr/share +dir path=usr/share/man +dir path=usr/share/man/man1m +driver name=ppt driver name=viona driver name=vmm +file path=usr/kernel/drv/$(ARCH64)/ppt file path=usr/kernel/drv/$(ARCH64)/viona file path=usr/kernel/drv/$(ARCH64)/vmm +file path=usr/kernel/drv/ppt.conf file path=usr/kernel/drv/viona.conf file path=usr/kernel/drv/vmm.conf file path=usr/sbin/bhyve mode=0555 file path=usr/sbin/bhyvectl mode=0555 +file path=usr/sbin/pptadm mode=0555 +file path=usr/share/man/man1m/pptadm.1m license lic_CDDL license=lic_CDDL depend fmri=developer/acpi type=require depend fmri=system/bhyve/firmware type=require diff --git a/usr/src/pkg/manifests/system-library-bhyve.mf b/usr/src/pkg/manifests/system-library-bhyve.mf index d9a15e1b37..3c7e52c938 100644 --- a/usr/src/pkg/manifests/system-library-bhyve.mf +++ b/usr/src/pkg/manifests/system-library-bhyve.mf @@ -27,5 +27,8 @@ dir path=lib group=bin dir path=lib/$(ARCH64) group=bin dir path=usr group=sys dir path=usr/lib group=bin +dir path=usr/lib/$(ARCH64) group=bin file path=lib/$(ARCH64)/libvmmapi.so.1 +file path=usr/lib/$(ARCH64)/libppt.so.1 +file path=usr/lib/libppt.so.1 license lic_CDDL license=lic_CDDL diff --git a/usr/src/uts/common/os/modsysfile.c b/usr/src/uts/common/os/modsysfile.c index 7875824a86..2015cfefae 100644 --- a/usr/src/uts/common/os/modsysfile.c +++ b/usr/src/uts/common/os/modsysfile.c @@ -58,10 +58,12 @@ struct hwc_class *hcl_head; /* head of list of classes */ static kmutex_t hcl_lock; /* for accessing list of classes */ #define DAFILE "/etc/driver_aliases" +#define PPTFILE "/etc/ppt_aliases" #define CLASSFILE "/etc/driver_classes" #define DACFFILE "/etc/dacf.conf" static char class_file[] = CLASSFILE; +static char pptfile[] = PPTFILE; static char dafile[] = DAFILE; static char dacffile[] = DACFFILE; @@ -2170,14 +2172,13 @@ hwc_parse_now(char *fname, struct par_list **pl, ddi_prop_t **props) return (0); /* always return success */ } -void -make_aliases(struct bind **bhash) +static void +parse_aliases(struct bind **bhash, struct _buf *file) { enum { AL_NEW, AL_DRVNAME, AL_DRVNAME_COMMA, AL_ALIAS, AL_ALIAS_COMMA } state; - struct _buf *file; char tokbuf[MAXPATHLEN]; char drvbuf[MAXPATHLEN]; token_t token; @@ -2186,9 +2187,6 @@ make_aliases(struct bind **bhash) static char dupwarn[] = "!Driver alias \"%s\" conflicts with " "an existing driver name or alias."; - if ((file = kobj_open_file(dafile)) == (struct _buf *)-1) - return; - state = AL_NEW; major = DDI_MAJOR_T_NONE; while (!done) { @@ -2273,8 +2271,22 @@ make_aliases(struct bind **bhash) kobj_file_err(CE_WARN, file, tok_err, tokbuf); } } +} - kobj_close_file(file); +void +make_aliases(struct bind **bhash) +{ + struct _buf *file; + + if ((file = kobj_open_file(pptfile)) != (struct _buf *)-1) { + parse_aliases(bhash, file); + kobj_close_file(file); + } + + if ((file = kobj_open_file(dafile)) != (struct _buf *)-1) { + parse_aliases(bhash, file); + kobj_close_file(file); + } } diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files index 312c0f233d..ed404d3d6d 100644 --- a/usr/src/uts/i86pc/Makefile.files +++ b/usr/src/uts/i86pc/Makefile.files @@ -247,6 +247,7 @@ VMM_OBJS += vmm.o \ vmm_stat.o \ vmm_util.o \ x86.o \ + iommu.o \ vdev.o \ vatpic.o \ vatpit.o \ @@ -260,14 +261,14 @@ VMM_OBJS += vmm.o \ vmx_msr.o \ vmx.o \ vmx_support.o \ + vtd.o \ + vtd_sol.o \ svm.o \ svm_msr.o \ npt.o \ vmcb.o \ svm_support.o \ amdv.o \ - sol_iommu.o \ - sol_ppt.o \ gipt.o \ vmm_sol_vm.o \ vmm_sol_glue.o \ @@ -282,6 +283,8 @@ VIONA_OBJS += viona_main.o \ viona_tx.o \ viona_hook.o \ +PPT_OBJS += ppt.o + # # Build up defines and paths. # diff --git a/usr/src/uts/i86pc/Makefile.i86pc b/usr/src/uts/i86pc/Makefile.i86pc index b60d24d82c..0c921b4028 100644 --- a/usr/src/uts/i86pc/Makefile.i86pc +++ b/usr/src/uts/i86pc/Makefile.i86pc @@ -248,6 +248,7 @@ DRV_KMODS += fipe DRV_KMODS += imc imcstub DRV_KMODS += vmm DRV_KMODS += viona +DRV_KMODS += ppt DRV_KMODS += cpudrv diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c index 9474b30fc6..902080e34c 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c @@ -44,6 +44,8 @@ __FBSDID("$FreeBSD$"); #include <machine/vmparam.h> #include <contrib/dev/acpica/include/acpi.h> +#include <sys/sunndi.h> + #include "io/iommu.h" /* @@ -120,6 +122,9 @@ static int drhd_num; static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; static int max_domains; typedef int (*drhd_ident_func_t)(void); +#ifndef __FreeBSD__ +static dev_info_t *vtddips[DRHD_MAX_UNITS]; +#endif static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); @@ -237,19 +242,63 @@ vtd_translation_disable(struct vtdmap *vtdmap) ; } +static void * +vtd_map(dev_info_t *dip) +{ + caddr_t regs; + ddi_acc_handle_t hdl; + int error; + + static ddi_device_acc_attr_t regs_attr = { + DDI_DEVICE_ATTR_V0, + DDI_NEVERSWAP_ACC, + DDI_STRICTORDER_ACC, + }; + + error = ddi_regs_map_setup(dip, 0, ®s, 0, PAGE_SIZE, ®s_attr, + &hdl); + + if (error != DDI_SUCCESS) + return (NULL); + + ddi_set_driver_private(dip, hdl); + + return (regs); +} + +static void +vtd_unmap(dev_info_t *dip) +{ + ddi_acc_handle_t hdl = ddi_get_driver_private(dip); + + if (hdl != NULL) + ddi_regs_map_free(&hdl); +} + +#ifndef __FreeBSD__ +/* + * This lives in vtd_sol.c for license reasons. + */ +extern dev_info_t *vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *, int); +#endif + static int vtd_init(void) { int i, units, remaining; struct vtdmap *vtdmap; vm_paddr_t ctx_paddr; - char *end, envname[32]; + char *end; +#ifdef __FreeBSD__ + char envname[32]; unsigned long mapaddr; +#endif ACPI_STATUS status; ACPI_TABLE_DMAR *dmar; ACPI_DMAR_HEADER *hdr; ACPI_DMAR_HARDWARE_UNIT *drhd; +#ifdef __FreeBSD__ /* * Allow the user to override the ACPI DMAR table by specifying the * physical address of each remapping unit. @@ -268,7 +317,9 @@ vtd_init(void) if (units > 0) goto skip_dmar; - +#else + units = 0; +#endif /* Search for DMAR table. */ status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar); if (ACPI_FAILURE(status)) @@ -291,7 +342,15 @@ vtd_init(void) break; drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; +#ifdef __FreeBSD__ vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); +#else + vtddips[units] = vtd_get_dip(drhd, units); + vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]); + if (vtdmaps[units] == NULL) + goto fail; + units++; +#endif if (units >= DRHD_MAX_UNITS) break; remaining -= hdr->Length; @@ -300,7 +359,9 @@ vtd_init(void) if (units <= 0) return (ENXIO); +#ifdef __FreeBSD__ skip_dmar: +#endif drhd_num = units; vtdmap = vtdmaps[0]; @@ -321,11 +382,36 @@ skip_dmar: } return (0); + +#ifndef __FreeBSD__ +fail: + for (i = 0; i <= units; i++) + vtd_unmap(vtddips[i]); + return (ENXIO); +#endif } static void vtd_cleanup(void) { +#ifndef __FreeBSD__ + int i; + + KASSERT(SLIST_EMPTY(&domhead), ("domain list not empty")); + + bzero(root_table, sizeof (root_table)); + + for (i = 0; i <= drhd_num; i++) { + vtdmaps[i] = NULL; + /* + * Unmap the vtd registers. Note that the devinfo nodes + * themselves aren't removed, they are considered system state + * and can be reused when the module is reloaded. + */ + if (vtddips[i] != NULL) + vtd_unmap(vtddips[i]); + } +#endif } static void @@ -619,6 +705,7 @@ vtd_create_domain(vm_paddr_t maxaddr) if ((uintptr_t)dom->ptp & PAGE_MASK) panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); +#ifdef __FreeBSD__ #ifdef notyet /* * XXX superpage mappings for the iommu do not work correctly. @@ -636,6 +723,18 @@ vtd_create_domain(vm_paddr_t maxaddr) */ dom->spsmask = VTD_CAP_SPS(vtdmap->cap); #endif +#else + /* + * On illumos we decidedly do not remove memory mapped to a VM's domain + * from the host_domain, so we don't have to deal with page demotion and + * can just use large pages. + * + * Since VM memory is currently allocated as 4k pages and mapped into + * the VM domain page by page, the use of large pages is essentially + * limited to the host_domain. + */ + dom->spsmask = VTD_CAP_SPS(vtdmap->cap); +#endif SLIST_INSERT_HEAD(&domhead, dom, next); diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd_sol.c b/usr/src/uts/i86pc/io/vmm/intel/vtd_sol.c new file mode 100644 index 0000000000..1dbe8ffa48 --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/intel/vtd_sol.c @@ -0,0 +1,83 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +#include <sys/sunndi.h> +#include <contrib/dev/acpica/include/acpi.h> + +dev_info_t * +vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *drhd, int unit) +{ + dev_info_t *dip; + struct ddi_parent_private_data *pdptr; + struct regspec reg; + int circ; + + /* + * Try to find an existing devinfo node for this vtd unit. + */ + ndi_devi_enter(ddi_root_node(), &circ); + dip = ddi_find_devinfo("vtd", unit, 0); + ndi_devi_exit(ddi_root_node(), circ); + + if (dip != NULL) + return (dip); + + /* + * None found, construct a devinfo node for this vtd unit. + */ + dip = ddi_add_child(ddi_root_node(), "vtd", + DEVI_SID_NODEID, unit); + + reg.regspec_bustype = 0; + reg.regspec_addr = drhd->Address; + reg.regspec_size = PAGE_SIZE; + + /* + * update the reg properties + * + * reg property will be used for register + * set access + * + * refer to the bus_map of root nexus driver + * I/O or memory mapping: + * + * <bustype=0, addr=x, len=x>: memory + * <bustype=1, addr=x, len=x>: i/o + * <bustype>1, addr=0, len=x>: x86-compatibility i/o + */ + (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, + dip, "reg", (int *)®, + sizeof (struct regspec) / sizeof (int)); + + /* + * This is an artificially constructed dev_info, and we + * need to set a few more things to be able to use it + * for ddi_dma_alloc_handle/free_handle. + */ + ddi_set_driver(dip, ddi_get_driver(ddi_root_node())); + DEVI(dip)->devi_bus_dma_allochdl = + DEVI(ddi_get_driver((ddi_root_node()))); + + pdptr = kmem_zalloc(sizeof (struct ddi_parent_private_data) + + sizeof (struct regspec), KM_SLEEP); + pdptr->par_nreg = 1; + pdptr->par_reg = (struct regspec *)(pdptr + 1); + pdptr->par_reg->regspec_bustype = 0; + pdptr->par_reg->regspec_addr = drhd->Address; + pdptr->par_reg->regspec_size = PAGE_SIZE; + ddi_set_parent_data(dip, pdptr); + + return (dip); +} diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c new file mode 100644 index 0000000000..b949573fe2 --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c @@ -0,0 +1,383 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/eventhandler.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <dev/pci/pcivar.h> +#include <dev/pci/pcireg.h> + +#include <machine/cpu.h> +#include <machine/md_var.h> + +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/pci.h> + +#include "vmm_util.h" +#include "vmm_mem.h" +#include "iommu.h" + +SYSCTL_DECL(_hw_vmm); +SYSCTL_NODE(_hw_vmm, OID_AUTO, iommu, CTLFLAG_RW, 0, "bhyve iommu parameters"); + +static int iommu_avail; +SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail, + 0, "bhyve iommu initialized?"); + +static int iommu_enable = 1; +SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, enable, CTLFLAG_RDTUN, &iommu_enable, 0, + "Enable use of I/O MMU (required for PCI passthrough)."); + +static struct iommu_ops *ops; +static void *host_domain; +#ifdef __FreeBSD__ +static eventhandler_tag add_tag, delete_tag; +#endif + +#ifndef __FreeBSD__ +static volatile u_int iommu_initted; +#endif + +static __inline int +IOMMU_INIT(void) +{ + if (ops != NULL) + return ((*ops->init)()); + else + return (ENXIO); +} + +static __inline void +IOMMU_CLEANUP(void) +{ + if (ops != NULL && iommu_avail) + (*ops->cleanup)(); +} + +static __inline void * +IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr) +{ + + if (ops != NULL && iommu_avail) + return ((*ops->create_domain)(maxaddr)); + else + return (NULL); +} + +static __inline void +IOMMU_DESTROY_DOMAIN(void *dom) +{ + + if (ops != NULL && iommu_avail) + (*ops->destroy_domain)(dom); +} + +static __inline uint64_t +IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) +{ + + if (ops != NULL && iommu_avail) + return ((*ops->create_mapping)(domain, gpa, hpa, len)); + else + return (len); /* XXX */ +} + +static __inline uint64_t +IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len) +{ + + if (ops != NULL && iommu_avail) + return ((*ops->remove_mapping)(domain, gpa, len)); + else + return (len); /* XXX */ +} + +static __inline void +IOMMU_ADD_DEVICE(void *domain, uint16_t rid) +{ + + if (ops != NULL && iommu_avail) + (*ops->add_device)(domain, rid); +} + +static __inline void +IOMMU_REMOVE_DEVICE(void *domain, uint16_t rid) +{ + + if (ops != NULL && iommu_avail) + (*ops->remove_device)(domain, rid); +} + +static __inline void +IOMMU_INVALIDATE_TLB(void *domain) +{ + + if (ops != NULL && iommu_avail) + (*ops->invalidate_tlb)(domain); +} + +static __inline void +IOMMU_ENABLE(void) +{ + + if (ops != NULL && iommu_avail) + (*ops->enable)(); +} + +static __inline void +IOMMU_DISABLE(void) +{ + + if (ops != NULL && iommu_avail) + (*ops->disable)(); +} + +#ifdef __FreeBSD__ +static void +iommu_pci_add(void *arg, device_t dev) +{ + + /* Add new devices to the host domain. */ + iommu_add_device(host_domain, pci_get_rid(dev)); +} + +static void +iommu_pci_delete(void *arg, device_t dev) +{ + + iommu_remove_device(host_domain, pci_get_rid(dev)); +} +#endif + +#ifndef __FreeBSD__ +static int +iommu_find_device(dev_info_t *dip, void *arg) +{ + boolean_t add = (boolean_t)arg; + + if (pcie_is_pci_device(dip)) { + if (add) + iommu_add_device(host_domain, pci_get_rid(dip)); + else + iommu_remove_device(host_domain, pci_get_rid(dip)); + } + + return (DDI_WALK_CONTINUE); +} +#endif + +static void +iommu_init(void) +{ + int error, bus, slot, func; + vm_paddr_t maxaddr; +#ifdef __FreeBSD__ + devclass_t dc; +#endif + device_t dev; + + if (!iommu_enable) + return; + + if (vmm_is_intel()) + ops = &iommu_ops_intel; + else if (vmm_is_amd()) + ops = &iommu_ops_amd; + else + ops = NULL; + + error = IOMMU_INIT(); + if (error) + return; + + iommu_avail = 1; + + /* + * Create a domain for the devices owned by the host + */ + maxaddr = vmm_mem_maxaddr(); + host_domain = IOMMU_CREATE_DOMAIN(maxaddr); + if (host_domain == NULL) { + printf("iommu_init: unable to create a host domain"); + IOMMU_CLEANUP(); + ops = NULL; + iommu_avail = 0; + return; + } + + /* + * Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to + * the host + */ + iommu_create_mapping(host_domain, 0, 0, maxaddr); + +#ifdef __FreeBSD__ + add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0); + delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete, + NULL, 0); + dc = devclass_find("ppt"); + for (bus = 0; bus <= PCI_BUSMAX; bus++) { + for (slot = 0; slot <= PCI_SLOTMAX; slot++) { + for (func = 0; func <= PCI_FUNCMAX; func++) { + dev = pci_find_dbsf(0, bus, slot, func); + if (dev == NULL) + continue; + + /* Skip passthrough devices. */ + if (dc != NULL && + device_get_devclass(dev) == dc) + continue; + + /* + * Everything else belongs to the host + * domain. + */ + iommu_add_device(host_domain, + pci_get_rid(dev)); + } + } + } +#else + ddi_walk_devs(ddi_root_node(), iommu_find_device, (void *)B_TRUE); +#endif + IOMMU_ENABLE(); + +} + +void +iommu_cleanup(void) +{ +#ifdef __FreeBSD__ + if (add_tag != NULL) { + EVENTHANDLER_DEREGISTER(pci_add_device, add_tag); + add_tag = NULL; + } + if (delete_tag != NULL) { + EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag); + delete_tag = NULL; + } +#else + atomic_store_rel_int(&iommu_initted, 0); +#endif + IOMMU_DISABLE(); +#ifndef __FreeBSD__ + ddi_walk_devs(ddi_root_node(), iommu_find_device, (void *)B_FALSE); +#endif + IOMMU_DESTROY_DOMAIN(host_domain); + IOMMU_CLEANUP(); +#ifndef __FreeBSD__ + ops = NULL; +#endif +} + +void * +iommu_create_domain(vm_paddr_t maxaddr) +{ + if (iommu_initted < 2) { + if (atomic_cmpset_int(&iommu_initted, 0, 1)) { + iommu_init(); + atomic_store_rel_int(&iommu_initted, 2); + } else + while (iommu_initted == 1) + cpu_spinwait(); + } + return (IOMMU_CREATE_DOMAIN(maxaddr)); +} + +void +iommu_destroy_domain(void *dom) +{ + + IOMMU_DESTROY_DOMAIN(dom); +} + +void +iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len) +{ + uint64_t mapped, remaining; + + remaining = len; + + while (remaining > 0) { + mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining); + gpa += mapped; + hpa += mapped; + remaining -= mapped; + } +} + +void +iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len) +{ + uint64_t unmapped, remaining; + + remaining = len; + + while (remaining > 0) { + unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining); + gpa += unmapped; + remaining -= unmapped; + } +} + +void * +iommu_host_domain(void) +{ + + return (host_domain); +} + +void +iommu_add_device(void *dom, uint16_t rid) +{ + + IOMMU_ADD_DEVICE(dom, rid); +} + +void +iommu_remove_device(void *dom, uint16_t rid) +{ + + IOMMU_REMOVE_DEVICE(dom, rid); +} + +void +iommu_invalidate_tlb(void *domain) +{ + + IOMMU_INVALIDATE_TLB(domain); +} diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c new file mode 100644 index 0000000000..a71ce86c2d --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c @@ -0,0 +1,1436 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/pciio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> + +#include <dev/pci/pcivar.h> +#include <dev/pci/pcireg.h> + +#include <machine/vmm.h> +#include <machine/vmm_dev.h> + +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/pci.h> +#include <sys/pci_cap.h> +#include <sys/pcie_impl.h> +#include <sys/ppt_dev.h> +#include <sys/mkdev.h> +#include <sys/sysmacros.h> + +#include "vmm_lapic.h" +#include "vmm_ktr.h" + +#include "iommu.h" +#include "ppt.h" + +#define MAX_MSIMSGS 32 + +/* + * If the MSI-X table is located in the middle of a BAR then that MMIO + * region gets split into two segments - one segment above the MSI-X table + * and the other segment below the MSI-X table - with a hole in place of + * the MSI-X table so accesses to it can be trapped and emulated. + * + * So, allocate a MMIO segment for each BAR register + 1 additional segment. + */ +#define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1) + +struct pptintr_arg { + struct pptdev *pptdev; + uint64_t addr; + uint64_t msg_data; +}; + +struct pptseg { + vm_paddr_t gpa; + size_t len; + int wired; +}; + +struct pptbar { + uint64_t base; + uint64_t size; + uint_t type; + ddi_acc_handle_t io_handle; + caddr_t io_ptr; +}; + +struct pptdev { + dev_info_t *pptd_dip; + list_node_t pptd_node; + ddi_acc_handle_t pptd_cfg; + struct pptbar pptd_bars[PCI_BASE_NUM]; + struct vm *vm; + struct pptseg mmio[MAX_MMIOSEGS]; + struct { + int num_msgs; /* guest state */ + boolean_t is_fixed; + size_t inth_sz; + ddi_intr_handle_t *inth; + struct pptintr_arg arg[MAX_MSIMSGS]; + } msi; + + struct { + int num_msgs; + size_t inth_sz; + size_t arg_sz; + ddi_intr_handle_t *inth; + struct pptintr_arg *arg; + } msix; +}; + + +static major_t ppt_major; +static void *ppt_state; +static kmutex_t pptdev_mtx; +static list_t pptdev_list; + +#define PPT_MINOR_NAME "ppt" + +static ddi_device_acc_attr_t ppt_attr = { + DDI_DEVICE_ATTR_V0, + DDI_NEVERSWAP_ACC, + DDI_STORECACHING_OK_ACC, + DDI_DEFAULT_ACC +}; + +static int +ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr) +{ + /* XXX: require extra privs? */ + return (0); +} + +#define BAR_TO_IDX(bar) (((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32) +#define BAR_VALID(b) ( \ + (b) >= PCI_CONF_BASE0 && \ + (b) <= PCI_CONF_BASE5 && \ + ((b) & (PCI_BAR_SZ_32-1)) == 0) + +static int +ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +{ + minor_t minor = getminor(dev); + struct pptdev *ppt; + void *data = (void *)arg; + + if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) { + return (ENOENT); + } + + switch (cmd) { + case PPT_CFG_READ: { + struct ppt_cfg_io cio; + ddi_acc_handle_t cfg = ppt->pptd_cfg; + + if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) { + return (EFAULT); + } + switch (cio.pci_width) { + case 4: + cio.pci_data = pci_config_get32(cfg, cio.pci_off); + break; + case 2: + cio.pci_data = pci_config_get16(cfg, cio.pci_off); + break; + case 1: + cio.pci_data = pci_config_get8(cfg, cio.pci_off); + break; + default: + return (EINVAL); + } + + if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) { + return (EFAULT); + } + return (0); + } + case PPT_CFG_WRITE: { + struct ppt_cfg_io cio; + ddi_acc_handle_t cfg = ppt->pptd_cfg; + + if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) { + return (EFAULT); + } + switch (cio.pci_width) { + case 4: + pci_config_put32(cfg, cio.pci_off, cio.pci_data); + break; + case 2: + pci_config_put16(cfg, cio.pci_off, cio.pci_data); + break; + case 1: + pci_config_put8(cfg, cio.pci_off, cio.pci_data); + break; + default: + return (EINVAL); + } + + return (0); + } + case PPT_BAR_QUERY: { + struct ppt_bar_query barg; + struct pptbar *pbar; + + if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) { + return (EFAULT); + } + if (barg.pbq_baridx >= PCI_BASE_NUM) { + return (EINVAL); + } + pbar = &ppt->pptd_bars[barg.pbq_baridx]; + + if (pbar->base == 0 || pbar->size == 0) { + return (ENOENT); + } + barg.pbq_type = pbar->type; + barg.pbq_base = pbar->base; + barg.pbq_size = pbar->size; + + if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) { + return (EFAULT); + } + return (0); + } + case PPT_BAR_READ: { + struct ppt_bar_io bio; + struct pptbar *pbar; + void *addr; + uint_t rnum; + ddi_acc_handle_t cfg; + + if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) { + return (EFAULT); + } + rnum = bio.pbi_bar; + if (rnum >= PCI_BASE_NUM) { + return (EINVAL); + } + pbar = &ppt->pptd_bars[rnum]; + if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) { + return (EINVAL); + } + addr = pbar->io_ptr + bio.pbi_off; + + switch (bio.pbi_width) { + case 4: + bio.pbi_data = ddi_get32(pbar->io_handle, addr); + break; + case 2: + bio.pbi_data = ddi_get16(pbar->io_handle, addr); + break; + case 1: + bio.pbi_data = ddi_get8(pbar->io_handle, addr); + break; + default: + return (EINVAL); + } + + if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) { + return (EFAULT); + } + return (0); + } + case PPT_BAR_WRITE: { + struct ppt_bar_io bio; + struct pptbar *pbar; + void *addr; + uint_t rnum; + ddi_acc_handle_t cfg; + + if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) { + return (EFAULT); + } + rnum = bio.pbi_bar; + if (rnum >= PCI_BASE_NUM) { + return (EINVAL); + } + pbar = &ppt->pptd_bars[rnum]; + if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) { + return (EINVAL); + } + addr = pbar->io_ptr + bio.pbi_off; + + switch (bio.pbi_width) { + case 4: + ddi_put32(pbar->io_handle, addr, bio.pbi_data); + break; + case 2: + ddi_put16(pbar->io_handle, addr, bio.pbi_data); + break; + case 1: + ddi_put8(pbar->io_handle, addr, bio.pbi_data); + break; + default: + return (EINVAL); + } + + return (0); + } + + default: + return (ENOTTY); + } + + return (0); +} + +static int +ppt_find_pba_bar(struct pptdev *ppt) +{ + uint16_t base; + uint32_t pba_off; + + if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) != + DDI_SUCCESS) + return (-1); + + pba_off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_PBA_OFFSET); + + if (pba_off == PCI_EINVAL32) + return (-1); + + return (pba_off & PCI_MSIX_PBA_BIR_MASK); +} + +static int +ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, + size_t *maplen, uint_t model) +{ + minor_t minor; + struct pptdev *ppt; + int err; + int bar; + + minor = getminor(dev); + + if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) + return (ENXIO); + +#ifdef _MULTI_DATAMODEL + if (ddi_model_convert_from(model) != DDI_MODEL_NONE) + return (ENXIO); +#endif + + if (off < 0 || off != P2ALIGN(off, PAGESIZE)) + return (EINVAL); + + if ((bar = ppt_find_pba_bar(ppt)) == -1) + return (EINVAL); + + /* + * Add 1 to the BAR number to get the register number used by DDI. + * Register 0 corresponds to PCI config space, the PCI BARs start at 1. + */ + bar += 1; + + err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, bar, off, len, + PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr); + + if (err == DDI_SUCCESS) + *maplen = len; + + return (err); +} + + +static void +ppt_bar_wipe(struct pptdev *ppt) +{ + uint_t i; + + for (i = 0; i < PCI_BASE_NUM; i++) { + struct pptbar *pbar = &ppt->pptd_bars[i]; + if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) { + ddi_regs_map_free(&pbar->io_handle); + } + } + bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars)); +} + +static int +ppt_bar_crawl(struct pptdev *ppt) +{ + pci_regspec_t *regs; + uint_t rcount, i; + int err = 0, rlen; + + if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS, + "assigned-addresses", (caddr_t)®s, &rlen) != DDI_PROP_SUCCESS) { + return (EIO); + } + + VERIFY3S(rlen, >, 0); + rcount = rlen / sizeof (pci_regspec_t); + for (i = 0; i < rcount; i++) { + pci_regspec_t *reg = ®s[i]; + struct pptbar *pbar; + uint_t bar, rnum; + + DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg); + bar = PCI_REG_REG_G(reg->pci_phys_hi); + if (!BAR_VALID(bar)) { + continue; + } + + rnum = BAR_TO_IDX(bar); + pbar = &ppt->pptd_bars[rnum]; + /* is this somehow already populated? */ + if (pbar->base != 0 || pbar->size != 0) { + err = EEXIST; + break; + } + + pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK; + pbar->base = ((uint64_t)reg->pci_phys_mid << 32) | + (uint64_t)reg->pci_phys_low; + pbar->size = ((uint64_t)reg->pci_size_hi << 32) | + (uint64_t)reg->pci_size_low; + if (pbar->type == PCI_ADDR_IO) { + err = ddi_regs_map_setup(ppt->pptd_dip, rnum, + &pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle); + if (err != 0) { + break; + } + } + } + kmem_free(regs, rlen); + + if (err != 0) { + ppt_bar_wipe(ppt); + } + return (err); +} + +static boolean_t +ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size) +{ + const uint64_t map_end = base + size; + + /* Zero-length or overflow mappings are not valid */ + if (map_end <= base) { + return (B_FALSE); + } + /* MMIO bounds should be page-aligned */ + if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) { + return (B_FALSE); + } + + for (uint_t i = 0; i < PCI_BASE_NUM; i++) { + const struct pptbar *bar = &ppt->pptd_bars[i]; + const uint64_t bar_end = bar->base + bar->size; + + /* Only memory BARs can be mapped */ + if (bar->type != PCI_ADDR_MEM32 && + bar->type != PCI_ADDR_MEM64) { + continue; + } + + /* Does the mapping fit within this BAR? */ + if (base < bar->base || base >= bar_end || + map_end < bar->base || map_end > bar_end) { + continue; + } + + /* This BAR satisfies the provided map */ + return (B_TRUE); + } + return (B_FALSE); +} + +static int +ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + struct pptdev *ppt = NULL; + char name[PPT_MAXNAMELEN]; + int inst; + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + inst = ddi_get_instance(dip); + + if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) { + goto fail; + } + VERIFY(ppt = ddi_get_soft_state(ppt_state, inst)); + ppt->pptd_dip = dip; + ddi_set_driver_private(dip, ppt); + + if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) { + goto fail; + } + if (ppt_bar_crawl(ppt) != 0) { + goto fail; + } + if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst, + DDI_PSEUDO, 0) != DDI_SUCCESS) { + goto fail; + } + + mutex_enter(&pptdev_mtx); + list_insert_tail(&pptdev_list, ppt); + mutex_exit(&pptdev_mtx); + + return (DDI_SUCCESS); + +fail: + if (ppt != NULL) { + ddi_remove_minor_node(dip, NULL); + if (ppt->pptd_cfg != NULL) { + pci_config_teardown(&ppt->pptd_cfg); + } + ppt_bar_wipe(ppt); + ddi_soft_state_free(ppt_state, inst); + } + return (DDI_FAILURE); +} + +static int +ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + struct pptdev *ppt; + int inst; + + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + ppt = ddi_get_driver_private(dip); + inst = ddi_get_instance(dip); + + ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt); + + mutex_enter(&pptdev_mtx); + if (ppt->vm != NULL) { + mutex_exit(&pptdev_mtx); + return (DDI_FAILURE); + } + list_remove(&pptdev_list, ppt); + mutex_exit(&pptdev_mtx); + + ddi_remove_minor_node(dip, PPT_MINOR_NAME); + ppt_bar_wipe(ppt); + pci_config_teardown(&ppt->pptd_cfg); + ddi_set_driver_private(dip, NULL); + ddi_soft_state_free(ppt_state, inst); + + return (DDI_SUCCESS); +} + +static int +ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) +{ + int error = DDI_FAILURE; + int inst = getminor((dev_t)arg); + + switch (cmd) { + case DDI_INFO_DEVT2DEVINFO: { + struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst); + + if (ppt != NULL) { + *result = (void *)ppt->pptd_dip; + error = DDI_SUCCESS; + } + break; + } + case DDI_INFO_DEVT2INSTANCE: { + *result = (void *)(uintptr_t)inst; + error = DDI_SUCCESS; + break; + } + default: + break; + } + return (error); +} + +static struct cb_ops ppt_cb_ops = { + ppt_open, + nulldev, /* close */ + nodev, /* strategy */ + nodev, /* print */ + nodev, /* dump */ + nodev, /* read */ + nodev, /* write */ + ppt_ioctl, + ppt_devmap, /* devmap */ + NULL, /* mmap */ + NULL, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, + NULL, + D_NEW | D_MP | D_64BIT | D_DEVMAP, + CB_REV +}; + +static struct dev_ops ppt_ops = { + DEVO_REV, + 0, + ppt_ddi_info, + nulldev, /* identify */ + nulldev, /* probe */ + ppt_ddi_attach, + ppt_ddi_detach, + nodev, /* reset */ + &ppt_cb_ops, + (struct bus_ops *)NULL +}; + +static struct modldrv modldrv = { + &mod_driverops, + "bhyve pci pass-thru", + &ppt_ops +}; + +static struct modlinkage modlinkage = { + MODREV_1, + &modldrv, + NULL +}; + +int +_init(void) +{ + int error; + + mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL); + list_create(&pptdev_list, sizeof (struct pptdev), + offsetof(struct pptdev, pptd_node)); + + error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0); + if (error) { + goto fail; + } + + error = mod_install(&modlinkage); + + ppt_major = ddi_name_to_major("ppt"); +fail: + if (error) { + ddi_soft_state_fini(&ppt_state); + } + return (error); +} + +int +_fini(void) +{ + int error; + + error = mod_remove(&modlinkage); + if (error) + return (error); + ddi_soft_state_fini(&ppt_state); + + return (0); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +static boolean_t +ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us) +{ + uint16_t cap_ptr, devsts; + ddi_acc_handle_t hdl; + + if (pci_config_setup(dip, &hdl) != DDI_SUCCESS) + return (B_FALSE); + + if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) { + pci_config_teardown(&hdl); + return (B_FALSE); + } + + devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS); + while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) { + if (max_delay_us == 0) { + pci_config_teardown(&hdl); + return (B_FALSE); + } + + /* Poll once every 100 milliseconds up to the timeout. */ + if (max_delay_us > 100000) { + delay(drv_usectohz(100000)); + max_delay_us -= 100000; + } else { + delay(drv_usectohz(max_delay_us)); + max_delay_us = 0; + } + devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS); + } + + pci_config_teardown(&hdl); + return (B_TRUE); +} + +static uint_t +ppt_max_completion_tmo_us(dev_info_t *dip) +{ + uint_t timo = 0; + uint16_t cap_ptr; + ddi_acc_handle_t hdl; + uint_t timo_ranges[] = { /* timeout ranges */ + 50000, /* 50ms */ + 100, /* 100us */ + 10000, /* 10ms */ + 0, + 0, + 55000, /* 55ms */ + 210000, /* 210ms */ + 0, + 0, + 900000, /* 900ms */ + 3500000, /* 3.5s */ + 0, + 0, + 13000000, /* 13s */ + 64000000, /* 64s */ + 0 + }; + + if (pci_config_setup(dip, &hdl) != DDI_SUCCESS) + return (50000); /* default 50ms */ + + if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) + goto out; + + if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) & + PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0) + goto out; + + if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCAP2) & + PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0) + goto out; + + timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) & + PCIE_DEVCAP2_COM_TO_RANGE_MASK]; + +out: + if (timo == 0) + timo = 50000; /* default 50ms */ + + pci_config_teardown(&hdl); + return (timo); +} + +static boolean_t +ppt_flr(dev_info_t *dip, boolean_t force) +{ + uint16_t cap_ptr, ctl, cmd; + ddi_acc_handle_t hdl; + uint_t compl_delay = 0, max_delay_us; + + if (pci_config_setup(dip, &hdl) != DDI_SUCCESS) + return (B_FALSE); + + if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) + goto fail; + + if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR) + == 0) + goto fail; + + max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000); + + /* + * Disable busmastering to prevent generation of new transactions while + * waiting for the device to go idle. If the idle timeout fails, the + * command register is restored which will re-enable busmastering. + */ + cmd = pci_config_get16(hdl, PCI_CONF_COMM); + pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME); + if (!ppt_wait_for_pending_txn(dip, max_delay_us)) { + if (!force) { + pci_config_put16(hdl, PCI_CONF_COMM, cmd); + goto fail; + } + dev_err(dip, CE_WARN, + "?Resetting with transactions pending after %u us\n", + max_delay_us); + + /* + * Extend the post-FLR delay to cover the maximum Completion + * Timeout delay of anything in flight during the FLR delay. + * Enforce a minimum delay of at least 10ms. + */ + compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000)); + } + + /* Initiate the reset. */ + ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL); + (void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL, + ctl | PCIE_DEVCTL_INITIATE_FLR); + + /* Wait for at least 100ms */ + delay(drv_usectohz((100 + compl_delay) * 1000)); + + pci_config_teardown(&hdl); + return (B_TRUE); + +fail: + /* + * TODO: If the FLR fails for some reason, we should attempt a reset + * using the PCI power management facilities (if possible). + */ + pci_config_teardown(&hdl); + return (B_FALSE); +} + + +static struct pptdev * +ppt_findf(int fd) +{ + struct pptdev *ppt = NULL; + file_t *fp; + vattr_t va; + + if ((fp = getf(fd)) == NULL) { + return (NULL); + } + + va.va_mask = AT_RDEV; + if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 || + getmajor(va.va_rdev) != ppt_major) + goto fail; + + ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev)); + + if (ppt != NULL) + return (ppt); + +fail: + releasef(fd); + return (NULL); +} + +static void +ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) +{ + int i; + struct pptseg *seg; + + for (i = 0; i < MAX_MMIOSEGS; i++) { + seg = &ppt->mmio[i]; + if (seg->len == 0) + continue; + (void) vm_unmap_mmio(vm, seg->gpa, seg->len); + bzero(seg, sizeof (struct pptseg)); + } +} + +static void +ppt_teardown_msi(struct pptdev *ppt) +{ + int i; + + if (ppt->msi.num_msgs == 0) + return; + + for (i = 0; i < ppt->msi.num_msgs; i++) { + int intr_cap; + + (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap); + if (intr_cap & DDI_INTR_FLAG_BLOCK) + ddi_intr_block_disable(&ppt->msi.inth[i], 1); + else + ddi_intr_disable(ppt->msi.inth[i]); + + ddi_intr_remove_handler(ppt->msi.inth[i]); + ddi_intr_free(ppt->msi.inth[i]); + + ppt->msi.inth[i] = NULL; + } + + kmem_free(ppt->msi.inth, ppt->msi.inth_sz); + ppt->msi.inth = NULL; + ppt->msi.inth_sz = 0; + ppt->msi.is_fixed = B_FALSE; + + ppt->msi.num_msgs = 0; +} + +static void +ppt_teardown_msix_intr(struct pptdev *ppt, int idx) +{ + if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) { + int intr_cap; + + (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap); + if (intr_cap & DDI_INTR_FLAG_BLOCK) + ddi_intr_block_disable(&ppt->msix.inth[idx], 1); + else + ddi_intr_disable(ppt->msix.inth[idx]); + + ddi_intr_remove_handler(ppt->msix.inth[idx]); + } +} + +static void +ppt_teardown_msix(struct pptdev *ppt) +{ + uint_t i; + + if (ppt->msix.num_msgs == 0) + return; + + for (i = 0; i < ppt->msix.num_msgs; i++) + ppt_teardown_msix_intr(ppt, i); + + if (ppt->msix.inth) { + for (i = 0; i < ppt->msix.num_msgs; i++) + ddi_intr_free(ppt->msix.inth[i]); + kmem_free(ppt->msix.inth, ppt->msix.inth_sz); + ppt->msix.inth = NULL; + ppt->msix.inth_sz = 0; + kmem_free(ppt->msix.arg, ppt->msix.arg_sz); + ppt->msix.arg = NULL; + ppt->msix.arg_sz = 0; + } + + ppt->msix.num_msgs = 0; +} + +int +ppt_assigned_devices(struct vm *vm) +{ + struct pptdev *ppt; + uint_t num = 0; + + mutex_enter(&pptdev_mtx); + for (ppt = list_head(&pptdev_list); ppt != NULL; + ppt = list_next(&pptdev_list, ppt)) { + if (ppt->vm == vm) { + num++; + } + } + mutex_exit(&pptdev_mtx); + return (num); +} + +boolean_t +ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) +{ + struct pptdev *ppt = list_head(&pptdev_list); + + /* XXX: this should probably be restructured to avoid the lock */ + mutex_enter(&pptdev_mtx); + for (ppt = list_head(&pptdev_list); ppt != NULL; + ppt = list_next(&pptdev_list, ppt)) { + if (ppt->vm != vm) { + continue; + } + + for (uint_t i = 0; i < MAX_MMIOSEGS; i++) { + struct pptseg *seg = &ppt->mmio[i]; + + if (seg->len == 0) + continue; + if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) { + mutex_exit(&pptdev_mtx); + return (B_TRUE); + } + } + } + + mutex_exit(&pptdev_mtx); + return (B_FALSE); +} + +int +ppt_assign_device(struct vm *vm, int pptfd) +{ + struct pptdev *ppt; + int err = 0; + + mutex_enter(&pptdev_mtx); + ppt = ppt_findf(pptfd); + if (ppt == NULL) { + mutex_exit(&pptdev_mtx); + return (EBADF); + } + + /* Only one VM may own a device at any given time */ + if (ppt->vm != NULL && ppt->vm != vm) { + err = EBUSY; + goto done; + } + + if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) { + err = EIO; + goto done; + } + ppt_flr(ppt->pptd_dip, B_TRUE); + + /* + * Restore the device state after reset and then perform another save + * so the "pristine" state can be restored when the device is removed + * from the guest. + */ + if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS || + pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) { + err = EIO; + goto done; + } + + ppt->vm = vm; + iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip)); + iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip)); + pf_set_passthru(ppt->pptd_dip, B_TRUE); + +done: + releasef(pptfd); + mutex_exit(&pptdev_mtx); + return (err); +} + +static void +ppt_reset_pci_power_state(dev_info_t *dip) +{ + ddi_acc_handle_t cfg; + uint16_t cap_ptr; + + if (pci_config_setup(dip, &cfg) != DDI_SUCCESS) + return; + + if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) { + uint16_t val; + + val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR); + if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) { + val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0; + (void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR, + val); + } + } + + pci_config_teardown(&cfg); +} + +static void +ppt_do_unassign(struct pptdev *ppt) +{ + struct vm *vm = ppt->vm; + + ASSERT3P(vm, !=, NULL); + ASSERT(MUTEX_HELD(&pptdev_mtx)); + + + ppt_flr(ppt->pptd_dip, B_TRUE); + + /* + * Restore from the state saved during device assignment. + * If the device power state has been altered, that must be remedied + * first, as it will reset register state during the transition. + */ + ppt_reset_pci_power_state(ppt->pptd_dip); + (void) pci_restore_config_regs(ppt->pptd_dip); + + pf_set_passthru(ppt->pptd_dip, B_FALSE); + + ppt_unmap_mmio(vm, ppt); + ppt_teardown_msi(ppt); + ppt_teardown_msix(ppt); + iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip)); + iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip)); + ppt->vm = NULL; +} + +int +ppt_unassign_device(struct vm *vm, int pptfd) +{ + struct pptdev *ppt; + int err = 0; + + mutex_enter(&pptdev_mtx); + ppt = ppt_findf(pptfd); + if (ppt == NULL) { + mutex_exit(&pptdev_mtx); + return (EBADF); + } + + /* If this device is not owned by this 'vm' then bail out. */ + if (ppt->vm != vm) { + err = EBUSY; + goto done; + } + ppt_do_unassign(ppt); + +done: + releasef(pptfd); + mutex_exit(&pptdev_mtx); + return (err); +} + +int +ppt_unassign_all(struct vm *vm) +{ + struct pptdev *ppt; + + mutex_enter(&pptdev_mtx); + for (ppt = list_head(&pptdev_list); ppt != NULL; + ppt = list_next(&pptdev_list, ppt)) { + if (ppt->vm == vm) { + ppt_do_unassign(ppt); + } + } + mutex_exit(&pptdev_mtx); + + return (0); +} + +int +ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len, + vm_paddr_t hpa) +{ + struct pptdev *ppt; + int err = 0; + + mutex_enter(&pptdev_mtx); + ppt = ppt_findf(pptfd); + if (ppt == NULL) { + mutex_exit(&pptdev_mtx); + return (EBADF); + } + if (ppt->vm != vm) { + err = EBUSY; + goto done; + } + + /* + * Ensure that the host-physical range of the requested mapping fits + * within one of the MMIO BARs of the device. + */ + if (!ppt_bar_verify_mmio(ppt, hpa, len)) { + err = EINVAL; + goto done; + } + + for (uint_t i = 0; i < MAX_MMIOSEGS; i++) { + struct pptseg *seg = &ppt->mmio[i]; + + if (seg->len == 0) { + err = vm_map_mmio(vm, gpa, len, hpa); + if (err == 0) { + seg->gpa = gpa; + seg->len = len; + } + goto done; + } + } + err = ENOSPC; + +done: + releasef(pptfd); + mutex_exit(&pptdev_mtx); + return (err); +} + +static uint_t +pptintr(caddr_t arg, caddr_t unused) +{ + struct pptintr_arg *pptarg = (struct pptintr_arg *)arg; + struct pptdev *ppt = pptarg->pptdev; + + if (ppt->vm != NULL) { + lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data); + } else { + /* + * XXX + * This is not expected to happen - panic? + */ + } + + /* + * For legacy interrupts give other filters a chance in case + * the interrupt was not generated by the passthrough device. + */ + return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED); +} + +int +ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg, + int numvec) +{ + int i, msi_count, intr_type; + struct pptdev *ppt; + int err = 0; + + if (numvec < 0 || numvec > MAX_MSIMSGS) + return (EINVAL); + + mutex_enter(&pptdev_mtx); + ppt = ppt_findf(pptfd); + if (ppt == NULL) { + mutex_exit(&pptdev_mtx); + return (EBADF); + } + if (ppt->vm != vm) { + /* Make sure we own this device */ + err = EBUSY; + goto done; + } + + /* Free any allocated resources */ + ppt_teardown_msi(ppt); + + if (numvec == 0) { + /* nothing more to do */ + goto done; + } + + if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI, + &msi_count) != DDI_SUCCESS) { + if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED, + &msi_count) != DDI_SUCCESS) { + err = EINVAL; + goto done; + } + + intr_type = DDI_INTR_TYPE_FIXED; + ppt->msi.is_fixed = B_TRUE; + } else { + intr_type = DDI_INTR_TYPE_MSI; + } + + /* + * The device must be capable of supporting the number of vectors + * the guest wants to allocate. + */ + if (numvec > msi_count) { + err = EINVAL; + goto done; + } + + ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t); + ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP); + if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0, + numvec, &msi_count, 0) != DDI_SUCCESS) { + kmem_free(ppt->msi.inth, ppt->msi.inth_sz); + err = EINVAL; + goto done; + } + + /* Verify that we got as many vectors as the guest requested */ + if (numvec != msi_count) { + ppt_teardown_msi(ppt); + err = EINVAL; + goto done; + } + + /* Set up & enable interrupt handler for each vector. */ + for (i = 0; i < numvec; i++) { + int res, intr_cap = 0; + + ppt->msi.num_msgs = i + 1; + ppt->msi.arg[i].pptdev = ppt; + ppt->msi.arg[i].addr = addr; + ppt->msi.arg[i].msg_data = msg + i; + + if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr, + &ppt->msi.arg[i], NULL) != DDI_SUCCESS) + break; + + (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap); + if (intr_cap & DDI_INTR_FLAG_BLOCK) + res = ddi_intr_block_enable(&ppt->msi.inth[i], 1); + else + res = ddi_intr_enable(ppt->msi.inth[i]); + + if (res != DDI_SUCCESS) + break; + } + if (i < numvec) { + ppt_teardown_msi(ppt); + err = ENXIO; + } + +done: + releasef(pptfd); + mutex_exit(&pptdev_mtx); + return (err); +} + +int +ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr, + uint64_t msg, uint32_t vector_control) +{ + struct pptdev *ppt; + int numvec, alloced; + int err = 0; + + mutex_enter(&pptdev_mtx); + ppt = ppt_findf(pptfd); + if (ppt == NULL) { + mutex_exit(&pptdev_mtx); + return (EBADF); + } + /* Make sure we own this device */ + if (ppt->vm != vm) { + err = EBUSY; + goto done; + } + + /* + * First-time configuration: + * Allocate the MSI-X table + * Allocate the IRQ resources + * Set up some variables in ppt->msix + */ + if (ppt->msix.num_msgs == 0) { + dev_info_t *dip = ppt->pptd_dip; + + if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX, + &numvec) != DDI_SUCCESS) { + err = EINVAL; + goto done; + } + + ppt->msix.num_msgs = numvec; + + ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]); + ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP); + ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t); + ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP); + + if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0, + numvec, &alloced, 0) != DDI_SUCCESS) { + kmem_free(ppt->msix.arg, ppt->msix.arg_sz); + kmem_free(ppt->msix.inth, ppt->msix.inth_sz); + ppt->msix.arg = NULL; + ppt->msix.inth = NULL; + ppt->msix.arg_sz = ppt->msix.inth_sz = 0; + err = EINVAL; + goto done; + } + + if (numvec != alloced) { + ppt_teardown_msix(ppt); + err = EINVAL; + goto done; + } + } + + if (idx >= ppt->msix.num_msgs) { + err = EINVAL; + goto done; + } + + if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { + int intr_cap, res; + + /* Tear down the IRQ if it's already set up */ + ppt_teardown_msix_intr(ppt, idx); + + ppt->msix.arg[idx].pptdev = ppt; + ppt->msix.arg[idx].addr = addr; + ppt->msix.arg[idx].msg_data = msg; + + /* Setup the MSI-X interrupt */ + if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr, + &ppt->msix.arg[idx], NULL) != DDI_SUCCESS) { + err = ENXIO; + goto done; + } + + (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap); + if (intr_cap & DDI_INTR_FLAG_BLOCK) + res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1); + else + res = ddi_intr_enable(ppt->msix.inth[idx]); + + if (res != DDI_SUCCESS) { + ddi_intr_remove_handler(ppt->msix.inth[idx]); + err = ENXIO; + goto done; + } + } else { + /* Masked, tear it down if it's already been set up */ + ppt_teardown_msix_intr(ppt, idx); + } + +done: + releasef(pptfd); + mutex_exit(&pptdev_mtx); + return (err); +} + +int +ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit) +{ + struct pptdev *ppt; + int err = 0; + + mutex_enter(&pptdev_mtx); + ppt = ppt_findf(pptfd); + if (ppt == NULL) { + mutex_exit(&pptdev_mtx); + return (EBADF); + } + if (ppt->vm != vm) { + err = EBUSY; + goto done; + } + + if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI, + msilimit) != DDI_SUCCESS) { + *msilimit = -1; + } + if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX, + msixlimit) != DDI_SUCCESS) { + *msixlimit = -1; + } + +done: + releasef(pptfd); + mutex_exit(&pptdev_mtx); + return (err); +} diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.conf b/usr/src/uts/i86pc/io/vmm/io/ppt.conf new file mode 100644 index 0000000000..698cecb6f8 --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.conf @@ -0,0 +1,14 @@ +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2017 Joyent, Inc. +# + diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.h b/usr/src/uts/i86pc/io/vmm/io/ppt.h index 686b15db49..979c0e18ac 100644 --- a/usr/src/uts/i86pc/io/vmm/io/ppt.h +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.h @@ -31,26 +31,21 @@ #ifndef _IO_PPT_H_ #define _IO_PPT_H_ -int ppt_unassign_all(struct vm *vm); -int ppt_map_mmio(struct vm *vm, int bus, int slot, int func, - vm_paddr_t gpa, size_t len, vm_paddr_t hpa); -int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, - uint64_t addr, uint64_t msg, int numvec); -int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, - int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); -int ppt_assigned_devices(struct vm *vm); +int ppt_unassign_all(struct vm *vm); +int ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len, + vm_paddr_t hpa); +int ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, + uint64_t msg, int numvec); +int ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr, + uint64_t msg, uint32_t vector_control); +int ppt_assigned_devices(struct vm *vm); boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa); - -/* - * Returns the number of devices sequestered by the ppt driver for assignment - * to virtual machines. - */ -int ppt_avail_devices(void); +int ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit); /* * The following functions should never be called directly. * Use 'vm_assign_pptdev()' and 'vm_unassign_pptdev()' instead. */ -int ppt_assign_device(struct vm *vm, int bus, int slot, int func); -int ppt_unassign_device(struct vm *vm, int bus, int slot, int func); +int ppt_assign_device(struct vm *vm, int pptfd); +int ppt_unassign_device(struct vm *vm, int pptfd); #endif diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile b/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile new file mode 100644 index 0000000000..aac896e89e --- /dev/null +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile @@ -0,0 +1,52 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION ILLUMOSprivate { + global: + # DDI Interfaces + _fini; + _init; + _info; + + # PCI pass-thru API for bhyve + ppt_assigned_devices; + ppt_is_mmio; + ppt_assign_device; + ppt_unassign_device; + ppt_unassign_all; + ppt_map_mmio; + ppt_setup_msi; + ppt_setup_msix; + ppt_get_limits; + + local: + *; +}; diff --git a/usr/src/uts/i86pc/io/vmm/io/sol_iommu.c b/usr/src/uts/i86pc/io/vmm/io/sol_iommu.c deleted file mode 100644 index 989e88e17b..0000000000 --- a/usr/src/uts/i86pc/io/vmm/io/sol_iommu.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2017 Joyent, Inc. - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/cmn_err.h> - -/* - * IOMMU Stub - * - * Until proper iommu support can be wired into bhyve, stub out all the - * functions to either fail, if reasonable, or panic. - */ - -void -iommu_cleanup(void) -{ -} - -void * -iommu_host_domain(void) -{ - return (NULL); -} - -/*ARGSUSED*/ -void * -iommu_create_domain(vm_paddr_t maxaddr) -{ - return (NULL); -} - -/*ARGSUSED*/ -void -iommu_destroy_domain(void *dom) -{ - panic("unimplemented"); -} - -/*ARGSUSED*/ -void -iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len) -{ - panic("unimplemented"); -} - -/*ARGSUSED*/ -void -iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len) -{ - panic("unimplemented"); -} - -/*ARGSUSED*/ -void -iommu_add_device(void *dom, uint16_t rid) -{ - panic("unimplemented"); -} - -/*ARGSUSED*/ -void -iommu_remove_device(void *dom, uint16_t rid) -{ - panic("unimplemented"); -} - -/*ARGSUSED*/ -void -iommu_invalidate_tlb(void *domain) -{ - panic("unimplemented"); -} - diff --git a/usr/src/uts/i86pc/io/vmm/io/sol_ppt.c b/usr/src/uts/i86pc/io/vmm/io/sol_ppt.c deleted file mode 100644 index 9d5b1f5cdc..0000000000 --- a/usr/src/uts/i86pc/io/vmm/io/sol_ppt.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2017 Joyent, Inc. - */ - -#include <sys/types.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> - -#include <sys/vmm.h> - -/* - * PCI Pass-Through Stub - * - * Until proper passthrough support can be wired into bhyve, stub out all the - * functions to either fail or no-op. - */ - -int -ppt_unassign_all(struct vm *vm) -{ - return (0); -} - -/*ARGSUSED*/ -int -ppt_map_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, - size_t len, vm_paddr_t hpa) -{ - return (ENXIO); -} - -/*ARGSUSED*/ -int -ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, - uint64_t addr, uint64_t msg, int numvec) -{ - return (ENXIO); -} - -/*ARGSUSED*/ -int -ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, int idx, - uint64_t addr, uint64_t msg, uint32_t vector_control) -{ - return (ENXIO); -} - -/*ARGSUSED*/ -int -ppt_assigned_devices(struct vm *vm) -{ - return (0); -} - -/*ARGSUSED*/ -boolean_t -ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) -{ - return (B_FALSE); -} - -/*ARGSUSED*/ -int -ppt_avail_devices(void) -{ - return (0); -} - -/*ARGSUSED*/ -int -ppt_assign_device(struct vm *vm, int bus, int slot, int func) -{ - return (ENOENT); -} - -/*ARGSUSED*/ -int -ppt_unassign_device(struct vm *vm, int bus, int slot, int func) -{ - return (ENXIO); -} diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 6df094b50e..dd24a18f6a 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -495,6 +495,7 @@ vmm_mod_unload() { int error; + iommu_cleanup(); error = VMM_CLEANUP(); if (error) return (error); @@ -1054,10 +1055,14 @@ vm_iommu_modify(struct vm *vm, boolean_t map) hpa = DMAP_TO_PHYS((uintptr_t)vp); if (map) { iommu_create_mapping(vm->iommu, gpa, hpa, sz); +#ifdef __FreeBSD__ iommu_remove_mapping(host_domain, hpa, sz); +#endif } else { iommu_remove_mapping(vm->iommu, gpa, sz); +#ifdef __FreeBSD__ iommu_create_mapping(host_domain, hpa, hpa, sz); +#endif } gpa += PAGE_SIZE; @@ -1068,21 +1073,34 @@ vm_iommu_modify(struct vm *vm, boolean_t map) * Invalidate the cached translations associated with the domain * from which pages were removed. */ +#ifdef __FreeBSD__ if (map) iommu_invalidate_tlb(host_domain); else iommu_invalidate_tlb(vm->iommu); +#else + iommu_invalidate_tlb(vm->iommu); +#endif } #define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) #define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) +#ifdef __FreeBSD__ int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) +#else +int +vm_unassign_pptdev(struct vm *vm, int pptfd) +#endif /* __FreeBSD__ */ { int error; +#ifdef __FreeBSD__ error = ppt_unassign_device(vm, bus, slot, func); +#else + error = ppt_unassign_device(vm, pptfd); +#endif /* __FreeBSD__ */ if (error) return (error); @@ -1092,8 +1110,13 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) return (0); } +#ifdef __FreeBSD__ int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) +#else +int +vm_assign_pptdev(struct vm *vm, int pptfd) +#endif /* __FreeBSD__ */ { int error; vm_paddr_t maxaddr; @@ -1109,7 +1132,11 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) vm_iommu_map(vm); } +#ifdef __FreeBSD__ error = ppt_assign_device(vm, bus, slot, func); +#else + error = ppt_assign_device(vm, pptfd); +#endif /* __FreeBSD__ */ return (error); } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index 2b612b20e9..d84580d04c 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -42,6 +42,7 @@ #include <vm/vm.h> #include <vm/seg_dev.h> +#include "io/ppt.h" #include "io/vatpic.h" #include "io/vioapic.h" #include "io/vrtc.h" @@ -564,7 +565,6 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, break; } - /* XXXJOY: punt on these for now */ case VM_PPTDEV_MSI: { struct vm_pptdev_msi pptmsi; @@ -572,7 +572,9 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, error = EFAULT; break; } - return (ENOTTY); + error = ppt_setup_msi(sc->vmm_vm, pptmsi.vcpu, pptmsi.pptfd, + pptmsi.addr, pptmsi.msg, pptmsi.numvec); + break; } case VM_PPTDEV_MSIX: { struct vm_pptdev_msix pptmsix; @@ -581,7 +583,10 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, error = EFAULT; break; } - return (ENOTTY); + error = ppt_setup_msix(sc->vmm_vm, pptmsix.vcpu, pptmsix.pptfd, + pptmsix.idx, pptmsix.addr, pptmsix.msg, + pptmsix.vector_control); + break; } case VM_MAP_PPTDEV_MMIO: { struct vm_pptdev_mmio pptmmio; @@ -590,9 +595,20 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, error = EFAULT; break; } - return (ENOTTY); + error = ppt_map_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa, + pptmmio.len, pptmmio.hpa); + break; + } + case VM_BIND_PPTDEV: { + struct vm_pptdev pptdev; + + if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) { + error = EFAULT; + break; + } + error = vm_assign_pptdev(sc->vmm_vm, pptdev.pptfd); + break; } - case VM_BIND_PPTDEV: case VM_UNBIND_PPTDEV: { struct vm_pptdev pptdev; @@ -600,12 +616,27 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, error = EFAULT; break; } - return (ENOTTY); + error = vm_unassign_pptdev(sc->vmm_vm, pptdev.pptfd); + break; } + case VM_GET_PPTDEV_LIMITS: { + struct vm_pptdev_limits pptlimits; + if (ddi_copyin(datap, &pptlimits, sizeof (pptlimits), md)) { + error = EFAULT; + break; + } + error = ppt_get_limits(sc->vmm_vm, pptlimits.pptfd, + &pptlimits.msi_limit, &pptlimits.msix_limit); + if (error == 0 && + ddi_copyout(&pptlimits, datap, sizeof (pptlimits), md)) { + error = EFAULT; + break; + } + break; + } case VM_INJECT_EXCEPTION: { struct vm_exception vmexc; - if (ddi_copyin(datap, &vmexc, sizeof (vmexc), md)) { error = EFAULT; break; @@ -2091,8 +2122,16 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) return (DDI_FAILURE); } - /* Ensure that all resources have been cleaned up */ - mutex_enter(&vmmdev_mtx); + /* + * Ensure that all resources have been cleaned up. + * + * To prevent a deadlock with iommu_cleanup() we'll fail the detach if + * vmmdev_mtx is already held. We can't wait for vmmdev_mtx with our + * devinfo locked as iommu_cleanup() tries to recursively lock each + * devinfo, including our own, while holding vmmdev_mtx. + */ + if (mutex_tryenter(&vmmdev_mtx) == 0) + return (DDI_FAILURE); mutex_enter(&vmm_mtx); if (!list_is_empty(&vmm_list) || !list_is_empty(&vmm_destroy_list)) { diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index e2522858dd..2401774ab7 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -55,6 +55,8 @@ #include <sys/modhash.h> #include <sys/hma.h> +#include <sys/x86_archext.h> + #include <machine/cpufunc.h> #include <machine/fpu.h> #include <machine/md_var.h> @@ -91,6 +93,19 @@ u_char const bin2bcd_data[] = { 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99 }; +void +pmap_invalidate_cache(void) +{ + cpuset_t cpuset; + + kpreempt_disable(); + cpuset_all_but(&cpuset, CPU->cpu_id); + xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL, + CPUSET2BV(cpuset), (xc_func_t)invalidate_cache); + invalidate_cache(); + kpreempt_enable(); +} + vm_paddr_t pmap_kextract(vm_offset_t va) { diff --git a/usr/src/uts/i86pc/ppt/Makefile b/usr/src/uts/i86pc/ppt/Makefile new file mode 100644 index 0000000000..f231dfddf6 --- /dev/null +++ b/usr/src/uts/i86pc/ppt/Makefile @@ -0,0 +1,86 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2013 Pluribus Networks Inc. +# Copyright 2019 Joyent, Inc. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = ppt +OBJECTS = $(PPT_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(PPT_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/i86pc/io/vmm/io +MAPFILE = $(UTSBASE)/i86pc/io/vmm/io/ppt.mapfile + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/Makefile.i86pc + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Overrides and additions +# +ALL_BUILDS = $(ALL_BUILDSONLY64) +DEF_BUILDS = $(DEF_BUILDSONLY64) +PRE_INC_PATH = -I$(COMPAT)/freebsd -I$(COMPAT)/freebsd/amd64 \ + -I$(CONTRIB)/freebsd -I$(CONTRIB)/freebsd/amd64 +INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io +AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR) + +LDFLAGS += -dy -N drv/vmm -N misc/pcie +LDFLAGS += -M $(MAPFILE) + +$(OBJS_DIR)/ppt.o := CERRWARN += -_gcc=-Wno-unused-variable + +# needs work +SMOFF += all_func_returns + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/Makefile.targ diff --git a/usr/src/uts/i86pc/sys/ppt_dev.h b/usr/src/uts/i86pc/sys/ppt_dev.h new file mode 100644 index 0000000000..e25f941f14 --- /dev/null +++ b/usr/src/uts/i86pc/sys/ppt_dev.h @@ -0,0 +1,56 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc + */ + +#ifndef _PPT_DEV_H +#define _PPT_DEV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define PPT_IOC (('P' << 16)|('T' << 8)) + +#define PPT_CFG_READ (PPT_IOC | 0x01) +#define PPT_CFG_WRITE (PPT_IOC | 0x02) +#define PPT_BAR_QUERY (PPT_IOC | 0x03) +#define PPT_BAR_READ (PPT_IOC | 0x04) +#define PPT_BAR_WRITE (PPT_IOC | 0x05) + +#define PPT_MAXNAMELEN 32 + +struct ppt_cfg_io { + uint64_t pci_off; + uint32_t pci_width; + uint32_t pci_data; +}; +struct ppt_bar_io { + uint32_t pbi_bar; + uint32_t pbi_off; + uint32_t pbi_width; + uint32_t pbi_data; +}; + +struct ppt_bar_query { + uint32_t pbq_baridx; + uint32_t pbq_type; + uint64_t pbq_base; + uint64_t pbq_size; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _PPT_DEV_H */ diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h index 8a35d123c7..ac8f14b042 100644 --- a/usr/src/uts/i86pc/sys/vmm.h +++ b/usr/src/uts/i86pc/sys/vmm.h @@ -224,8 +224,13 @@ int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); void vm_free_memseg(struct vm *vm, int ident); int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); +#ifdef __FreeBSD__ int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); +#else +int vm_assign_pptdev(struct vm *vm, int pptfd); +int vm_unassign_pptdev(struct vm *vm, int pptfd); +#endif /* __FreeBSD__ */ /* * APIs that inspect the guest memory map require only a *single* vcpu to diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index 58e581a60d..dd87dcb0a6 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -127,6 +127,7 @@ struct vm_capability { int allcpus; }; +#ifdef __FreeBSD__ struct vm_pptdev { int bus; int slot; @@ -163,6 +164,49 @@ struct vm_pptdev_msix { uint64_t addr; }; +struct vm_pptdev_limits { + int bus; + int slot; + int func; + int msi_limit; + int msix_limit; +}; +#else /* __FreeBSD__ */ +struct vm_pptdev { + int pptfd; +}; + +struct vm_pptdev_mmio { + int pptfd; + vm_paddr_t gpa; + vm_paddr_t hpa; + size_t len; +}; + +struct vm_pptdev_msi { + int vcpu; + int pptfd; + int numvec; /* 0 means disabled */ + uint64_t msg; + uint64_t addr; +}; + +struct vm_pptdev_msix { + int vcpu; + int pptfd; + int idx; + uint64_t msg; + uint32_t vector_control; + uint64_t addr; +}; + +struct vm_pptdev_limits { + int pptfd; + int msi_limit; + int msix_limit; +}; +#endif /* __FreeBSD__ */ + struct vm_nmi { int cpuid; }; @@ -307,6 +351,7 @@ enum { IOCNUM_MAP_PPTDEV_MMIO = 42, IOCNUM_PPTDEV_MSI = 43, IOCNUM_PPTDEV_MSIX = 44, + IOCNUM_GET_PPTDEV_LIMITS = 45, /* statistics */ IOCNUM_VM_STATS = 50, @@ -410,6 +455,8 @@ enum { _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi) #define VM_PPTDEV_MSIX \ _IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix) +#define VM_GET_PPTDEV_LIMITS \ + _IOR('v', IOCNUM_GET_PPTDEV_LIMITS, struct vm_pptdev_limits) #define VM_INJECT_NMI \ _IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi) #define VM_STATS_IOC \ diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile index 5b93db987b..d5dc8d7124 100644 --- a/usr/src/uts/i86pc/vmm/Makefile +++ b/usr/src/uts/i86pc/vmm/Makefile @@ -104,11 +104,12 @@ CFLAGS += -_gcc=-Wno-format # enable collection of VMM statistics CFLAGS += -DVMM_KEEP_STATS -LDFLAGS += -Nfs/dev - $(OBJS_DIR)/vmm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits $(OBJS_DIR)/svm.o := CERRWARN += -_gcc=-Wno-pointer-sign -_gcc=-Wno-type-limits +$(OBJS_DIR)/vmx.o := CERRWARN += -_gcc=-Wno-unused-variable +$(OBJS_DIR)/iommu.o := CERRWARN += -_gcc=-Wno-unused-variable +LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev LDFLAGS += -z type=kmod -M $(MAPFILE) OFFSETS_VMX = $(CONF_SRCDIR)/intel/offsets.in diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index 2562f9ec4b..49c0cce31c 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -1293,9 +1293,25 @@ fcnname/**/_info: \ #endif /* - * this is just a marker for the area of text that contains stubs + * Stubs for ppt module (bhyve PCI passthrough driver) */ +#ifndef PPT_MODULE + MODULE(ppt,drv); + WSTUB(ppt, ppt_unassign_all, nomod_zero); + WSTUB(ppt, ppt_map_mmio, nomod_einval); + WSTUB(ppt, ppt_setup_msi, nomod_einval); + WSTUB(ppt, ppt_setup_msix, nomod_einval); + WSTUB(ppt, ppt_assigned_devices, nomod_zero); + WSTUB(ppt, ppt_is_mmio, nomod_zero); + WSTUB(ppt, ppt_assign_device, nomod_einval); + WSTUB(ppt, ppt_unassign_device, nomod_einval); + WSTUB(ppt, ppt_get_limits, nomod_einval); + END_MODULE(ppt); +#endif +/* + * this is just a marker for the area of text that contains stubs + */ ENTRY_NP(stubs_end) nop |