diff options
author | Andy Fiddaman <illumos@fiddaman.net> | 2022-08-17 20:44:42 +0000 |
---|---|---|
committer | Andy Fiddaman <illumos@fiddaman.net> | 2022-11-05 13:48:21 +0000 |
commit | 4f3f3e9a1dee62c031fa67cfe64e11d6dd3fab1b (patch) | |
tree | c7ada529531e7e297db67acf08e1a325f79ec107 /usr/src | |
parent | cfed4d7055842c539437036c634e7fe84d10977d (diff) | |
download | illumos-gate-4f3f3e9a1dee62c031fa67cfe64e11d6dd3fab1b.tar.gz |
14763 bhyve upstream sync 2022 August
Reviewed by: Patrick Mooney <pmooney@pfmooney.com>
Approved by: Robert Mustacchi <rm@fingolfin.org>
Diffstat (limited to 'usr/src')
43 files changed, 852 insertions, 437 deletions
diff --git a/usr/src/cmd/bhyve/README.sync b/usr/src/cmd/bhyve/README.sync index 2031c7ed0a..8175237b32 100644 --- a/usr/src/cmd/bhyve/README.sync +++ b/usr/src/cmd/bhyve/README.sync @@ -5,13 +5,11 @@ The bhyve userland code in this directory, and its associated libraries and parts of the kernel module have been updated to the latest upstream FreeBSD sources as of: - commit 3ebe1109348f53f64b395293578416abedef4090 - Author: Robert Wing <rew@FreeBSD.org> - Date: Thu Mar 17 21:55:52 2022 -0800 + commit fa46f3704b7618f9d9493c126df781faf59040a8 + Author: John Baldwin <jhb@FreeBSD.org> + Date: Wed Aug 17 10:01:16 2022 -0700 - bhyve: sweep MAX_VMNAME - - MAX_VMNAME is no longer used. + bhyve e1000: Skip packets with a small header. Divergence Notes: diff --git a/usr/src/cmd/bhyve/acpi.c b/usr/src/cmd/bhyve/acpi.c index 757111590f..fd0a6f732e 100644 --- a/usr/src/cmd/bhyve/acpi.c +++ b/usr/src/cmd/bhyve/acpi.c @@ -866,7 +866,7 @@ basl_compile(struct vmctx *ctx, int (*fwrite_section)(FILE *), uint64_t offset) { struct basl_fio io[2]; static char iaslbuf[3*MAXPATHLEN + 10]; - char *fmt; + const char *fmt; int err; err = basl_start(&io[0], &io[1]); diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 4d4c9578b9..90260a4ac9 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -252,6 +252,9 @@ usage(int code) " %*s [-s <pci>] [-U uuid] vmname\n" #endif " -a: local apic is in xAPIC mode (deprecated)\n" +#ifndef __FreeBSD__ + " -B type,key=value,...: set SMBIOS information\n" +#endif " -C: include guest memory in core file\n" " -c: number of cpus and/or topology specification\n" " -D: destroy on power-off\n" @@ -1740,11 +1743,9 @@ main(int argc, char *argv[]) } } -#ifndef __FreeBSD__ - smbios_apply(); -#endif error = smbios_build(ctx); - assert(error == 0); + if (error != 0) + exit(4); if (get_config_bool("acpi_tables")) { error = acpi_build(ctx, guest_ncpus); diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c index 48948b4f23..dfbb9df85e 100644 --- a/usr/src/cmd/bhyve/block_if.c +++ b/usr/src/cmd/bhyve/block_if.c @@ -45,9 +45,9 @@ __FBSDID("$FreeBSD$"); #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/disk.h> +#ifndef __FreeBSD__ #include <sys/limits.h> #include <sys/uio.h> -#ifndef __FreeBSD__ #include <sys/dkio.h> #endif @@ -246,6 +246,29 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); } +static int +blockif_flush_bc(struct blockif_ctxt *bc) +{ +#ifdef __FreeBSD__ + if (bc->bc_ischr) { + if (ioctl(bc->bc_fd, DIOCGFLUSH)) + return (errno); + } else if (fsync(bc->bc_fd)) + return (errno); +#else + /* + * This fsync() should be adequate to flush the cache of a file + * or device. In VFS, the VOP_SYNC operation is converted to + * the appropriate ioctl in both sdev (for real devices) and + * zfs (for zvols). + */ + if (fsync(bc->bc_fd)) + return (errno); +#endif + + return (0); +} + static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) { @@ -255,6 +278,9 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) #endif ssize_t clen, len, off, boff, voff; int i, err; +#ifdef __FreeBSD__ + struct spacectl_range range; +#endif br = be->be_req; if (br->br_iovcnt <= 1) @@ -338,22 +364,7 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) } break; case BOP_FLUSH: -#ifdef __FreeBSD__ - if (bc->bc_ischr) { - if (ioctl(bc->bc_fd, DIOCGFLUSH)) - err = errno; - } else if (fsync(bc->bc_fd)) - err = errno; -#else - /* - * This fsync() should be adequate to flush the cache of a file - * or device. In VFS, the VOP_SYNC operation is converted to - * the appropriate ioctl in both sdev (for real devices) and - * zfs (for zvols). - */ - if (fsync(bc->bc_fd)) - err = errno; -#endif + err = blockif_flush_bc(bc); break; case BOP_DELETE: if (!bc->bc_candelete) @@ -425,6 +436,12 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) (*br->br_callback)(br, err); } +static inline bool +blockif_empty(const struct blockif_ctxt *bc) +{ + return (TAILQ_EMPTY(&bc->bc_pendq) && TAILQ_EMPTY(&bc->bc_busyq)); +} + static void * blockif_thr(void *arg) { @@ -451,6 +468,7 @@ blockif_thr(void *arg) /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) break; + pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } pthread_mutex_unlock(&bc->bc_mtx); @@ -915,7 +933,6 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_READ)); } @@ -923,7 +940,6 @@ blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_WRITE)); } @@ -931,7 +947,6 @@ blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_FLUSH)); } @@ -939,7 +954,6 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_DELETE)); } @@ -1107,7 +1121,6 @@ blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) off_t blockif_size(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_size); } @@ -1115,7 +1128,6 @@ blockif_size(struct blockif_ctxt *bc) int blockif_sectsz(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_sectsz); } @@ -1123,7 +1135,6 @@ blockif_sectsz(struct blockif_ctxt *bc) void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) { - assert(bc->bc_magic == BLOCKIF_SIG); *size = bc->bc_psectsz; *off = bc->bc_psectoff; @@ -1132,7 +1143,6 @@ blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) int blockif_queuesz(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } @@ -1140,7 +1150,6 @@ blockif_queuesz(struct blockif_ctxt *bc) int blockif_is_ro(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } @@ -1148,7 +1157,6 @@ blockif_is_ro(struct blockif_ctxt *bc) int blockif_candelete(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } diff --git a/usr/src/cmd/bhyve/config.c b/usr/src/cmd/bhyve/config.c index f8aace3b73..5d6f2c0170 100644 --- a/usr/src/cmd/bhyve/config.c +++ b/usr/src/cmd/bhyve/config.c @@ -312,7 +312,7 @@ _expand_config_value(const char *value, int depth) return (valbuf); } -const char * +static const char * expand_config_value(const char *value) { static char *valbuf; @@ -354,7 +354,7 @@ get_config_value_node(const nvlist_t *parent, const char *name) return (expand_config_value(nvlist_get_string(parent, name))); } -bool +static bool _bool_value(const char *name, const char *value) { diff --git a/usr/src/cmd/bhyve/fwctl.c b/usr/src/cmd/bhyve/fwctl.c index f0f9aa3aff..7027e34a77 100644 --- a/usr/src/cmd/bhyve/fwctl.c +++ b/usr/src/cmd/bhyve/fwctl.c @@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$"); /* * Back-end state-machine */ -enum state { +static enum state { DORMANT, IDENT_WAIT, IDENT_SEND, diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c index 1d1203e138..5dda1737b3 100644 --- a/usr/src/cmd/bhyve/gdb.c +++ b/usr/src/cmd/bhyve/gdb.c @@ -139,7 +139,7 @@ static struct vcpu_state *vcpu_state; static int cur_vcpu, stopped_vcpu; static bool gdb_active = false; -const int gdb_regset[] = { +static const int gdb_regset[] = { VM_REG_GUEST_RAX, VM_REG_GUEST_RBX, VM_REG_GUEST_RCX, @@ -166,7 +166,7 @@ const int gdb_regset[] = { VM_REG_GUEST_GS }; -const int gdb_regsize[] = { +static const int gdb_regsize[] = { 8, 8, 8, @@ -1838,7 +1838,7 @@ new_connection(int fd, enum ev_type event, void *arg) } #ifndef WITHOUT_CAPSICUM -void +static void limit_gdb_socket(int s) { cap_rights_t rights; diff --git a/usr/src/cmd/bhyve/hda_codec.c b/usr/src/cmd/bhyve/hda_codec.c index 7a6ba345d8..2cc875d3d2 100644 --- a/usr/src/cmd/bhyve/hda_codec.c +++ b/usr/src/cmd/bhyve/hda_codec.c @@ -845,17 +845,15 @@ hda_codec_audio_inout_nid(struct hda_codec_stream *st, uint16_t verb, return (res); } -struct hda_codec_class hda_codec = { +static const struct hda_codec_class hda_codec = { .name = "hda_codec", .init = hda_codec_init, .reset = hda_codec_reset, .command = hda_codec_command, .notify = hda_codec_notify, }; - HDA_EMUL_SET(hda_codec); - /* * HDA Audio Context module function definitions */ diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c index 74ce0103a3..08756161a4 100644 --- a/usr/src/cmd/bhyve/mem.c +++ b/usr/src/cmd/bhyve/mem.c @@ -72,7 +72,7 @@ struct mmio_rb_range { struct mmio_rb_tree; RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); -RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; +static RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; /* * Per-vCPU cache. Since most accesses from a vCPU will be to diff --git a/usr/src/cmd/bhyve/net_backends.c b/usr/src/cmd/bhyve/net_backends.c index 329405964e..b870f4e865 100644 --- a/usr/src/cmd/bhyve/net_backends.c +++ b/usr/src/cmd/bhyve/net_backends.c @@ -196,7 +196,7 @@ SET_DECLARE(net_backend_set, struct net_backend); */ #if defined(INET6) || defined(INET) -const int pf_list[] = { +static const int pf_list[] = { #if defined(INET6) PF_INET6, #endif @@ -1316,6 +1316,7 @@ netbe_legacy_config(nvlist_t *nvl, const char *opts) /* Default to the 'dlpi' backend - can still be overridden by opts */ set_config_value_node(nvl, "backend", "dlpi"); + set_config_value_node(nvl, "type", "dlpi"); config = tofree = strdup(opts); if (config == NULL) @@ -1352,7 +1353,7 @@ netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb, void *param) { struct net_backend **pbe, *nbe, *tbe = NULL; - const char *value; + const char *value, *type; char *devname; int err; @@ -1363,11 +1364,19 @@ netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb, devname = strdup(value); /* + * Use the type given by configuration if exists; otherwise + * use the prefix of the backend as the type. + */ + type = get_config_value_node(nvl, "type"); + if (type == NULL) + type = devname; + + /* * Find the network backend that matches the user-provided * device name. net_backend_set is built using a linker set. */ SET_FOREACH(pbe, net_backend_set) { - if (strncmp(devname, (*pbe)->prefix, + if (strncmp(type, (*pbe)->prefix, strlen((*pbe)->prefix)) == 0) { tbe = *pbe; assert(tbe->init != NULL); diff --git a/usr/src/cmd/bhyve/pci_ahci.c b/usr/src/cmd/bhyve/pci_ahci.c index f973b8964f..2a4b97fbbf 100644 --- a/usr/src/cmd/bhyve/pci_ahci.c +++ b/usr/src/cmd/bhyve/pci_ahci.c @@ -2570,7 +2570,7 @@ open_fail: /* * Use separate emulation names to distinguish drive and atapi devices */ -struct pci_devemu pci_de_ahci = { +static const struct pci_devemu pci_de_ahci = { .pe_emu = "ahci", .pe_init = pci_ahci_init, .pe_legacy_config = pci_ahci_legacy_config, @@ -2579,14 +2579,14 @@ struct pci_devemu pci_de_ahci = { }; PCI_EMUL_SET(pci_de_ahci); -struct pci_devemu pci_de_ahci_hd = { +static const struct pci_devemu pci_de_ahci_hd = { .pe_emu = "ahci-hd", .pe_legacy_config = pci_ahci_hd_legacy_config, .pe_alias = "ahci", }; PCI_EMUL_SET(pci_de_ahci_hd); -struct pci_devemu pci_de_ahci_cd = { +static const struct pci_devemu pci_de_ahci_cd = { .pe_emu = "ahci-cd", .pe_legacy_config = pci_ahci_cd_legacy_config, .pe_alias = "ahci", diff --git a/usr/src/cmd/bhyve/pci_e82545.c b/usr/src/cmd/bhyve/pci_e82545.c index f4eaa0c93b..363e203692 100644 --- a/usr/src/cmd/bhyve/pci_e82545.c +++ b/usr/src/cmd/bhyve/pci_e82545.c @@ -232,7 +232,7 @@ struct ck_info { * Debug printf */ static int e82545_debug = 0; -#define WPRINTF(msg,params...) PRINTLN("e82545: " msg, params) +#define WPRINTF(msg,params...) PRINTLN("e82545: " msg, ##params) #define DPRINTF(msg,params...) if (e82545_debug) WPRINTF(msg, params) #define MIN(a,b) (((a)<(b))?(a):(b)) @@ -811,7 +811,7 @@ e82545_tx_ctl(struct e82545_softc *sc, uint32_t val) sc->esc_TCTL = val & ~0xFE800005; } -int +static int e82545_bufsz(uint32_t rctl) { @@ -1025,7 +1025,7 @@ e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len) /* * Return the transmit descriptor type. */ -int +static int e82545_txdesc_type(uint32_t lower) { int type; @@ -1089,15 +1089,18 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, union e1000_tx_udesc *dsc; int desc, dtype, len, ntype, iovcnt, tcp, tso; int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff; - unsigned hdrlen, vlen; + unsigned hdrlen, vlen, pktlen; uint32_t tcpsum, tcpseq; uint16_t ipcs, tcpcs, ipid, ohead; + bool invalid; ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0; iovcnt = 0; ntype = 0; tso = 0; + pktlen = 0; ohead = head; + invalid = false; /* iovb[0/1] may be used for writable copy of headers. */ iov = &iovb[2]; @@ -1147,17 +1150,23 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length : dsc->dd.lower.data & 0xFFFFF; - if (len > 0) { - /* Strip checksum supplied by guest. */ - if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 && - (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0) + /* Strip checksum supplied by guest. */ + if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 && + (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0) { + if (len <= 2) { + WPRINTF("final descriptor too short (%d) -- dropped", + len); + invalid = true; + } else len -= 2; - if (iovcnt < I82545_MAX_TXSEGS) { - iov[iovcnt].iov_base = paddr_guest2host( - sc->esc_ctx, dsc->td.buffer_addr, len); - iov[iovcnt].iov_len = len; - } + } + + if (len > 0 && iovcnt < I82545_MAX_TXSEGS) { + iov[iovcnt].iov_base = paddr_guest2host(sc->esc_ctx, + dsc->td.buffer_addr, len); + iov[iovcnt].iov_len = len; iovcnt++; + pktlen += len; } /* @@ -1205,6 +1214,9 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, } } + if (invalid) + goto done; + if (iovcnt > I82545_MAX_TXSEGS) { WPRINTF("tx too many descriptors (%d > %d) -- dropped", iovcnt, I82545_MAX_TXSEGS); @@ -1296,10 +1308,20 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, "(%d) -- dropped", hdrlen); goto done; } + if (ckinfo[1].ck_valid && hdrlen < ckinfo[1].ck_off + 2) { + WPRINTF("TSO hdrlen too small for TCP/UDP fields " + "(%d) -- dropped", hdrlen); + goto done; + } + } + + if (pktlen < hdrlen + vlen) { + WPRINTF("packet too small for writable header"); + goto done; } /* Allocate, fill and prepend writable header vector. */ - if (hdrlen != 0) { + if (hdrlen + vlen != 0) { hdr = __builtin_alloca(hdrlen + vlen); hdr += vlen; for (left = hdrlen, hdrp = hdr; left > 0; @@ -2371,7 +2393,7 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) return (0); } -struct pci_devemu pci_de_e82545 = { +static const struct pci_devemu pci_de_e82545 = { .pe_emu = "e1000", .pe_init = e82545_init, .pe_legacy_config = netbe_legacy_config, @@ -2379,4 +2401,3 @@ struct pci_devemu pci_de_e82545 = { .pe_barread = e82545_read, }; PCI_EMUL_SET(pci_de_e82545); - diff --git a/usr/src/cmd/bhyve/pci_emul.c b/usr/src/cmd/bhyve/pci_emul.c index 413633411d..ccb1ce9c4d 100644 --- a/usr/src/cmd/bhyve/pci_emul.c +++ b/usr/src/cmd/bhyve/pci_emul.c @@ -178,6 +178,18 @@ CFGREAD(struct pci_devinst *pi, int coff, int bytes) return (pci_get_cfgdata32(pi, coff)); } +static int +is_pcir_bar(int coff) +{ + return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)); +} + +static int +is_pcir_bios(int coff) +{ + return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4); +} + /* * I/O access */ @@ -321,7 +333,7 @@ done: } void -pci_print_supported_devices() +pci_print_supported_devices(void) { struct pci_devemu **pdpp, *pdp; @@ -815,9 +827,6 @@ pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, limit = 0; mask = PCIM_BIOS_ADDR_MASK; lobits = 0; -#ifndef __FreeBSD__ - addr = 0; -#endif break; default: printf("pci_emul_alloc_base: invalid bar type %d\n", type); @@ -832,6 +841,8 @@ pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, error = pci_emul_alloc_resource(baseptr, limit, size, &addr); if (error != 0) return (error); + } else { + addr = 0; } pdi->pi_bar[idx].type = type; @@ -2132,27 +2143,23 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, /* * Special handling for write to BAR and ROM registers */ - if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) || - (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) { + if (is_pcir_bar(coff) || is_pcir_bios(coff)) { /* * Ignore writes to BAR registers that are not * 4-byte aligned. */ if (bytes != 4 || (coff & 0x3) != 0) return; -#ifndef __FreeBSD__ - if (coff < PCIR_BIOS) { + + if (is_pcir_bar(coff)) { idx = (coff - PCIR_BAR(0)) / 4; - } else { + } else if (is_pcir_bios(coff)) { idx = PCI_ROM_IDX; - } -#else - if (coff != PCIR_BIOS) { - idx = (coff - PCIR_BAR(0)) / 4; } else { - idx = PCI_ROM_IDX; + errx(4, "%s: invalid BAR offset %d", __func__, + coff); } -#endif + mask = ~(pi->pi_bar[idx].size - 1); switch (pi->pi_bar[idx].type) { case PCIBAR_NONE: @@ -2451,7 +2458,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, return (value); } -struct pci_devemu pci_dummy = { +static const struct pci_devemu pci_dummy = { .pe_emu = "dummy", .pe_init = pci_emul_dinit, .pe_barwrite = pci_emul_diow, diff --git a/usr/src/cmd/bhyve/pci_emul.h b/usr/src/cmd/bhyve/pci_emul.h index 2929e0c307..c19b6d2fac 100644 --- a/usr/src/cmd/bhyve/pci_emul.h +++ b/usr/src/cmd/bhyve/pci_emul.h @@ -53,7 +53,7 @@ struct pci_devinst; struct memory_region; struct pci_devemu { - char *pe_emu; /* Name of device emulation */ + const char *pe_emu; /* Name of device emulation */ /* instance creation */ int (*pe_init)(struct vmctx *, struct pci_devinst *, @@ -255,7 +255,7 @@ int pci_msix_pba_bar(struct pci_devinst *pi); int pci_msi_maxmsgnum(struct pci_devinst *pi); int pci_parse_legacy_config(nvlist_t *nvl, const char *opt); int pci_parse_slot(char *opt); -void pci_print_supported_devices(); +void pci_print_supported_devices(void); void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr); int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum); int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, diff --git a/usr/src/cmd/bhyve/pci_fbuf.c b/usr/src/cmd/bhyve/pci_fbuf.c index 5a17b1e618..35764f253f 100644 --- a/usr/src/cmd/bhyve/pci_fbuf.c +++ b/usr/src/cmd/bhyve/pci_fbuf.c @@ -174,7 +174,7 @@ pci_fbuf_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, } } -uint64_t +static uint64_t pci_fbuf_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { @@ -356,7 +356,7 @@ pci_fbuf_parse_config(struct pci_fbuf_softc *sc, nvlist_t *nvl) extern void vga_render(struct bhyvegc *gc, void *arg); -void +static void pci_fbuf_render(struct bhyvegc *gc, void *arg) { struct pci_fbuf_softc *sc; @@ -479,7 +479,7 @@ done: return (error); } -struct pci_devemu pci_fbuf = { +static const struct pci_devemu pci_fbuf = { .pe_emu = "fbuf", .pe_init = pci_fbuf_init, .pe_barwrite = pci_fbuf_write, diff --git a/usr/src/cmd/bhyve/pci_hda.c b/usr/src/cmd/bhyve/pci_hda.c index 7491944fed..1a2a3844ab 100644 --- a/usr/src/cmd/bhyve/pci_hda.c +++ b/usr/src/cmd/bhyve/pci_hda.c @@ -94,7 +94,7 @@ struct hda_bdle_desc { }; struct hda_codec_cmd_ctl { - char *name; + const char *name; void *dma_vaddr; uint8_t run; uint16_t rp; @@ -270,13 +270,12 @@ static struct hda_ops hops = { .transfer = hda_transfer, }; -struct pci_devemu pci_de_hda = { +static const struct pci_devemu pci_de_hda = { .pe_emu = "hda", .pe_init = pci_hda_init, .pe_barwrite = pci_hda_write, .pe_barread = pci_hda_read }; - PCI_EMUL_SET(pci_de_hda); SET_DECLARE(hda_codec_class_set, struct hda_codec_class); @@ -730,7 +729,7 @@ static inline void hda_print_cmd_ctl_data(struct hda_codec_cmd_ctl *p) { #if DEBUG_HDA == 1 - char *name = p->name; + const char *name = p->name; #endif DPRINTF("%s size: %d", name, p->size); DPRINTF("%s dma_vaddr: %p", name, p->dma_vaddr); diff --git a/usr/src/cmd/bhyve/pci_hda.h b/usr/src/cmd/bhyve/pci_hda.h index a34366dedc..e868671921 100644 --- a/usr/src/cmd/bhyve/pci_hda.h +++ b/usr/src/cmd/bhyve/pci_hda.h @@ -70,7 +70,7 @@ struct hda_codec_inst { }; struct hda_codec_class { - char *name; + const char *name; int (*init)(struct hda_codec_inst *hci, const char *play, const char *rec); int (*reset)(struct hda_codec_inst *hci); diff --git a/usr/src/cmd/bhyve/pci_hostbridge.c b/usr/src/cmd/bhyve/pci_hostbridge.c index d35bdcf640..db7690e4b6 100644 --- a/usr/src/cmd/bhyve/pci_hostbridge.c +++ b/usr/src/cmd/bhyve/pci_hostbridge.c @@ -156,14 +156,14 @@ pci_amd_hostbridge_legacy_config(nvlist_t *nvl, const char *opts) return (0); } -struct pci_devemu pci_de_amd_hostbridge = { +static const struct pci_devemu pci_de_amd_hostbridge = { .pe_emu = "amd_hostbridge", .pe_legacy_config = pci_amd_hostbridge_legacy_config, .pe_alias = "hostbridge", }; PCI_EMUL_SET(pci_de_amd_hostbridge); -struct pci_devemu pci_de_hostbridge = { +static const struct pci_devemu pci_de_hostbridge = { .pe_emu = "hostbridge", .pe_init = pci_hostbridge_init, }; diff --git a/usr/src/cmd/bhyve/pci_lpc.c b/usr/src/cmd/bhyve/pci_lpc.c index 27cf8004b4..2702e0fdca 100644 --- a/usr/src/cmd/bhyve/pci_lpc.c +++ b/usr/src/cmd/bhyve/pci_lpc.c @@ -146,7 +146,7 @@ done: } void -lpc_print_supported_devices() +lpc_print_supported_devices(void) { size_t i; @@ -521,7 +521,7 @@ lpc_pirq_routed(void) pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5)); } -struct pci_devemu pci_de_lpc = { +static const struct pci_devemu pci_de_lpc = { .pe_emu = "lpc", .pe_init = pci_lpc_init, .pe_write_dsdt = pci_lpc_write_dsdt, diff --git a/usr/src/cmd/bhyve/pci_lpc.h b/usr/src/cmd/bhyve/pci_lpc.h index 9041f79c50..611b025d43 100644 --- a/usr/src/cmd/bhyve/pci_lpc.h +++ b/usr/src/cmd/bhyve/pci_lpc.h @@ -68,7 +68,7 @@ struct lpc_sysres { #define SYSRES_MEM(base, length) LPC_SYSRES(LPC_SYSRES_MEM, base, length) int lpc_device_parse(const char *opt); -void lpc_print_supported_devices(); +void lpc_print_supported_devices(void); char *lpc_pirq_name(int pin); void lpc_pirq_routed(void); const char *lpc_bootrom(void); diff --git a/usr/src/cmd/bhyve/pci_nvme.c b/usr/src/cmd/bhyve/pci_nvme.c index 488c7d4770..717d400bc0 100644 --- a/usr/src/cmd/bhyve/pci_nvme.c +++ b/usr/src/cmd/bhyve/pci_nvme.c @@ -401,6 +401,7 @@ static void pci_nvme_io_done(struct blockif_req *, int); ((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK) #define NVME_CSTS_RDY (1 << NVME_CSTS_REG_RDY_SHIFT) +#define NVME_CSTS_CFS (1 << NVME_CSTS_REG_CFS_SHIFT) /* Completion Queue status word utils */ #define NVME_STATUS_P (1 << NVME_STATUS_P_SHIFT) @@ -548,16 +549,24 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) cd->aerl = 4; /* Advertise 1, Read-only firmware slot */ - cd->frmw = NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK | + cd->frmw = NVMEB(NVME_CTRLR_DATA_FRMW_SLOT1_RO) | (1 << NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT); cd->lpa = 0; /* TODO: support some simple things like SMART */ cd->elpe = 0; /* max error log page entries */ - cd->npss = 1; /* number of power states support */ + /* + * Report a single power state (zero-based value) + * power_state[] values are left as zero to indicate "Not reported" + */ + cd->npss = 0; /* Warning Composite Temperature Threshold */ cd->wctemp = 0x0157; cd->cctemp = 0x0157; + /* SANICAP must not be 0 for Revision 1.4 and later NVMe Controllers */ + cd->sanicap = (NVME_CTRLR_DATA_SANICAP_NODMMAS_NO << + NVME_CTRLR_DATA_SANICAP_NODMMAS_SHIFT); + cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) | (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT); cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) | @@ -581,8 +590,6 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT; cd->vwc = NVME_CTRLR_DATA_VWC_ALL_NO << NVME_CTRLR_DATA_VWC_ALL_SHIFT; - - cd->power_state[0].mp = 10; } /* @@ -698,6 +705,11 @@ pci_nvme_init_logpages(struct pci_nvme_softc *sc) sc->health_log.temperature = NVME_TEMPERATURE; sc->health_log.available_spare = 100; sc->health_log.available_spare_threshold = 10; + + /* Set Active Firmware Info to slot 1 */ + sc->fw_log.afi = (1 << NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT); + memcpy(&sc->fw_log.revision[0], sc->ctrldata.fr, + sizeof(sc->fw_log.revision[0])); } static void @@ -1026,7 +1038,7 @@ aen_thr(void *arg) pci_nvme_aen_process(sc); pthread_cond_wait(&sc->aen_cond, &sc->aen_mtx); } -#ifdef __FreeBSD__ +#ifdef __FreeBSD__ /* Smatch spots unreachable code */ pthread_mutex_unlock(&sc->aen_mtx); pthread_exit(NULL); @@ -1090,30 +1102,61 @@ pci_nvme_reset(struct pci_nvme_softc *sc) pthread_mutex_unlock(&sc->mtx); } -static void +static int pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) { uint16_t acqs, asqs; DPRINTF("%s", __func__); - asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; + /* + * NVMe 2.0 states that "enabling a controller while this field is + * cleared to 0h produces undefined results" for both ACQS and + * ASQS. If zero, set CFS and do not become ready. + */ + asqs = ONE_BASED(sc->regs.aqa & NVME_AQA_REG_ASQS_MASK); + if (asqs < 2) { + EPRINTLN("%s: illegal ASQS value %#x (aqa=%#x)", __func__, + asqs - 1, sc->regs.aqa); + sc->regs.csts |= NVME_CSTS_CFS; + return (-1); + } sc->submit_queues[0].size = asqs; sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, sizeof(struct nvme_command) * asqs); + if (sc->submit_queues[0].qbase == NULL) { + EPRINTLN("%s: ASQ vm_map_gpa(%lx) failed", __func__, + sc->regs.asq); + sc->regs.csts |= NVME_CSTS_CFS; + return (-1); + } DPRINTF("%s mapping Admin-SQ guest 0x%lx, host: %p", __func__, sc->regs.asq, sc->submit_queues[0].qbase); - acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & - NVME_AQA_REG_ACQS_MASK) + 1; + acqs = ONE_BASED((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & + NVME_AQA_REG_ACQS_MASK); + if (acqs < 2) { + EPRINTLN("%s: illegal ACQS value %#x (aqa=%#x)", __func__, + acqs - 1, sc->regs.aqa); + sc->regs.csts |= NVME_CSTS_CFS; + return (-1); + } sc->compl_queues[0].size = acqs; sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq, sizeof(struct nvme_completion) * acqs); + if (sc->compl_queues[0].qbase == NULL) { + EPRINTLN("%s: ACQ vm_map_gpa(%lx) failed", __func__, + sc->regs.acq); + sc->regs.csts |= NVME_CSTS_CFS; + return (-1); + } sc->compl_queues[0].intr_en = NVME_CQ_INTEN; DPRINTF("%s mapping Admin-CQ guest 0x%lx, host: %p", __func__, sc->regs.acq, sc->compl_queues[0].qbase); + + return (0); } static int @@ -1395,13 +1438,7 @@ nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, { uint64_t logoff; uint32_t logsize; - uint8_t logpage = command->cdw10 & 0xFF; - -#ifndef __FreeBSD__ - logsize = 0; -#endif - - DPRINTF("%s log page %u len %u", __func__, logpage, logsize); + uint8_t logpage; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); @@ -1409,10 +1446,13 @@ nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, * Command specifies the number of dwords to return in fields NUMDU * and NUMDL. This is a zero-based value. */ + logpage = command->cdw10 & 0xFF; logsize = ((command->cdw11 << 16) | (command->cdw10 >> 16)) + 1; logsize *= sizeof(uint32_t); logoff = ((uint64_t)(command->cdw13) << 32) | command->cdw12; + DPRINTF("%s log page %u len %u", __func__, logpage, logsize); + switch (logpage) { case NVME_LOG_ERROR: if (logoff >= sizeof(sc->err_log)) { @@ -1523,7 +1563,7 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(uint32_t) * 1024); /* All unused entries shall be zero */ - bzero(dest, sizeof(uint32_t) * 1024); + memset(dest, 0, sizeof(uint32_t) * 1024); ((uint32_t *)dest)[0] = 1; break; case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */ @@ -1535,12 +1575,21 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(uint32_t) * 1024); /* All bytes after the descriptor shall be zero */ - bzero(dest, sizeof(uint32_t) * 1024); + memset(dest, 0, sizeof(uint32_t) * 1024); /* Return NIDT=1 (i.e. EUI64) descriptor */ ((uint8_t *)dest)[0] = 1; ((uint8_t *)dest)[1] = sizeof(uint64_t); - bcopy(sc->nsdata.eui64, ((uint8_t *)dest) + 4, sizeof(uint64_t)); + memcpy(((uint8_t *)dest) + 4, sc->nsdata.eui64, sizeof(uint64_t)); + break; + case 0x13: + /* + * Controller list is optional but used by UNH tests. Return + * a valid but empty list. + */ + dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, + sizeof(uint16_t) * 2048); + memset(dest, 0, sizeof(uint16_t) * 2048); break; default: DPRINTF("%s unsupported identify command requested 0x%x", @@ -1655,18 +1704,17 @@ nvme_fid_to_name(uint8_t fid) } static void -nvme_feature_invalid_cb(struct pci_nvme_softc *sc, - struct nvme_feature_obj *feat, - struct nvme_command *command, +nvme_feature_invalid_cb(struct pci_nvme_softc *sc __unused, + struct nvme_feature_obj *feat __unused, + struct nvme_command *command __unused, struct nvme_completion *compl) { - pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); } static void nvme_feature_iv_config(struct pci_nvme_softc *sc, - struct nvme_feature_obj *feat, + struct nvme_feature_obj *feat __unused, struct nvme_command *command, struct nvme_completion *compl) { @@ -1698,12 +1746,11 @@ nvme_feature_iv_config(struct pci_nvme_softc *sc, #define NVME_ASYNC_EVENT_ENDURANCE_GROUP (0x4000) static void -nvme_feature_async_event(struct pci_nvme_softc *sc, - struct nvme_feature_obj *feat, +nvme_feature_async_event(struct pci_nvme_softc *sc __unused, + struct nvme_feature_obj *feat __unused, struct nvme_command *command, struct nvme_completion *compl) { - if (command->cdw11 & NVME_ASYNC_EVENT_ENDURANCE_GROUP) pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); } @@ -1712,7 +1759,7 @@ nvme_feature_async_event(struct pci_nvme_softc *sc, #define NVME_TEMP_THRESH_UNDER 1 static void nvme_feature_temperature(struct pci_nvme_softc *sc, - struct nvme_feature_obj *feat, + struct nvme_feature_obj *feat __unused, struct nvme_command *command, struct nvme_completion *compl) { @@ -1757,7 +1804,7 @@ nvme_feature_temperature(struct pci_nvme_softc *sc, static void nvme_feature_num_queues(struct pci_nvme_softc *sc, - struct nvme_feature_obj *feat, + struct nvme_feature_obj *feat __unused, struct nvme_command *command, struct nvme_completion *compl) { @@ -1955,8 +2002,8 @@ nvme_opc_format_nvm(struct pci_nvme_softc* sc, struct nvme_command* command, } static int -nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, - struct nvme_completion* compl) +nvme_opc_abort(struct pci_nvme_softc *sc __unused, struct nvme_command *command, + struct nvme_completion *compl) { DPRINTF("%s submission queue %u, command ID 0x%x", __func__, command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF); @@ -2185,9 +2232,10 @@ pci_nvme_out_of_range(struct pci_nvme_blockstore *nvstore, uint64_t slba, static int pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, - uint64_t gpaddr, size_t size, int do_write, uint64_t lba) + uint64_t gpaddr, size_t size, int do_write, uint64_t offset) { int iovidx; + bool range_is_contiguous; if (req == NULL) return (-1); @@ -2196,13 +2244,24 @@ pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, return (-1); } - /* concatenate contig block-iovs to minimize number of iovs */ - if ((req->prev_gpaddr + req->prev_size) == gpaddr) { + /* + * Minimize the number of IOVs by concatenating contiguous address + * ranges. If the IOV count is zero, there is no previous range to + * concatenate. + */ + if (req->io_req.br_iovcnt == 0) + range_is_contiguous = false; + else + range_is_contiguous = (req->prev_gpaddr + req->prev_size) == gpaddr; + + if (range_is_contiguous) { iovidx = req->io_req.br_iovcnt - 1; req->io_req.br_iov[iovidx].iov_base = paddr_guest2host(req->sc->nsc_pi->pi_vmctx, req->prev_gpaddr, size); + if (req->io_req.br_iov[iovidx].iov_base == NULL) + return (-1); req->prev_size += size; req->io_req.br_resid += size; @@ -2211,7 +2270,7 @@ pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, } else { iovidx = req->io_req.br_iovcnt; if (iovidx == 0) { - req->io_req.br_offset = lba; + req->io_req.br_offset = offset; req->io_req.br_resid = 0; req->io_req.br_param = req; } @@ -2219,6 +2278,8 @@ pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, req->io_req.br_iov[iovidx].iov_base = paddr_guest2host(req->sc->nsc_pi->pi_vmctx, gpaddr, size); + if (req->io_req.br_iov[iovidx].iov_base == NULL) + return (-1); req->io_req.br_iov[iovidx].iov_len = size; @@ -2234,8 +2295,7 @@ pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, static void pci_nvme_set_completion(struct pci_nvme_softc *sc, - struct nvme_submission_queue *sq, int sqid, uint16_t cid, - uint32_t cdw0, uint16_t status) + struct nvme_submission_queue *sq, int sqid, uint16_t cid, uint16_t status) { struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; @@ -2243,11 +2303,7 @@ pci_nvme_set_completion(struct pci_nvme_softc *sc, __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status), NVME_STATUS_GET_SC(status)); - pci_nvme_cq_update(sc, cq, - 0, /* CDW0 */ - cid, - sqid, - status); + pci_nvme_cq_update(sc, cq, 0, cid, sqid, status); if (cq->head != cq->tail) { if (cq->intr_en & NVME_CQ_INTEN) { @@ -2326,7 +2382,7 @@ pci_nvme_io_done(struct blockif_req *br, int err) code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); - pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status); + pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, status); pci_nvme_stats_write_read_update(req->sc, req->opc, req->bytes, status); pci_nvme_release_ioreq(req->sc, req); @@ -2341,8 +2397,8 @@ pci_nvme_io_done(struct blockif_req *br, int err) * not supported (i.e. RAM or as indicated by the blockif). */ static bool -nvme_opc_flush(struct pci_nvme_softc *sc, - struct nvme_command *cmd, +nvme_opc_flush(struct pci_nvme_softc *sc __unused, + struct nvme_command *cmd __unused, struct pci_nvme_blockstore *nvstore, struct pci_nvme_ioreq *req, uint16_t *status) @@ -2417,8 +2473,7 @@ nvme_write_read_blockif(struct pci_nvme_softc *sc, size = MIN(PAGE_SIZE - (prp1 % PAGE_SIZE), bytes); if (pci_nvme_append_iov_req(sc, req, prp1, size, is_write, offset)) { - pci_nvme_status_genc(&status, - NVME_SC_DATA_TRANSFER_ERROR); + err = -1; goto out; } @@ -2431,8 +2486,7 @@ nvme_write_read_blockif(struct pci_nvme_softc *sc, size = bytes; if (pci_nvme_append_iov_req(sc, req, prp2, size, is_write, offset)) { - pci_nvme_status_genc(&status, - NVME_SC_DATA_TRANSFER_ERROR); + err = -1; goto out; } } else { @@ -2448,6 +2502,10 @@ nvme_write_read_blockif(struct pci_nvme_softc *sc, prp_list = paddr_guest2host(vmctx, prp, PAGE_SIZE - (prp % PAGE_SIZE)); + if (prp_list == NULL) { + err = -1; + goto out; + } last = prp_list + (NVME_PRP2_ITEMS - 1); } @@ -2455,8 +2513,7 @@ nvme_write_read_blockif(struct pci_nvme_softc *sc, if (pci_nvme_append_iov_req(sc, req, *prp_list, size, is_write, offset)) { - pci_nvme_status_genc(&status, - NVME_SC_DATA_TRANSFER_ERROR); + err = -1; goto out; } @@ -2471,10 +2528,10 @@ nvme_write_read_blockif(struct pci_nvme_softc *sc, err = blockif_write(nvstore->ctx, &req->io_req); else err = blockif_read(nvstore->ctx, &req->io_req); - +out: if (err) pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); -out: + return (status); } @@ -2490,12 +2547,14 @@ nvme_opc_write_read(struct pci_nvme_softc *sc, bool is_write = cmd->opc == NVME_OPC_WRITE; bool pending = false; -#ifndef __FreeBSD__ - bytes = 0; -#endif - lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; nblocks = (cmd->cdw12 & 0xFFFF) + 1; + bytes = nblocks << nvstore->sectsz_bits; + if (bytes > NVME_MAX_DATA_SIZE) { + WPRINTF("%s command would exceed MDTS", __func__); + pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD); + goto out; + } if (pci_nvme_out_of_range(nvstore, lba, nblocks)) { WPRINTF("%s command would exceed LBA range(slba=%#lx nblocks=%#lx)", @@ -2504,13 +2563,6 @@ nvme_opc_write_read(struct pci_nvme_softc *sc, goto out; } - bytes = nblocks << nvstore->sectsz_bits; - if (bytes > NVME_MAX_DATA_SIZE) { - WPRINTF("%s command would exceed MDTS", __func__); - pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD); - goto out; - } - offset = lba << nvstore->sectsz_bits; req->bytes = bytes; @@ -2570,8 +2622,8 @@ pci_nvme_dealloc_sm(struct blockif_req *br, int err) } if (done) { - pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, - req->cid, 0, status); + pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, req->cid, + status); pci_nvme_release_ioreq(sc, req); } } @@ -2583,15 +2635,11 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, uint16_t *status) { - struct nvme_dsm_range *range; + struct nvme_dsm_range *range = NULL; uint32_t nr, r, non_zero, dr; int err; bool pending = false; -#ifndef __FreeBSD__ - range = NULL; -#endif - if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) { pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE); goto out; @@ -2781,8 +2829,7 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) } complete: if (!pending) { - pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status); + pci_nvme_set_completion(sc, sq, idx, cmd->cid, status); if (req != NULL) pci_nvme_release_ioreq(sc, req); } @@ -2794,7 +2841,7 @@ complete: } static void -pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, +pci_nvme_handle_doorbell(struct vmctx *ctx __unused, struct pci_nvme_softc* sc, uint64_t idx, int is_sq, uint64_t value) { DPRINTF("nvme doorbell %lu, %s, val 0x%lx", @@ -2898,6 +2945,12 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, uint64_t idx = belloffset / 8; /* door bell size = 2*int */ int is_sq = (belloffset % 8) < 4; + if ((sc->regs.csts & NVME_CSTS_RDY) == 0) { + WPRINTF("doorbell write prior to RDY (offset=%#lx)\n", + offset); + return; + } + if (belloffset > ((sc->max_queues+1) * 8 - 4)) { WPRINTF("guest attempted an overflow write offset " "0x%lx, val 0x%lx in %s", @@ -2905,6 +2958,12 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, return; } + if (is_sq) { + if (sc->submit_queues[idx].qbase == NULL) + return; + } else if (sc->compl_queues[idx].qbase == NULL) + return; + pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value); return; } @@ -2971,7 +3030,8 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK; sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK; sc->regs.csts &= ~NVME_CSTS_RDY; - } else if (sc->pending_ios == 0) { + } else if ((sc->pending_ios == 0) && + !(sc->regs.csts & NVME_CSTS_CFS)) { sc->regs.csts |= NVME_CSTS_RDY; } break; @@ -3007,8 +3067,8 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, } static void -pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value) +pci_nvme_write(struct vmctx *ctx, int vcpu __unused, struct pci_devinst *pi, + int baridx, uint64_t offset, int size, uint64_t value) { struct pci_nvme_softc* sc = pi->pi_arg; @@ -3070,8 +3130,8 @@ static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, static uint64_t -pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size) +pci_nvme_read(struct vmctx *ctx __unused, int vcpu __unused, + struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_nvme_softc* sc = pi->pi_arg; @@ -3198,7 +3258,8 @@ pci_nvme_parse_config(struct pci_nvme_softc *sc, nvlist_t *nvl) } static void -pci_nvme_resized(struct blockif_ctxt *bctxt, void *arg, size_t new_size) +pci_nvme_resized(struct blockif_ctxt *bctxt __unused, void *arg, + size_t new_size) { struct pci_nvme_softc *sc; struct pci_nvme_blockstore *nvstore; @@ -3220,7 +3281,7 @@ pci_nvme_resized(struct blockif_ctxt *bctxt, void *arg, size_t new_size) } static int -pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) +pci_nvme_init(struct vmctx *ctx __unused, struct pci_devinst *pi, nvlist_t *nvl) { struct pci_nvme_softc *sc; uint32_t pci_membar_sz; @@ -3329,7 +3390,7 @@ pci_nvme_legacy_config(nvlist_t *nvl, const char *opts) return (blockif_legacy_config(nvl, opts)); } -struct pci_devemu pci_de_nvme = { +static const struct pci_devemu pci_de_nvme = { .pe_emu = "nvme", .pe_init = pci_nvme_init, .pe_legacy_config = pci_nvme_legacy_config, diff --git a/usr/src/cmd/bhyve/pci_passthru.c b/usr/src/cmd/bhyve/pci_passthru.c index 75176d6fe6..8b5f3d05ab 100644 --- a/usr/src/cmd/bhyve/pci_passthru.c +++ b/usr/src/cmd/bhyve/pci_passthru.c @@ -1087,7 +1087,7 @@ passthru_addr_rom(struct pci_devinst *const pi, const int idx, } else { if (vm_mmap_memseg(pi->pi_vmctx, addr, VM_PCIROM, pi->pi_romoffset, size, PROT_READ | PROT_EXEC) != 0) { - errx(4, "%s: mnmap_memseg @ [%016lx - %016lx] failed", + errx(4, "%s: mmap_memseg @ [%016lx - %016lx] failed", __func__, addr, addr + size); } } @@ -1117,7 +1117,7 @@ passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, } } -struct pci_devemu passthru = { +static const struct pci_devemu passthru = { .pe_emu = "passthru", .pe_init = passthru_init, .pe_legacy_config = passthru_legacy_config, diff --git a/usr/src/cmd/bhyve/pci_uart.c b/usr/src/cmd/bhyve/pci_uart.c index 25ef1ed662..3064d6fe62 100644 --- a/usr/src/cmd/bhyve/pci_uart.c +++ b/usr/src/cmd/bhyve/pci_uart.c @@ -125,7 +125,7 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) return (0); } -struct pci_devemu pci_de_com = { +static const struct pci_devemu pci_de_com = { .pe_emu = "uart", .pe_init = pci_uart_init, .pe_legacy_config = pci_uart_legacy_config, diff --git a/usr/src/cmd/bhyve/pci_virtio_9p.c b/usr/src/cmd/bhyve/pci_virtio_9p.c index 9808fee46d..2e169dc7cb 100644 --- a/usr/src/cmd/bhyve/pci_virtio_9p.c +++ b/usr/src/cmd/bhyve/pci_virtio_9p.c @@ -389,7 +389,7 @@ pci_vt9p_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) return (0); } -struct pci_devemu pci_de_v9p = { +static const struct pci_devemu pci_de_v9p = { .pe_emu = "virtio-9p", .pe_legacy_config = pci_vt9p_legacy_config, .pe_init = pci_vt9p_init, diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c index 30998161f0..7a0667e7b0 100644 --- a/usr/src/cmd/bhyve/pci_virtio_block.c +++ b/usr/src/cmd/bhyve/pci_virtio_block.c @@ -585,7 +585,7 @@ pci_vtblk_apply_feats(void *vsc, uint64_t caps) } #endif /* __FreeBSD__ */ -struct pci_devemu pci_de_vblk = { +static const struct pci_devemu pci_de_vblk = { .pe_emu = "virtio-blk", .pe_init = pci_vtblk_init, .pe_legacy_config = blockif_legacy_config, diff --git a/usr/src/cmd/bhyve/pci_virtio_console.c b/usr/src/cmd/bhyve/pci_virtio_console.c index 59099e688e..51b7acb1ba 100644 --- a/usr/src/cmd/bhyve/pci_virtio_console.c +++ b/usr/src/cmd/bhyve/pci_virtio_console.c @@ -661,7 +661,6 @@ pci_vtcon_notify_rx(void *vsc, struct vqueue_info *vq) } } -#ifdef __FreeBSD__ /* * Each console device has a "port" node which contains nodes for * each port. Ports are numbered starting at 0. @@ -710,7 +709,6 @@ pci_vtcon_legacy_config(nvlist_t *nvl, const char *opts) free(tofree); return (error); } -#endif static int pci_vtcon_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) @@ -779,10 +777,11 @@ pci_vtcon_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) return (0); } -struct pci_devemu pci_de_vcon = { +static const struct pci_devemu pci_de_vcon = { .pe_emu = "virtio-console", .pe_init = pci_vtcon_init, .pe_barwrite = vi_pci_write, - .pe_barread = vi_pci_read + .pe_barread = vi_pci_read, + .pe_legacy_config = pci_vtcon_legacy_config, }; PCI_EMUL_SET(pci_de_vcon); diff --git a/usr/src/cmd/bhyve/pci_virtio_input.c b/usr/src/cmd/bhyve/pci_virtio_input.c index 4517333b16..404213d907 100644 --- a/usr/src/cmd/bhyve/pci_virtio_input.c +++ b/usr/src/cmd/bhyve/pci_virtio_input.c @@ -772,7 +772,7 @@ failed: return (-1); } -struct pci_devemu pci_de_vinput = { +static const struct pci_devemu pci_de_vinput = { .pe_emu = "virtio-input", .pe_init = pci_vtinput_init, .pe_legacy_config = pci_vtinput_legacy_config, diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c index b7094484aa..a496414809 100644 --- a/usr/src/cmd/bhyve/pci_virtio_net.c +++ b/usr/src/cmd/bhyve/pci_virtio_net.c @@ -765,7 +765,7 @@ pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) pthread_mutex_unlock(&sc->rx_mtx); } -static struct pci_devemu pci_de_vnet = { +static const struct pci_devemu pci_de_vnet = { .pe_emu = "virtio-net", .pe_init = pci_vtnet_init, .pe_legacy_config = netbe_legacy_config, diff --git a/usr/src/cmd/bhyve/pci_virtio_rnd.c b/usr/src/cmd/bhyve/pci_virtio_rnd.c index 4aff95a34e..4fbedc639c 100644 --- a/usr/src/cmd/bhyve/pci_virtio_rnd.c +++ b/usr/src/cmd/bhyve/pci_virtio_rnd.c @@ -204,7 +204,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) } -struct pci_devemu pci_de_vrnd = { +static const struct pci_devemu pci_de_vrnd = { .pe_emu = "virtio-rnd", .pe_init = pci_vtrnd_init, .pe_barwrite = vi_pci_write, diff --git a/usr/src/cmd/bhyve/pci_virtio_scsi.c b/usr/src/cmd/bhyve/pci_virtio_scsi.c index 6f00f58811..6f7dceb05d 100644 --- a/usr/src/cmd/bhyve/pci_virtio_scsi.c +++ b/usr/src/cmd/bhyve/pci_virtio_scsi.c @@ -735,7 +735,7 @@ pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) } -struct pci_devemu pci_de_vscsi = { +static const struct pci_devemu pci_de_vscsi = { .pe_emu = "virtio-scsi", .pe_init = pci_vtscsi_init, .pe_legacy_config = pci_vtscsi_legacy_config, diff --git a/usr/src/cmd/bhyve/pci_xhci.c b/usr/src/cmd/bhyve/pci_xhci.c index 0b9fde3208..4caa32e981 100644 --- a/usr/src/cmd/bhyve/pci_xhci.c +++ b/usr/src/cmd/bhyve/pci_xhci.c @@ -167,6 +167,13 @@ struct pci_xhci_dev_ep { #define ep_tr _ep_trbsctx._epu_tr #define ep_sctx _ep_trbsctx._epu_sctx + /* + * Caches the value of MaxPStreams from the endpoint context + * when an endpoint is initialized and is used to validate the + * use of ep_ringaddr vs ep_sctx_trbs[] as well as the length + * of ep_sctx_trbs[]. + */ + uint32_t ep_MaxPStreams; union { struct pci_xhci_trb_ring _epu_trb; struct pci_xhci_trb_ring *_epu_sctx_trbs; @@ -565,7 +572,7 @@ pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, } } -struct xhci_dev_ctx * +static struct xhci_dev_ctx * pci_xhci_get_dev_ctx(struct pci_xhci_softc *sc, uint32_t slot) { uint64_t devctx_addr; @@ -589,7 +596,7 @@ pci_xhci_get_dev_ctx(struct pci_xhci_softc *sc, uint32_t slot) return (devctx); } -struct xhci_trb * +static struct xhci_trb * pci_xhci_trb_next(struct pci_xhci_softc *sc, struct xhci_trb *curtrb, uint64_t *guestaddr) { @@ -674,6 +681,7 @@ pci_xhci_init_ep(struct pci_xhci_dev_emu *dev, int epid) devep->ep_tr = XHCI_GADDR(dev->xsc, devep->ep_ringaddr); DPRINTF(("init_ep tr DCS %x", devep->ep_ccs)); } + devep->ep_MaxPStreams = pstreams; if (devep->ep_xfer == NULL) { devep->ep_xfer = malloc(sizeof(struct usb_data_xfer)); @@ -695,9 +703,8 @@ pci_xhci_disable_ep(struct pci_xhci_dev_emu *dev, int epid) ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_DISABLED; devep = &dev->eps[epid]; - if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) > 0 && - devep->ep_sctx_trbs != NULL) - free(devep->ep_sctx_trbs); + if (devep->ep_MaxPStreams > 0) + free(devep->ep_sctx_trbs); if (devep->ep_xfer != NULL) { free(devep->ep_xfer); @@ -1157,7 +1164,7 @@ pci_xhci_cmd_reset_ep(struct pci_xhci_softc *sc, uint32_t slot, ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_STOPPED; - if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) == 0) + if (devep->ep_MaxPStreams == 0) ep_ctx->qwEpCtx2 = devep->ep_ringaddr | devep->ep_ccs; DPRINTF(("pci_xhci: reset ep[%u] %08x %08x %016lx %08x", @@ -1178,16 +1185,15 @@ done: static uint32_t pci_xhci_find_stream(struct pci_xhci_softc *sc, struct xhci_endp_ctx *ep, - uint32_t streamid, struct xhci_stream_ctx **osctx) + struct pci_xhci_dev_ep *devep, uint32_t streamid, + struct xhci_stream_ctx **osctx) { struct xhci_stream_ctx *sctx; - uint32_t maxpstreams; - maxpstreams = XHCI_EPCTX_0_MAXP_STREAMS_GET(ep->dwEpCtx0); - if (maxpstreams == 0) + if (devep->ep_MaxPStreams == 0) return (XHCI_TRB_ERROR_TRB); - if (maxpstreams > XHCI_STREAMS_MAX) + if (devep->ep_MaxPStreams > XHCI_STREAMS_MAX) return (XHCI_TRB_ERROR_INVALID_SID); if (XHCI_EPCTX_0_LSA_GET(ep->dwEpCtx0) == 0) { @@ -1196,7 +1202,7 @@ pci_xhci_find_stream(struct pci_xhci_softc *sc, struct xhci_endp_ctx *ep, } /* only support primary stream */ - if (streamid > maxpstreams) + if (streamid > devep->ep_MaxPStreams) return (XHCI_TRB_ERROR_STREAM_TYPE); sctx = XHCI_GADDR(sc, ep->qwEpCtx2 & ~0xFUL) + streamid; @@ -1258,11 +1264,12 @@ pci_xhci_cmd_set_tr(struct pci_xhci_softc *sc, uint32_t slot, } streamid = XHCI_TRB_2_STREAM_GET(trb->dwTrb2); - if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) > 0) { + if (devep->ep_MaxPStreams > 0) { struct xhci_stream_ctx *sctx; sctx = NULL; - cmderr = pci_xhci_find_stream(sc, ep_ctx, streamid, &sctx); + cmderr = pci_xhci_find_stream(sc, ep_ctx, devep, streamid, + &sctx); if (sctx != NULL) { assert(devep->ep_sctx != NULL); @@ -1632,7 +1639,7 @@ pci_xhci_update_ep_ring(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, uint32_t streamid, uint64_t ringaddr, int ccs) { - if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) != 0) { + if (devep->ep_MaxPStreams != 0) { devep->ep_sctx[streamid].qwSctx0 = (ringaddr & ~0xFUL) | (ccs & 0x1); @@ -1959,7 +1966,7 @@ pci_xhci_device_doorbell(struct pci_xhci_softc *sc, uint32_t slot, } /* get next trb work item */ - if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) != 0) { + if (devep->ep_MaxPStreams != 0) { struct xhci_stream_ctx *sctx; /* @@ -1972,7 +1979,7 @@ pci_xhci_device_doorbell(struct pci_xhci_softc *sc, uint32_t slot, } sctx = NULL; - pci_xhci_find_stream(sc, ep_ctx, streamid, &sctx); + pci_xhci_find_stream(sc, ep_ctx, devep, streamid, &sctx); if (sctx == NULL) { DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; @@ -2950,7 +2957,7 @@ done: return (error); } -struct pci_devemu pci_de_xhci = { +static const struct pci_devemu pci_de_xhci = { .pe_emu = "xhci", .pe_init = pci_xhci_init, .pe_legacy_config = pci_xhci_legacy_config, diff --git a/usr/src/cmd/bhyve/smbiostbl.c b/usr/src/cmd/bhyve/smbiostbl.c index 8c3cd6332d..fc0d69e9fc 100644 --- a/usr/src/cmd/bhyve/smbiostbl.c +++ b/usr/src/cmd/bhyve/smbiostbl.c @@ -43,6 +43,10 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> #include <vmmapi.h> +#ifndef __FreeBSD__ +#include <sys/sysmacros.h> +#endif + #include "bhyverun.h" #include "config.h" #include "debug.h" @@ -77,13 +81,18 @@ struct smbios_structure { uint16_t handle; } __packed; +struct smbios_string { + const char *node; + const char *value; +}; + typedef int (*initializer_func_t)(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_template_entry { struct smbios_structure *entry; - const char **strings; + struct smbios_string *strings; initializer_func_t initializer; }; @@ -351,11 +360,11 @@ struct smbios_table_type0 smbios_type0_template = { 0xff /* embedded controller firmware minor release */ }; -const char *smbios_type0_strings[] = { - "BHYVE", /* vendor string */ - FIRMWARE_VERSION, /* bios version string */ - FIRMWARE_RELEASE_DATE, /* bios release date string */ - NULL +struct smbios_string smbios_type0_strings[] = { + { "bios.vendor", "BHYVE" }, /* vendor string */ + { "bios.version", FIRMWARE_VERSION }, /* bios version string */ + { "bios.release_date", FIRMWARE_RELEASE_DATE }, /* bios release date string */ + { 0 } }; struct smbios_table_type1 smbios_type1_template = { @@ -371,17 +380,17 @@ struct smbios_table_type1 smbios_type1_template = { }; static int smbios_type1_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); -const char *smbios_type1_strings[] = { - "illumos", /* manufacturer string */ - "BHYVE", /* product name string */ - "1.0", /* version string */ - "None", /* serial number string */ - "None", /* sku string */ - "Virtual Machine", /* family name string */ - NULL +struct smbios_string smbios_type1_strings[] = { + { "system.manufacturer", "illumos" }, /* manufacturer string */ + { "system.product_name", "BHYVE" }, /* product string */ + { "system.version", "1.0" }, /* version string */ + { "system.serial_number", "None" }, /* serial number string */ + { "system.sku", "None" }, /* sku string */ + { "system.family_name", "Virtual Machine" }, /* family string */ + { 0 } }; struct smbios_table_type2 smbios_type2_template = { @@ -398,14 +407,14 @@ struct smbios_table_type2 smbios_type2_template = { 0 }; -const char *smbios_type2_strings[] = { - "illumos", /* manufacturer string */ - "BHYVE", /* product name string */ - "1.0", /* version string */ - "None", /* serial number string */ - "None", /* asset tag string */ - "None", /* location string */ - NULL +struct smbios_string smbios_type2_strings[] = { + { "board.manufacturer", "illumos" }, /* manufacturer string */ + { "board.product_name", "BHYVE" }, /* product name string */ + { "board.version", "1.0" }, /* version string */ + { "board.serial_number", "None" }, /* serial number string */ + { "board.asset_tag", "None" }, /* asset tag string */ + { "board.location", "None" }, /* location string */ + { 0 } }; struct smbios_table_type3 smbios_type3_template = { @@ -427,13 +436,13 @@ struct smbios_table_type3 smbios_type3_template = { 5 /* sku number string */ }; -const char *smbios_type3_strings[] = { - "illumos", /* manufacturer string */ - "1.0", /* version string */ - "None", /* serial number string */ - "None", /* asset tag string */ - "None", /* sku number string */ - NULL +struct smbios_string smbios_type3_strings[] = { + { "chassis.manufacturer", "illumos" }, /* manufacturer string */ + { "chassis.version", "1.0" }, /* version string */ + { "chassis.serial_number", "None" }, /* serial number string */ + { "chassis.asset_tag", "None" }, /* asset tag string */ + { "chassis.sku", "None" }, /* sku number string */ + { 0 } }; struct smbios_table_type4 smbios_type4_template = { @@ -463,18 +472,18 @@ struct smbios_table_type4 smbios_type4_template = { SMBIOS_PRF_OTHER }; -const char *smbios_type4_strings[] = { - " ", /* socket designation string */ - " ", /* manufacturer string */ - " ", /* version string */ - "None", /* serial number string */ - "None", /* asset tag string */ - "None", /* part number string */ - NULL +struct smbios_string smbios_type4_strings[] = { + { NULL, " " }, /* socket designation string */ + { NULL, " " }, /* manufacturer string */ + { NULL, " " }, /* version string */ + { NULL, "None" }, /* serial number string */ + { NULL, "None" }, /* asset tag string */ + { NULL, "None" }, /* part number string */ + { 0 } }; static int smbios_type4_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type16 smbios_type16_template = { @@ -489,7 +498,7 @@ struct smbios_table_type16 smbios_type16_template = { }; static int smbios_type16_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type17 smbios_type17_template = { @@ -518,18 +527,18 @@ struct smbios_table_type17 smbios_type17_template = { 0 /* configured voltage in mv (0=unknown) */ }; -const char *smbios_type17_strings[] = { - " ", /* device locator string */ - " ", /* physical bank locator string */ - " ", /* manufacturer string */ - "None", /* serial number string */ - "None", /* asset tag string */ - "None", /* part number string */ - NULL +struct smbios_string smbios_type17_strings[] = { + { NULL, " " }, /* device locator string */ + { NULL, " " }, /* physical bank locator string */ + { NULL, " " }, /* manufacturer string */ + { NULL, "None" }, /* serial number string */ + { NULL, "None" }, /* asset tag string */ + { NULL, "None" }, /* part number string */ + { 0 } }; static int smbios_type17_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type19 smbios_type19_template = { @@ -543,7 +552,7 @@ struct smbios_table_type19 smbios_type19_template = { }; static int smbios_type19_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type32 smbios_type32_template = { @@ -557,7 +566,7 @@ struct smbios_table_type127 smbios_type127_template = { }; static int smbios_generic_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); static struct smbios_template_entry smbios_template[] = { @@ -599,7 +608,7 @@ static uint16_t type16_handle; static int smbios_generic_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_structure *entry; @@ -611,11 +620,20 @@ smbios_generic_initializer(struct smbios_structure *template_entry, if (template_strings != NULL) { int i; - for (i = 0; template_strings[i] != NULL; i++) { + for (i = 0; template_strings[i].value != NULL; i++) { const char *string; int len; - string = template_strings[i]; + if (template_strings[i].node == NULL) { + string = template_strings[i].value; + } else { + set_config_value_if_unset( + template_strings[i].node, + template_strings[i].value); + string = get_config_value( + template_strings[i].node); + } + len = strlen(string) + 1; memcpy(curaddr, string, len); curaddr += len; @@ -637,7 +655,7 @@ smbios_generic_initializer(struct smbios_structure *template_entry, static int smbios_type1_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type1 *type1; @@ -653,8 +671,10 @@ smbios_type1_initializer(struct smbios_structure *template_entry, uint32_t status; uuid_from_string(guest_uuid_str, &uuid, &status); - if (status != uuid_s_ok) + if (status != uuid_s_ok) { + EPRINTLN("Invalid UUID"); return (-1); + } uuid_enc_le(&type1->uuid, &uuid); } else { @@ -693,7 +713,7 @@ smbios_type1_initializer(struct smbios_structure *template_entry, static int smbios_type4_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { int i; @@ -735,7 +755,7 @@ smbios_type4_initializer(struct smbios_structure *template_entry, static int smbios_type16_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type16 *type16; @@ -752,7 +772,7 @@ smbios_type16_initializer(struct smbios_structure *template_entry, static int smbios_type17_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type17 *type17; @@ -796,7 +816,7 @@ smbios_type17_initializer(struct smbios_structure *template_entry, static int smbios_type19_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, + struct smbios_string *template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type19 *type19; @@ -862,6 +882,26 @@ smbios_ep_finalizer(struct smbios_entry_point *smbios_ep, uint16_t len, smbios_ep->echecksum = checksum; } +#ifndef __FreeBSD__ +/* + * bhyve on illumos previously used configuration keys starting with 'smbios.' + * to control type 1 SMBIOS information. Since these may still be present in + * bhyve configuration files, the following table is used to translate them + * to their new key names. + */ +static struct { + const char *oldkey; + const char *newkey; +} smbios_legacy_config_map[] = { + { "smbios.manufacturer", "system.manufacturer" }, + { "smbios.family", "system.family_name" }, + { "smbios.product", "system.product_name" }, + { "smbios.serial", "system.serial_number" }, + { "smbios.sku", "system.sku" }, + { "smbios.version", "system.version" }, +}; +#endif + int smbios_build(struct vmctx *ctx) { @@ -881,6 +921,19 @@ smbios_build(struct vmctx *ctx) return (ENOMEM); } +#ifndef __FreeBSD__ + /* Translate legacy illumos configuration keys */ + for (uint_t i = 0; i < ARRAY_SIZE(smbios_legacy_config_map); i++) { + const char *v; + + v = get_config_value(smbios_legacy_config_map[i].oldkey); + if (v != NULL) { + set_config_value_if_unset( + smbios_legacy_config_map[i].newkey, v); + } + } +#endif + curaddr = startaddr; smbios_ep = (struct smbios_entry_point *)curaddr; @@ -893,7 +946,7 @@ smbios_build(struct vmctx *ctx) maxssize = 0; for (i = 0; smbios_template[i].entry != NULL; i++) { struct smbios_structure *entry; - const char **strings; + struct smbios_string *strings; initializer_func_t initializer; char *endaddr; uint16_t size; @@ -920,42 +973,39 @@ smbios_build(struct vmctx *ctx) } #ifndef __FreeBSD__ -struct { +static struct { + uint_t type; const char *key; - const char **targetp; -} type1_map[] = { - { "manufacturer", &smbios_type1_strings[0] }, - { "product", &smbios_type1_strings[1] }, - { "version", &smbios_type1_strings[2] }, - { "serial", &smbios_type1_strings[3] }, - { "sku", &smbios_type1_strings[4] }, - { "family", &smbios_type1_strings[5] }, - { 0 } + char *val; +} smbios_legacy_map[] = { + { 1, "product", "product_name" }, + { 1, "serial", "serial_number" }, + { 1, "family", "family_name" }, }; -void -smbios_apply(void) -{ - nvlist_t *nvl; - - nvl = find_config_node("smbios"); - if (nvl == NULL) - return; - - for (uint_t i = 0; type1_map[i].key != NULL; i++) { - const char *value; - - value = get_config_value_node(nvl, type1_map[i].key); - if (value != NULL) - *type1_map[i].targetp = value; - } -} +static struct smbios_string *smbios_tbl_map[] = { + smbios_type0_strings, + smbios_type1_strings, + smbios_type2_strings, + smbios_type3_strings, +}; +/* + * This function accepts an option of the form + * type,[key=value][,key=value]... + * and sets smbios data for the given type. Keys for type X are defined in the + * smbios_typeX_strings tables above, but for type 1 there are also some + * legacy values which were accepted in earlier versions of bhyve on illumos + * which need to be mapped. + */ int smbios_parse(const char *opts) { - char *buf, *lasts, *token, *end; + char *buf, *lasts, *token, *typekey = NULL; + const char *errstr; + struct smbios_string *tbl; nvlist_t *nvl; + uint_t i; long type; if ((buf = strdup(opts)) == NULL) { @@ -968,59 +1018,79 @@ smbios_parse(const char *opts) goto fail; } - errno = 0; - type = strtol(token, &end, 10); - if (errno != 0 || *end != '\0') { - (void) fprintf(stderr, "first token '%s' is not an integer\n", - token); + type = strtonum(token, 0, 3, &errstr); + if (errstr != NULL) { + fprintf(stderr, "First token (type) is %s\n", errstr); goto fail; } - /* For now, only type 1 is supported. */ - if (type != 1) { - (void) fprintf(stderr, "unsupported type %d\n", type); + tbl = smbios_tbl_map[type]; + + /* Extract the config key for this type */ + typekey = strdup(tbl[0].node); + if (typekey == NULL) { + (void) fprintf(stderr, "out of memory\n"); goto fail; } - nvl = create_config_node("smbios"); + token = strchr(typekey, '.'); + assert(token != NULL); + *token = '\0'; + + nvl = create_config_node(typekey); if (nvl == NULL) { (void) fprintf(stderr, "out of memory\n"); - return (-1); + goto fail; } while ((token = strtok_r(NULL, ",", &lasts)) != NULL) { char *val; - uint_t i; - if ((val = strchr(token, '=')) == NULL) { + if ((val = strchr(token, '=')) == NULL || val[1] == '\0') { (void) fprintf(stderr, "invalid key=value: '%s'\n", token); goto fail; } - *val = '\0'; - val++; + *val++ = '\0'; + /* UUID is a top-level config item, but -U takes priority */ if (strcmp(token, "uuid") == 0) { - set_config_value_node(nvl, token, val); + set_config_value_if_unset(token, val); continue; } - for (i = 0; type1_map[i].key != NULL; i++) { - if (strcmp(token, type1_map[i].key) == 0) { + /* Translate legacy keys */ + for (i = 0; i < ARRAY_SIZE(smbios_legacy_map); i++) { + if (type == smbios_legacy_map[i].type && + strcmp(token, smbios_legacy_map[i].key) == 0) { + token = smbios_legacy_map[i].val; break; } } - if (type1_map[i].key == NULL) { - (void) fprintf(stderr, "invalid key '%s'\n", token); + + for (i = 0; tbl[i].value != NULL; i++) { + if (strcmp(tbl[i].node + strlen(typekey) + 1, + token) == 0) { + /* Found match */ + break; + } + } + + if (tbl[i].value == NULL) { + (void) fprintf(stderr, + "Unknown SMBIOS key %s for type %d\n", token, type); goto fail; } + set_config_value_node(nvl, token, val); } + free(typekey); return (0); fail: free(buf); + free(typekey); return (-1); } #endif diff --git a/usr/src/contrib/bhyve/dev/nvme/nvme.h b/usr/src/contrib/bhyve/dev/nvme/nvme.h index 2f393ea1d1..5820908f3c 100644 --- a/usr/src/contrib/bhyve/dev/nvme/nvme.h +++ b/usr/src/contrib/bhyve/dev/nvme/nvme.h @@ -73,8 +73,11 @@ */ #define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF) -/* Cap transfers by the maximum addressable by page-sized PRP (4KB -> 2MB). */ -#define NVME_MAX_XFER_SIZE MIN(maxphys, (PAGE_SIZE/8*PAGE_SIZE)) +/* Host memory buffer sizes are always in 4096 byte chunks */ +#define NVME_HMB_UNITS 4096 + +/* Many items are expressed in terms of power of two times MPS */ +#define NVME_MPS_SHIFT 12 /* Register field definitions */ #define NVME_CAP_LO_REG_MQES_SHIFT (0) @@ -215,6 +218,7 @@ #define NVME_STATUS_GET_P(st) (((st) >> NVME_STATUS_P_SHIFT) & NVME_STATUS_P_MASK) #define NVME_STATUS_GET_SC(st) (((st) >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK) #define NVME_STATUS_GET_SCT(st) (((st) >> NVME_STATUS_SCT_SHIFT) & NVME_STATUS_SCT_MASK) +#define NVME_STATUS_GET_CRD(st) (((st) >> NVME_STATUS_CRD_SHIFT) & NVME_STATUS_CRD_MASK) #define NVME_STATUS_GET_M(st) (((st) >> NVME_STATUS_M_SHIFT) & NVME_STATUS_M_MASK) #define NVME_STATUS_GET_DNR(st) (((st) >> NVME_STATUS_DNR_SHIFT) & NVME_STATUS_DNR_MASK) diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index a3c00d616b..3d47b70214 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -60,6 +60,9 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <errno.h> +#ifdef __FreeBSD__ +#include <stdbool.h> +#endif #include <stdio.h> #include <stdlib.h> #include <assert.h> @@ -69,6 +72,9 @@ __FBSDID("$FreeBSD$"); #include <libutil.h> +#ifdef __FreeBSD__ +#include <vm/vm.h> +#endif #include <machine/vmm.h> #include <machine/vmm_dev.h> @@ -111,7 +117,27 @@ struct vmctx { #ifdef __FreeBSD__ #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) +#endif + +static int +vm_device_open(const char *name) +{ + int fd, len; + char *vmfile; + len = strlen("/dev/vmm/") + strlen(name) + 1; + vmfile = malloc(len); + assert(vmfile != NULL); + snprintf(vmfile, len, "/dev/vmm/%s", name); + + /* Open the device file */ + fd = open(vmfile, O_RDWR, 0); + + free(vmfile); + return (fd); +} + +#ifdef __FreeBSD__ int vm_create(const char *name) { @@ -120,19 +146,6 @@ vm_create(const char *name) kldload("vmm"); return (CREATE(name)); } - -void -vm_destroy(struct vmctx *vm) -{ - assert(vm != NULL); - - if (vm->fd >= 0) - close(vm->fd); - DESTROY(vm->name); - - free(vm); -} - #else static int vm_do_ctl(int cmd, void *req) @@ -167,51 +180,8 @@ vm_create(const char *name, uint64_t flags) return (vm_do_ctl(VMM_CREATE_VM, &req)); } - -void -vm_close(struct vmctx *vm) -{ - assert(vm != NULL); - assert(vm->fd >= 0); - - (void) close(vm->fd); - - free(vm); -} - -void -vm_destroy(struct vmctx *vm) -{ - assert(vm != NULL); - - if (vm->fd >= 0) { - (void) ioctl(vm->fd, VM_DESTROY_SELF, 0); - (void) close(vm->fd); - vm->fd = -1; - } - - free(vm); -} #endif -static int -vm_device_open(const char *name) -{ - int fd, len; - char *vmfile; - - len = strlen("/dev/vmm/") + strlen(name) + 1; - vmfile = malloc(len); - assert(vmfile != NULL); - snprintf(vmfile, len, "/dev/vmm/%s", name); - - /* Open the device file */ - fd = open(vmfile, O_RDWR, 0); - - free(vmfile); - return (fd); -} - struct vmctx * vm_open(const char *name) { @@ -238,6 +208,53 @@ err: return (NULL); } +#ifdef __FreeBSD__ +void +vm_close(struct vmctx *vm) +{ + assert(vm != NULL); + + close(vm->fd); + free(vm); +} + +void +vm_destroy(struct vmctx *vm) +{ + assert(vm != NULL); + + if (vm->fd >= 0) + close(vm->fd); + DESTROY(vm->name); + + free(vm); +} +#else +void +vm_close(struct vmctx *vm) +{ + assert(vm != NULL); + assert(vm->fd >= 0); + + (void) close(vm->fd); + + free(vm); +} + +void +vm_destroy(struct vmctx *vm) +{ + assert(vm != NULL); + + if (vm->fd >= 0) { + (void) ioctl(vm->fd, VM_DESTROY_SELF, 0); + (void) close(vm->fd); + vm->fd = -1; + } + + free(vm); +} +#endif int vm_parse_memsize(const char *opt, size_t *ret_memsize) @@ -330,6 +347,19 @@ vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, return (error); } +#ifdef __FreeBSD__ +int +vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr, + size_t *lowmem_size, size_t *highmem_size) +{ + + *guest_baseaddr = ctx->baseaddr; + *lowmem_size = ctx->lowmem; + *highmem_size = ctx->highmem; + return (0); +} +#endif + int vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len) { @@ -594,6 +624,33 @@ vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) return (NULL); } +#ifdef __FreeBSD__ +vm_paddr_t +vm_rev_map_gpa(struct vmctx *ctx, void *addr) +{ + vm_paddr_t offaddr; + + offaddr = (char *)addr - ctx->baseaddr; + + if (ctx->lowmem > 0) + if (offaddr <= ctx->lowmem) + return (offaddr); + + if (ctx->highmem > 0) + if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem) + return (offaddr); + + return ((vm_paddr_t)-1); +} + +const char * +vm_get_name(struct vmctx *ctx) +{ + + return (ctx->name); +} +#endif /* __FreeBSD__ */ + size_t vm_get_lowmem_size(struct vmctx *ctx) { @@ -800,6 +857,21 @@ vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count, return (error); } +#ifdef __FreeBSD__ +int +vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit) +{ + int error; + struct vm_run vmrun; + + bzero(&vmrun, sizeof(vmrun)); + vmrun.cpuid = vcpu; + + error = ioctl(ctx->fd, VM_RUN, &vmrun); + bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); + return (error); +} +#else int vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry, struct vm_exit *vm_exit) @@ -812,6 +884,7 @@ vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry, return (ioctl(ctx->fd, VM_RUN, &entry)); } +#endif int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) @@ -823,22 +896,22 @@ vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); } -#ifndef __FreeBSD__ +#ifdef __FreeBSD__ int -vm_reinit(struct vmctx *ctx, uint64_t flags) +vm_reinit(struct vmctx *ctx) { - struct vm_reinit reinit = { - .flags = flags - }; - return (ioctl(ctx->fd, VM_REINIT, &reinit)); + return (ioctl(ctx->fd, VM_REINIT, 0)); } #else int -vm_reinit(struct vmctx *ctx) +vm_reinit(struct vmctx *ctx, uint64_t flags) { + struct vm_reinit reinit = { + .flags = flags + }; - return (ioctl(ctx->fd, VM_REINIT, 0)); + return (ioctl(ctx->fd, VM_REINIT, &reinit)); } #endif @@ -1201,26 +1274,6 @@ vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, } int -vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func, - int *msi_limit, int *msix_limit) -{ - struct vm_pptdev_limits pptlimits; - int error; - - bzero(&pptlimits, sizeof (pptlimits)); - pptlimits.bus = bus; - pptlimits.slot = slot; - pptlimits.func = func; - - error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits); - - *msi_limit = pptlimits.msi_limit; - *msix_limit = pptlimits.msix_limit; - - return (error); -} - -int vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func) { struct vm_pptdev ppt; @@ -1370,16 +1423,15 @@ vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, if (vmstats.num_entries != nitems(vmstats.statbuf)) break; } - if (have_stats) { if (ret_entries) *ret_entries = count; if (ret_tv) *ret_tv = vmstats.tv; return (stats_buf); + } else { + return (NULL); } - - return (NULL); } const char * @@ -1458,13 +1510,21 @@ vcpu_reset(struct vmctx *vmctx, int vcpu) if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) goto done; + /* + * According to Intels Software Developer Manual CR0 should be + * initialized with CR0_ET | CR0_NW | CR0_CD but that crashes some + * guests like Windows. + */ cr0 = CR0_NE; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR2, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) goto done; - + cr4 = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) goto done; @@ -1527,6 +1587,9 @@ vcpu_reset(struct vmctx *vmctx, int vcpu) if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, zero)) != 0) + goto done; + /* General purpose registers */ rdx = 0xf00; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) @@ -1545,6 +1608,22 @@ vcpu_reset(struct vmctx *vmctx, int vcpu) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R8, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R9, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R10, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R11, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R12, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R13, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R14, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_R15, zero)) != 0) + goto done; /* GDTR, IDTR */ desc_base = 0; @@ -1585,7 +1664,16 @@ vcpu_reset(struct vmctx *vmctx, int vcpu) if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) goto done; - /* XXX cr2, debug registers */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DR6, + 0xffff0ff0)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DR7, 0x400)) != + 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_INTR_SHADOW, + zero)) != 0) + goto done; error = 0; done: diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index 9d5470f729..4dffd7d755 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -94,6 +94,10 @@ enum { VM_PCIROM, }; +#ifdef __cplusplus +extern "C" { +#endif + /* * Get the length and name of the memory segment identified by 'segid'. * Note that system memory segments are identified with a nul name. @@ -142,9 +146,7 @@ int vm_create(const char *name); #endif /* __FreeBSD__ */ int vm_get_device_fd(struct vmctx *ctx); struct vmctx *vm_open(const char *name); -#ifndef __FreeBSD__ void vm_close(struct vmctx *ctx); -#endif void vm_destroy(struct vmctx *ctx); int vm_parse_memsize(const char *optarg, size_t *memsize); int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); @@ -339,4 +341,9 @@ int vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu, uint32_t esp); void vm_setup_freebsd_gdt(uint64_t *gdtr); #endif + +#ifdef __cplusplus +} +#endif + #endif /* _VMMAPI_H_ */ diff --git a/usr/src/man/man5/bhyve_config.5 b/usr/src/man/man5/bhyve_config.5 index f031165f89..3a5c0a2615 100644 --- a/usr/src/man/man5/bhyve_config.5 +++ b/usr/src/man/man5/bhyve_config.5 @@ -25,7 +25,7 @@ .\" .\" Portions Copyright 2022 OmniOS Community Edition (OmniOSce) Association. .\" -.Dd April 6, 2022 +.Dd June 1, 2022 .Dt BHYVE_CONFIG 5 .Os .Sh NAME @@ -174,6 +174,96 @@ Enable debug messages relating to the emulated XHCI controller. These messages are sent to .Dv stderr . +.It Va bios.vendor Ta string Ta BHYVE Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va bios.version Ta string Ta 14.0 Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va bios.release_date Ta string Ta 10/17/2021 Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va system.family_name Ta string Ta Virtual Machine Ta +Family the computer belongs to. +This value is used for the guest's System Management BIOS System Information +structure. +.It Va system.manufacturer Ta string Ta illumos Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va system.product_name Ta string Ta BHYVE Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va system.serial_number Ta string Ta None Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va system.sku Ta string Ta None Ta +Stock keeping unit of the computer. +It's also called product ID or purchase order number. +This value is used for the guest's System Management BIOS System Information +structure. +.It Va system.version Ta string Ta 1.0 Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va board.manufacturer Ta string Ta illumos Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va board.product_name Ta string Ta BHYVE Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va board.version Ta string Ta 1.0 Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va board.serial_number Ta string Ta None Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va board.asset_tag Ta string Ta None Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va board.location Ta string Ta None Ta +Describes the board's location within the chassis. +This value is used for the guest's System Management BIOS System Information +structure. +.It Va chassis.manufacturer Ta string Ta illumos Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va chassis.version Ta string Ta 1.0 Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va chassis.serial_number Ta string Ta None Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va chassis.asset_tag Ta string Ta None Ta +This value is used for the guest's System Management BIOS System Information +structure. +.It Va chassis.sku Ta string Ta None Ta +Stock keeping unit of the chassis. +It's also called product ID or purchase order number. +This value is used for the guest's System Management BIOS System Information +structure. +.It Va smbios.family Ta string Ta Virtual Machine Ta +Legacy alias for +.Va system.family_name , +do not use in new configurations. +.It Va smbios.manufacturer Ta string Ta illumos Ta +Legacy alias for +.Va system.manufacturer , +do not use in new configurations. +.It Va smbios.product Ta string Ta BHYVE Ta +Legacy alias for +.Va system.product_name , +do not use in new configurations. +.It Va smbios.serial Ta string Ta None Ta +Legacy alias for +.Va system.serial_number , +do not use in new configurations. +.It Va smbios.sku Ta string Ta None Ta +Legacy alias for +.Va system.sku , +do not use in new configurations. +.It Va smbios.version Ta string Ta 1.0 Ta +Legacy alias for +.Va system.version , +do not use in new configurations. .El .Ss x86-Specific Settings .Bl -column "x86.vmexit_on_pause" "integer" "Default" diff --git a/usr/src/man/man8/bhyve.8 b/usr/src/man/man8/bhyve.8 index a862668b8c..cb5551a33b 100644 --- a/usr/src/man/man8/bhyve.8 +++ b/usr/src/man/man8/bhyve.8 @@ -24,7 +24,7 @@ .\" .\" Portions Copyright 2022 OmniOS Community Edition (OmniOSce) Association. .\" -.Dd February 26, 2022 +.Dd June 1, 2022 .Dt BHYVE 8 .Os .Sh NAME @@ -115,17 +115,28 @@ It will be deprecated in a future version. .Xc Configure smbios data. .Ar type -must be set to -.Sy 1 . -Supported keys are: -.Sy manufacturer , -.Sy product , -.Sy version , -.Sy serial , -.Sy sku , -.Sy family -and -.Sy uuid . +must be set to one of +.Cm 0 , 1 , 2 +or +.Cm 3 . +Supported keys for each type are: +.Bl -tag -width XXX -offset XXX +.It Cm 0 +.Cm vendor , version , release_date . +.It Cm 1 +.Cm manufacturer , +.Cm product_name Pq or Cm product , +.Cm version , +.Cm serial_number Pq or Cm serial , +.Cm sku , +.Cm family_name Pq or Cm family , +.Cm uuid . +.It Cm 2 +.Cm manufacturer , product_name , version , serial_number , asset_tag , +.Cm location +.It Ic 3 +.Cm manufacturer , version , serial_number , asset_tag, sku . +.El .It Xo Fl c .Sm off .Op Oo Cm cpus= Oc Ar numcpus diff --git a/usr/src/uts/intel/io/vmm/amd/amdvi_hw.c b/usr/src/uts/intel/io/vmm/amd/amdvi_hw.c index 33a2557492..c217b19112 100644 --- a/usr/src/uts/intel/io/vmm/amd/amdvi_hw.c +++ b/usr/src/uts/intel/io/vmm/amd/amdvi_hw.c @@ -418,7 +418,7 @@ amdvi_cmd_inv_intr_map(struct amdvi_softc *softc, static void amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id) { - struct amdvi_cmd *cmd; + struct amdvi_cmd *cmd __diagused; cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); @@ -439,13 +439,14 @@ amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id) static bool amdvi_cmp_wait(struct amdvi_softc *softc) { - struct amdvi_ctrl *ctrl; +#ifdef AMDVI_DEBUG_CMD + struct amdvi_ctrl *ctrl = softc->ctrl; +#endif const uint64_t VERIFY = 0xA5A5; volatile uint64_t *read; int i; bool status; - ctrl = softc->ctrl; read = &softc->cmp_data; *read = 0; amdvi_cmd_cmp(softc, VERIFY); diff --git a/usr/src/uts/intel/io/vmm/amd/svm.c b/usr/src/uts/intel/io/vmm/amd/svm.c index f4f01ea4b6..a20a844030 100644 --- a/usr/src/uts/intel/io/vmm/amd/svm.c +++ b/usr/src/uts/intel/io/vmm/amd/svm.c @@ -378,6 +378,10 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_STGI); svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_CLGI); svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_SKINIT); + if (vcpu_trap_wbinvd(sc->vm, vcpu) != 0) { + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, + VMCB_INTCPT_WBINVD); + } /* * The ASID will be set to a non-zero value just before VMRUN. @@ -1448,7 +1452,6 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) (void) vm_suspend(svm_sc->vm, VM_SUSPEND_TRIPLEFAULT); handled = 1; break; - case VMCB_EXIT_INVD: case VMCB_EXIT_INVLPGA: /* privileged invalidation instructions */ vm_inject_ud(svm_sc->vm, vcpu); @@ -1464,6 +1467,11 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) vm_inject_ud(svm_sc->vm, vcpu); handled = 1; break; + case VMCB_EXIT_INVD: + case VMCB_EXIT_WBINVD: + /* ignore exit */ + handled = 1; + break; case VMCB_EXIT_VMMCALL: /* No handlers make use of VMMCALL for now */ vm_inject_ud(svm_sc->vm, vcpu); diff --git a/usr/src/uts/intel/io/vmm/amd/vmcb.h b/usr/src/uts/intel/io/vmm/amd/vmcb.h index 7a57979d56..91e00193bf 100644 --- a/usr/src/uts/intel/io/vmm/amd/vmcb.h +++ b/usr/src/uts/intel/io/vmm/amd/vmcb.h @@ -172,6 +172,7 @@ struct svm_softc; #define VMCB_EXIT_STGI 0x84 #define VMCB_EXIT_CLGI 0x85 #define VMCB_EXIT_SKINIT 0x86 +#define VMCB_EXIT_WBINVD 0x89 #define VMCB_EXIT_MONITOR 0x8A #define VMCB_EXIT_MWAIT 0x8B #define VMCB_EXIT_NPF 0x400 diff --git a/usr/src/uts/intel/io/vmm/intel/vmx.c b/usr/src/uts/intel/io/vmm/intel/vmx.c index c16fe1f1d4..e42455a0f3 100644 --- a/usr/src/uts/intel/io/vmm/intel/vmx.c +++ b/usr/src/uts/intel/io/vmm/intel/vmx.c @@ -188,6 +188,9 @@ static int vmx_initialized; /* PAUSE triggers a VM-exit */ static int cap_pause_exit; +/* WBINVD triggers a VM-exit */ +static int cap_wbinvd_exit; + /* Monitor trap flag */ static int cap_monitor_trap; @@ -548,6 +551,11 @@ vmx_init(void) PROCBASED_PAUSE_EXITING, 0, &tmp) == 0); + cap_wbinvd_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, + MSR_VMX_PROCBASED_CTLS2, + PROCBASED2_WBINVD_EXITING, 0, + &tmp) == 0); + cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, &tmp) == 0); @@ -819,7 +827,12 @@ vmx_vminit(struct vm *vm) vmcs_write(VMCS_EPTP, vmx->eptp); vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls); vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); - vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); + + uint32_t use_proc2_ctls = proc2_ctls; + if (cap_wbinvd_exit && vcpu_trap_wbinvd(vm, i) != 0) + use_proc2_ctls |= PROCBASED2_WBINVD_EXITING; + vmcs_write(VMCS_SEC_PROC_BASED_CTLS, use_proc2_ctls); + vmcs_write(VMCS_EXIT_CTLS, exit_ctls); vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa); @@ -2530,6 +2543,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit); vmexit->exitcode = VM_EXITCODE_VMINSN; break; + case EXIT_REASON_INVD: + case EXIT_REASON_WBINVD: + /* ignore exit */ + handled = HANDLED; + break; default: SDT_PROBE4(vmm, vmx, exit, unknown, vmx, vcpu, vmexit, reason); diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h index c5c7d7889e..1ef2d48adf 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h @@ -358,6 +358,7 @@ void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, struct vm_copyinfo *copyinfo, size_t len); int vcpu_trace_exceptions(struct vm *vm, int vcpuid); +int vcpu_trap_wbinvd(struct vm *vm, int vcpuid); void vm_inject_ud(struct vm *vm, int vcpuid); void vm_inject_gp(struct vm *vm, int vcpuid); diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c index 44f1ee5ca2..136c38c5ab 100644 --- a/usr/src/uts/intel/io/vmm/vmm.c +++ b/usr/src/uts/intel/io/vmm/vmm.c @@ -286,10 +286,13 @@ SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, * Halt the guest if all vcpus are executing a HLT instruction with * interrupts disabled. */ -static int halt_detection_enabled = 1; +int halt_detection_enabled = 1; /* Trap into hypervisor on all guest exceptions and reflect them back */ -static int trace_guest_exceptions; +int trace_guest_exceptions; + +/* Trap WBINVD and ignore it */ +int trap_wbinvd = 1; static void vm_free_memmap(struct vm *vm, int ident); static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); @@ -403,10 +406,15 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) int vcpu_trace_exceptions(struct vm *vm, int vcpuid) { - return (trace_guest_exceptions); } +int +vcpu_trap_wbinvd(struct vm *vm, int vcpuid) +{ + return (trap_wbinvd); +} + struct vm_exit * vm_exitinfo(struct vm *vm, int cpuid) { |