diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2019-06-04 21:27:31 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@pfmooney.com> | 2019-06-14 16:28:50 +0000 |
commit | c3cbf49c8d881ba57710e7e8b70aa6275bc5fc44 (patch) | |
tree | 549f85b42dcc7444792f88104bc0fd374f0b15ef /usr | |
parent | de37b27aa9332f8b33b7b4041b3bdaa6894dcde9 (diff) | |
download | illumos-joyent-c3cbf49c8d881ba57710e7e8b70aa6275bc5fc44.tar.gz |
OS-7819 bhyve upstream sync 2019 June
Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com>
Reviewed by: Mike Gerdts <mike.gerdts@joyent.com>
Approved by: John Levon <john.levon@joyent.com>
Diffstat (limited to 'usr')
33 files changed, 811 insertions, 344 deletions
diff --git a/usr/contrib/freebsd/x86/specialreg.h b/usr/contrib/freebsd/x86/specialreg.h index ef78e9a0af..f528bad55c 100644 --- a/usr/contrib/freebsd/x86/specialreg.h +++ b/usr/contrib/freebsd/x86/specialreg.h @@ -433,29 +433,41 @@ /* * CPUID instruction 7 Structured Extended Features, leaf 0 ecx info */ -#define CPUID_STDEXT2_PREFETCHWT1 0x00000001 -#define CPUID_STDEXT2_UMIP 0x00000004 -#define CPUID_STDEXT2_PKU 0x00000008 -#define CPUID_STDEXT2_OSPKE 0x00000010 -#define CPUID_STDEXT2_WAITPKG 0x00000020 -#define CPUID_STDEXT2_GFNI 0x00000100 -#define CPUID_STDEXT2_RDPID 0x00400000 -#define CPUID_STDEXT2_CLDEMOTE 0x02000000 -#define CPUID_STDEXT2_MOVDIRI 0x08000000 +#define CPUID_STDEXT2_PREFETCHWT1 0x00000001 +#define CPUID_STDEXT2_AVX512VBMI 0x00000002 +#define CPUID_STDEXT2_UMIP 0x00000004 +#define CPUID_STDEXT2_PKU 0x00000008 +#define CPUID_STDEXT2_OSPKE 0x00000010 +#define CPUID_STDEXT2_WAITPKG 0x00000020 +#define CPUID_STDEXT2_AVX512VBMI2 0x00000040 +#define CPUID_STDEXT2_GFNI 0x00000100 +#define CPUID_STDEXT2_VAES 0x00000200 +#define CPUID_STDEXT2_VPCLMULQDQ 0x00000400 +#define CPUID_STDEXT2_AVX512VNNI 0x00000800 +#define CPUID_STDEXT2_AVX512BITALG 0x00001000 +#define CPUID_STDEXT2_AVX512VPOPCNTDQ 0x00004000 +#define CPUID_STDEXT2_RDPID 0x00400000 +#define CPUID_STDEXT2_CLDEMOTE 0x02000000 +#define CPUID_STDEXT2_MOVDIRI 0x08000000 #define CPUID_STDEXT2_MOVDIRI64B 0x10000000 -#define CPUID_STDEXT2_SGXLC 0x40000000 +#define CPUID_STDEXT2_ENQCMD 0x20000000 +#define CPUID_STDEXT2_SGXLC 0x40000000 /* * CPUID instruction 7 Structured Extended Features, leaf 0 edx info */ -#define CPUID_STDEXT3_MD_CLEAR 0x00000400 -#define CPUID_STDEXT3_TSXFA 0x00002000 -#define CPUID_STDEXT3_IBPB 0x04000000 -#define CPUID_STDEXT3_STIBP 0x08000000 -#define CPUID_STDEXT3_L1D_FLUSH 0x10000000 -#define CPUID_STDEXT3_ARCH_CAP 0x20000000 -#define CPUID_STDEXT3_CORE_CAP 0x40000000 -#define CPUID_STDEXT3_SSBD 0x80000000 +#define CPUID_STDEXT3_AVX5124VNNIW 0x00000004 +#define CPUID_STDEXT3_AVX5124FMAPS 0x00000008 +#define CPUID_STDEXT3_AVX512VP2INTERSECT 0x00000100 +#define CPUID_STDEXT3_MD_CLEAR 0x00000400 +#define CPUID_STDEXT3_TSXFA 0x00002000 +#define CPUID_STDEXT3_PCONFIG 0x00040000 +#define CPUID_STDEXT3_IBPB 0x04000000 +#define CPUID_STDEXT3_STIBP 0x08000000 +#define CPUID_STDEXT3_L1D_FLUSH 0x10000000 +#define CPUID_STDEXT3_ARCH_CAP 0x20000000 +#define CPUID_STDEXT3_CORE_CAP 0x40000000 +#define CPUID_STDEXT3_SSBD 0x80000000 /* MSR IA32_ARCH_CAP(ABILITIES) bits */ #define IA32_ARCH_CAP_RDCL_NO 0x00000001 @@ -944,6 +956,16 @@ #define MC_MISC_AMD_PTR_MASK 0x00000000ff000000 /* Pointer to additional registers */ #define MC_MISC_AMD_PTR_SHIFT 24 +/* AMD Scalable MCA */ +#define MSR_SMCA_MC0_CTL 0xc0002000 +#define MSR_SMCA_MC0_STATUS 0xc0002001 +#define MSR_SMCA_MC0_ADDR 0xc0002002 +#define MSR_SMCA_MC0_MISC0 0xc0002003 +#define MSR_SMCA_MC_CTL(x) (MSR_SMCA_MC0_CTL + 0x10 * (x)) +#define MSR_SMCA_MC_STATUS(x) (MSR_SMCA_MC0_STATUS + 0x10 * (x)) +#define MSR_SMCA_MC_ADDR(x) (MSR_SMCA_MC0_ADDR + 0x10 * (x)) +#define MSR_SMCA_MC_MISC(x) (MSR_SMCA_MC0_MISC0 + 0x10 * (x)) + /* * The following four 3-byte registers control the non-cacheable regions. * These registers must be written as three separate bytes. @@ -1076,6 +1098,7 @@ #define MSR_VM_HSAVE_PA 0xc0010117 /* SVM: host save area address */ #define MSR_AMD_CPUID07 0xc0011002 /* CPUID 07 %ebx override */ #define MSR_EXTFEATURES 0xc0011005 /* Extended CPUID Features override */ +#define MSR_LS_CFG 0xc0011020 #define MSR_IC_CFG 0xc0011021 /* Instruction Cache Configuration */ /* MSR_VM_CR related */ diff --git a/usr/src/cmd/bhyve/acpi.c b/usr/src/cmd/bhyve/acpi.c index 309ba98a11..1ed1ab6c60 100644 --- a/usr/src/cmd/bhyve/acpi.c +++ b/usr/src/cmd/bhyve/acpi.c @@ -39,7 +39,9 @@ * The tables are placed in the guest's ROM area just below 1MB physical, * above the MPTable. * - * Layout + * Layout (No longer correct at FADT and beyond due to properly + * calculating the size of the MADT to allow for changes to + * VM_MAXCPU above 21 which overflows this layout.) * ------ * RSDP -> 0xf2400 (36 bytes fixed) * RSDT -> 0xf2440 (36 bytes + 4*7 table addrs, 4 used) @@ -74,18 +76,31 @@ __FBSDID("$FreeBSD$"); #include "pci_emul.h" /* - * Define the base address of the ACPI tables, and the offsets to - * the individual tables + * Define the base address of the ACPI tables, the sizes of some tables, + * and the offsets to the individual tables, */ #define BHYVE_ACPI_BASE 0xf2400 #define RSDT_OFFSET 0x040 #define XSDT_OFFSET 0x080 #define MADT_OFFSET 0x100 -#define FADT_OFFSET 0x200 -#define HPET_OFFSET 0x340 -#define MCFG_OFFSET 0x380 -#define FACS_OFFSET 0x3C0 -#define DSDT_OFFSET 0x400 +/* + * The MADT consists of: + * 44 Fixed Header + * 8 * maxcpu Processor Local APIC entries + * 12 I/O APIC entry + * 2 * 10 Interrupt Source Override entires + * 6 Local APIC NMI entry + */ +#define MADT_SIZE (44 + VM_MAXCPU*8 + 12 + 2*10 + 6) +#define FADT_OFFSET (MADT_OFFSET + MADT_SIZE) +#define FADT_SIZE 0x140 +#define HPET_OFFSET (FADT_OFFSET + FADT_SIZE) +#define HPET_SIZE 0x40 +#define MCFG_OFFSET (HPET_OFFSET + HPET_SIZE) +#define MCFG_SIZE 0x40 +#define FACS_OFFSET (MCFG_OFFSET + MCFG_SIZE) +#define FACS_SIZE 0x40 +#define DSDT_OFFSET (FACS_OFFSET + FACS_SIZE) #define BHYVE_ASL_TEMPLATE "bhyve.XXXXXXX" #define BHYVE_ASL_SUFFIX ".aml" @@ -256,6 +271,7 @@ basl_fwrite_madt(FILE *fp) EFPRINTF(fp, "[0001]\t\tLocal Apic ID : %02x\n", i); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\tProcessor Enabled : 1\n"); + EFPRINTF(fp, "\t\t\tRuntime Online Capable : 0\n"); EFPRINTF(fp, "\n"); } diff --git a/usr/src/cmd/bhyve/bhyverun.h b/usr/src/cmd/bhyve/bhyverun.h index cdde04862c..8df8e01a73 100644 --- a/usr/src/cmd/bhyve/bhyverun.h +++ b/usr/src/cmd/bhyve/bhyverun.h @@ -48,6 +48,7 @@ struct vmctx; extern int guest_ncpus; +extern uint16_t cores, sockets, threads; extern char *guest_uuid_str; extern char *vmname; #ifndef __FreeBSD__ diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c index fcb4149b62..72c5b02a0d 100644 --- a/usr/src/cmd/bhyve/block_if.c +++ b/usr/src/cmd/bhyve/block_if.c @@ -77,12 +77,11 @@ __FBSDID("$FreeBSD$"); #ifdef __FreeBSD__ #define BLOCKIF_NUMTHR 8 -#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) #else /* Enlarge to keep pace with the virtio-block ring size */ #define BLOCKIF_NUMTHR 16 -#define BLOCKIF_MAXREQ (128 + BLOCKIF_NUMTHR) #endif +#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) enum blockop { BOP_READ, @@ -705,13 +704,7 @@ blockif_open(const char *optstr, const char *ident) err: if (fd >= 0) close(fd); -#ifdef __FreeBSD__ - free(cp); - free(xopts); - free(nopt); -#else free(nopt); -#endif return (NULL); } diff --git a/usr/src/cmd/bhyve/block_if.h b/usr/src/cmd/bhyve/block_if.h index 8401cd9529..bff2b42768 100644 --- a/usr/src/cmd/bhyve/block_if.h +++ b/usr/src/cmd/bhyve/block_if.h @@ -41,16 +41,13 @@ #include <sys/uio.h> #include <sys/unistd.h> -#ifdef __FreeBSD__ -#define BLOCKIF_IOV_MAX 33 /* not practical to be IOV_MAX */ -#else /* - * Upstream is in the process of bumping this up to 128 for several reasons, - * including Windows compatibility. For the sake of our Windows support, we - * will use the higher value now. + * BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in + * a single request. BLOCKIF_RING_MAX is the maxmimum number of + * pending requests that can be queued. */ -#define BLOCKIF_IOV_MAX 128 -#endif +#define BLOCKIF_IOV_MAX 128 /* not practical to be IOV_MAX */ +#define BLOCKIF_RING_MAX 128 struct blockif_req { int br_iovcnt; diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c index 69bcf53c31..71cb780544 100644 --- a/usr/src/cmd/bhyve/gdb.c +++ b/usr/src/cmd/bhyve/gdb.c @@ -2,7 +2,6 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org> - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -784,15 +783,24 @@ gdb_read_mem(const uint8_t *data, size_t len) bool started; int error; + /* Skip 'm' */ + data += 1; + len -= 1; + + /* Parse and consume address. */ cp = memchr(data, ',', len); - if (cp == NULL) { + if (cp == NULL || cp == data) { send_error(EINVAL); return; } - gva = parse_integer(data + 1, cp - (data + 1)); - resid = parse_integer(cp + 1, len - (cp + 1 - data)); - started = false; + gva = parse_integer(data, cp - data); + len -= (cp - data) + 1; + data += (cp - data) + 1; + /* Parse length. */ + resid = parse_integer(data, len); + + started = false; while (resid > 0) { error = guest_vaddr2paddr(cur_vcpu, gva, &gpa); if (error == -1) { @@ -878,6 +886,119 @@ gdb_read_mem(const uint8_t *data, size_t len) finish_packet(); } +static void +gdb_write_mem(const uint8_t *data, size_t len) +{ + uint64_t gpa, gva, val; + uint8_t *cp; + size_t resid, todo, bytes; + int error; + + /* Skip 'M' */ + data += 1; + len -= 1; + + /* Parse and consume address. */ + cp = memchr(data, ',', len); + if (cp == NULL || cp == data) { + send_error(EINVAL); + return; + } + gva = parse_integer(data, cp - data); + len -= (cp - data) + 1; + data += (cp - data) + 1; + + /* Parse and consume length. */ + cp = memchr(data, ':', len); + if (cp == NULL || cp == data) { + send_error(EINVAL); + return; + } + resid = parse_integer(data, cp - data); + len -= (cp - data) + 1; + data += (cp - data) + 1; + + /* Verify the available bytes match the length. */ + if (len != resid * 2) { + send_error(EINVAL); + return; + } + + while (resid > 0) { + error = guest_vaddr2paddr(cur_vcpu, gva, &gpa); + if (error == -1) { + send_error(errno); + return; + } + if (error == 0) { + send_error(EFAULT); + return; + } + + /* Write bytes to current page. */ + todo = getpagesize() - gpa % getpagesize(); + if (todo > resid) + todo = resid; + + cp = paddr_guest2host(ctx, gpa, todo); + if (cp != NULL) { + /* + * If this page is guest RAM, write it a byte + * at a time. + */ + while (todo > 0) { + assert(len >= 2); + *cp = parse_byte(data); + data += 2; + len -= 2; + cp++; + gpa++; + gva++; + resid--; + todo--; + } + } else { + /* + * If this page isn't guest RAM, try to handle + * it via MMIO. For MMIO requests, use + * aligned writes of words when possible. + */ + while (todo > 0) { + if (gpa & 1 || todo == 1) { + bytes = 1; + val = parse_byte(data); + } else if (gpa & 2 || todo == 2) { + bytes = 2; + val = parse_byte(data) | + (parse_byte(data + 2) << 8); + } else { + bytes = 4; + val = parse_byte(data) | + (parse_byte(data + 2) << 8) | + (parse_byte(data + 4) << 16) | + (parse_byte(data + 6) << 24); + } + error = write_mem(ctx, cur_vcpu, gpa, val, + bytes); + if (error == 0) { + gpa += bytes; + gva += bytes; + resid -= bytes; + todo -= bytes; + data += 2 * bytes; + len -= 2 * bytes; + } else { + send_error(EFAULT); + return; + } + } + } + assert(resid == 0 || gpa % getpagesize() == 0); + } + assert(len == 0); + send_ok(); +} + static bool command_equals(const uint8_t *data, size_t len, const char *cmd) { @@ -888,13 +1009,81 @@ command_equals(const uint8_t *data, size_t len, const char *cmd) } static void +check_features(const uint8_t *data, size_t len) +{ + char *feature, *next_feature, *str, *value; + bool supported; + + str = malloc(len + 1); + memcpy(str, data, len); + str[len] = '\0'; + next_feature = str; + + while ((feature = strsep(&next_feature, ";")) != NULL) { + /* + * Null features shouldn't exist, but skip if they + * do. + */ + if (strcmp(feature, "") == 0) + continue; + + /* + * Look for the value or supported / not supported + * flag. + */ + value = strchr(feature, '='); + if (value != NULL) { + *value = '\0'; + value++; + supported = true; + } else { + value = feature + strlen(feature) - 1; + switch (*value) { + case '+': + supported = true; + break; + case '-': + supported = false; + break; + default: + /* + * This is really a protocol error, + * but we just ignore malformed + * features for ease of + * implementation. + */ + continue; + } + value = NULL; + } + + /* No currently supported features. */ +#ifndef __FreeBSD__ + /* + * The compiler dislikes 'supported' being set but never used. + * Make it happy here. + */ + if (supported) { + debug("feature '%s' supported\n", feature); + } +#endif /* __FreeBSD__ */ + } + free(str); + + start_packet(); + + /* This is an arbitrary limit. */ + append_string("PacketSize=4096"); + finish_packet(); +} + +static void gdb_query(const uint8_t *data, size_t len) { /* * TODO: * - qSearch - * - qSupported */ if (command_equals(data, len, "qAttached")) { start_packet(); @@ -932,6 +1121,10 @@ gdb_query(const uint8_t *data, size_t len) start_packet(); append_char('l'); finish_packet(); + } else if (command_equals(data, len, "qSupported")) { + data += strlen("qSupported"); + len -= strlen("qSupported"); + check_features(data, len); } else if (command_equals(data, len, "qThreadExtraInfo")) { char buf[16]; int tid; @@ -1017,6 +1210,9 @@ handle_command(const uint8_t *data, size_t len) case 'm': gdb_read_mem(data, len); break; + case 'M': + gdb_write_mem(data, len); + break; case 'T': { int tid; @@ -1052,7 +1248,6 @@ handle_command(const uint8_t *data, size_t len) finish_packet(); break; case 'G': /* TODO */ - case 'M': /* TODO */ case 'v': /* Handle 'vCont' */ /* 'vCtrlC' */ diff --git a/usr/src/cmd/bhyve/gdb.h b/usr/src/cmd/bhyve/gdb.h index fa2184df16..09ebc34f24 100644 --- a/usr/src/cmd/bhyve/gdb.h +++ b/usr/src/cmd/bhyve/gdb.h @@ -2,7 +2,6 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 John H. Baldwin <jhb@FreeBSD.org> - * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c index 85e56af10b..90aefe45c8 100644 --- a/usr/src/cmd/bhyve/mem.c +++ b/usr/src/cmd/bhyve/mem.c @@ -251,30 +251,43 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema)); } -struct read_mem_args { - uint64_t *rval; +struct rw_mem_args { + uint64_t *val; int size; + int operation; }; static int -read_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, +rw_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, void *arg) { - struct read_mem_args *rma; + struct rw_mem_args *rma; rma = arg; - return (mr->handler(ctx, vcpu, MEM_F_READ, paddr, rma->size, - rma->rval, mr->arg1, mr->arg2)); + return (mr->handler(ctx, vcpu, rma->operation, paddr, rma->size, + rma->val, mr->arg1, mr->arg2)); } int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size) { - struct read_mem_args rma; + struct rw_mem_args rma; - rma.rval = rval; + rma.val = rval; rma.size = size; - return (access_memory(ctx, vcpu, gpa, read_mem_cb, &rma)); + rma.operation = MEM_F_READ; + return (access_memory(ctx, vcpu, gpa, rw_mem_cb, &rma)); +} + +int +write_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size) +{ + struct rw_mem_args rma; + + rma.val = &wval; + rma.size = size; + rma.operation = MEM_F_WRITE; + return (access_memory(ctx, vcpu, gpa, rw_mem_cb, &rma)); } static int diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h index 596c0b0cf3..38d773c43f 100644 --- a/usr/src/cmd/bhyve/mem.h +++ b/usr/src/cmd/bhyve/mem.h @@ -61,5 +61,7 @@ int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp); int unregister_mem(struct mem_range *memp); +int write_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t wval, + int size); #endif /* _MEM_H_ */ diff --git a/usr/src/cmd/bhyve/pci_emul.c b/usr/src/cmd/bhyve/pci_emul.c index 03db632e37..a71cc528aa 100644 --- a/usr/src/cmd/bhyve/pci_emul.c +++ b/usr/src/cmd/bhyve/pci_emul.c @@ -69,8 +69,8 @@ __FBSDID("$FreeBSD$"); #include "pci_irq.h" #include "pci_lpc.h" -#define CONF1_ADDR_PORT 0x0cf8 -#define CONF1_DATA_PORT 0x0cfc +#define CONF1_ADDR_PORT 0x0cf8 +#define CONF1_DATA_PORT 0x0cfc #define CONF1_ENABLE 0x80000000ul @@ -492,7 +492,7 @@ modify_bar_registration(struct pci_devinst *pi, int idx, int registration) iop.handler = pci_emul_io_handler; iop.arg = pi; error = register_inout(&iop); - } else + } else error = unregister_inout(&iop); break; case PCIBAR_MEM32: @@ -560,7 +560,7 @@ memen(struct pci_devinst *pi) * the address range decoded by the BAR register. */ static void -update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) +update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) { int decode; @@ -689,7 +689,7 @@ pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); } - + register_bar(pdi, idx); return (0); @@ -862,7 +862,7 @@ pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); - + tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; /* Align table size to nearest 4K */ @@ -1112,7 +1112,7 @@ init_pci(struct vmctx *ctx) for (bus = 0; bus < MAXBUSES; bus++) { if ((bi = pci_businfo[bus]) == NULL) continue; - /* + /* * Keep track of the i/o and memory resources allocated to * this bus. */ @@ -1750,9 +1750,9 @@ pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) else unregister_bar(pi, i); } - break; + break; default: - assert(0); + assert(0); } } @@ -1969,7 +1969,7 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); #define DIOSZ 8 #define DMEMSZ 4096 struct pci_emul_dsoftc { - uint8_t ioregs[DIOSZ]; + uint8_t ioregs[DIOSZ]; uint8_t memregs[2][DMEMSZ]; }; @@ -2061,7 +2061,7 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, } else { printf("diow: memw unknown size %d\n", size); } - + /* * magic interrupt ?? */ @@ -2088,6 +2088,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, return (0); } + value = 0; if (size == 1) { value = sc->ioregs[offset]; } else if (size == 2) { @@ -2105,7 +2106,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, offset, size); return (0); } - + i = baridx - 1; /* 'memregs' index */ if (size == 1) { diff --git a/usr/src/cmd/bhyve/pci_nvme.c b/usr/src/cmd/bhyve/pci_nvme.c index 387611c888..a56c1d6959 100644 --- a/usr/src/cmd/bhyve/pci_nvme.c +++ b/usr/src/cmd/bhyve/pci_nvme.c @@ -85,6 +85,9 @@ static int nvme_debug = 0; #define NVME_IOSLOTS 8 +/* The NVMe spec defines bits 13:4 in BAR0 as reserved */ +#define NVME_MMIO_SPACE_MIN (1 << 14) + #define NVME_QUEUES 16 #define NVME_MAX_QENTRIES 2048 @@ -199,6 +202,9 @@ struct pci_nvme_softc { struct nvme_namespace_data nsdata; struct nvme_controller_data ctrldata; + struct nvme_error_information_entry err_log; + struct nvme_health_information_page health_log; + struct nvme_firmware_page fw_log; struct pci_nvme_blockstore nvstore; @@ -358,7 +364,7 @@ pci_nvme_init_nsdata(struct pci_nvme_softc *sc) nd->nuse = nd->nsze; /* Get LBA and backstore information from backing store */ - nd->nlbaf = 1; + nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */ /* LBA data-sz = 2^lbads */ nd->lbaf[0] = sc->nvstore.sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT; @@ -366,6 +372,15 @@ pci_nvme_init_nsdata(struct pci_nvme_softc *sc) } static void +pci_nvme_init_logpages(struct pci_nvme_softc *sc) +{ + + memset(&sc->err_log, 0, sizeof(sc->err_log)); + memset(&sc->health_log, 0, sizeof(sc->health_log)); + memset(&sc->fw_log, 0, sizeof(sc->fw_log)); +} + +static void pci_nvme_reset_locked(struct pci_nvme_softc *sc) { DPRINTF(("%s\r\n", __func__)); @@ -455,6 +470,47 @@ pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) } static int +nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *src, + size_t len) +{ + uint8_t *dst; + size_t bytes; + + if (len > (8 * 1024)) { + return (-1); + } + + /* Copy from the start of prp1 to the end of the physical page */ + bytes = PAGE_SIZE - (prp1 & PAGE_MASK); + bytes = MIN(bytes, len); + + dst = vm_map_gpa(ctx, prp1, bytes); + if (dst == NULL) { + return (-1); + } + + memcpy(dst, src, bytes); + + src += bytes; + + len -= bytes; + if (len == 0) { + return (0); + } + + len = MIN(len, PAGE_SIZE); + + dst = vm_map_gpa(ctx, prp2, len); + if (dst == NULL) { + return (-1); + } + + memcpy(dst, src, len); + + return (0); +} + +static int nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { @@ -587,31 +643,24 @@ nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, { uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2; uint8_t logpage = command->cdw10 & 0xFF; -#ifdef __FreeBSD__ - void *data; -#else - /* Our compiler grumbles about this, despite it being OK */ - void *data = NULL; -#endif DPRINTF(("%s log page %u len %u\r\n", __func__, logpage, logsize)); - if (logpage >= 1 && logpage <= 3) - data = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, - PAGE_SIZE); - pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); switch (logpage) { - case 0x01: /* Error information */ - memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize); + case NVME_LOG_ERROR: + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, + command->prp2, (uint8_t *)&sc->err_log, logsize); break; - case 0x02: /* SMART/Health information */ + case NVME_LOG_HEALTH_INFORMATION: /* TODO: present some smart info */ - memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize); + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, + command->prp2, (uint8_t *)&sc->health_log, logsize); break; - case 0x03: /* Firmware slot information */ - memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize); + case NVME_LOG_FIRMWARE_SLOT: + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, + command->prp2, (uint8_t *)&sc->fw_log, logsize); break; default: WPRINTF(("%s get log page %x command not supported\r\n", @@ -635,14 +684,13 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, switch (command->cdw10 & 0xFF) { case 0x00: /* return Identify Namespace data structure */ - dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, - sizeof(sc->nsdata)); - memcpy(dest, &sc->nsdata, sizeof(sc->nsdata)); + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, + command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata)); break; case 0x01: /* return Identify Controller data structure */ - dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, - sizeof(sc->ctrldata)); - memcpy(dest, &sc->ctrldata, sizeof(sc->ctrldata)); + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, + command->prp2, (uint8_t *)&sc->ctrldata, + sizeof(sc->ctrldata)); break; case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, @@ -1856,9 +1904,16 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0); - /* allocate size of nvme registers + doorbell space for all queues */ + /* + * Allocate size of NVMe registers + doorbell space for all queues. + * + * The specification requires a minimum memory I/O window size of 16K. + * The Windows driver will refuse to start a device with a smaller + * window. + */ pci_membar_sz = sizeof(struct nvme_registers) + - 2*sizeof(uint32_t)*(sc->max_queues + 1); + 2 * sizeof(uint32_t) * (sc->max_queues + 1); + pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN); DPRINTF(("nvme membar size: %u\r\n", pci_membar_sz)); @@ -1880,6 +1935,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_nvme_reset(sc); pci_nvme_init_ctrldata(sc); pci_nvme_init_nsdata(sc); + pci_nvme_init_logpages(sc); pci_lintr_request(pi); diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c index b0c3b06187..f9da14ce89 100644 --- a/usr/src/cmd/bhyve/pci_virtio_block.c +++ b/usr/src/cmd/bhyve/pci_virtio_block.c @@ -3,6 +3,7 @@ * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. + * Copyright (c) 2019 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -68,12 +69,9 @@ __FBSDID("$FreeBSD$"); #include "virtio.h" #include "block_if.h" -#ifdef __FreeBSD__ -#define VTBLK_RINGSZ 64 -#else -/* Enlarge to match bigger BLOCKIF_IOV_MAX */ #define VTBLK_RINGSZ 128 -#endif + +_Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request"); #define VTBLK_S_OK 0 #define VTBLK_S_IOERR 1 @@ -398,9 +396,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* setup virtio block config space */ sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */ sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ -#ifdef __FreeBSD__ - sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX; -#else + /* * If Linux is presented with a seg_max greater than the virtio queue * size, it can stumble into situations where it violates its own @@ -409,7 +405,6 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) * of a request. */ sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX); -#endif sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */ sc->vbsc_cfg.vbc_geometry.heads = 0; sc->vbsc_cfg.vbc_geometry.sectors = 0; diff --git a/usr/src/cmd/bhyve/pci_virtio_scsi.c b/usr/src/cmd/bhyve/pci_virtio_scsi.c index 238f07398b..38e7d918a0 100644 --- a/usr/src/cmd/bhyve/pci_virtio_scsi.c +++ b/usr/src/cmd/bhyve/pci_virtio_scsi.c @@ -634,7 +634,7 @@ pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, struct pci_vtscsi_queue *queue, int num) { struct pci_vtscsi_worker *worker; - char threadname[16]; + char tname[MAXCOMLEN + 1]; int i; queue->vsq_sc = sc; @@ -653,8 +653,8 @@ pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc, (void *)worker); - sprintf(threadname, "virtio-scsi:%d-%d", num, i); - pthread_set_name_np(worker->vsw_thread, threadname); + snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i); + pthread_set_name_np(worker->vsw_thread, tname); LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link); } diff --git a/usr/src/cmd/bhyve/pci_xhci.c b/usr/src/cmd/bhyve/pci_xhci.c index 988e6933cc..29d56ec32c 100644 --- a/usr/src/cmd/bhyve/pci_xhci.c +++ b/usr/src/cmd/bhyve/pci_xhci.c @@ -2640,13 +2640,10 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) struct pci_xhci_dev_emu *dev; struct usb_devemu *ue; void *devsc; -#ifdef __FreeBSD__ char *uopt, *xopts, *config; -#else - char *uopt = NULL, *xopts, *config; -#endif int usb3_port, usb2_port, i; + uopt = NULL; usb3_port = sc->usb3_port_start - 1; usb2_port = sc->usb2_port_start - 1; devices = NULL; @@ -2721,10 +2718,6 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) sc->ndevices++; } -#ifdef __FreeBSD__ - if (uopt != NULL) - free(uopt); -#endif portsfinal: sc->portregs = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_portregs)); diff --git a/usr/src/cmd/bhyve/smbiostbl.c b/usr/src/cmd/bhyve/smbiostbl.c index 35a41a0855..da227f813a 100644 --- a/usr/src/cmd/bhyve/smbiostbl.c +++ b/usr/src/cmd/bhyve/smbiostbl.c @@ -637,7 +637,7 @@ smbios_type4_initializer(struct smbios_structure *template_entry, { int i; - for (i = 0; i < guest_ncpus; i++) { + for (i = 0; i < sockets; i++) { struct smbios_table_type4 *type4; char *p; int nstrings, len; @@ -656,6 +656,16 @@ smbios_type4_initializer(struct smbios_structure *template_entry, *(*endaddr) = '\0'; (*endaddr)++; type4->socket = nstrings + 1; + /* Revise cores and threads after update to smbios 3.0 */ + if (cores > 254) + type4->cores = 0; + else + type4->cores = cores; + /* This threads is total threads in a socket */ + if ((cores * threads) > 254) + type4->threads = 0; + else + type4->threads = (cores * threads); curaddr = *endaddr; } diff --git a/usr/src/cmd/bhyve/uart_emul.c b/usr/src/cmd/bhyve/uart_emul.c index 79d8c64c7a..c0fff61d00 100644 --- a/usr/src/cmd/bhyve/uart_emul.c +++ b/usr/src/cmd/bhyve/uart_emul.c @@ -117,8 +117,8 @@ struct fifo { struct ttyfd { bool opened; - int fd; /* tty device file descriptor */ - struct termios tio_orig, tio_new; /* I/O Terminals */ + int rfd; /* fd for reading */ + int wfd; /* fd for writing, may be == rfd */ }; struct uart_softc { @@ -167,16 +167,15 @@ ttyclose(void) static void ttyopen(struct ttyfd *tf) { - - tcgetattr(tf->fd, &tf->tio_orig); - - tf->tio_new = tf->tio_orig; - cfmakeraw(&tf->tio_new); - tf->tio_new.c_cflag |= CLOCAL; - tcsetattr(tf->fd, TCSANOW, &tf->tio_new); - - if (tf->fd == STDIN_FILENO) { - tio_stdio_orig = tf->tio_orig; + struct termios orig, new; + + tcgetattr(tf->rfd, &orig); + new = orig; + cfmakeraw(&new); + new.c_cflag |= CLOCAL; + tcsetattr(tf->rfd, TCSANOW, &new); + if (uart_stdio) { + tio_stdio_orig = orig; atexit(ttyclose); } } @@ -186,7 +185,7 @@ ttyread(struct ttyfd *tf) { unsigned char rb; - if (read(tf->fd, &rb, 1) == 1) + if (read(tf->rfd, &rb, 1) == 1) return (rb); else return (-1); @@ -196,7 +195,7 @@ static void ttywrite(struct ttyfd *tf, unsigned char wb) { - (void)write(tf->fd, &wb, 1); + (void)write(tf->wfd, &wb, 1); } #ifndef __FreeBSD__ @@ -224,7 +223,7 @@ rxfifo_reset(struct uart_softc *sc, int size) * Flush any unread input from the tty buffer. */ while (1) { - nread = read(sc->tty.fd, flushbuf, sizeof(flushbuf)); + nread = read(sc->tty.rfd, flushbuf, sizeof(flushbuf)); if (nread != sizeof(flushbuf)) break; } @@ -337,8 +336,9 @@ rxfifo_numchars(struct uart_softc *sc) static void uart_opentty(struct uart_softc *sc) { + ttyopen(&sc->tty); - sc->mev = mevent_add(sc->tty.fd, EVF_READ, uart_drain, sc); + sc->mev = mevent_add(sc->tty.rfd, EVF_READ, uart_drain, sc); assert(sc->mev != NULL); } @@ -439,7 +439,7 @@ uart_drain(int fd, enum ev_type ev, void *arg) sc = arg; - assert(fd == sc->tty.fd); + assert(fd == sc->tty.rfd); assert(ev == EVF_READ); /* @@ -500,91 +500,84 @@ uart_write(struct uart_softc *sc, int offset, uint8_t value) sc->thre_int_pending = true; break; case REG_IER: -#ifndef __FreeBSD__ - /* - * Assert an interrupt if re-enabling the THRE intr, since we - * always report THRE as active in the status register. - */ - if ((sc->ier & IER_ETXRDY) == 0 && - (value & IER_ETXRDY) != 0) { + /* Set pending when IER_ETXRDY is raised (edge-triggered). */ + if ((sc->ier & IER_ETXRDY) == 0 && (value & IER_ETXRDY) != 0) sc->thre_int_pending = true; - } -#endif /* * Apply mask so that bits 4-7 are 0 * Also enables bits 0-3 only if they're 1 */ sc->ier = value & 0x0F; break; - case REG_FCR: - /* - * When moving from FIFO and 16450 mode and vice versa, - * the FIFO contents are reset. - */ - if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) { - fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1; - rxfifo_reset(sc, fifosz); - } + case REG_FCR: + /* + * When moving from FIFO and 16450 mode and vice versa, + * the FIFO contents are reset. + */ + if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) { + fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1; + rxfifo_reset(sc, fifosz); + } - /* - * The FCR_ENABLE bit must be '1' for the programming - * of other FCR bits to be effective. - */ - if ((value & FCR_ENABLE) == 0) { - sc->fcr = 0; - } else { - if ((value & FCR_RCV_RST) != 0) - rxfifo_reset(sc, FIFOSZ); - - sc->fcr = value & - (FCR_ENABLE | FCR_DMA | FCR_RX_MASK); - } - break; - case REG_LCR: - sc->lcr = value; - break; - case REG_MCR: - /* Apply mask so that bits 5-7 are 0 */ - sc->mcr = value & 0x1F; - msr = modem_status(sc->mcr); - - /* - * Detect if there has been any change between the - * previous and the new value of MSR. If there is - * then assert the appropriate MSR delta bit. - */ - if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS)) - sc->msr |= MSR_DCTS; - if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR)) - sc->msr |= MSR_DDSR; - if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD)) - sc->msr |= MSR_DDCD; - if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0) - sc->msr |= MSR_TERI; - - /* - * Update the value of MSR while retaining the delta - * bits. - */ - sc->msr &= MSR_DELTA_MASK; - sc->msr |= msr; - break; - case REG_LSR: - /* - * Line status register is not meant to be written to - * during normal operation. - */ - break; - case REG_MSR: - /* - * As far as I can tell MSR is a read-only register. - */ - break; - case REG_SCR: - sc->scr = value; - break; - default: - break; + /* + * The FCR_ENABLE bit must be '1' for the programming + * of other FCR bits to be effective. + */ + if ((value & FCR_ENABLE) == 0) { + sc->fcr = 0; + } else { + if ((value & FCR_RCV_RST) != 0) + rxfifo_reset(sc, FIFOSZ); + + sc->fcr = value & + (FCR_ENABLE | FCR_DMA | FCR_RX_MASK); + } + break; + case REG_LCR: + sc->lcr = value; + break; + case REG_MCR: + /* Apply mask so that bits 5-7 are 0 */ + sc->mcr = value & 0x1F; + msr = modem_status(sc->mcr); + + /* + * Detect if there has been any change between the + * previous and the new value of MSR. If there is + * then assert the appropriate MSR delta bit. + */ + if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS)) + sc->msr |= MSR_DCTS; + if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR)) + sc->msr |= MSR_DDSR; + if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD)) + sc->msr |= MSR_DDCD; + if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0) + sc->msr |= MSR_TERI; + + /* + * Update the value of MSR while retaining the delta + * bits. + */ + sc->msr &= MSR_DELTA_MASK; + sc->msr |= msr; + break; + case REG_LSR: + /* + * Line status register is not meant to be written to + * during normal operation. + */ + break; + case REG_MSR: + /* + * As far as I can tell MSR is a read-only register. + */ + break; + case REG_SCR: + sc->scr = value; + break; + default: + break; } done: @@ -841,24 +834,6 @@ uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert, return (sc); } -static int -uart_tty_backend(struct uart_softc *sc, const char *opts) -{ - int fd; - int retval; - - retval = -1; - - fd = open(opts, O_RDWR | O_NONBLOCK); - if (fd > 0 && isatty(fd)) { - sc->tty.fd = fd; - sc->tty.opened = true; - retval = 0; - } - - return (retval); -} - #ifndef __FreeBSD__ static int uart_sock_backend(struct uart_softc *sc, const char *inopts) @@ -896,7 +871,7 @@ uart_sock_backend(struct uart_softc *sc, const char *inopts) return (-1); } sc->sock = true; - sc->tty.fd = -1; + sc->tty.rfd = sc->tty.wfd = -1; sc->usc_sock.servmev = mevent_add(sc->usc_sock.servfd, EVF_READ, uart_sock_accept, sc); assert(sc->usc_sock.servmev != NULL); @@ -905,55 +880,84 @@ uart_sock_backend(struct uart_softc *sc, const char *inopts) } #endif /* not __FreeBSD__ */ -int -uart_set_backend(struct uart_softc *sc, const char *opts) +static int +uart_stdio_backend(struct uart_softc *sc) { - int retval; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ }; #endif - retval = -1; + if (uart_stdio) + return (-1); - if (opts == NULL) - return (0); + sc->tty.rfd = STDIN_FILENO; + sc->tty.wfd = STDOUT_FILENO; + sc->tty.opened = true; - if (strcmp("stdio", opts) == 0) { - if (!uart_stdio) { - sc->tty.fd = STDIN_FILENO; - sc->tty.opened = true; - uart_stdio = true; - retval = 0; - } -#ifndef __FreeBSD__ - } else if (strncmp("socket,", opts, 7) == 0) { - return (uart_sock_backend(sc, opts)); + if (fcntl(sc->tty.rfd, F_SETFL, O_NONBLOCK) != 0) + return (-1); + if (fcntl(sc->tty.wfd, F_SETFL, O_NONBLOCK) != 0) + return (-1); + +#ifndef WITHOUT_CAPSICUM + cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ); + if (caph_rights_limit(sc->tty.rfd, &rights) == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); + if (caph_ioctls_limit(sc->tty.rfd, cmds, nitems(cmds)) == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif - } else if (uart_tty_backend(sc, opts) == 0) { - retval = 0; - } - /* Make the backend file descriptor non-blocking */ - if (retval == 0 && sc->tty.fd != -1) - retval = fcntl(sc->tty.fd, F_SETFL, O_NONBLOCK); + uart_stdio = true; - if (retval == 0) { + return (0); +} + +static int +uart_tty_backend(struct uart_softc *sc, const char *opts) +{ #ifndef WITHOUT_CAPSICUM - cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, - CAP_WRITE); - if (caph_rights_limit(sc->tty.fd, &rights) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (caph_ioctls_limit(sc->tty.fd, cmds, nitems(cmds)) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (!uart_stdio) { - if (caph_limit_stdin() == -1) - errx(EX_OSERR, - "Unable to apply rights for sandbox"); - } + cap_rights_t rights; + cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ }; +#endif + int fd; + + fd = open(opts, O_RDWR | O_NONBLOCK); + if (fd < 0 || !isatty(fd)) + return (-1); + + sc->tty.rfd = sc->tty.wfd = fd; + sc->tty.opened = true; + +#ifndef WITHOUT_CAPSICUM + cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); + if (caph_rights_limit(fd, &rights) == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); + if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); +#endif + + return (0); +} + +int +uart_set_backend(struct uart_softc *sc, const char *opts) +{ + int retval; + + if (opts == NULL) + return (0); + +#ifndef __FreeBSD__ + if (strncmp("socket,", opts, 7) == 0) + return (uart_sock_backend(sc, opts)); #endif + if (strcmp("stdio", opts) == 0) + retval = uart_stdio_backend(sc); + else + retval = uart_tty_backend(sc, opts); + if (retval == 0) uart_opentty(sc); - } return (retval); } diff --git a/usr/src/cmd/bhyve/virtio.c b/usr/src/cmd/bhyve/virtio.c index d3ff5e3951..47a3ed29ba 100644 --- a/usr/src/cmd/bhyve/virtio.c +++ b/usr/src/cmd/bhyve/virtio.c @@ -3,6 +3,7 @@ * * Copyright (c) 2013 Chris Torek <torek @ torek net> * All rights reserved. + * Copyright (c) 2019 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -32,6 +33,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/uio.h> +#include <machine/atomic.h> + #include <stdio.h> #include <stdint.h> #include <pthread.h> @@ -422,13 +425,12 @@ vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) vue = &vuh->vu_ring[uidx++ & mask]; vue->vu_idx = idx; vue->vu_tlen = iolen; -#ifndef __FreeBSD__ + /* * Ensure the used descriptor is visible before updating the index. * This is necessary on ISAs with memory ordering less strict than x86. */ - wmb(); -#endif + atomic_thread_fence_rel(); vuh->vu_idx = uidx; } @@ -466,14 +468,13 @@ vq_endchains(struct vqueue_info *vq, int used_all_avail) vs = vq->vq_vs; old_idx = vq->vq_save_used; vq->vq_save_used = new_idx = vq->vq_used->vu_idx; -#ifndef __FreeBSD__ + /* * Use full memory barrier between vu_idx store from preceding * vq_relchain() call and the loads from VQ_USED_EVENT_IDX() or * va_flags below. */ - mb(); -#endif + atomic_thread_fence_seq_cst(); if (used_all_avail && (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY)) intr = 1; diff --git a/usr/src/cmd/bhyve/xmsr.c b/usr/src/cmd/bhyve/xmsr.c index 3278ea591c..7fe4804a2e 100644 --- a/usr/src/cmd/bhyve/xmsr.c +++ b/usr/src/cmd/bhyve/xmsr.c @@ -79,6 +79,7 @@ emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val) return (0); case MSR_NB_CFG1: + case MSR_LS_CFG: case MSR_IC_CFG: return (0); /* Ignore writes */ @@ -148,6 +149,7 @@ emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val) break; case MSR_NB_CFG1: + case MSR_LS_CFG: case MSR_IC_CFG: /* * The reset value is processor family dependent so diff --git a/usr/src/compat/freebsd/amd64/machine/atomic.h b/usr/src/compat/freebsd/amd64/machine/atomic.h index d8e8131840..1da9724b7d 100644 --- a/usr/src/compat/freebsd/amd64/machine/atomic.h +++ b/usr/src/compat/freebsd/amd64/machine/atomic.h @@ -241,6 +241,20 @@ atomic_swap_long(volatile u_long *p, u_long v) /* Needed for the membar functions */ #include_next <sys/atomic.h> +static __inline void +atomic_thread_fence_rel(void) +{ + /* Equivalent to their __compiler_membar() */ + __asm __volatile(" " : : : "memory"); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + /* Equivalent to their !KERNEL storeload_barrer() */ + __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc"); +} + #define mb() membar_enter() #define rmb() membar_consumer() #define wmb() membar_producer() diff --git a/usr/src/compat/freebsd/sys/eventhandler.h b/usr/src/compat/freebsd/sys/eventhandler.h new file mode 100644 index 0000000000..133aa664f0 --- /dev/null +++ b/usr/src/compat/freebsd/sys/eventhandler.h @@ -0,0 +1,19 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#ifndef _COMPAT_FREEBSD_SYS_EVENTHANDLER_H_ +#define _COMPAT_FREEBSD_SYS_EVENTHANDLER_H_ + +#endif /* _COMPAT_FREEBSD_SYS_EVENTHANDLER_H_ */ diff --git a/usr/src/compat/freebsd/vm/vm_param.h b/usr/src/compat/freebsd/vm/vm_param.h index 8affac9d7e..fd76b62a37 100644 --- a/usr/src/compat/freebsd/vm/vm_param.h +++ b/usr/src/compat/freebsd/vm/vm_param.h @@ -5,6 +5,9 @@ #define KERN_SUCCESS 0 +/* Not a direct correlation, but the primary necessity is being non-zero */ +#define KERN_RESOURCE_SHORTAGE ENOMEM + /* * The VM_MAXUSER_ADDRESS is used to determine the upper limit size limit of a * vmspace, their 'struct as' equivalent. The compat value is sized well below diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync index 676fdd3a9d..1cddfd829e 100644 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ b/usr/src/uts/i86pc/io/vmm/README.sync @@ -2,12 +2,17 @@ The bhyve kernel module and its associated userland consumers have been updated to the latest upstream FreeBSD sources as of: -commit 6b1bb0edb4792cc3d4e6b71c4a80e99438081d5d -Author: imp <imp@FreeBSD.org> -Date: Tue Feb 12 19:05:09 2019 +0000 +commit 3b9cb80b242682690203709aaff4eafae41c138f +Author: jhb <jhb@FreeBSD.org> +Date: Mon Jun 3 23:17:35 2019 +0000 - Revert r343077 until the license issues surrounding it can be resolved. + Emulate the AMD MSR_LS_CFG MSR used for various Ryzen errata. - Approved by: core@ + Writes are ignored and reads always return zero. -Which corresponds to SVN revision: 344057 + Submitted by: José Albornoz <jojo@eljojo.net> (write-only version) + Reviewed by: Patrick Mooney, cem + MFC after: 2 weeks + Differential Revision: https://reviews.freebsd.org/D19506 + +Which corresponds to SVN revision: 348592 diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index ca9ed9e4e1..114eec365a 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -574,6 +574,7 @@ svm_vminit(struct vm *vm, pmap_t pmap) struct svm_vcpu *vcpu; vm_paddr_t msrpm_pa, iopm_pa, pml4_pa; int i; + uint16_t maxcpus; svm_sc = malloc(sizeof (*svm_sc), M_SVM, M_WAITOK | M_ZERO); if (((uintptr_t)svm_sc & PAGE_MASK) != 0) @@ -627,7 +628,8 @@ svm_vminit(struct vm *vm, pmap_t pmap) iopm_pa = vtophys(svm_sc->iopm_bitmap); msrpm_pa = vtophys(svm_sc->msr_bitmap); pml4_pa = svm_sc->nptp; - for (i = 0; i < VM_MAXCPU; i++) { + maxcpus = vm_get_maxcpus(svm_sc->vm); + for (i = 0; i < maxcpus; i++) { vcpu = svm_get_vcpu(svm_sc, i); vcpu->nextrip = ~0; vcpu->lastcpu = NOCPU; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index 4a636dcea0..cf6b0e69c3 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -3,6 +3,7 @@ * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. + * Copyright (c) 2018 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1044,6 +1045,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) struct vmx *vmx; struct vmcs *vmcs; uint32_t exc_bitmap; + uint16_t maxcpus; vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO); if ((uintptr_t)vmx & PAGE_MASK) { @@ -1105,7 +1107,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap) KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error)); } - for (i = 0; i < VM_MAXCPU; i++) { + maxcpus = vm_get_maxcpus(vm); + for (i = 0; i < maxcpus; i++) { #ifndef __FreeBSD__ /* * Cache physical address lookups for various components which @@ -3472,11 +3475,13 @@ vmx_vmcleanup(void *arg) { int i; struct vmx *vmx = arg; + uint16_t maxcpus; if (apic_access_virtualization(vmx, 0)) vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); - for (i = 0; i < VM_MAXCPU; i++) + maxcpus = vm_get_maxcpus(vmx->vm); + for (i = 0; i < maxcpus; i++) vpid_free(vmx->state[i].vpid); free(vmx, M_VMX); @@ -3873,7 +3878,7 @@ struct vlapic_vtx { struct vlapic vlapic; struct pir_desc *pir_desc; struct vmx *vmx; - uint_t pending_prio; + u_int pending_prio; }; #define VPR_PRIO_BIT(vpr) (1 << ((vpr) >> 4)) @@ -3935,8 +3940,8 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level) notify = 1; vlapic_vtx->pending_prio = 0; } else { - const uint_t old_prio = vlapic_vtx->pending_prio; - const uint_t prio_bit = VPR_PRIO_BIT(vector & APIC_TPR_INT); + const u_int old_prio = vlapic_vtx->pending_prio; + const u_int prio_bit = VPR_PRIO_BIT(vector & APIC_TPR_INT); if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) { atomic_set_int(&vlapic_vtx->pending_prio, prio_bit); @@ -4014,6 +4019,7 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) break; } } + /* * If the highest-priority pending interrupt falls short of the * processor priority of this vCPU, ensure that 'pending_prio' does not @@ -4021,8 +4027,8 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) * from incurring a notification later. */ if (vpr <= ppr) { - const uint_t prio_bit = VPR_PRIO_BIT(vpr); - const uint_t old = vlapic_vtx->pending_prio; + const u_int prio_bit = VPR_PRIO_BIT(vpr); + const u_int old = vlapic_vtx->pending_prio; if (old > prio_bit && (old & prio_bit) == 0) { vlapic_vtx->pending_prio = prio_bit; diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c index 5f686d3c62..b949573fe2 100644 --- a/usr/src/uts/i86pc/io/vmm/io/iommu.c +++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c @@ -32,10 +32,10 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> -#include <sys/types.h> -#include <sys/systm.h> #include <sys/bus.h> +#include <sys/eventhandler.h> #include <sys/sysctl.h> +#include <sys/systm.h> #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index 6a5fcc5d4c..98dfc6ee56 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -853,7 +853,7 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, */ CPU_ZERO(dmask); vcpuid = vm_apicid2vcpuid(vm, dest); - if (vcpuid < VM_MAXCPU) + if (vcpuid < vm_get_maxcpus(vm)) CPU_SET(vcpuid, dmask); } else { /* @@ -980,6 +980,7 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) struct vlapic *vlapic2; struct vm_exit *vmexit; struct LAPIC *lapic; + uint16_t maxcpus; lapic = vlapic->apic_page; lapic->icr_lo &= ~APIC_DELSTAT_PEND; @@ -1041,11 +1042,12 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) return (0); /* handled completely in the kernel */ } + maxcpus = vm_get_maxcpus(vlapic->vm); if (mode == APIC_DELMODE_INIT) { if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) return (0); - if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { + if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { vlapic2 = vm_lapic(vlapic->vm, dest); /* move from INIT to waiting-for-SIPI state */ @@ -1058,7 +1060,7 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) } if (mode == APIC_DELMODE_STARTUP) { - if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { + if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) { vlapic2 = vm_lapic(vlapic->vm, dest); /* @@ -1467,7 +1469,8 @@ void vlapic_init(struct vlapic *vlapic) { KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); - KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU, + KASSERT(vlapic->vcpuid >= 0 && + vlapic->vcpuid < vm_get_maxcpus(vlapic->vm), ("vlapic_init: vcpuid is not initialized")); KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " "initialized")); diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 14e2fc4e60..11915220d2 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -334,7 +334,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) { struct vcpu *vcpu; - KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, + KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, ("vcpu_init: invalid vcpu %d", vcpu_id)); vcpu = &vm->vcpu[vcpu_id]; @@ -378,7 +378,7 @@ vm_exitinfo(struct vm *vm, int cpuid) { struct vcpu *vcpu; - if (cpuid < 0 || cpuid >= VM_MAXCPU) + if (cpuid < 0 || cpuid >= vm->maxcpus) panic("vm_exitinfo: invalid cpuid %d", cpuid); vcpu = &vm->vcpu[cpuid]; @@ -546,12 +546,12 @@ vm_init(struct vm *vm, bool create) vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); - for (i = 0; i < VM_MAXCPU; i++) + for (i = 0; i < vm->maxcpus; i++) vcpu_init(vm, i, create); #ifndef __FreeBSD__ tsc_off = (uint64_t)(-(int64_t)rdtsc()); - for (i = 0; i < VM_MAXCPU; i++) { + for (i = 0; i < vm->maxcpus; i++) { vm->vcpu[i].tsc_offset = tsc_off; } #endif /* __FreeBSD__ */ @@ -591,7 +591,7 @@ vm_create(const char *name, struct vm **retvm) vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ vm->threads = threads_per_core; /* XXX backwards compatibility */ - vm->maxcpus = 0; /* XXX not implemented */ + vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ vm_init(vm, true); @@ -609,19 +609,25 @@ vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, *maxcpus = vm->maxcpus; } +uint16_t +vm_get_maxcpus(struct vm *vm) +{ + return (vm->maxcpus); +} + int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) { if (maxcpus != 0) return (EINVAL); /* XXX remove when supported */ - if ((sockets * cores * threads) > VM_MAXCPU) + if ((sockets * cores * threads) > vm->maxcpus) return (EINVAL); /* XXX need to check sockets * cores * threads == vCPU, how? */ vm->sockets = sockets; vm->cores = cores; vm->threads = threads; - vm->maxcpus = maxcpus; + vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ return(0); } @@ -646,7 +652,7 @@ vm_cleanup(struct vm *vm, bool destroy) vatpic_cleanup(vm->vatpic); vioapic_cleanup(vm->vioapic); - for (i = 0; i < VM_MAXCPU; i++) + for (i = 0; i < vm->maxcpus; i++) vcpu_cleanup(vm, i, destroy); VMCLEANUP(vm->cookie); @@ -918,7 +924,8 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); - return (EFAULT); + return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : + EFAULT); } } @@ -1156,9 +1163,9 @@ vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot, * guaranteed if at least one vcpu is in the VCPU_FROZEN state. */ int state; - KASSERT(vcpuid >= -1 && vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d", + KASSERT(vcpuid >= -1 && vcpuid < vm->maxcpus, ("%s: invalid vcpuid %d", __func__, vcpuid)); - for (i = 0; i < VM_MAXCPU; i++) { + for (i = 0; i < vm->maxcpus; i++) { if (vcpuid != -1 && vcpuid != i) continue; state = vcpu_get_state(vm, i, NULL); @@ -1204,7 +1211,7 @@ int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) { - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpu < 0 || vcpu >= vm->maxcpus) return (EINVAL); if (reg >= VM_REG_LAST) @@ -1219,7 +1226,7 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) struct vcpu *vcpu; int error; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (reg >= VM_REG_LAST) @@ -1273,7 +1280,7 @@ vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc) { - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpu < 0 || vcpu >= vm->maxcpus) return (EINVAL); if (!is_segment_register(reg) && !is_descriptor_table(reg)) @@ -1286,7 +1293,7 @@ int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc) { - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpu < 0 || vcpu >= vm->maxcpus) return (EINVAL); if (!is_segment_register(reg) && !is_descriptor_table(reg)) @@ -1478,7 +1485,7 @@ static void vm_handle_rendezvous(struct vm *vm, int vcpuid) { - KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), + KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus), ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); mtx_lock(&vm->rendezvous_mtx); @@ -1813,7 +1820,7 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) /* * Wakeup the other sleeping vcpus and return to userspace. */ - for (i = 0; i < VM_MAXCPU; i++) { + for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { vcpu_notify_event(vm, i, false); } @@ -1873,7 +1880,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how) /* * Notify all active vcpus that they are now suspended. */ - for (i = 0; i < VM_MAXCPU; i++) { + for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm, i, false); } @@ -2068,7 +2075,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun) vcpuid = vmrun->cpuid; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (!CPU_ISSET(vcpuid, &vm->active_cpus)) @@ -2241,7 +2248,7 @@ vm_restart_instruction(void *arg, int vcpuid) int error; vm = arg; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; @@ -2280,7 +2287,7 @@ vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) struct vcpu *vcpu; int type, vector; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; @@ -2430,7 +2437,8 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) uint64_t info1, info2; int valid; - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); + KASSERT(vcpuid >= 0 && + vcpuid < vm->maxcpus, ("invalid vcpu %d", vcpuid)); vcpu = &vm->vcpu[vcpuid]; @@ -2470,7 +2478,7 @@ vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) { struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; @@ -2487,7 +2495,7 @@ vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, uint64_t regval; int error; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (vector < 0 || vector >= 32) @@ -2578,7 +2586,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid) { struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; @@ -2593,7 +2601,7 @@ vm_nmi_pending(struct vm *vm, int vcpuid) { struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; @@ -2606,7 +2614,7 @@ vm_nmi_clear(struct vm *vm, int vcpuid) { struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; @@ -2625,7 +2633,7 @@ vm_inject_extint(struct vm *vm, int vcpuid) { struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; @@ -2640,7 +2648,7 @@ vm_extint_pending(struct vm *vm, int vcpuid) { struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) panic("vm_extint_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; @@ -2653,7 +2661,7 @@ vm_extint_clear(struct vm *vm, int vcpuid) { struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) panic("vm_extint_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; @@ -2668,7 +2676,7 @@ vm_extint_clear(struct vm *vm, int vcpuid) int vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) { - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpu < 0 || vcpu >= vm->maxcpus) return (EINVAL); if (type < 0 || type >= VM_CAP_MAX) @@ -2680,7 +2688,7 @@ vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) int vm_set_capability(struct vm *vm, int vcpu, int type, int val) { - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpu < 0 || vcpu >= vm->maxcpus) return (EINVAL); if (type < 0 || type >= VM_CAP_MAX) @@ -2767,7 +2775,7 @@ vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, int error; struct vcpu *vcpu; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) panic("vm_set_run_state: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; @@ -2785,7 +2793,7 @@ vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) struct vcpu *vcpu; enum vcpu_state state; - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) panic("vm_get_run_state: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; @@ -2811,7 +2819,7 @@ int vm_activate_cpu(struct vm *vm, int vcpuid) { - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (CPU_ISSET(vcpuid, &vm->active_cpus)) @@ -2827,12 +2835,12 @@ vm_suspend_cpu(struct vm *vm, int vcpuid) { int i; - if (vcpuid < -1 || vcpuid >= VM_MAXCPU) + if (vcpuid < -1 || vcpuid >= vm->maxcpus) return (EINVAL); if (vcpuid == -1) { vm->debug_cpus = vm->active_cpus; - for (i = 0; i < VM_MAXCPU; i++) { + for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm, i, false); } @@ -2850,7 +2858,7 @@ int vm_resume_cpu(struct vm *vm, int vcpuid) { - if (vcpuid < -1 || vcpuid >= VM_MAXCPU) + if (vcpuid < -1 || vcpuid >= vm->maxcpus) return (EINVAL); if (vcpuid == -1) { @@ -2902,7 +2910,7 @@ vcpu_stats(struct vm *vm, int vcpuid) int vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) { - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); *state = vm->vcpu[vcpuid].x2apic_state; @@ -2913,7 +2921,7 @@ vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) int vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) { - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (state >= X2APIC_STATE_LAST) @@ -3005,7 +3013,7 @@ vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, * Enforce that this function is called without any locks */ WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); - KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), + KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus), ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); restart: @@ -3035,7 +3043,7 @@ restart: * Wake up any sleeping vcpus and trigger a VM-exit in any running * vcpus so they handle the rendezvous as soon as possible. */ - for (i = 0; i < VM_MAXCPU; i++) { + for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &dest)) vcpu_notify_event(vm, i, false); } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index d276944800..ea96cd8db0 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -90,6 +90,7 @@ enum { VIE_OP_TYPE_STOS, VIE_OP_TYPE_BITTEST, VIE_OP_TYPE_TWOB_GRP15, + VIE_OP_TYPE_ADD, VIE_OP_TYPE_LAST }; @@ -126,6 +127,10 @@ static const struct vie_op two_byte_opcodes[256] = { }; static const struct vie_op one_byte_opcodes[256] = { + [0x03] = { + .op_byte = 0x03, + .op_type = VIE_OP_TYPE_ADD, + }, [0x0F] = { .op_byte = 0x0F, .op_type = VIE_OP_TYPE_TWO_BYTE @@ -425,6 +430,41 @@ getcc(int opsize, uint64_t x, uint64_t y) return (getcc64(x, y)); } +/* + * Macro creation of functions getaddflags{8,16,32,64} + */ +#define GETADDFLAGS(sz) \ +static u_long \ +getaddflags##sz(uint##sz##_t x, uint##sz##_t y) \ +{ \ + u_long rflags; \ + \ + __asm __volatile("add %2,%1; pushfq; popq %0" : \ + "=r" (rflags), "+r" (x) : "m" (y)); \ + return (rflags); \ +} struct __hack + +GETADDFLAGS(8); +GETADDFLAGS(16); +GETADDFLAGS(32); +GETADDFLAGS(64); + +static u_long +getaddflags(int opsize, uint64_t x, uint64_t y) +{ + KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8, + ("getaddflags: invalid operand size %d", opsize)); + + if (opsize == 1) + return (getaddflags8(x, y)); + else if (opsize == 2) + return (getaddflags16(x, y)); + else if (opsize == 4) + return (getaddflags32(x, y)); + else + return (getaddflags64(x, y)); +} + static int emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) @@ -1194,6 +1234,62 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int +emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +{ + int error, size; + uint64_t nval, rflags, rflags2, val1, val2; + enum vm_reg_name reg; + + size = vie->opsize; + error = EINVAL; + + switch (vie->op.op_byte) { + case 0x03: + /* + * ADD r/m to r and store the result in r + * + * 03/r ADD r16, r/m16 + * 03/r ADD r32, r/m32 + * REX.W + 03/r ADD r64, r/m64 + */ + + /* get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vm, vcpuid, reg, &val1); + if (error) + break; + + /* get the second operand */ + error = memread(vm, vcpuid, gpa, &val2, size, arg); + if (error) + break; + + /* perform the operation and write the result */ + nval = val1 + val2; + error = vie_update_register(vm, vcpuid, reg, nval, size); + break; + default: + break; + } + + if (!error) { + rflags2 = getaddflags(size, val1, val2); + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + &rflags); + if (error) + return (error); + + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & RFLAGS_STATUS_BITS; + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + rflags, 8); + } + + return (error); +} + +static int emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -1558,6 +1654,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_twob_group15(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_ADD: + error = emulate_add(vm, vcpuid, gpa, vie, memread, + memwrite, memarg); + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c index 7cafc0755e..43b2bebe97 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c @@ -68,7 +68,7 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level) { struct vlapic *vlapic; - if (cpu < 0 || cpu >= VM_MAXCPU) + if (cpu < 0 || cpu >= vm_get_maxcpus(vm)) return (EINVAL); /* @@ -91,7 +91,7 @@ lapic_set_local_intr(struct vm *vm, int cpu, int vector) cpuset_t dmask; int error; - if (cpu < -1 || cpu >= VM_MAXCPU) + if (cpu < -1 || cpu >= vm_get_maxcpus(vm)) return (EINVAL); if (cpu == -1) diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index af775d4d7b..d20732ee1e 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -230,7 +230,7 @@ vcpu_lock_one(vmm_softc_t *sc, int vcpu) { int error; - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vmm_vm)) return (EINVAL); error = vcpu_set_state(sc->vmm_vm, vcpu, VCPU_FROZEN, true); @@ -254,9 +254,11 @@ vcpu_unlock_one(vmm_softc_t *sc, int vcpu) static int vcpu_lock_all(vmm_softc_t *sc) { - int error, vcpu; + int error = 0, vcpu; + uint16_t maxcpus; - for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { + maxcpus = vm_get_maxcpus(sc->vmm_vm); + for (vcpu = 0; vcpu < maxcpus; vcpu++) { error = vcpu_lock_one(sc, vcpu); if (error) break; @@ -274,8 +276,10 @@ static void vcpu_unlock_all(vmm_softc_t *sc) { int vcpu; + uint16_t maxcpus; - for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) + maxcpus = vm_get_maxcpus(sc->vmm_vm); + for (vcpu = 0; vcpu < maxcpus; vcpu++) vcpu_unlock_one(sc, vcpu); } @@ -320,7 +324,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) { return (EFAULT); } - if (vcpu < 0 || vcpu >= VM_MAXCPU) { + if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vmm_vm)) { error = EINVAL; goto done; } @@ -356,7 +360,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, * Lock a vcpu to make sure that the memory map cannot be * modified while it is being inspected. */ - vcpu = VM_MAXCPU - 1; + vcpu = vm_get_maxcpus(sc->vmm_vm) - 1; error = vcpu_lock_one(sc, vcpu); if (error) goto done; @@ -976,7 +980,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, error = EFAULT; break; } - if (vcpu < -1 || vcpu >= VM_MAXCPU) { + if (vcpu < -1 || vcpu >= vm_get_maxcpus(sc->vmm_vm)) { error = EINVAL; break; } @@ -989,7 +993,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, error = EFAULT; break; } - if (vcpu < -1 || vcpu >= VM_MAXCPU) { + if (vcpu < -1 || vcpu >= vm_get_maxcpus(sc->vmm_vm)) { error = EINVAL; break; } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.c b/usr/src/uts/i86pc/io/vmm/vmm_stat.c index c2c2cfe77c..f61272c49c 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_stat.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.c @@ -88,7 +88,7 @@ vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf) uint64_t *stats; int i; - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpu < 0 || vcpu >= vm_get_maxcpus(vm)) return (EINVAL); /* Let stats functions update their counters */ diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c index 2ffa80b335..d74f866013 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.c +++ b/usr/src/uts/i86pc/io/vmm/x86.c @@ -451,6 +451,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 | CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM | CPUID_STDEXT_AVX512F | + CPUID_STDEXT_RDSEED | CPUID_STDEXT_AVX512PF | CPUID_STDEXT_AVX512ER | CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA); diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h index e5e5460211..d6313469a5 100644 --- a/usr/src/uts/i86pc/sys/vmm.h +++ b/usr/src/uts/i86pc/sys/vmm.h @@ -209,6 +209,7 @@ int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); +uint16_t vm_get_maxcpus(struct vm *vm); void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus); int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, |