diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2019-02-13 21:18:37 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@pfmooney.com> | 2019-02-20 21:35:38 +0000 |
| commit | c6a22c86f2d0254060157d591af12a83e37b9a50 (patch) | |
| tree | 7ca57323cc911ea806e535b1de7909873f35f1b6 /usr/src/uts | |
| parent | d7e5de8ade719deb02b214bb5901ab33c0406f0f (diff) | |
| download | illumos-joyent-c6a22c86f2d0254060157d591af12a83e37b9a50.tar.gz | |
OS-7580 bhyve upstream sync 2019 Feb
Reviewed by: John Levon <john.levon@joyent.com>
Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src/uts')
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/README.sync | 19 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/amd/svm.c | 13 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/amd/svm_msr.c | 5 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmcs.h | 8 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx.c | 71 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c | 40 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/x86.c | 155 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/x86.h | 2 |
8 files changed, 250 insertions, 63 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync index e9a2479b13..676fdd3a9d 100644 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ b/usr/src/uts/i86pc/io/vmm/README.sync @@ -1,18 +1,13 @@ The bhyve kernel module and its associated userland consumers have been updated to the latest upstream FreeBSD sources as of: -commit f81459bd8363602ed5e436f10288320419e80ccf -Author: andrew <andrew@FreeBSD.org> -Date: Thu Sep 27 11:16:19 2018 +0000 - Handle a guest executing a vm instruction by trapping and raising an - undefined instruction exception. Previously we would exit the guest, - however an unprivileged user could execute these. +commit 6b1bb0edb4792cc3d4e6b71c4a80e99438081d5d +Author: imp <imp@FreeBSD.org> +Date: Tue Feb 12 19:05:09 2019 +0000 - Found with: syzkaller - Reviewed by: araujo, tychon (previous version) - Approved by: re (kib) - MFC after: 1 week - Differential Revision: https://reviews.freebsd.org/D17192 + Revert r343077 until the license issues surrounding it can be resolved. -Which corresponds to SVN revision: 338957 + Approved by: core@ + +Which corresponds to SVN revision: 344057 diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 9c22fc2532..e921383d22 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -1965,6 +1965,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vm *vm; uint64_t vmcb_pa; int handled; + uint16_t ldt_sel; svm_sc = arg; vm = svm_sc->vm; @@ -2049,6 +2050,15 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, break; } + /* + * #VMEXIT resumes the host with the guest LDTR, so + * save the current LDT selector so it can be restored + * after an exit. The userspace hypervisor probably + * doesn't use a LDT, but save and restore it to be + * safe. + */ + ldt_sel = sldt(); + svm_inj_interrupts(svm_sc, vcpu, vlapic); /* Activate the nested pmap on 'curcpu' */ @@ -2083,6 +2093,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, */ restore_host_tss(); + /* Restore host LDTR. */ + lldt(ldt_sel); + /* #VMEXIT disables interrupts so re-enable them here. */ enable_gintr(); diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c index 0417983233..67c43100f1 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c @@ -122,9 +122,8 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: case MSR_SYSCFG: - *result = 0; - break; case MSR_AMDK8_IPM: + case MSR_EXTFEATURES: *result = 0; break; default: @@ -163,6 +162,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) * Ignore writes to microcode update register. */ break; + case MSR_EXTFEATURES: + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h index 28c5e6b15b..edde5c6dd5 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h @@ -419,6 +419,14 @@ VMPTRLD(struct vmcs *vmcs) #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_RDRAND 57 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_VMFUNC 59 +#define EXIT_REASON_ENCLS 60 +#define EXIT_REASON_RDSEED 61 +#define EXIT_REASON_PM_LOG_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 /* * NMI unblocking due to IRET. diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index d33ec7e4db..a723be0d28 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -104,7 +104,7 @@ __FBSDID("$FreeBSD$"); PROCBASED_NMI_WINDOW_EXITING) #ifdef __FreeBSD__ -#define PROCBASED_CTLS_ONE_SETTING \ +#define PROCBASED_CTLS_ONE_SETTING \ (PROCBASED_SECONDARY_CONTROLS | \ PROCBASED_MWAIT_EXITING | \ PROCBASED_MONITOR_EXITING | \ @@ -471,7 +471,7 @@ vmx_allow_x2apic_msrs(struct vmx *vmx) for (i = 0; i < 8; i++) error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i); - + for (i = 0; i < 8; i++) error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i); @@ -631,6 +631,7 @@ vmx_disable(void *arg __unused) static int vmx_cleanup(void) { + if (pirvec >= 0) lapic_ipi_free(pirvec); @@ -902,7 +903,8 @@ vmx_init(int ipinum) } #ifdef __FreeBSD__ - guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0; + guest_l1d_flush = (cpu_ia32_arch_caps & + IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0; TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); /* @@ -1231,7 +1233,7 @@ vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) { #ifdef __FreeBSD__ int handled, func; - + func = vmxctx->guest_rax; #else int handled; @@ -3229,6 +3231,10 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vm_exit *vmexit; struct vlapic *vlapic; uint32_t exit_reason; +#ifdef __FreeBSD__ + struct region_descriptor gdtr, idtr; + uint16_t ldt_sel; +#endif vmx = arg; vm = vmx->vm; @@ -3358,17 +3364,56 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * re-VMLAUNCH as opposed to VMRESUME. */ launched = (vmx->vmcs_state[vcpu] & VS_LAUNCHED) != 0; + /* + * Restoration of the GDT limit is taken care of by + * vmx_savectx(). Since the maximum practical index for the + * IDT is 255, restoring its limits from the post-VMX-exit + * default of 0xffff is not a concern. + * + * Only 64-bit hypervisor callers are allowed, which forgoes + * the need to restore any LDT descriptor. Toss an error to + * anyone attempting to break that rule. + */ + if (curproc->p_model != DATAMODEL_LP64) { + ht_release(); + enable_intr(); + bzero(vmexit, sizeof (*vmexit)); + vmexit->rip = rip; + vmexit->exitcode = VM_EXITCODE_VMX; + vmexit->u.vmx.status = VM_FAIL_INVALID; + handled = UNHANDLED; + break; + } +#else + /* + * VM exits restore the base address but not the + * limits of GDTR and IDTR. The VMCS only stores the + * base address, so VM exits set the limits to 0xffff. + * Save and restore the full GDTR and IDTR to restore + * the limits. + * + * The VMCS does not save the LDTR at all, and VM + * exits clear LDTR as if a NULL selector were loaded. + * The userspace hypervisor probably doesn't use a + * LDT, but save and restore it to be safe. + */ + sgdt(&gdtr); + sidt(&idtr); + ldt_sel = sldt(); #endif + vmx_run_trace(vmx, vcpu); vmx_dr_enter_guest(vmxctx); rc = vmx_enter_guest(vmxctx, vmx, launched); vmx_dr_leave_guest(vmxctx); + #ifndef __FreeBSD__ vmx->vmcs_state[vcpu] |= VS_LAUNCHED; -#endif - -#ifndef __FreeBSD__ ht_release(); +#else + bare_lgdt(&gdtr); + lidt(&idtr); + lldt(ldt_sel); #endif /* Collect some information for VM exit processing */ @@ -3522,7 +3567,7 @@ vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval) uint64_t gi; int error; - error = vmcs_getreg(&vmx->vmcs[vcpu], running, + error = vmcs_getreg(&vmx->vmcs[vcpu], running, VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi); *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; return (error); @@ -3566,8 +3611,8 @@ vmx_shadow_reg(int reg) switch (reg) { case VM_REG_GUEST_CR0: shreg = VMCS_CR0_SHADOW; - break; - case VM_REG_GUEST_CR4: + break; + case VM_REG_GUEST_CR4: shreg = VMCS_CR4_SHADOW; break; default: @@ -3638,7 +3683,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) if (shadow > 0) { /* * Store the unmodified value in the shadow - */ + */ error = vmcs_setreg(&vmx->vmcs[vcpu], running, VMCS_IDENT(shadow), val); } @@ -3821,7 +3866,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } } - return (retval); + return (retval); } struct vlapic_vtx { @@ -4174,7 +4219,7 @@ vmx_vlapic_init(void *arg, int vcpuid) struct vmx *vmx; struct vlapic *vlapic; struct vlapic_vtx *vlapic_vtx; - + vmx = arg; vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index 1a2f493dd1..d276944800 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -89,6 +89,7 @@ enum { VIE_OP_TYPE_GROUP1, VIE_OP_TYPE_STOS, VIE_OP_TYPE_BITTEST, + VIE_OP_TYPE_TWOB_GRP15, VIE_OP_TYPE_LAST }; @@ -101,6 +102,10 @@ enum { #ifdef _KERNEL static const struct vie_op two_byte_opcodes[256] = { + [0xAE] = { + .op_byte = 0xAE, + .op_type = VIE_OP_TYPE_TWOB_GRP15, + }, [0xB6] = { .op_byte = 0xB6, .op_type = VIE_OP_TYPE_MOVZX, @@ -1458,6 +1463,37 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (0); } +static int +emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +{ + int error; + uint64_t buf; + + switch (vie->reg & 7) { + case 0x7: /* CLFLUSH, CLFLUSHOPT, and SFENCE */ + if (vie->mod == 0x3) { + /* + * SFENCE. Ignore it, VM exit provides enough + * barriers on its own. + */ + error = 0; + } else { + /* + * CLFLUSH, CLFLUSHOPT. Only check for access + * rights. + */ + error = memread(vm, vcpuid, gpa, &buf, 1, memarg); + } + break; + default: + error = EINVAL; + break; + } + + return (error); +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, @@ -1518,6 +1554,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_bittest(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_TWOB_GRP15: + error = emulate_twob_group15(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c index 5a6d7f9dd7..b02142e7e5 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.c +++ b/usr/src/uts/i86pc/io/vmm/x86.c @@ -141,17 +141,30 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); if (vmm_is_amd()) { /* - * XXX this might appear silly because AMD - * cpus don't have threads. - * - * However this matches the logical cpus as - * advertised by leaf 0x1 and will work even - * if threads is set incorrectly on an AMD host. + * As on Intel (0000_0007:0, EDX), mask out + * unsupported or unsafe AMD extended features + * (8000_0008 EBX). */ + regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF | + AMDFEID_XSAVEERPTR); + vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); - logical_cpus = threads * cores; - regs[2] = logical_cpus - 1; + /* + * Here, width is ApicIdCoreIdSize, present on + * at least Family 15h and newer. It + * represents the "number of bits in the + * initial apicid that indicate thread id + * within a package." + * + * Our topo_probe_amd() uses it for + * pkg_id_shift and other OSes may rely on it. + */ + width = MIN(0xF, log2(threads * cores)); + if (width < 0x4) + width = 0; + logical_cpus = MIN(0xFF, threads * cores - 1); + regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus; } break; @@ -159,9 +172,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); /* - * Hide SVM and Topology Extension features from guest. + * Hide SVM from guest. */ - regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY); + regs[2] &= ~AMDID2_SVM; /* * Don't advertise extended performance counter MSRs @@ -226,6 +239,68 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, #endif /* __FreeBSD__ */ break; + case CPUID_8000_001D: + /* AMD Cache topology, like 0000_0004 for Intel. */ + if (!vmm_is_amd()) + goto default_leaf; + + /* + * Similar to Intel, generate a ficticious cache + * topology for the guest with L3 shared by the + * package, and L1 and L2 local to a core. + */ + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + switch (*ecx) { + case 0: + logical_cpus = threads; + level = 1; + func = 1; /* data cache */ + break; + case 1: + logical_cpus = threads; + level = 2; + func = 3; /* unified cache */ + break; + case 2: + logical_cpus = threads * cores; + level = 3; + func = 3; /* unified cache */ + break; + default: + logical_cpus = 0; + level = 0; + func = 0; + break; + } + + logical_cpus = MIN(0xfff, logical_cpus - 1); + regs[0] = (logical_cpus << 14) | (1 << 8) | + (level << 5) | func; + regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0; + regs[2] = 0; + regs[3] = 0; + break; + + case CPUID_8000_001E: + /* AMD Family 16h+ additional identifiers */ + if (!vmm_is_amd() || CPUID_TO_FAMILY(cpu_id) < 0x16) + goto default_leaf; + + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + regs[0] = vcpu_id; + threads = MIN(0xFF, threads - 1); + regs[1] = (threads << 8) | + (vcpu_id >> log2(threads + 1)); + /* + * XXX Bhyve topology cannot yet represent >1 node per + * processor. + */ + regs[2] = 0; + regs[3] = 0; + break; + case CPUID_0000_0001: do_cpuid(1, regs); @@ -366,7 +441,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512PF | CPUID_STDEXT_AVX512ER | - CPUID_STDEXT_AVX512CD); + CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA); regs[2] = 0; regs[3] = 0; @@ -398,35 +473,42 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, case CPUID_0000_000B: /* - * Processor topology enumeration + * Intel processor topology enumeration */ - vm_get_topology(vm, &sockets, &cores, &threads, - &maxcpus); - if (*ecx == 0) { - logical_cpus = threads; - width = log2(logical_cpus); - level = CPUID_TYPE_SMT; - x2apic_id = vcpu_id; - } + if (vmm_is_intel()) { + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + if (*ecx == 0) { + logical_cpus = threads; + width = log2(logical_cpus); + level = CPUID_TYPE_SMT; + x2apic_id = vcpu_id; + } - if (*ecx == 1) { - logical_cpus = threads * cores; - width = log2(logical_cpus); - level = CPUID_TYPE_CORE; - x2apic_id = vcpu_id; - } + if (*ecx == 1) { + logical_cpus = threads * cores; + width = log2(logical_cpus); + level = CPUID_TYPE_CORE; + x2apic_id = vcpu_id; + } - if (!cpuid_leaf_b || *ecx >= 2) { - width = 0; - logical_cpus = 0; - level = 0; - x2apic_id = 0; - } + if (!cpuid_leaf_b || *ecx >= 2) { + width = 0; + logical_cpus = 0; + level = 0; + x2apic_id = 0; + } - regs[0] = width & 0x1f; - regs[1] = logical_cpus & 0xffff; - regs[2] = (level << 8) | (*ecx & 0xff); - regs[3] = x2apic_id; + regs[0] = width & 0x1f; + regs[1] = logical_cpus & 0xffff; + regs[2] = (level << 8) | (*ecx & 0xff); + regs[3] = x2apic_id; + } else { + regs[0] = 0; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + } break; case CPUID_0000_000D: @@ -488,6 +570,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, break; default: +default_leaf: /* * The leaf value has already been clamped so * simply pass this through, keeping count of diff --git a/usr/src/uts/i86pc/io/vmm/x86.h b/usr/src/uts/i86pc/io/vmm/x86.h index 3a8e043852..0d70c04fd8 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.h +++ b/usr/src/uts/i86pc/io/vmm/x86.h @@ -49,6 +49,8 @@ #define CPUID_8000_0006 (0x80000006) #define CPUID_8000_0007 (0x80000007) #define CPUID_8000_0008 (0x80000008) +#define CPUID_8000_001D (0x8000001D) +#define CPUID_8000_001E (0x8000001E) /* * CPUID instruction Fn0000_0001: |
