summaryrefslogtreecommitdiff
path: root/usr/src/uts
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2019-02-13 21:18:37 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2019-02-20 21:35:38 +0000
commitc6a22c86f2d0254060157d591af12a83e37b9a50 (patch)
tree7ca57323cc911ea806e535b1de7909873f35f1b6 /usr/src/uts
parentd7e5de8ade719deb02b214bb5901ab33c0406f0f (diff)
downloadillumos-joyent-c6a22c86f2d0254060157d591af12a83e37b9a50.tar.gz
OS-7580 bhyve upstream sync 2019 Feb
Reviewed by: John Levon <john.levon@joyent.com> Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com> Approved by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src/uts')
-rw-r--r--usr/src/uts/i86pc/io/vmm/README.sync19
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c13
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_msr.c5
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmcs.h8
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c71
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c40
-rw-r--r--usr/src/uts/i86pc/io/vmm/x86.c155
-rw-r--r--usr/src/uts/i86pc/io/vmm/x86.h2
8 files changed, 250 insertions, 63 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync
index e9a2479b13..676fdd3a9d 100644
--- a/usr/src/uts/i86pc/io/vmm/README.sync
+++ b/usr/src/uts/i86pc/io/vmm/README.sync
@@ -1,18 +1,13 @@
The bhyve kernel module and its associated userland consumers have been updated
to the latest upstream FreeBSD sources as of:
-commit f81459bd8363602ed5e436f10288320419e80ccf
-Author: andrew <andrew@FreeBSD.org>
-Date: Thu Sep 27 11:16:19 2018 +0000
- Handle a guest executing a vm instruction by trapping and raising an
- undefined instruction exception. Previously we would exit the guest,
- however an unprivileged user could execute these.
+commit 6b1bb0edb4792cc3d4e6b71c4a80e99438081d5d
+Author: imp <imp@FreeBSD.org>
+Date: Tue Feb 12 19:05:09 2019 +0000
- Found with: syzkaller
- Reviewed by: araujo, tychon (previous version)
- Approved by: re (kib)
- MFC after: 1 week
- Differential Revision: https://reviews.freebsd.org/D17192
+ Revert r343077 until the license issues surrounding it can be resolved.
-Which corresponds to SVN revision: 338957
+ Approved by: core@
+
+Which corresponds to SVN revision: 344057
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 9c22fc2532..e921383d22 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -1965,6 +1965,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
struct vm *vm;
uint64_t vmcb_pa;
int handled;
+ uint16_t ldt_sel;
svm_sc = arg;
vm = svm_sc->vm;
@@ -2049,6 +2050,15 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
break;
}
+ /*
+ * #VMEXIT resumes the host with the guest LDTR, so
+ * save the current LDT selector so it can be restored
+ * after an exit. The userspace hypervisor probably
+ * doesn't use a LDT, but save and restore it to be
+ * safe.
+ */
+ ldt_sel = sldt();
+
svm_inj_interrupts(svm_sc, vcpu, vlapic);
/* Activate the nested pmap on 'curcpu' */
@@ -2083,6 +2093,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
*/
restore_host_tss();
+ /* Restore host LDTR. */
+ lldt(ldt_sel);
+
/* #VMEXIT disables interrupts so re-enable them here. */
enable_gintr();
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
index 0417983233..67c43100f1 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
@@ -122,9 +122,8 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result,
case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
case MSR_MTRR64kBase:
case MSR_SYSCFG:
- *result = 0;
- break;
case MSR_AMDK8_IPM:
+ case MSR_EXTFEATURES:
*result = 0;
break;
default:
@@ -163,6 +162,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu)
* Ignore writes to microcode update register.
*/
break;
+ case MSR_EXTFEATURES:
+ break;
default:
error = EINVAL;
break;
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
index 28c5e6b15b..edde5c6dd5 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
@@ -419,6 +419,14 @@ VMPTRLD(struct vmcs *vmcs)
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_RDRAND 57
+#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_VMFUNC 59
+#define EXIT_REASON_ENCLS 60
+#define EXIT_REASON_RDSEED 61
+#define EXIT_REASON_PM_LOG_FULL 62
+#define EXIT_REASON_XSAVES 63
+#define EXIT_REASON_XRSTORS 64
/*
* NMI unblocking due to IRET.
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index d33ec7e4db..a723be0d28 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -104,7 +104,7 @@ __FBSDID("$FreeBSD$");
PROCBASED_NMI_WINDOW_EXITING)
#ifdef __FreeBSD__
-#define PROCBASED_CTLS_ONE_SETTING \
+#define PROCBASED_CTLS_ONE_SETTING \
(PROCBASED_SECONDARY_CONTROLS | \
PROCBASED_MWAIT_EXITING | \
PROCBASED_MONITOR_EXITING | \
@@ -471,7 +471,7 @@ vmx_allow_x2apic_msrs(struct vmx *vmx)
for (i = 0; i < 8; i++)
error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i);
-
+
for (i = 0; i < 8; i++)
error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i);
@@ -631,6 +631,7 @@ vmx_disable(void *arg __unused)
static int
vmx_cleanup(void)
{
+
if (pirvec >= 0)
lapic_ipi_free(pirvec);
@@ -902,7 +903,8 @@ vmx_init(int ipinum)
}
#ifdef __FreeBSD__
- guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0;
+ guest_l1d_flush = (cpu_ia32_arch_caps &
+ IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0;
TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush);
/*
@@ -1231,7 +1233,7 @@ vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx)
{
#ifdef __FreeBSD__
int handled, func;
-
+
func = vmxctx->guest_rax;
#else
int handled;
@@ -3229,6 +3231,10 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
struct vm_exit *vmexit;
struct vlapic *vlapic;
uint32_t exit_reason;
+#ifdef __FreeBSD__
+ struct region_descriptor gdtr, idtr;
+ uint16_t ldt_sel;
+#endif
vmx = arg;
vm = vmx->vm;
@@ -3358,17 +3364,56 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
* re-VMLAUNCH as opposed to VMRESUME.
*/
launched = (vmx->vmcs_state[vcpu] & VS_LAUNCHED) != 0;
+ /*
+ * Restoration of the GDT limit is taken care of by
+ * vmx_savectx(). Since the maximum practical index for the
+ * IDT is 255, restoring its limits from the post-VMX-exit
+ * default of 0xffff is not a concern.
+ *
+ * Only 64-bit hypervisor callers are allowed, which forgoes
+ * the need to restore any LDT descriptor. Toss an error to
+ * anyone attempting to break that rule.
+ */
+ if (curproc->p_model != DATAMODEL_LP64) {
+ ht_release();
+ enable_intr();
+ bzero(vmexit, sizeof (*vmexit));
+ vmexit->rip = rip;
+ vmexit->exitcode = VM_EXITCODE_VMX;
+ vmexit->u.vmx.status = VM_FAIL_INVALID;
+ handled = UNHANDLED;
+ break;
+ }
+#else
+ /*
+ * VM exits restore the base address but not the
+ * limits of GDTR and IDTR. The VMCS only stores the
+ * base address, so VM exits set the limits to 0xffff.
+ * Save and restore the full GDTR and IDTR to restore
+ * the limits.
+ *
+ * The VMCS does not save the LDTR at all, and VM
+ * exits clear LDTR as if a NULL selector were loaded.
+ * The userspace hypervisor probably doesn't use a
+ * LDT, but save and restore it to be safe.
+ */
+ sgdt(&gdtr);
+ sidt(&idtr);
+ ldt_sel = sldt();
#endif
+
vmx_run_trace(vmx, vcpu);
vmx_dr_enter_guest(vmxctx);
rc = vmx_enter_guest(vmxctx, vmx, launched);
vmx_dr_leave_guest(vmxctx);
+
#ifndef __FreeBSD__
vmx->vmcs_state[vcpu] |= VS_LAUNCHED;
-#endif
-
-#ifndef __FreeBSD__
ht_release();
+#else
+ bare_lgdt(&gdtr);
+ lidt(&idtr);
+ lldt(ldt_sel);
#endif
/* Collect some information for VM exit processing */
@@ -3522,7 +3567,7 @@ vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
uint64_t gi;
int error;
- error = vmcs_getreg(&vmx->vmcs[vcpu], running,
+ error = vmcs_getreg(&vmx->vmcs[vcpu], running,
VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
*retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
return (error);
@@ -3566,8 +3611,8 @@ vmx_shadow_reg(int reg)
switch (reg) {
case VM_REG_GUEST_CR0:
shreg = VMCS_CR0_SHADOW;
- break;
- case VM_REG_GUEST_CR4:
+ break;
+ case VM_REG_GUEST_CR4:
shreg = VMCS_CR4_SHADOW;
break;
default:
@@ -3638,7 +3683,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
if (shadow > 0) {
/*
* Store the unmodified value in the shadow
- */
+ */
error = vmcs_setreg(&vmx->vmcs[vcpu], running,
VMCS_IDENT(shadow), val);
}
@@ -3821,7 +3866,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
}
}
- return (retval);
+ return (retval);
}
struct vlapic_vtx {
@@ -4174,7 +4219,7 @@ vmx_vlapic_init(void *arg, int vcpuid)
struct vmx *vmx;
struct vlapic *vlapic;
struct vlapic_vtx *vlapic_vtx;
-
+
vmx = arg;
vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
index 1a2f493dd1..d276944800 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -89,6 +89,7 @@ enum {
VIE_OP_TYPE_GROUP1,
VIE_OP_TYPE_STOS,
VIE_OP_TYPE_BITTEST,
+ VIE_OP_TYPE_TWOB_GRP15,
VIE_OP_TYPE_LAST
};
@@ -101,6 +102,10 @@ enum {
#ifdef _KERNEL
static const struct vie_op two_byte_opcodes[256] = {
+ [0xAE] = {
+ .op_byte = 0xAE,
+ .op_type = VIE_OP_TYPE_TWOB_GRP15,
+ },
[0xB6] = {
.op_byte = 0xB6,
.op_type = VIE_OP_TYPE_MOVZX,
@@ -1458,6 +1463,37 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (0);
}
+static int
+emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+{
+ int error;
+ uint64_t buf;
+
+ switch (vie->reg & 7) {
+ case 0x7: /* CLFLUSH, CLFLUSHOPT, and SFENCE */
+ if (vie->mod == 0x3) {
+ /*
+ * SFENCE. Ignore it, VM exit provides enough
+ * barriers on its own.
+ */
+ error = 0;
+ } else {
+ /*
+ * CLFLUSH, CLFLUSHOPT. Only check for access
+ * rights.
+ */
+ error = memread(vm, vcpuid, gpa, &buf, 1, memarg);
+ }
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
int
vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t memread,
@@ -1518,6 +1554,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_bittest(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
break;
+ case VIE_OP_TYPE_TWOB_GRP15:
+ error = emulate_twob_group15(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
default:
error = EINVAL;
break;
diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c
index 5a6d7f9dd7..b02142e7e5 100644
--- a/usr/src/uts/i86pc/io/vmm/x86.c
+++ b/usr/src/uts/i86pc/io/vmm/x86.c
@@ -141,17 +141,30 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
cpuid_count(*eax, *ecx, regs);
if (vmm_is_amd()) {
/*
- * XXX this might appear silly because AMD
- * cpus don't have threads.
- *
- * However this matches the logical cpus as
- * advertised by leaf 0x1 and will work even
- * if threads is set incorrectly on an AMD host.
+ * As on Intel (0000_0007:0, EDX), mask out
+ * unsupported or unsafe AMD extended features
+ * (8000_0008 EBX).
*/
+ regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
+ AMDFEID_XSAVEERPTR);
+
vm_get_topology(vm, &sockets, &cores, &threads,
&maxcpus);
- logical_cpus = threads * cores;
- regs[2] = logical_cpus - 1;
+ /*
+ * Here, width is ApicIdCoreIdSize, present on
+ * at least Family 15h and newer. It
+ * represents the "number of bits in the
+ * initial apicid that indicate thread id
+ * within a package."
+ *
+ * Our topo_probe_amd() uses it for
+ * pkg_id_shift and other OSes may rely on it.
+ */
+ width = MIN(0xF, log2(threads * cores));
+ if (width < 0x4)
+ width = 0;
+ logical_cpus = MIN(0xFF, threads * cores - 1);
+ regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
}
break;
@@ -159,9 +172,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
cpuid_count(*eax, *ecx, regs);
/*
- * Hide SVM and Topology Extension features from guest.
+ * Hide SVM from guest.
*/
- regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY);
+ regs[2] &= ~AMDID2_SVM;
/*
* Don't advertise extended performance counter MSRs
@@ -226,6 +239,68 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
#endif /* __FreeBSD__ */
break;
+ case CPUID_8000_001D:
+ /* AMD Cache topology, like 0000_0004 for Intel. */
+ if (!vmm_is_amd())
+ goto default_leaf;
+
+ /*
+ * Similar to Intel, generate a ficticious cache
+ * topology for the guest with L3 shared by the
+ * package, and L1 and L2 local to a core.
+ */
+ vm_get_topology(vm, &sockets, &cores, &threads,
+ &maxcpus);
+ switch (*ecx) {
+ case 0:
+ logical_cpus = threads;
+ level = 1;
+ func = 1; /* data cache */
+ break;
+ case 1:
+ logical_cpus = threads;
+ level = 2;
+ func = 3; /* unified cache */
+ break;
+ case 2:
+ logical_cpus = threads * cores;
+ level = 3;
+ func = 3; /* unified cache */
+ break;
+ default:
+ logical_cpus = 0;
+ level = 0;
+ func = 0;
+ break;
+ }
+
+ logical_cpus = MIN(0xfff, logical_cpus - 1);
+ regs[0] = (logical_cpus << 14) | (1 << 8) |
+ (level << 5) | func;
+ regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
+ regs[2] = 0;
+ regs[3] = 0;
+ break;
+
+ case CPUID_8000_001E:
+ /* AMD Family 16h+ additional identifiers */
+ if (!vmm_is_amd() || CPUID_TO_FAMILY(cpu_id) < 0x16)
+ goto default_leaf;
+
+ vm_get_topology(vm, &sockets, &cores, &threads,
+ &maxcpus);
+ regs[0] = vcpu_id;
+ threads = MIN(0xFF, threads - 1);
+ regs[1] = (threads << 8) |
+ (vcpu_id >> log2(threads + 1));
+ /*
+ * XXX Bhyve topology cannot yet represent >1 node per
+ * processor.
+ */
+ regs[2] = 0;
+ regs[3] = 0;
+ break;
+
case CPUID_0000_0001:
do_cpuid(1, regs);
@@ -366,7 +441,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
CPUID_STDEXT_AVX512F |
CPUID_STDEXT_AVX512PF |
CPUID_STDEXT_AVX512ER |
- CPUID_STDEXT_AVX512CD);
+ CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
regs[2] = 0;
regs[3] = 0;
@@ -398,35 +473,42 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
case CPUID_0000_000B:
/*
- * Processor topology enumeration
+ * Intel processor topology enumeration
*/
- vm_get_topology(vm, &sockets, &cores, &threads,
- &maxcpus);
- if (*ecx == 0) {
- logical_cpus = threads;
- width = log2(logical_cpus);
- level = CPUID_TYPE_SMT;
- x2apic_id = vcpu_id;
- }
+ if (vmm_is_intel()) {
+ vm_get_topology(vm, &sockets, &cores, &threads,
+ &maxcpus);
+ if (*ecx == 0) {
+ logical_cpus = threads;
+ width = log2(logical_cpus);
+ level = CPUID_TYPE_SMT;
+ x2apic_id = vcpu_id;
+ }
- if (*ecx == 1) {
- logical_cpus = threads * cores;
- width = log2(logical_cpus);
- level = CPUID_TYPE_CORE;
- x2apic_id = vcpu_id;
- }
+ if (*ecx == 1) {
+ logical_cpus = threads * cores;
+ width = log2(logical_cpus);
+ level = CPUID_TYPE_CORE;
+ x2apic_id = vcpu_id;
+ }
- if (!cpuid_leaf_b || *ecx >= 2) {
- width = 0;
- logical_cpus = 0;
- level = 0;
- x2apic_id = 0;
- }
+ if (!cpuid_leaf_b || *ecx >= 2) {
+ width = 0;
+ logical_cpus = 0;
+ level = 0;
+ x2apic_id = 0;
+ }
- regs[0] = width & 0x1f;
- regs[1] = logical_cpus & 0xffff;
- regs[2] = (level << 8) | (*ecx & 0xff);
- regs[3] = x2apic_id;
+ regs[0] = width & 0x1f;
+ regs[1] = logical_cpus & 0xffff;
+ regs[2] = (level << 8) | (*ecx & 0xff);
+ regs[3] = x2apic_id;
+ } else {
+ regs[0] = 0;
+ regs[1] = 0;
+ regs[2] = 0;
+ regs[3] = 0;
+ }
break;
case CPUID_0000_000D:
@@ -488,6 +570,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
break;
default:
+default_leaf:
/*
* The leaf value has already been clamped so
* simply pass this through, keeping count of
diff --git a/usr/src/uts/i86pc/io/vmm/x86.h b/usr/src/uts/i86pc/io/vmm/x86.h
index 3a8e043852..0d70c04fd8 100644
--- a/usr/src/uts/i86pc/io/vmm/x86.h
+++ b/usr/src/uts/i86pc/io/vmm/x86.h
@@ -49,6 +49,8 @@
#define CPUID_8000_0006 (0x80000006)
#define CPUID_8000_0007 (0x80000007)
#define CPUID_8000_0008 (0x80000008)
+#define CPUID_8000_001D (0x8000001D)
+#define CPUID_8000_001E (0x8000001E)
/*
* CPUID instruction Fn0000_0001: