summaryrefslogtreecommitdiff
path: root/usr
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2018-12-14 23:15:07 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2019-03-15 15:01:23 +0000
commit2453029c010976e95241a5f5244e86d44dc6194c (patch)
treeee57e43a87769a4489b772635f14c4e153205ac5 /usr
parent7afbf32617941421fcf18c3c37cbce5f7d30a8fc (diff)
downloadillumos-joyent-2453029c010976e95241a5f5244e86d44dc6194c.tar.gz
OS-7170 bhyve should support AMD
Reviewed by: John Levon <john.levon@joyent.com> Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com> Approved by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr')
-rw-r--r--usr/src/uts/i86pc/Makefile.files2
-rw-r--r--usr/src/uts/i86pc/Makefile.rules3
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c122
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_msr.c26
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_softc.h15
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_support.s188
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c6
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.h3
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_support.s40
-rw-r--r--usr/src/uts/i86pc/io/vmm/vm/vm_glue.h1
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c15
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c297
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c16
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_support.s54
-rw-r--r--usr/src/uts/i86pc/io/vmm/x86.c12
-rw-r--r--usr/src/uts/i86pc/os/hma.c273
-rw-r--r--usr/src/uts/i86pc/sys/hma.h9
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h4
-rw-r--r--usr/src/uts/intel/sys/controlregs.h12
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h12
20 files changed, 942 insertions, 168 deletions
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 8eadfcb56f..3979ddaef7 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -279,6 +279,8 @@ VMM_OBJS += vmm.o \
vmm_sol_vm.o \
vmm_sol_glue.o \
vmm_sol_ept.o \
+ vmm_sol_rvi.o \
+ vmm_support.o \
vmm_zsd.o
VIONA_OBJS += viona.o
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index 3ea69978ce..0e3ea556ea 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -232,6 +232,9 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/vmm/io/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/vmm/%.s
+ $(COMPILE.s) -o $@ $<
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/vmm/intel/%.s
$(COMPILE.s) -o $@ $<
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index e921383d22..ca9ed9e4e1 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#ifndef __FreeBSD__
#include <sys/x86_archext.h>
+#include <sys/trap.h>
#endif
#include <vm/vm.h>
@@ -123,6 +124,7 @@ static int disable_npf_assist;
SYSCTL_INT(_hw_vmm_svm, OID_AUTO, disable_npf_assist, CTLFLAG_RWTUN,
&disable_npf_assist, 0, NULL);
+#ifdef __FreeBSD__
/* Maximum ASIDs supported by the processor */
static uint32_t nasid;
SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RDTUN, &nasid, 0,
@@ -135,6 +137,7 @@ static struct asid asid[MAXCPU];
* SVM host state saved area of size 4KB for each core.
*/
static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
+#endif /* __FreeBSD__ */
static VMM_STAT_AMD(VCPU_EXITINTINFO, "VM exits during event delivery");
static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry");
@@ -156,6 +159,7 @@ decode_assist(void)
return (svm_feature & AMD_CPUID_SVM_DECODE_ASSIST);
}
+#ifdef __FreeBSD__
static void
svm_disable(void *arg __unused)
{
@@ -298,6 +302,31 @@ svm_restore(void)
svm_enable(NULL);
}
+#else /* __FreeBSD__ */
+static int
+svm_cleanup(void)
+{
+ /* This is taken care of by the hma registration */
+ return (0);
+}
+
+static int
+svm_init(int ipinum)
+{
+ vmcb_clean &= VMCB_CACHE_DEFAULT;
+
+ svm_msr_init();
+ svm_npt_init(ipinum);
+
+ return (0);
+}
+
+static void
+svm_restore(void)
+{
+ /* No-op on illumos */
+}
+#endif /* __FreeBSD__ */
/* Pentium compatible MSRs */
#define MSR_PENTIUM_START 0
@@ -1309,7 +1338,11 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
struct svm_regctx *ctx;
uint64_t code, info1, info2, val;
uint32_t eax, ecx, edx;
+#ifdef __FreeBSD__
int error, errcode_valid, handled, idtvec, reflect;
+#else
+ int error, errcode_valid = 0, handled, idtvec, reflect;
+#endif
bool retu;
ctx = svm_get_guest_regctx(svm_sc, vcpu);
@@ -1380,8 +1413,11 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
*/
reflect = 0;
VCPU_CTR0(svm_sc->vm, vcpu, "Vectoring to MCE handler");
- /* XXXJOY: we will need equivalent of vmx_call_trap */
+#ifdef __FreeBSD__
__asm __volatile("int $18");
+#else
+ vmm_call_trap(T_MCE);
+#endif
break;
case IDT_PF:
error = svm_setreg(svm_sc, vcpu, VM_REG_GUEST_CR2,
@@ -1772,11 +1808,14 @@ restore_host_tss(void)
tss_sd->sd_type = SDT_SYSTSS;
ltr(GSEL(GPROC0_SEL, SEL_KPL));
#else
- /* XXXJOY: Add logic to restore TSS for us */
- panic("SVM Restore system TSS");
+ system_desc_t *tss = (system_desc_t *)&CPU->cpu_gdt[GDT_KTSS];
+
+ tss->ssd_type = SDT_SYSTSS;
+ wr_tsr(KTSS_SEL);
#endif
}
+#ifdef __FreeBSD__
static void
check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
{
@@ -1879,6 +1918,27 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
KASSERT(ctrl->asid == vcpustate->asid.num,
("ASID mismatch: %u/%u", ctrl->asid, vcpustate->asid.num));
}
+#else /* __FreeBSD__ */
+static void
+check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
+{
+ struct svm_vcpu *vcpustate = svm_get_vcpu(sc, vcpuid);
+ struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpuid);
+ long eptgen;
+ uint8_t flush;
+
+ eptgen = pmap->pm_eptgen;
+ flush = hma_svm_asid_update(&vcpustate->hma_asid, flush_by_asid(),
+ vcpustate->eptgen == eptgen);
+
+ if (flush != VMCB_TLB_FLUSH_NOTHING) {
+ ctrl->asid = vcpustate->hma_asid.hsa_asid;
+ svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
+ }
+ ctrl->tlb_ctrl = flush;
+ vcpustate->eptgen = eptgen;
+}
+#endif /* __FreeBSD__ */
static __inline void
disable_gintr(void)
@@ -1983,7 +2043,11 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
/*
* Force new ASID allocation by invalidating the generation.
*/
+#ifdef __FreeBSD__
vcpustate->asid.gen = 0;
+#else
+ vcpustate->hma_asid.hsa_gen = 0;
+#endif
/*
* Invalidate the VMCB state cache by marking all fields dirty.
@@ -2006,10 +2070,25 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
svm_msr_guest_enter(svm_sc, vcpu);
+#ifndef __FreeBSD__
+ VERIFY(!vcpustate->loaded && curthread->t_preempt != 0);
+ vcpustate->loaded = B_TRUE;
+#endif
+
/* Update Guest RIP */
state->rip = rip;
do {
+#ifndef __FreeBSD__
+ /*
+ * Interrupt injection may involve mutex contention which, on
+ * illumos bhyve, are blocking/non-spin. Doing so with global
+ * interrupts disabled is a recipe for deadlock, so it is
+ * performed here.
+ */
+ svm_inj_interrupts(svm_sc, vcpu, vlapic);
+#endif
+
/*
* Disable global interrupts to guarantee atomicity during
* loading of guest state. This includes not only the state
@@ -2059,7 +2138,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
*/
ldt_sel = sldt();
+#ifdef __FreeBSD__
svm_inj_interrupts(svm_sc, vcpu, vlapic);
+#endif
/* Activate the nested pmap on 'curcpu' */
CPU_SET_ATOMIC_ACQ(curcpu, &pmap->pm_active);
@@ -2108,6 +2189,11 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
svm_msr_guest_exit(svm_sc, vcpu);
+#ifndef __FreeBSD__
+ VERIFY(vcpustate->loaded && curthread->t_preempt != 0);
+ vcpustate->loaded = B_FALSE;
+#endif
+
return (0);
}
@@ -2309,6 +2395,28 @@ svm_vlapic_cleanup(void *arg, struct vlapic *vlapic)
free(vlapic, M_SVM_VLAPIC);
}
+#ifndef __FreeBSD__
+static void
+svm_savectx(void *arg, int vcpu)
+{
+ struct svm_softc *sc = arg;
+
+ if (sc->vcpu[vcpu].loaded) {
+ svm_msr_guest_exit(sc, vcpu);
+ }
+}
+
+static void
+svm_restorectx(void *arg, int vcpu)
+{
+ struct svm_softc *sc = arg;
+
+ if (sc->vcpu[vcpu].loaded) {
+ svm_msr_guest_enter(sc, vcpu);
+ }
+}
+#endif /* __FreeBSD__ */
+
struct vmm_ops vmm_ops_amd = {
svm_init,
svm_cleanup,
@@ -2328,11 +2436,7 @@ struct vmm_ops vmm_ops_amd = {
svm_vlapic_cleanup,
#ifndef __FreeBSD__
- /*
- * When SVM support is wired up and tested, it is likely to require
- * savectx/restorectx functions similar to VMX.
- */
- NULL,
- NULL,
+ svm_savectx,
+ svm_restorectx,
#endif
};
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
index 67c43100f1..0c1ce0e4e0 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
@@ -54,6 +54,7 @@ enum {
HOST_MSR_NUM /* must be the last enumeration */
};
+#ifdef __FreeBSD__
static uint64_t host_msrs[HOST_MSR_NUM];
void
@@ -68,6 +69,19 @@ svm_msr_init(void)
host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
}
+#else
+
+CTASSERT(HOST_MSR_NUM == SVM_HOST_MSR_NUM);
+
+void
+svm_msr_init(void)
+{
+ /*
+ * These MSRs do vary between CPUs on illumos, so saving system-wide
+ * values for them serves no purpose.
+ */
+}
+#endif /* __FreeBSD__ */
void
svm_msr_guest_init(struct svm_softc *sc, int vcpu)
@@ -89,11 +103,23 @@ svm_msr_guest_enter(struct svm_softc *sc, int vcpu)
/*
* Save host MSRs (if any) and restore guest MSRs (if any).
*/
+#ifndef __FreeBSD__
+ uint64_t *host_msrs = sc->host_msrs[vcpu];
+
+ /* Save host MSRs */
+ host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+ host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+ host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+ host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+#endif /* __FreeBSD__ */
}
void
svm_msr_guest_exit(struct svm_softc *sc, int vcpu)
{
+#ifndef __FreeBSD__
+ uint64_t *host_msrs = sc->host_msrs[vcpu];
+#endif
/*
* Save guest MSRs (if any) and restore host MSRs.
*/
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
index 8735353bb4..b5ac1903e7 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
@@ -34,10 +34,17 @@
#define SVM_IO_BITMAP_SIZE (3 * PAGE_SIZE)
#define SVM_MSR_BITMAP_SIZE (2 * PAGE_SIZE)
+#ifdef __FreeBSD__
struct asid {
uint64_t gen; /* range is [1, ~0UL] */
uint32_t num; /* range is [1, nasid - 1] */
};
+#else
+#include <sys/hma.h>
+
+/* This must match HOST_MSR_NUM in svm_msr.c (where it is CTASSERTed) */
+#define SVM_HOST_MSR_NUM 4
+#endif /* __FreeBSD__ */
/*
* XXX separate out 'struct vmcb' from 'svm_vcpu' to avoid wasting space
@@ -51,7 +58,12 @@ struct svm_vcpu {
int lastcpu; /* host cpu that the vcpu last ran on */
uint32_t dirty; /* state cache bits that must be cleared */
long eptgen; /* pmap->pm_eptgen when the vcpu last ran */
+#ifdef __FreeBSD__
struct asid asid;
+#else
+ hma_svm_asid_t hma_asid;
+ boolean_t loaded;
+#endif
} __aligned(PAGE_SIZE);
/*
@@ -64,6 +76,9 @@ struct svm_softc {
uint8_t *iopm_bitmap; /* shared by all vcpus */
uint8_t *msr_bitmap; /* shared by all vcpus */
struct vm *vm;
+#ifndef __FreeBSD__
+ uint64_t host_msrs[VM_MAXCPU][SVM_HOST_MSR_NUM];
+#endif
};
CTASSERT((offsetof(struct svm_softc, nptp) & PAGE_MASK) == 0);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_support.s b/usr/src/uts/i86pc/io/vmm/amd/svm_support.s
index 4258c95d70..fad994b09c 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_support.s
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_support.s
@@ -25,7 +25,12 @@
*
* $FreeBSD$
*/
-#include <machine/asmacros.h>
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
#include "svm_assym.h"
@@ -34,115 +39,126 @@
#if defined(lint)
struct svm_regctx;
-struct pcpu;
+struct cpu;
/*ARGSUSED*/
void
-svm_launch(uint64_t pa, struct svm_regctx *gctx, struct pcpu *pcpu)
+svm_launch(uint64_t pa, struct svm_regctx *gctx, struct cpu *cpu)
{}
#else /* lint */
-/*
- * Be friendly to DTrace FBT's prologue/epilogue pattern matching.
- *
- * They are also responsible for saving/restoring the host %rbp across VMRUN.
- */
-#define VENTER push %rbp ; mov %rsp,%rbp
-#define VLEAVE pop %rbp
-
#define VMLOAD .byte 0x0f, 0x01, 0xda
#define VMRUN .byte 0x0f, 0x01, 0xd8
#define VMSAVE .byte 0x0f, 0x01, 0xdb
+
+/*
+ * Flush scratch registers to avoid lingering guest state being used for
+ * Spectre v1 attacks when returning from guest entry.
+ */
+#define SVM_GUEST_FLUSH_SCRATCH \
+ xorl %edi, %edi; \
+ xorl %esi, %esi; \
+ xorl %edx, %edx; \
+ xorl %ecx, %ecx; \
+ xorl %r8d, %r8d; \
+ xorl %r9d, %r9d; \
+ xorl %r10d, %r10d; \
+ xorl %r11d, %r11d;
+
+/* Stack layout (offset from %rsp) for svm_launch */
+#define SVMSTK_R15 0x00 /* callee saved %r15 */
+#define SVMSTK_R14 0x08 /* callee saved %r14 */
+#define SVMSTK_R13 0x10 /* callee saved %r13 */
+#define SVMSTK_R12 0x18 /* callee saved %r12 */
+#define SVMSTK_RBX 0x20 /* callee saved %rbx */
+#define SVMSTK_RDX 0x28 /* save-args %rdx (struct cpu *) */
+#define SVMSTK_RSI 0x30 /* save-args %rsi (struct svm_regctx *) */
+#define SVMSTK_RDI 0x38 /* save-args %rdi (uint64_t vmcb_pa) */
+#define SVMSTK_FP 0x40 /* frame pointer %rbp */
+#define SVMSTKSIZE SVMSTK_FP
+
/*
* svm_launch(uint64_t vmcb, struct svm_regctx *gctx, struct pcpu *pcpu)
* %rdi: physical address of VMCB
* %rsi: pointer to guest context
* %rdx: pointer to the pcpu data
*/
-ENTRY(svm_launch)
- VENTER
-
- /* save pointer to the pcpu data */
- push %rdx
-
- /*
- * Host register state saved across a VMRUN.
- *
- * All "callee saved registers" except:
- * %rsp: because it is preserved by the processor across VMRUN.
- * %rbp: because it is saved/restored by the function prologue/epilogue.
- */
- push %rbx
- push %r12
- push %r13
- push %r14
- push %r15
-
- /* Save the physical address of the VMCB in %rax */
- movq %rdi, %rax
-
- push %rsi /* push guest context pointer on the stack */
-
- /*
- * Restore guest state.
- */
- movq SCTX_R8(%rsi), %r8
- movq SCTX_R9(%rsi), %r9
- movq SCTX_R10(%rsi), %r10
- movq SCTX_R11(%rsi), %r11
- movq SCTX_R12(%rsi), %r12
- movq SCTX_R13(%rsi), %r13
- movq SCTX_R14(%rsi), %r14
- movq SCTX_R15(%rsi), %r15
- movq SCTX_RBP(%rsi), %rbp
- movq SCTX_RBX(%rsi), %rbx
- movq SCTX_RCX(%rsi), %rcx
- movq SCTX_RDX(%rsi), %rdx
- movq SCTX_RDI(%rsi), %rdi
- movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */
+ENTRY_NP(svm_launch)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $SVMSTKSIZE, %rsp
+ movq %r15, SVMSTK_R15(%rsp)
+ movq %r14, SVMSTK_R14(%rsp)
+ movq %r13, SVMSTK_R13(%rsp)
+ movq %r12, SVMSTK_R12(%rsp)
+ movq %rbx, SVMSTK_RBX(%rsp)
+ movq %rdx, SVMSTK_RDX(%rsp)
+ movq %rsi, SVMSTK_RSI(%rsp)
+ movq %rdi, SVMSTK_RDI(%rsp)
+
+ /* VMLOAD and VMRUN expect the VMCB physaddr in %rax */
+ movq %rdi, %rax
+
+ /* Restore guest state. */
+ movq SCTX_R8(%rsi), %r8
+ movq SCTX_R9(%rsi), %r9
+ movq SCTX_R10(%rsi), %r10
+ movq SCTX_R11(%rsi), %r11
+ movq SCTX_R12(%rsi), %r12
+ movq SCTX_R13(%rsi), %r13
+ movq SCTX_R14(%rsi), %r14
+ movq SCTX_R15(%rsi), %r15
+ movq SCTX_RBP(%rsi), %rbp
+ movq SCTX_RBX(%rsi), %rbx
+ movq SCTX_RCX(%rsi), %rcx
+ movq SCTX_RDX(%rsi), %rdx
+ movq SCTX_RDI(%rsi), %rdi
+ movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */
VMLOAD
VMRUN
VMSAVE
- pop %rax /* pop guest context pointer from the stack */
-
- /*
- * Save guest state.
- */
- movq %r8, SCTX_R8(%rax)
- movq %r9, SCTX_R9(%rax)
- movq %r10, SCTX_R10(%rax)
- movq %r11, SCTX_R11(%rax)
- movq %r12, SCTX_R12(%rax)
- movq %r13, SCTX_R13(%rax)
- movq %r14, SCTX_R14(%rax)
- movq %r15, SCTX_R15(%rax)
- movq %rbp, SCTX_RBP(%rax)
- movq %rbx, SCTX_RBX(%rax)
- movq %rcx, SCTX_RCX(%rax)
- movq %rdx, SCTX_RDX(%rax)
- movq %rdi, SCTX_RDI(%rax)
- movq %rsi, SCTX_RSI(%rax)
-
- /* Restore host state */
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbx
-
- /* Restore %GS.base to point to the host's pcpu data */
- pop %rdx
- mov %edx, %eax
- shr $32, %rdx
- mov $MSR_GSBASE, %ecx
+ /* Grab the svm_regctx pointer */
+ movq SVMSTK_RSI(%rsp), %rax
+
+ /* Save guest state. */
+ movq %r8, SCTX_R8(%rax)
+ movq %r9, SCTX_R9(%rax)
+ movq %r10, SCTX_R10(%rax)
+ movq %r11, SCTX_R11(%rax)
+ movq %r12, SCTX_R12(%rax)
+ movq %r13, SCTX_R13(%rax)
+ movq %r14, SCTX_R14(%rax)
+ movq %r15, SCTX_R15(%rax)
+ movq %rbp, SCTX_RBP(%rax)
+ movq %rbx, SCTX_RBX(%rax)
+ movq %rcx, SCTX_RCX(%rax)
+ movq %rdx, SCTX_RDX(%rax)
+ movq %rdi, SCTX_RDI(%rax)
+ movq %rsi, SCTX_RSI(%rax)
+
+ /* Restore callee-saved registers */
+ movq SVMSTK_R15(%rsp), %r15
+ movq SVMSTK_R14(%rsp), %r14
+ movq SVMSTK_R13(%rsp), %r13
+ movq SVMSTK_R12(%rsp), %r12
+ movq SVMSTK_RBX(%rsp), %rbx
+
+ /* Fix %gsbase to point back to the correct 'struct cpu *' */
+ movq SVMSTK_RDX(%rsp), %rdx
+ movl %edx, %eax
+ shrq $32, %rdx
+ movl $MSR_GSBASE, %ecx
wrmsr
- VLEAVE
+ SVM_GUEST_FLUSH_SCRATCH
+
+ addq $SVMSTKSIZE, %rsp
+ popq %rbp
ret
-END(svm_launch)
+SET_SIZE(svm_launch)
#endif /* lint */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index a723be0d28..a039455be7 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -2640,7 +2640,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
#ifdef __FreeBSD__
__asm __volatile("int $18");
#else
- vmx_call_trap(T_MCE);
+ vmm_call_trap(T_MCE);
#endif
return (1);
}
@@ -2929,7 +2929,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
#ifdef __FreeBSD__
__asm __volatile("int $18");
#else
- vmx_call_trap(T_MCE);
+ vmm_call_trap(T_MCE);
#endif
return (1);
}
@@ -3147,7 +3147,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
#ifdef __FreeBSD__
__asm __volatile("int $2");
#else
- vmx_call_trap(T_NMIFLT);
+ vmm_call_trap(T_NMIFLT);
#endif
}
}
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
index 9766e6b749..2d16799bdd 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -164,9 +164,6 @@ CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0);
#define VMX_VMWRITE_ERROR 4
int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched);
void vmx_call_isr(uintptr_t entry);
-#ifndef __FreeBSD__
-void vmx_call_trap(uint64_t);
-#endif
u_long vmx_fix_cr0(u_long cr0);
u_long vmx_fix_cr4(u_long cr4);
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s b/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
index a2375e3a6c..0130f88dd6 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
@@ -381,44 +381,4 @@ ENTRY_NP(vmx_call_isr)
ret
SET_SIZE(vmx_call_isr)
-/*
- * %rdi = trapno
- *
- * This variant is for any explicit exception injection that we need: in this
- * case, we can't just, for example, do a direct "int $2", as that will then
- * trash our %cr3 via tr_nmiint due to KPTI. So we have to fake a trap frame in
- * a similar fashion to vmx_call_isr(). Both NMIs and MCEs don't push an 'err'
- * into the frame.
- */
-ENTRY_NP(vmx_call_trap)
- pushq %rbp
- movq %rsp, %rbp
- movq %rsp, %r11
- andq $~0xf, %rsp /* align stack */
- pushq $KDS_SEL /* %ss */
- pushq %r11 /* %rsp */
- pushfq /* %rflags */
- pushq $KCS_SEL /* %cs */
- leaq .trap_iret_dest(%rip), %rcx
- pushq %rcx /* %rip */
- cli
- cmpq $T_NMIFLT, %rdi
- je nmiint
- cmpq $T_MCE, %rdi
- je mcetrap
-
- pushq %rdi /* save our bad trapno... */
- leaq __vmx_call_bad_trap(%rip), %rdi
- xorl %eax, %eax
- call panic
- /*NOTREACHED*/
-
-.trap_iret_dest:
- popq %rbp
- ret
-SET_SIZE(vmx_call_trap)
-
-__vmx_call_bad_trap:
- .string "bad trapno for vmx_call_trap()"
-
#endif /* lint */
diff --git a/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h b/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h
index 7ac745f509..600872c321 100644
--- a/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h
+++ b/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h
@@ -93,6 +93,7 @@ struct vmm_pt_ops {
};
extern struct vmm_pt_ops ept_ops;
+extern struct vmm_pt_ops rvi_ops;
#endif /* _VM_GLUE_ */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 2fa0267f72..4b759b44e9 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -1628,10 +1628,19 @@ vmm_is_supported(intptr_t arg)
int r;
const char *msg;
- if (!vmm_is_intel())
- return (ENXIO);
+ if (vmm_is_intel()) {
+ r = vmx_x86_supported(&msg);
+ } else if (vmm_is_amd()) {
+ /*
+ * HMA already ensured that the features necessary for SVM
+ * operation were present and online during vmm_attach().
+ */
+ r = 0;
+ } else {
+ r = ENXIO;
+ msg = "Unsupported CPU vendor";
+ }
- r = vmx_x86_supported(&msg);
if (r != 0 && arg != NULL) {
if (copyoutstr(msg, (char *)arg, strlen(msg), NULL) != 0)
return (EFAULT);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c
new file mode 100644
index 0000000000..d630d32630
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c
@@ -0,0 +1,297 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/machsystm.h>
+#include <sys/x86_archext.h>
+
+#include <sys/gipt.h>
+#include <vm/vm_glue.h>
+
+
+struct rvi_map {
+ gipt_map_t rm_gipt;
+ uint64_t rm_wired_page_count;
+};
+typedef struct rvi_map rvi_map_t;
+
+#define RVI_LOCK(m) (&(m)->rm_gipt.giptm_lock)
+
+#define RVI_MAX_LEVELS 4
+
+CTASSERT(RVI_MAX_LEVELS <= GIPT_MAX_LEVELS);
+
+#define RVI_PRESENT PT_VALID
+#define RVI_WRITABLE PT_WRITABLE
+#define RVI_ACCESSED PT_REF
+#define RVI_DIRTY PT_MOD
+#define RVI_LGPG PT_PAGESIZE
+#define RVI_NX PT_NX
+#define RVI_USER PT_USER
+#define RVI_PWT PT_WRITETHRU
+#define RVI_PCD PT_NOCACHE
+
+#define RVI_PA_MASK PT_PADDR
+
+#define RVI_PAT(attr) rvi_attr_to_pat(attr)
+#define RVI_PADDR(addr) ((addr) & RVI_PA_MASK)
+#define RVI_PROT(prot) \
+ ((((prot) & PROT_WRITE) != 0 ? RVI_WRITABLE : 0) | \
+ (((prot) & PROT_EXEC) == 0 ? RVI_NX : 0))
+
+#define RVI_IS_ABSENT(pte) (((pte) & RVI_PRESENT) == 0)
+#define RVI_PTE_PFN(pte) mmu_btop(RVI_PADDR(pte))
+#define RVI_MAPS_PAGE(pte, lvl) \
+ (!RVI_IS_ABSENT(pte) && (((pte) & RVI_LGPG) != 0 || (lvl) == 0))
+#define RVI_PTE_PROT(pte) \
+ (RVI_IS_ABSENT(pte) ? 0 : ( \
+ PROT_READ | \
+ (((pte) & RVI_NX) == 0 ? PROT_EXEC : 0) | \
+ (((pte) & RVI_WRITABLE) != 0 ? PROT_WRITE : 0)))
+
+#define RVI_PTE_ASSIGN_PAGE(lvl, pfn, prot, attr) \
+ (RVI_PADDR(pfn_to_pa(pfn)) | \
+ (((lvl) != 0) ? RVI_LGPG : 0) | \
+ RVI_USER | RVI_ACCESSED | RVI_PRESENT | \
+ RVI_PAT(attr) | \
+ RVI_PROT(prot))
+
+#define RVI_PTE_ASSIGN_TABLE(pfn) \
+ (RVI_PADDR(pfn_to_pa(pfn)) | \
+ RVI_USER | RVI_ACCESSED | RVI_PRESENT | \
+ RVI_PAT(MTRR_TYPE_WB) | \
+ RVI_PROT(PROT_READ | PROT_WRITE | PROT_EXEC))
+
+
+/* Make sure that PAT indexes line up as expected */
+CTASSERT((PAT_DEFAULT_ATTRIBUTE & 0xf) == MTRR_TYPE_WB);
+CTASSERT(((PAT_DEFAULT_ATTRIBUTE >> 24) & 0xf) == MTRR_TYPE_UC);
+
+static inline uint64_t
+rvi_attr_to_pat(const uint8_t attr)
+{
+ if (attr == MTRR_TYPE_UC) {
+ /* !PAT + PCD + PWT -> PAT3 -> MTRR_TYPE_UC */
+ return (RVI_PCD|RVI_PWT);
+ } else if (attr == MTRR_TYPE_WB) {
+ /* !PAT + !PCD + !PWT -> PAT0 -> MTRR_TYPE_WB */
+ return (0);
+ }
+
+ panic("unexpected memattr %x", attr);
+ return (0);
+}
+
+static gipt_pte_type_t
+rvi_pte_type(uint64_t pte, uint_t level)
+{
+ if (RVI_IS_ABSENT(pte)) {
+ return (PTET_EMPTY);
+ } else if (RVI_MAPS_PAGE(pte, level)) {
+ return (PTET_PAGE);
+ } else {
+ return (PTET_LINK);
+ }
+}
+
+static uint64_t
+rvi_pte_map(uint64_t pfn)
+{
+ return (RVI_PTE_ASSIGN_TABLE(pfn));
+}
+
+static void *
+rvi_create(uintptr_t *pml4_kaddr)
+{
+ rvi_map_t *rmap;
+ gipt_map_t *map;
+ gipt_t *root;
+ struct gipt_cbs cbs = {
+ .giptc_pte_type = rvi_pte_type,
+ .giptc_pte_map = rvi_pte_map,
+ };
+
+ rmap = kmem_zalloc(sizeof (*rmap), KM_SLEEP);
+ map = &rmap->rm_gipt;
+ root = gipt_alloc();
+ root->gipt_level = RVI_MAX_LEVELS - 1;
+ gipt_map_init(map, RVI_MAX_LEVELS, GIPT_HASH_SIZE_DEFAULT, &cbs, root);
+
+ *pml4_kaddr = (uintptr_t)root->gipt_kva;
+ return (rmap);
+}
+
+static void
+rvi_destroy(void *arg)
+{
+ rvi_map_t *rmap = arg;
+
+ if (rmap != NULL) {
+ gipt_map_t *map = &rmap->rm_gipt;
+
+ gipt_map_fini(map);
+ kmem_free(rmap, sizeof (*rmap));
+ }
+}
+
+static uint64_t
+rvi_wired_count(void *arg)
+{
+ rvi_map_t *rmap = arg;
+ uint64_t res;
+
+ mutex_enter(RVI_LOCK(rmap));
+ res = rmap->rm_wired_page_count;
+ mutex_exit(RVI_LOCK(rmap));
+
+ return (res);
+}
+
+static int
+rvi_is_wired(void *arg, uint64_t va, uint_t *protp)
+{
+ rvi_map_t *rmap = arg;
+ gipt_t *pt;
+ int rv = -1;
+
+ mutex_enter(RVI_LOCK(rmap));
+ pt = gipt_map_lookup_deepest(&rmap->rm_gipt, va);
+ if (pt != NULL) {
+ const uint64_t pte = GIPT_VA2PTE(pt, va);
+
+ if (RVI_MAPS_PAGE(pte, pt->gipt_level)) {
+ *protp = RVI_PTE_PROT(pte);
+ rv = 0;
+ }
+ }
+ mutex_exit(RVI_LOCK(rmap));
+
+ return (rv);
+}
+
+static int
+rvi_map(void *arg, uint64_t va, pfn_t pfn, uint_t lvl, uint_t prot,
+ uint8_t attr)
+{
+ rvi_map_t *rmap = arg;
+ gipt_map_t *map = &rmap->rm_gipt;
+ gipt_t *pt;
+ uint64_t *ptep, pte;
+
+ ASSERT((prot & PROT_READ) != 0);
+ ASSERT3U((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)), ==, 0);
+ ASSERT3U(lvl, <, RVI_MAX_LEVELS);
+
+ mutex_enter(RVI_LOCK(rmap));
+ pt = gipt_map_lookup(map, va, lvl);
+ if (pt == NULL) {
+ /*
+ * A table at the appropriate VA/level that would house this
+ * mapping does not currently exist. Try to walk down to that
+ * point, creating any necessary parent(s).
+ */
+ pt = gipt_map_create_parents(map, va, lvl);
+
+ /*
+ * There was a large page mapping in the way of creating the
+ * necessary parent table(s).
+ */
+ if (pt == NULL) {
+ panic("unexpected large page @ %08lx", va);
+ }
+ }
+ ptep = GIPT_VA2PTEP(pt, va);
+
+ pte = *ptep;
+ if (!RVI_IS_ABSENT(pte)) {
+ if (!RVI_MAPS_PAGE(pte, lvl)) {
+ panic("unexpected PT link @ %08lx in %p", va, pt);
+ } else {
+ panic("unexpected page mapped @ %08lx in %p", va, pt);
+ }
+ }
+
+ pte = RVI_PTE_ASSIGN_PAGE(lvl, pfn, prot, attr);
+ *ptep = pte;
+ pt->gipt_valid_cnt++;
+ rmap->rm_wired_page_count += gipt_level_count[lvl];
+
+ mutex_exit(RVI_LOCK(rmap));
+ return (0);
+}
+
+static uint64_t
+rvi_unmap(void *arg, uint64_t va, uint64_t end_va)
+{
+ rvi_map_t *rmap = arg;
+ gipt_map_t *map = &rmap->rm_gipt;
+ gipt_t *pt;
+ uint64_t cur_va = va;
+ uint64_t unmapped = 0;
+
+ mutex_enter(RVI_LOCK(rmap));
+
+ pt = gipt_map_lookup_deepest(map, cur_va);
+ if (pt == NULL) {
+ mutex_exit(RVI_LOCK(rmap));
+ return (0);
+ }
+ if (!RVI_MAPS_PAGE(GIPT_VA2PTE(pt, cur_va), pt->gipt_level)) {
+ cur_va = gipt_map_next_page(map, cur_va, end_va, &pt);
+ if (cur_va == 0) {
+ mutex_exit(RVI_LOCK(rmap));
+ return (0);
+ }
+ }
+
+ while (cur_va < end_va) {
+ uint64_t *ptep = GIPT_VA2PTEP(pt, cur_va);
+ const uint_t lvl = pt->gipt_level;
+
+ ASSERT(RVI_MAPS_PAGE(*ptep, lvl));
+ *ptep = 0;
+ pt->gipt_valid_cnt--;
+ unmapped += gipt_level_count[pt->gipt_level];
+
+ gipt_t *next_pt = pt;
+ uint64_t next_va;
+ next_va = gipt_map_next_page(map, cur_va, end_va, &next_pt);
+
+ if (pt->gipt_valid_cnt == 0) {
+ gipt_map_clean_parents(map, pt);
+ }
+ if (next_va == 0) {
+ break;
+ }
+ pt = next_pt;
+ cur_va = next_va;
+ }
+ rmap->rm_wired_page_count -= unmapped;
+
+ mutex_exit(RVI_LOCK(rmap));
+
+ return (unmapped);
+}
+
+struct vmm_pt_ops rvi_ops = {
+ .vpo_init = rvi_create,
+ .vpo_free = rvi_destroy,
+ .vpo_wired_cnt = rvi_wired_count,
+ .vpo_is_wired = rvi_is_wired,
+ .vpo_map = rvi_map,
+ .vpo_unmap = rvi_unmap,
+};
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
index 8d5051144c..58a62586a1 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
@@ -24,6 +24,7 @@
#include <sys/machsystm.h>
#include <sys/vmsystm.h>
#include <sys/malloc.h>
+#include <sys/x86_archext.h>
#include <vm/as.h>
#include <vm/seg_vn.h>
#include <vm/seg_kmem.h>
@@ -208,8 +209,17 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type type, int flags)
pmap->pm_pml4 = pml4;
return (1);
}
- case PT_RVI:
- /* RVI support not yet implemented */
+ case PT_RVI: {
+ struct vmm_pt_ops *ops = &rvi_ops;
+ void *pml4, *pmi;
+
+ pmi = ops->vpo_init((uintptr_t *)&pml4);
+
+ pmap->pm_ops = ops;
+ pmap->pm_impl = pmi;
+ pmap->pm_pml4 = pml4;
+ return (1);
+ }
default:
panic("unsupported pmap type: %x", type);
break;
@@ -537,6 +547,8 @@ vm_object_deallocate(vm_object_t vmo)
kmem_free(vmo, sizeof (*vmo));
}
+CTASSERT(VM_MEMATTR_UNCACHEABLE == MTRR_TYPE_UC);
+CTASSERT(VM_MEMATTR_WRITE_BACK == MTRR_TYPE_WB);
int
vm_object_set_memattr(vm_object_t vmo, vm_memattr_t attr)
{
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_support.s b/usr/src/uts/i86pc/io/vmm/vmm_support.s
new file mode 100644
index 0000000000..5777d46959
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_support.s
@@ -0,0 +1,54 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/segments.h>
+
+/*
+ * %rdi = trapno
+ *
+ * This variant is for any explicit exception injection that we need: in this
+ * case, we can't just, for example, do a direct "int $2", as that will then
+ * trash our %cr3 via tr_nmiint due to KPTI, so we have to fake a trap frame.
+ * Both NMIs and MCEs don't push an 'err' into the frame.
+ */
+ENTRY_NP(vmm_call_trap)
+ pushq %rbp
+ movq %rsp, %rbp
+ movq %rsp, %r11
+ andq $~0xf, %rsp /* align stack */
+ pushq $KDS_SEL /* %ss */
+ pushq %r11 /* %rsp */
+ pushfq /* %rflags */
+ pushq $KCS_SEL /* %cs */
+ leaq .trap_iret_dest(%rip), %rcx
+ pushq %rcx /* %rip */
+ cli
+ cmpq $T_NMIFLT, %rdi
+ je nmiint
+ cmpq $T_MCE, %rdi
+ je mcetrap
+
+ pushq %rdi /* save our bad trapno... */
+ leaq __vmm_call_bad_trap(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+ /*NOTREACHED*/
+
+.trap_iret_dest:
+ popq %rbp
+ ret
+SET_SIZE(vmm_call_trap)
+
+__vmm_call_bad_trap:
+ .string "bad trapno for vmm_call_trap()"
diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c
index b02142e7e5..b126e96f2c 100644
--- a/usr/src/uts/i86pc/io/vmm/x86.c
+++ b/usr/src/uts/i86pc/io/vmm/x86.c
@@ -198,6 +198,18 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/* Hide mwaitx/monitorx capability from the guest */
regs[2] &= ~AMDID2_MWAITX;
+#ifndef __FreeBSD__
+ /*
+ * Detection routines for TCE and FFXSR are missing
+ * from our vm_cpuid_capability() detection logic
+ * today. Mask them out until that is remedied.
+ * They do not appear to be in common usage, so their
+ * absence should not cause undue trouble.
+ */
+ regs[2] &= ~AMDID2_TCE;
+ regs[3] &= ~AMDID_FFXSR;
+#endif
+
/*
* Hide rdtscp/ia32_tsc_aux until we know how
* to deal with them.
diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c
index 34ae85086d..ecaf9c17cb 100644
--- a/usr/src/uts/i86pc/os/hma.c
+++ b/usr/src/uts/i86pc/os/hma.c
@@ -37,23 +37,33 @@ static boolean_t hma_vmx_ready = B_FALSE;
static const char *hma_vmx_error = NULL;
static id_space_t *hma_vmx_vpid;
-typedef enum vmx_cpu_state {
- VCS_UNINITIALIZED = 0,
- VCS_READY,
- VCS_ERROR
-} vmx_cpu_state_t;
-
/*
- * The bulk of VMX-related HMA state is protected by cpu_lock, rather than a
+ * The bulk of HMA state (VMX & SVM) is protected by cpu_lock, rather than a
* mutex specific to the module. It (cpu_lock) is already required for the
* state needed to perform setup on all CPUs, so it was a natural fit to
* protect this data too.
*/
+typedef enum hma_cpu_state {
+ HCS_UNINITIALIZED = 0,
+ HCS_READY,
+ HCS_ERROR
+} hma_cpu_state_t;
+static hma_cpu_state_t hma_cpu_status[NCPU];
+
static void *hma_vmx_vmxon_page[NCPU];
static uintptr_t hma_vmx_vmxon_pa[NCPU];
-static vmx_cpu_state_t hma_vmx_status[NCPU];
static uint32_t hma_vmx_revision;
+static boolean_t hma_svm_ready = B_FALSE;
+static const char *hma_svm_error = NULL;
+static uint32_t hma_svm_features;
+static uint32_t hma_svm_max_asid;
+
+static void *hma_svm_hsave_page[NCPU];
+static uintptr_t hma_svm_hsave_pa[NCPU];
+
+static hma_svm_asid_t hma_svm_cpu_asid[NCPU];
+
static int hma_vmx_init(void);
static int hma_svm_init(void);
@@ -94,8 +104,7 @@ hma_register(const char *name)
is_ready = hma_vmx_ready;
break;
case X86_VENDOR_AMD:
- /* Punt on SVM support for now */
- is_ready = B_FALSE;
+ is_ready = hma_svm_ready;
break;
default:
is_ready = B_FALSE;
@@ -156,9 +165,9 @@ hma_vmx_vpid_free(uint16_t vpid)
extern int hma_vmx_vmxon(uintptr_t);
-/* ARGSUSED */
static int
-hma_vmx_cpu_vmxon(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
+hma_vmx_cpu_vmxon(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused,
+ xc_arg_t arg3 __unused)
{
uint64_t fctrl;
processorid_t id = CPU->cpu_seqid;
@@ -181,9 +190,9 @@ hma_vmx_cpu_vmxon(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
setcr4(getcr4() | CR4_VMXE);
if (hma_vmx_vmxon(vmxon_pa) == 0) {
- hma_vmx_status[id] = VCS_READY;
+ hma_cpu_status[id] = HCS_READY;
} else {
- hma_vmx_status[id] = VCS_ERROR;
+ hma_cpu_status[id] = HCS_ERROR;
/*
* If VMX has already been marked active and available for the
@@ -198,9 +207,8 @@ hma_vmx_cpu_vmxon(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
return (0);
}
-/* ARGSUSED2 */
static int
-hma_vmx_cpu_setup(cpu_setup_t what, int id, void *arg)
+hma_vmx_cpu_setup(cpu_setup_t what, int id, void *arg __unused)
{
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(id >= 0 && id < NCPU);
@@ -223,8 +231,8 @@ hma_vmx_cpu_setup(cpu_setup_t what, int id, void *arg)
}
/* Perform initialization if it has not been previously attempted. */
- if (hma_vmx_status[id] != VCS_UNINITIALIZED) {
- return ((hma_vmx_status[id] == VCS_READY) ? 0 : -1);
+ if (hma_cpu_status[id] != HCS_UNINITIALIZED) {
+ return ((hma_cpu_status[id] == HCS_READY) ? 0 : -1);
}
/* Allocate the VMXON page for this CPU */
@@ -265,7 +273,7 @@ hma_vmx_cpu_setup(cpu_setup_t what, int id, void *arg)
xc_sync(0, 0, 0, CPUSET2BV(set), hma_vmx_cpu_vmxon);
}
- return (hma_vmx_status[id] != VCS_READY);
+ return (hma_cpu_status[id] != HCS_READY);
}
static int
@@ -329,10 +337,233 @@ bail:
return (-1);
}
+#define VMCB_FLUSH_NOTHING 0x0
+#define VMCB_FLUSH_ALL 0x1
+#define VMCB_FLUSH_ASID 0x3
+
+void
+hma_svm_asid_init(hma_svm_asid_t *vcp)
+{
+ /*
+ * Initialize the generation to 0, forcing an ASID allocation on first
+ * entry. Leave the ASID at 0, so if the host forgoes the call to
+ * hma_svm_asid_update(), SVM will bail on the invalid vcpu state.
+ */
+ vcp->hsa_gen = 0;
+ vcp->hsa_asid = 0;
+}
+
+uint8_t
+hma_svm_asid_update(hma_svm_asid_t *vcp, boolean_t flush_by_asid,
+ boolean_t npt_flush)
+{
+ hma_svm_asid_t *hcp = &hma_svm_cpu_asid[CPU->cpu_seqid];
+
+ ASSERT(curthread->t_preempt != 0);
+
+ /*
+ * If NPT changes dictate a TLB flush and by-ASID flushing is not
+ * supported/used, force a fresh ASID allocation.
+ */
+ if (npt_flush && !flush_by_asid) {
+ vcp->hsa_gen = 0;
+ }
+
+ if (vcp->hsa_gen != hcp->hsa_gen) {
+ hcp->hsa_asid++;
+
+ if (hcp->hsa_asid >= hma_svm_max_asid) {
+ /* Keep the ASID properly constrained */
+ hcp->hsa_asid = 1;
+ hcp->hsa_gen++;
+ if (hcp->hsa_gen == 0) {
+ /*
+ * Stay clear of the '0' sentinel value for
+ * generation, if wrapping around.
+ */
+ hcp->hsa_gen = 1;
+ }
+ }
+ vcp->hsa_gen = hcp->hsa_gen;
+ vcp->hsa_asid = hcp->hsa_asid;
+
+ ASSERT(vcp->hsa_asid != 0);
+ ASSERT3U(vcp->hsa_asid, <, hma_svm_max_asid);
+
+ if (flush_by_asid) {
+ return (VMCB_FLUSH_ASID);
+ }
+ return (VMCB_FLUSH_ALL);
+ } else if (npt_flush) {
+ ASSERT(flush_by_asid);
+ return (VMCB_FLUSH_ASID);
+ }
+ return (VMCB_FLUSH_NOTHING);
+}
+
+static int
+hma_svm_cpu_activate(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused,
+ xc_arg_t arg3 __unused)
+{
+ const processorid_t id = CPU->cpu_seqid;
+ const uintptr_t hsave_pa = hma_svm_hsave_pa[id];
+ uint64_t efer;
+
+ VERIFY(hsave_pa != 0);
+
+ /* Enable SVM via EFER */
+ efer = rdmsr(MSR_AMD_EFER);
+ efer |= AMD_EFER_SVME;
+ wrmsr(MSR_AMD_EFER, efer);
+
+ /* Setup hsave area */
+ wrmsr(MSR_AMD_VM_HSAVE_PA, hsave_pa);
+
+ hma_cpu_status[id] = HCS_READY;
+ return (0);
+}
+
+static int
+hma_svm_cpu_setup(cpu_setup_t what, int id, void *arg __unused)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ ASSERT(id >= 0 && id < NCPU);
+
+ switch (what) {
+ case CPU_CONFIG:
+ case CPU_ON:
+ case CPU_INIT:
+ break;
+ default:
+ /*
+ * Other events, such as CPU offlining, are of no interest.
+ * Letting the SVM state linger should not cause any harm.
+ *
+ * This logic assumes that any offlining activity is strictly
+ * administrative in nature and will not alter any existing
+ * configuration (such as EFER bits previously set).
+ */
+ return (0);
+ }
+
+ /* Perform initialization if it has not been previously attempted. */
+ if (hma_cpu_status[id] != HCS_UNINITIALIZED) {
+ return ((hma_cpu_status[id] == HCS_READY) ? 0 : -1);
+ }
+
+ /* Allocate the hsave page for this CPU */
+ if (hma_svm_hsave_page[id] == NULL) {
+ caddr_t va;
+ pfn_t pfn;
+
+ va = kmem_alloc(PAGESIZE, KM_SLEEP);
+ VERIFY0((uintptr_t)va & PAGEOFFSET);
+ hma_svm_hsave_page[id] = va;
+
+ /*
+ * Cache the physical address of the hsave page rather than
+ * looking it up later when the potential blocking of
+ * hat_getpfnum would be less acceptable.
+ */
+ pfn = hat_getpfnum(kas.a_hat, va);
+ hma_svm_hsave_pa[id] = (pfn << PAGESHIFT);
+ } else {
+ VERIFY(hma_svm_hsave_pa[id] != 0);
+ }
+
+ kpreempt_disable();
+ if (CPU->cpu_seqid == id) {
+ /* Perform svm setup directly if this CPU is the target */
+ (void) hma_svm_cpu_activate(0, 0, 0);
+ kpreempt_enable();
+ } else {
+ cpuset_t set;
+
+ /* Use a cross-call if a remote CPU is the target */
+ kpreempt_enable();
+ cpuset_zero(&set);
+ cpuset_add(&set, id);
+ xc_sync(0, 0, 0, CPUSET2BV(set), hma_svm_cpu_activate);
+ }
+
+ return (hma_cpu_status[id] != HCS_READY);
+}
static int
hma_svm_init(void)
{
- /* punt on AMD for now */
- return (ENOTSUP);
+ uint64_t msr;
+ const char *msg = NULL;
+ struct cpuid_regs regs;
+ cpu_t *cp;
+
+ if (!is_x86_feature(x86_featureset, X86FSET_SVM)) {
+ msg = "CPU does not support SVM";
+ goto bail;
+ }
+
+ msr = rdmsr(MSR_AMD_VM_CR);
+ if ((msr & AMD_VM_CR_SVMDIS) != 0) {
+ msg = "SVM disabled by BIOS";
+ goto bail;
+ }
+
+ regs.cp_eax = 0x8000000a;
+ (void) cpuid_insn(NULL, &regs);
+ const uint32_t nasid = regs.cp_ebx;
+ const uint32_t feat = regs.cp_edx;
+
+ if (nasid == 0) {
+ msg = "Not enough ASIDs for guests";
+ goto bail;
+ }
+ if ((feat & CPUID_AMD_EDX_NESTED_PAGING) == 0) {
+ msg = "CPU does not support nested paging";
+ goto bail;
+ }
+ if ((feat & CPUID_AMD_EDX_NRIPS) == 0) {
+ msg = "CPU does not support NRIP save";
+ goto bail;
+ }
+
+ hma_svm_features = feat;
+ hma_svm_max_asid = nasid;
+
+ mutex_enter(&cpu_lock);
+ /* Perform SVM configuration for already-online CPUs. */
+ cp = cpu_active;
+ do {
+ int err = hma_svm_cpu_setup(CPU_ON, cp->cpu_seqid, NULL);
+ if (err != 0) {
+ msg = "failure during SVM setup";
+ mutex_exit(&cpu_lock);
+ goto bail;
+ }
+ } while ((cp = cp->cpu_next_onln) != cpu_active);
+
+ /*
+ * Register callback for later-onlined CPUs and perform other remaining
+ * resource allocation.
+ */
+ register_cpu_setup_func(hma_svm_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+
+ /* Initialize per-CPU ASID state. */
+ for (uint_t i = 0; i < NCPU; i++) {
+ /*
+ * Skip past sentinel 0 value for generation. Doing so for
+ * ASID is unneeded, since it will be incremented during the
+ * first allocation.
+ */
+ hma_svm_cpu_asid[i].hsa_gen = 1;
+ hma_svm_cpu_asid[i].hsa_asid = 0;
+ }
+
+ hma_svm_ready = B_TRUE;
+ return (0);
+
+bail:
+ hma_svm_error = msg;
+ cmn_err(CE_NOTE, "hma_svm_init: %s", msg);
+ return (-1);
}
diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h
index 0c6161fdfc..86099b79e1 100644
--- a/usr/src/uts/i86pc/sys/hma.h
+++ b/usr/src/uts/i86pc/sys/hma.h
@@ -49,6 +49,15 @@ extern void hma_unregister(hma_reg_t *);
extern uint16_t hma_vmx_vpid_alloc(void);
extern void hma_vmx_vpid_free(uint16_t);
+struct hma_svm_asid {
+ uint64_t hsa_gen;
+ uint32_t hsa_asid;
+};
+typedef struct hma_svm_asid hma_svm_asid_t;
+
+extern void hma_svm_asid_init(hma_svm_asid_t *);
+extern uint8_t hma_svm_asid_update(hma_svm_asid_t *, boolean_t, boolean_t);
+
/*
* FPU related management. These functions provide a set of APIs to manage the
* FPU state and switch between host and guest management of this state.
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 163c0781cf..e5e5460211 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -38,7 +38,7 @@
* http://www.illumos.org/license/CDDL.
*
* Copyright 2015 Pluribus Networks Inc.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _VMM_H_
@@ -741,6 +741,8 @@ void vmm_sol_glue_cleanup(void);
int vmm_mod_load(void);
int vmm_mod_unload(void);
+void vmm_call_trap(uint64_t);
+
/*
* Because of tangled headers, these are mirrored by vmm_drv.h to present the
* interface to driver consumers.
diff --git a/usr/src/uts/intel/sys/controlregs.h b/usr/src/uts/intel/sys/controlregs.h
index babf036e0b..0be7b3b650 100644
--- a/usr/src/uts/intel/sys/controlregs.h
+++ b/usr/src/uts/intel/sys/controlregs.h
@@ -200,6 +200,18 @@ extern "C" {
#define MSR_AMD_KGSBASE 0xc0000102 /* swapgs swaps this with gsbase */
#define MSR_AMD_TSCAUX 0xc0000103 /* %ecx value on rdtscp insn */
+
+/* AMD's SVM MSRs */
+
+#define MSR_AMD_VM_CR 0xc0010114 /* SVM global control */
+#define MSR_AMD_VM_HSAVE_PA 0xc0010117 /* SVM host save area address */
+
+#define AMD_VM_CR_DPD (1 << 0)
+#define AMD_VM_CR_R_INIT (1 << 1)
+#define AMD_VM_CR_DIS_A20M (1 << 2)
+#define AMD_VM_CR_LOCK (1 << 3)
+#define AMD_VM_CR_SVMDIS (1 << 4)
+
/* AMD's configuration MSRs, weakly documented in the revision guide */
#define MSR_AMD_DC_CFG 0xc0011022
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 943bdd8203..59a974dfd2 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -210,6 +210,18 @@ extern "C" {
#define CPUID_AMD_EBX_SSB_NO 0x004000000 /* AMD: SSB Fixed */
/*
+ * AMD SVM features (extended function 0x8000000A).
+ */
+#define CPUID_AMD_EDX_NESTED_PAGING 0x000000001 /* AMD: SVM NP */
+#define CPUID_AMD_EDX_LBR_VIRT 0x000000002 /* AMD: LBR virt. */
+#define CPUID_AMD_EDX_SVML 0x000000004 /* AMD: SVM lock */
+#define CPUID_AMD_EDX_NRIPS 0x000000008 /* AMD: NRIP save */
+#define CPUID_AMD_EDX_TSC_RATE_MSR 0x000000010 /* AMD: MSR TSC ctrl */
+#define CPUID_AMD_EDX_VMCB_CLEAN 0x000000020 /* AMD: VMCB clean bits */
+#define CPUID_AMD_EDX_FLUSH_ASID 0x000000040 /* AMD: flush by ASID */
+#define CPUID_AMD_EDX_DECODE_ASSISTS 0x000000080 /* AMD: decode assists */
+
+/*
* Intel now seems to have claimed part of the "extended" function
* space that we previously for non-Intel implementors to use.
* More excitingly still, they've claimed bit 20 to mean LAHF/SAHF