summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2020-11-03 02:16:48 +0000
committerPatrick Mooney <pmooney@oxide.computer>2021-01-08 22:12:27 +0000
commit2606939d92dd3044a9851b2930ebf533c3c03892 (patch)
tree0732de0c5fa81e77230ed8909d0c6a2bfd42dcca
parent78f846c0ab4f41678386d3e1b49c16cc8db07a8b (diff)
downloadillumos-joyent-2606939d92dd3044a9851b2930ebf533c3c03892.tar.gz
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Gordon Ross <gordon.w.ross@gmail.com>
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c42
-rw-r--r--usr/src/cmd/bhyve/bhyverun.h3
-rw-r--r--usr/src/cmd/bhyve/spinup_ap.c29
-rw-r--r--usr/src/cmd/bhyve/spinup_ap.h4
-rw-r--r--usr/src/lib/libvmmapi/common/mapfile-vers2
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c46
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.h4
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c35
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c52
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c203
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.h3
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h7
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h48
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c616
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c42
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_stat.c3
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_stat.h3
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h24
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h25
19 files changed, 759 insertions, 432 deletions
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index 941dfae2f5..8522d85bd9 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -514,13 +514,14 @@ void
fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
#else
void
-fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip,
- bool suspend)
+fbsdrun_addcpu(struct vmctx *ctx, int newcpu, uint64_t rip, bool suspend)
#endif
{
int error;
+#ifdef __FreeBSD__
assert(fromcpu == BSP);
+#endif
/*
* The 'newcpu' must be activated in the context of 'fromcpu'. If
@@ -573,7 +574,7 @@ vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data)
assert(entry->cmd == VEC_DEFAULT);
- entry->cmd = VEC_COMPLETE_MMIO;
+ entry->cmd = VEC_FULFILL_MMIO;
mmio->bytes = bytes;
mmio->read = 1;
mmio->gpa = gpa;
@@ -588,7 +589,7 @@ vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes)
assert(entry->cmd == VEC_DEFAULT);
- entry->cmd = VEC_COMPLETE_MMIO;
+ entry->cmd = VEC_FULFILL_MMIO;
mmio->bytes = bytes;
mmio->read = 0;
mmio->gpa = gpa;
@@ -603,7 +604,7 @@ vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data)
assert(entry->cmd == VEC_DEFAULT);
- entry->cmd = VEC_COMPLETE_INOUT;
+ entry->cmd = VEC_FULFILL_INOUT;
inout->bytes = bytes;
inout->flags = INOUT_IN;
inout->port = port;
@@ -618,7 +619,7 @@ vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes)
assert(entry->cmd == VEC_DEFAULT);
- entry->cmd = VEC_COMPLETE_INOUT;
+ entry->cmd = VEC_FULFILL_INOUT;
inout->bytes = bytes;
inout->flags = 0;
inout->port = port;
@@ -727,6 +728,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
return (VMEXIT_CONTINUE);
}
+#ifdef __FreeBSD__
static int
vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
{
@@ -736,6 +738,18 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
return (VMEXIT_CONTINUE);
}
+#else
+static int
+vmexit_run_state(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+ /*
+ * Run-state transitions (INIT, SIPI, etc) are handled in-kernel, so an
+ * exit to userspace with that code is not expected.
+ */
+ fprintf(stderr, "unexpected run-state VM exit");
+ return (VMEXIT_ABORT);
+}
+#endif /* __FreeBSD__ */
#ifdef __FreeBSD__
#define DEBUG_EPT_MISCONFIG
@@ -1013,7 +1027,11 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+#ifdef __FreeBSD__
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
+#else
+ [VM_EXITCODE_RUN_STATE] = vmexit_run_state,
+#endif
[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
[VM_EXITCODE_DEBUG] = vmexit_debug,
@@ -1521,13 +1539,21 @@ main(int argc, char *argv[])
errx(EX_OSERR, "cap_enter() failed");
#endif
+#ifdef __FreeBSD__
/*
* Add CPU 0
*/
-#ifdef __FreeBSD__
fbsdrun_addcpu(ctx, BSP, BSP, rip);
#else
- fbsdrun_addcpu(ctx, BSP, BSP, rip, suspend);
+ /* Set BSP to run (unlike the APs which wait for INIT) */
+ error = vm_set_run_state(ctx, BSP, VRS_RUN, 0);
+ assert(error == 0);
+ fbsdrun_addcpu(ctx, BSP, rip, suspend);
+
+ /* Add subsequent CPUs, which will wait until INIT/SIPI-ed */
+ for (uint_t i = 1; i < guest_ncpus; i++) {
+ spinup_halted_ap(ctx, i);
+ }
#endif
/*
* Head off to the main event dispatch loop
diff --git a/usr/src/cmd/bhyve/bhyverun.h b/usr/src/cmd/bhyve/bhyverun.h
index 99ce739f70..f2582512ea 100644
--- a/usr/src/cmd/bhyve/bhyverun.h
+++ b/usr/src/cmd/bhyve/bhyverun.h
@@ -58,8 +58,7 @@ void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
#ifdef __FreeBSD__
void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);
#else
-void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip,
- bool suspend);
+void fbsdrun_addcpu(struct vmctx *ctx, int newcpu, uint64_t rip, bool suspend);
#endif
int fbsdrun_muxed(void);
int fbsdrun_vmexit_on_hlt(void);
diff --git a/usr/src/cmd/bhyve/spinup_ap.c b/usr/src/cmd/bhyve/spinup_ap.c
index 80caafe78e..96eef44bcf 100644
--- a/usr/src/cmd/bhyve/spinup_ap.c
+++ b/usr/src/cmd/bhyve/spinup_ap.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include "bhyverun.h"
#include "spinup_ap.h"
+#ifdef __FreeBSD__
static void
spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip)
{
@@ -101,7 +102,6 @@ spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip)
fbsdrun_set_capabilities(ctx, newcpu);
-#ifdef __FreeBSD__
/*
* Enable the 'unrestricted guest' mode for 'newcpu'.
*
@@ -110,17 +110,30 @@ spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip)
*/
error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
assert(error == 0);
-#else
- /* Unrestricted Guest is always enabled on illumos */
-#endif
spinup_ap_realmode(ctx, newcpu, &rip);
-#ifdef __FreeBSD__
fbsdrun_addcpu(ctx, vcpu, newcpu, rip);
-#else
- fbsdrun_addcpu(ctx, vcpu, newcpu, rip, false);
-#endif
return (newcpu);
}
+#else /* __FreeBSD__ */
+void
+spinup_halted_ap(struct vmctx *ctx, int newcpu)
+{
+ int error;
+
+ assert(newcpu != 0);
+ assert(newcpu < guest_ncpus);
+
+ error = vcpu_reset(ctx, newcpu);
+ assert(error == 0);
+
+ fbsdrun_set_capabilities(ctx, newcpu);
+
+ error = vm_set_run_state(ctx, newcpu, VRS_HALT, 0);
+ assert(error == 0);
+
+ fbsdrun_addcpu(ctx, newcpu, 0, false);
+}
+#endif /* __FreeBSD__ */
diff --git a/usr/src/cmd/bhyve/spinup_ap.h b/usr/src/cmd/bhyve/spinup_ap.h
index 226542f6c3..bce4f3878c 100644
--- a/usr/src/cmd/bhyve/spinup_ap.h
+++ b/usr/src/cmd/bhyve/spinup_ap.h
@@ -31,6 +31,10 @@
#ifndef _SPINUP_AP_H_
#define _SPINUP_AP_H_
+#ifdef __FreeBSD__
int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip);
+#else
+void spinup_halted_ap(struct vmctx *ctx, int newcpu);
+#endif /* __FreeBSD__ */
#endif
diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers
index 9e972b8d60..26cfd15426 100644
--- a/usr/src/lib/libvmmapi/common/mapfile-vers
+++ b/usr/src/lib/libvmmapi/common/mapfile-vers
@@ -123,6 +123,8 @@ SYMBOL_VERSION ILLUMOSprivate {
vm_unassign_pptdev;
vm_pmtmr_set_location;
vm_wrlock_cycle;
+ vm_get_run_state;
+ vm_set_run_state;
local:
*;
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index 5b2cb4c235..0b22ca7522 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -1302,6 +1302,18 @@ vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
return (error);
}
+#ifndef __FreeBSD__
+int
+vcpu_reset(struct vmctx *vmctx, int vcpu)
+{
+ struct vm_vcpu_reset vvr;
+
+ vvr.vcpuid = vcpu;
+ vvr.kind = VRK_RESET;
+
+ return (ioctl(vmctx->fd, VM_RESET_CPU, &vvr));
+}
+#else /* __FreeBSD__ */
/*
* From Intel Vol 3a:
* Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
@@ -1458,6 +1470,7 @@ vcpu_reset(struct vmctx *vmctx, int vcpu)
done:
return (error);
}
+#endif /* __FreeBSD__ */
int
vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
@@ -1839,6 +1852,39 @@ vm_wrlock_cycle(struct vmctx *ctx)
}
return (0);
}
+
+int
+vm_get_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state *state,
+ uint8_t *sipi_vector)
+{
+ struct vm_run_state data;
+
+ data.vcpuid = vcpu;
+ if (ioctl(ctx->fd, VM_GET_RUN_STATE, &data) != 0) {
+ return (errno);
+ }
+
+ *state = data.state;
+ *sipi_vector = data.sipi_vector;
+ return (0);
+}
+
+int
+vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state,
+ uint8_t sipi_vector)
+{
+ struct vm_run_state data;
+
+ data.vcpuid = vcpu;
+ data.state = state;
+ data.sipi_vector = sipi_vector;
+ if (ioctl(ctx->fd, VM_SET_RUN_STATE, &data) != 0) {
+ return (errno);
+ }
+
+ return (0);
+}
+
#endif /* __FreeBSD__ */
#ifdef __FreeBSD__
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index 96102ec925..f7aaa02087 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -304,6 +304,10 @@ int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
/* illumos-specific APIs */
int vm_pmtmr_set_location(struct vmctx *ctx, uint16_t ioport);
int vm_wrlock_cycle(struct vmctx *ctx);
+int vm_get_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state *state,
+ uint8_t *sipi_vector);
+int vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state,
+ uint8_t sipi_vector);
#endif /* __FreeBSD__ */
#ifdef __FreeBSD__
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index aaa19d4bab..02926b6b12 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -1917,8 +1917,7 @@ svm_dr_leave_guest(struct svm_regctx *gctx)
* Start vcpu with specified RIP.
*/
static int
-svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
- struct vm_eventinfo *evinfo)
+svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
{
struct svm_regctx *gctx;
struct svm_softc *svm_sc;
@@ -2010,34 +2009,18 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
inject_state = svm_inject_vlapic(svm_sc, vcpu, vlapic,
inject_state);
- if (vcpu_suspended(evinfo)) {
- enable_gintr();
- vm_exit_suspended(vm, vcpu, state->rip);
- break;
- }
-
- if (vcpu_runblocked(evinfo)) {
- enable_gintr();
- vm_exit_runblock(vm, vcpu, state->rip);
- break;
- }
-
- if (vcpu_reqidle(evinfo)) {
- enable_gintr();
- vm_exit_reqidle(vm, vcpu, state->rip);
- break;
- }
-
- /* We are asked to give the cpu by scheduler. */
- if (vcpu_should_yield(vm, vcpu)) {
+ /*
+ * Check for vCPU bail-out conditions. This must be done after
+ * svm_inject_events() to detect a triple-fault condition.
+ */
+ if (vcpu_entry_bailout_checks(vm, vcpu, state->rip)) {
enable_gintr();
- vm_exit_astpending(vm, vcpu, state->rip);
break;
}
- if (vcpu_debugged(vm, vcpu)) {
+ if (vcpu_run_state_pending(vm, vcpu)) {
enable_gintr();
- vm_exit_debug(vm, vcpu, state->rip);
+ vm_exit_run_state(vm, vcpu, state->rip);
break;
}
@@ -2303,7 +2286,7 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
}
static int
-svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+svm_setdesc(void *arg, int vcpu, int reg, const struct seg_desc *desc)
{
struct vmcb *vmcb;
struct svm_softc *sc;
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index 54a03b1d3e..b5c713891c 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -2738,8 +2738,7 @@ vmx_dr_leave_guest(struct vmxctx *vmxctx)
}
static int
-vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
- struct vm_eventinfo *evinfo)
+vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
{
int rc, handled, launched;
struct vmx *vmx;
@@ -2834,39 +2833,17 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
}
/*
- * Check for vcpu suspension after injecting events because
- * vmx_inject_events() can suspend the vcpu due to a
- * triple fault.
+ * Check for vCPU bail-out conditions. This must be done after
+ * vmx_inject_events() to detect a triple-fault condition.
*/
- if (vcpu_suspended(evinfo)) {
+ if (vcpu_entry_bailout_checks(vmx->vm, vcpu, rip)) {
enable_intr();
- vm_exit_suspended(vmx->vm, vcpu, rip);
break;
}
- if (vcpu_runblocked(evinfo)) {
+ if (vcpu_run_state_pending(vm, vcpu)) {
enable_intr();
- vm_exit_runblock(vmx->vm, vcpu, rip);
- break;
- }
-
- if (vcpu_reqidle(evinfo)) {
- enable_intr();
- vm_exit_reqidle(vmx->vm, vcpu, rip);
- break;
- }
-
- if (vcpu_should_yield(vm, vcpu)) {
- enable_intr();
- vm_exit_astpending(vmx->vm, vcpu, rip);
- vmx_astpending_trace(vmx, vcpu, rip);
- handled = HANDLED;
- break;
- }
-
- if (vcpu_debugged(vm, vcpu)) {
- enable_intr();
- vm_exit_debug(vmx->vm, vcpu, rip);
+ vm_exit_run_state(vmx->vm, vcpu, rip);
break;
}
@@ -2985,19 +2962,12 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
rip = vmexit->rip;
} while (handled);
- /*
- * If a VM exit has been handled then the exitcode must be BOGUS
- * If a VM exit is not handled then the exitcode must not be BOGUS
- */
- if ((handled && vmexit->exitcode != VM_EXITCODE_BOGUS) ||
- (!handled && vmexit->exitcode == VM_EXITCODE_BOGUS)) {
- panic("Mismatch between handled (%d) and exitcode (%d)",
- handled, vmexit->exitcode);
+ /* If a VM exit has been handled then the exitcode must be BOGUS */
+ if (handled && vmexit->exitcode != VM_EXITCODE_BOGUS) {
+ panic("Non-BOGUS exitcode (%d) unexpected for handled VM exit",
+ vmexit->exitcode);
}
- if (!handled)
- vmm_stat_incr(vm, vcpu, VMEXIT_USERSPACE, 1);
-
VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d",
vmexit->exitcode);
@@ -3261,7 +3231,7 @@ vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
}
static int
-vmx_setdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
+vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc)
{
int hostcpu, running;
struct vmx *vmx = arg;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index e84520de46..8c054a52fb 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -992,13 +992,10 @@ int
vlapic_icrlo_write_handler(struct vlapic *vlapic)
{
int i;
- bool phys;
cpuset_t dmask;
uint64_t icrval;
- uint32_t dest, vec, mode;
- struct vlapic *vlapic2;
+ uint32_t dest, vec, mode, dsh;
struct LAPIC *lapic;
- uint16_t maxcpus;
lapic = vlapic->apic_page;
lapic->icr_lo &= ~APIC_DELSTAT_PEND;
@@ -1010,93 +1007,79 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic)
dest = icrval >> (32 + 24);
vec = icrval & APIC_VECTOR_MASK;
mode = icrval & APIC_DELMODE_MASK;
+ dsh = icrval & APIC_DEST_MASK;
if (mode == APIC_DELMODE_FIXED && vec < 16) {
vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
- VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
return (0);
}
+ if (mode == APIC_DELMODE_INIT &&
+ (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
+ /* No work required to deassert INIT */
+ return (0);
+ }
+ if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) &&
+ !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) {
+ /*
+ * While Intel makes no mention of restrictions for destination
+ * shorthand when sending INIT or SIPI, AMD requires either a
+ * specific destination or all-excluding self. Common use seems
+ * to be restricted to those two cases.
+ */
+ return (-1);
+ }
- VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
+ switch (dsh) {
+ case APIC_DEST_DESTFLD:
+ vlapic_calcdest(vlapic->vm, &dmask, dest,
+ (icrval & APIC_DESTMODE_LOG) == 0, false, x2apic(vlapic));
+ break;
+ case APIC_DEST_SELF:
+ CPU_SETOF(vlapic->vcpuid, &dmask);
+ break;
+ case APIC_DEST_ALLISELF:
+ dmask = vm_active_cpus(vlapic->vm);
+ break;
+ case APIC_DEST_ALLESELF:
+ dmask = vm_active_cpus(vlapic->vm);
+ CPU_CLR(vlapic->vcpuid, &dmask);
+ break;
+ default:
+ /*
+ * All possible delivery notations are covered above.
+ * We should never end up here.
+ */
+ panic("unknown delivery shorthand: %x", dsh);
+ }
- if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
- switch (icrval & APIC_DEST_MASK) {
- case APIC_DEST_DESTFLD:
- phys = ((icrval & APIC_DESTMODE_LOG) == 0);
- vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
- x2apic(vlapic));
+ while ((i = CPU_FFS(&dmask)) != 0) {
+ i--;
+ CPU_CLR(i, &dmask);
+ switch (mode) {
+ case APIC_DELMODE_FIXED:
+ lapic_intr_edge(vlapic->vm, i, vec);
+ vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
+ VLAPIC_IPI_SEND, 1);
+ vmm_stat_incr(vlapic->vm, i,
+ VLAPIC_IPI_RECV, 1);
break;
- case APIC_DEST_SELF:
- CPU_SETOF(vlapic->vcpuid, &dmask);
+ case APIC_DELMODE_NMI:
+ vm_inject_nmi(vlapic->vm, i);
break;
- case APIC_DEST_ALLISELF:
- dmask = vm_active_cpus(vlapic->vm);
+ case APIC_DELMODE_INIT:
+ (void) vm_inject_init(vlapic->vm, i);
break;
- case APIC_DEST_ALLESELF:
- dmask = vm_active_cpus(vlapic->vm);
- CPU_CLR(vlapic->vcpuid, &dmask);
+ case APIC_DELMODE_STARTUP:
+ (void) vm_inject_sipi(vlapic->vm, i, vec);
break;
+ case APIC_DELMODE_LOWPRIO:
+ case APIC_DELMODE_SMI:
default:
- CPU_ZERO(&dmask); /* satisfy gcc */
+ /* Unhandled IPI modes (for now) */
break;
}
-
- while ((i = CPU_FFS(&dmask)) != 0) {
- i--;
- CPU_CLR(i, &dmask);
- if (mode == APIC_DELMODE_FIXED) {
- lapic_intr_edge(vlapic->vm, i, vec);
- vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
- VLAPIC_IPI_SEND, 1);
- vmm_stat_incr(vlapic->vm, i,
- VLAPIC_IPI_RECV, 1);
- VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
- "to vcpuid %d", vec, i);
- } else {
- vm_inject_nmi(vlapic->vm, i);
- VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
- "to vcpuid %d", i);
- }
- }
-
- return (0); /* handled completely in the kernel */
- }
-
- maxcpus = vm_get_maxcpus(vlapic->vm);
- if (mode == APIC_DELMODE_INIT) {
- if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
- return (0);
-
- if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
- vlapic2 = vm_lapic(vlapic->vm, dest);
-
- /* move from INIT to waiting-for-SIPI state */
- if (vlapic2->boot_state == BS_INIT) {
- vlapic2->boot_state = BS_SIPI;
- }
-
- return (0);
- }
- }
-
- if (mode == APIC_DELMODE_STARTUP) {
- if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
- vlapic2 = vm_lapic(vlapic->vm, dest);
-
- /*
- * Ignore SIPIs in any state other than wait-for-SIPI
- */
- if (vlapic2->boot_state != BS_SIPI)
- return (0);
-
- vlapic2->boot_state = BS_RUNNING;
- vm_req_spinup_ap(vlapic->vm, dest, vec << PAGE_SHIFT);
- return (0);
- }
}
-
- /* Return to userland. */
- return (-1);
+ return (0);
}
void
@@ -1450,30 +1433,72 @@ vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
return (retval);
}
-static void
+void
vlapic_reset(struct vlapic *vlapic)
{
- struct LAPIC *lapic;
+ struct LAPIC *lapic = vlapic->apic_page;
+ uint32_t *isrptr, *tmrptr, *irrptr;
- lapic = vlapic->apic_page;
- bzero(lapic, sizeof (struct LAPIC));
+ /* Reset any timer-related state first */
+ VLAPIC_TIMER_LOCK(vlapic);
+ callout_stop(&vlapic->callout);
+ lapic->icr_timer = 0;
+ lapic->ccr_timer = 0;
+ VLAPIC_TIMER_UNLOCK(vlapic);
+ lapic->dcr_timer = 0;
+ vlapic_dcr_write_handler(vlapic);
+
+ /*
+ * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
+ * it is not leftover after the reset. This is performed after the APIC
+ * timer has been stopped, in case it happened to fire just prior to
+ * being deactivated.
+ */
+ if (vlapic->ops.sync_state) {
+ (*vlapic->ops.sync_state)(vlapic);
+ }
lapic->id = vlapic_get_id(vlapic);
lapic->version = VLAPIC_VERSION;
lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
- lapic->dfr = 0xffffffff;
- lapic->svr = APIC_SVR_VECTOR;
- vlapic_mask_lvts(vlapic);
- lapic->dcr_timer = 0;
- vlapic_dcr_write_handler(vlapic);
+ lapic->tpr = 0;
+ lapic->apr = 0;
+ lapic->ppr = 0;
- if (vlapic->vcpuid == 0)
- vlapic->boot_state = BS_RUNNING; /* BSP */
- else
- vlapic->boot_state = BS_INIT; /* AP */
+#ifdef __ISRVEC_DEBUG
+ /* With the PPR cleared, the isrvec tracking should be reset too */
+ vlapic->isrvec_stk_top = 0;
+#endif
+ lapic->eoi = 0;
+ lapic->ldr = 0;
+ lapic->dfr = 0xffffffff;
+ lapic->svr = APIC_SVR_VECTOR;
vlapic->svr_last = lapic->svr;
+
+ isrptr = &lapic->isr0;
+ tmrptr = &lapic->tmr0;
+ irrptr = &lapic->irr0;
+ for (uint_t i = 0; i < 8; i++) {
+ atomic_store_rel_int(&isrptr[i * 4], 0);
+ atomic_store_rel_int(&tmrptr[i * 4], 0);
+ atomic_store_rel_int(&irrptr[i * 4], 0);
+ }
+
+ lapic->esr = 0;
+ vlapic->esr_pending = 0;
+ lapic->icr_lo = 0;
+ lapic->icr_hi = 0;
+
+ lapic->lvt_cmci = 0;
+ lapic->lvt_timer = 0;
+ lapic->lvt_thermal = 0;
+ lapic->lvt_pcint = 0;
+ lapic->lvt_lint0 = 0;
+ lapic->lvt_lint1 = 0;
+ lapic->lvt_error = 0;
+ vlapic_mask_lvts(vlapic);
}
void
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.h b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
index f34cf1ec4b..6072b46101 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
@@ -30,6 +30,7 @@
/*
* Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#ifndef _VLAPIC_H_
@@ -38,6 +39,8 @@
struct vm;
enum x2apic_state;
+void vlapic_reset(struct vlapic *vlapic);
+
int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
uint64_t data);
int vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
index 69daf3652c..8d739bcfcc 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
@@ -137,12 +137,6 @@ do { \
VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \
} while (0)
-enum boot_state {
- BS_INIT,
- BS_SIPI,
- BS_RUNNING
-};
-
/*
* 16 priority levels with at most one vector injected per level.
*/
@@ -182,7 +176,6 @@ struct vlapic {
struct mtx timer_mtx;
uint64_t msr_apicbase;
- enum boot_state boot_state;
/*
* Copies of some registers in the virtual APIC page. We do this for
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 863d31cfeb..a7dbf84061 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -64,18 +64,12 @@ struct vm_object;
struct vm_guest_paging;
struct pmap;
-struct vm_eventinfo {
- uint_t *rptr; /* runblock cookie */
- int *sptr; /* suspend cookie */
- int *iptr; /* reqidle cookie */
-};
-
typedef int (*vmm_init_func_t)(int ipinum);
typedef int (*vmm_cleanup_func_t)(void);
typedef void (*vmm_resume_func_t)(void);
typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip,
- struct pmap *pmap, struct vm_eventinfo *info);
+ struct pmap *pmap);
typedef void (*vmi_cleanup_func_t)(void *vmi);
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
uint64_t *retval);
@@ -84,7 +78,7 @@ typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num,
typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num,
struct seg_desc *desc);
typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num,
- struct seg_desc *desc);
+ const struct seg_desc *desc);
typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
typedef struct vmspace *(*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
@@ -169,9 +163,13 @@ bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
- struct seg_desc *ret_desc);
+ struct seg_desc *ret_desc);
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
- struct seg_desc *desc);
+ const struct seg_desc *desc);
+int vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state,
+ uint8_t *sipi_vec);
+int vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state,
+ uint8_t sipi_vec);
int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vm *vm, int vcpu);
@@ -180,6 +178,8 @@ void vm_nmi_clear(struct vm *vm, int vcpuid);
int vm_inject_extint(struct vm *vm, int vcpu);
int vm_extint_pending(struct vm *vm, int vcpuid);
void vm_extint_clear(struct vm *vm, int vcpuid);
+int vm_inject_init(struct vm *vm, int vcpuid);
+int vm_inject_sipi(struct vm *vm, int vcpuid, uint8_t vec);
struct vlapic *vm_lapic(struct vm *vm, int cpu);
struct vioapic *vm_ioapic(struct vm *vm);
struct vhpet *vm_hpet(struct vm *vm);
@@ -195,14 +195,13 @@ struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
struct vie *vm_vie_ctx(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
-void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_run_state(struct vm *vm, int vcpuid, uint64_t rip);
int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
int rsize);
int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
int wsize);
-void vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip);
#ifdef _SYS__CPUSET_H_
cpuset_t vm_active_cpus(struct vm *vm);
@@ -210,28 +209,9 @@ cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
#endif /* _SYS__CPUSET_H_ */
-static __inline int
-vcpu_runblocked(struct vm_eventinfo *info)
-{
-
- return (*info->rptr != 0);
-}
-
-static __inline int
-vcpu_suspended(struct vm_eventinfo *info)
-{
-
- return (*info->sptr);
-}
-
-static __inline int
-vcpu_reqidle(struct vm_eventinfo *info)
-{
-
- return (*info->iptr);
-}
-
-int vcpu_debugged(struct vm *vm, int vcpuid);
+bool vcpu_entry_bailout_checks(struct vm *vm, int vcpuid, uint64_t rip);
+bool vcpu_run_state_pending(struct vm *vm, int vcpuid);
+int vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only);
/*
* Return true if device indicated by bus/slot/func is supposed to be a
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index ed7a97921a..8c9a284cbf 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -109,17 +109,15 @@ struct vlapic;
* (x) initialized before use
*/
struct vcpu {
- struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */
+ /* (o) protects state, run_state, hostcpu, sipi_vector */
+ struct mtx mtx;
+
enum vcpu_state state; /* (o) vcpu state */
-#ifndef __FreeBSD__
+ enum vcpu_run_state run_state; /* (i) vcpu init/sipi/run state */
kcondvar_t vcpu_cv; /* (o) cpu waiter cv */
kcondvar_t state_cv; /* (o) IDLE-transition cv */
-#endif /* __FreeBSD__ */
int hostcpu; /* (o) vcpu's current host cpu */
-#ifndef __FreeBSD__
int lastloccpu; /* (o) last host cpu localized to */
-#endif
- uint_t runblock; /* (i) block vcpu from run state */
int reqidle; /* (i) request vcpu to idle */
struct vlapic *vlapic; /* (i) APIC device model */
enum x2apic_state x2apic_state; /* (i) APIC mode */
@@ -130,6 +128,7 @@ struct vcpu {
int exc_vector; /* (x) exception collateral */
int exc_errcode_valid;
uint32_t exc_errcode;
+ uint8_t sipi_vector; /* (i) SIPI vector */
struct savefpu *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
@@ -200,15 +199,6 @@ struct vm {
uint16_t maxcpus; /* (o) max pluggable cpus */
struct ioport_config ioports; /* (o) ioport handling */
-
- bool sipi_req; /* (i) SIPI requested */
- int sipi_req_vcpu; /* (i) SIPI destination */
- uint64_t sipi_req_rip; /* (i) SIPI start %rip */
-
- /* Miscellaneous VM-wide statistics and counters */
- struct vm_wide_stats {
- uint64_t sipi_supersede;
- } stats;
};
static int vmm_initialized;
@@ -249,8 +239,8 @@ static struct vmm_ops *ops = &vmm_ops_null;
#define VMM_RESUME() ((*ops->resume)())
#define VMINIT(vm, pmap) ((*ops->vminit)(vm, pmap))
-#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \
- ((*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo))
+#define VMRUN(vmi, vcpu, rip, pmap) \
+ ((*ops->vmrun)(vmi, vcpu, rip, pmap))
#define VMCLEANUP(vmi) ((*ops->vmcleanup)(vmi))
#define VMSPACE_ALLOC(min, max) ((*ops->vmspace_alloc)(min, max))
#define VMSPACE_FREE(vmspace) ((*ops->vmspace_free)(vmspace))
@@ -292,6 +282,8 @@ static int trace_guest_exceptions;
static void vm_free_memmap(struct vm *vm, int ident);
static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
static void vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t);
+static bool vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid);
+static int vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector);
#ifndef __FreeBSD__
static void vm_clear_memseg(struct vm *, int);
@@ -370,9 +362,9 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo));
}
+ vcpu->run_state = VRS_HALT;
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
- vcpu->runblock = 0;
vcpu->reqidle = 0;
vcpu->exitintinfo = 0;
vcpu->nmi_pending = 0;
@@ -1233,7 +1225,7 @@ vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc)
}
int
-vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc)
+vm_set_seg_desc(struct vm *vm, int vcpu, int reg, const struct seg_desc *desc)
{
if (vcpu < 0 || vcpu >= vm->maxcpus)
return (EINVAL);
@@ -1244,6 +1236,49 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc)
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
}
+int
+vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
+ return (EINVAL);
+ }
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ *state = vcpu->run_state;
+ *sipi_vec = vcpu->sipi_vector;
+ vcpu_unlock(vcpu);
+
+ return (0);
+}
+
+int
+vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
+ return (EINVAL);
+ }
+ if (!VRS_IS_VALID(state)) {
+ return (EINVAL);
+ }
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ vcpu->run_state = state;
+ vcpu->sipi_vector = sipi_vec;
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+ vcpu_unlock(vcpu);
+
+ return (0);
+}
+
+
static void
restore_guest_fpustate(struct vcpu *vcpu)
{
@@ -1354,16 +1389,6 @@ vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
break;
}
- if (newstate == VCPU_RUNNING) {
- while (vcpu->runblock != 0) {
-#ifdef __FreeBSD__
- msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
-#else
- cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
-#endif
- }
- }
-
if (error)
return (EBUSY);
@@ -1376,8 +1401,7 @@ vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
else
vcpu->hostcpu = NOCPU;
- if (newstate == VCPU_IDLE ||
- (newstate == VCPU_FROZEN && vcpu->runblock != 0)) {
+ if (newstate == VCPU_IDLE) {
#ifdef __FreeBSD__
wakeup(&vcpu->state);
#else
@@ -1413,12 +1437,8 @@ static int
vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
{
struct vcpu *vcpu;
-#ifdef __FreeBSD__
- const char *wmesg;
-#else
- const char *wmesg __unused;
-#endif
int t, vcpu_halted, vm_halted;
+ bool userspace_exit = false;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
@@ -1429,18 +1449,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
vcpu_lock(vcpu);
while (1) {
/*
- * Do a final check for pending NMI or interrupts before
- * really putting this thread to sleep. Also check for
- * software events that would cause this vcpu to wakeup.
- *
- * These interrupts/events could have happened after the
- * vcpu returned from VMRUN() and before it acquired the
- * vcpu lock above.
+ * Do a final check for pending interrupts (including NMI and
+ * INIT) before putting this thread to sleep.
*/
- if (vm->suspend || vcpu->reqidle)
- break;
if (vm_nmi_pending(vm, vcpuid))
break;
+ if (vcpu_run_state_pending(vm, vcpuid))
+ break;
if (!intr_disabled) {
if (vm_extint_pending(vm, vcpuid) ||
vlapic_pending_intr(vcpu->vlapic, NULL)) {
@@ -1448,12 +1463,15 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
}
}
- /* Don't go to sleep if the vcpu thread needs to yield */
- if (vcpu_should_yield(vm, vcpuid))
- break;
-
- if (vcpu_debugged(vm, vcpuid))
+ /*
+ * Also check for software events which would cause a wake-up.
+ * This will set the appropriate exitcode directly, rather than
+ * requiring a trip through VM_RUN().
+ */
+ if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
+ userspace_exit = true;
break;
+ }
/*
* Some Linux guests implement "halt" by having all vcpus
@@ -1462,8 +1480,6 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
* vcpus enter the halted state the virtual machine is halted.
*/
if (intr_disabled) {
- wmesg = "vmhalt";
- VCPU_CTR0(vm, vcpuid, "Halted");
if (!vcpu_halted && halt_detection_enabled) {
vcpu_halted = 1;
CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
@@ -1472,25 +1488,11 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
vm_halted = 1;
break;
}
- } else {
- wmesg = "vmidle";
}
t = ticks;
vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
-#ifdef __FreeBSD__
- /*
- * XXX msleep_spin() cannot be interrupted by signals so
- * wake up periodically to check pending signals.
- */
- msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
-#else
- /*
- * Fortunately, cv_wait_sig can be interrupted by signals, so
- * there is no need to periodically wake up.
- */
(void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
-#endif
vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
@@ -1503,7 +1505,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
if (vm_halted)
vm_suspend(vm, VM_SUSPEND_HALT);
- return (0);
+ return (userspace_exit ? -1 : 0);
}
static int
@@ -1832,6 +1834,62 @@ vm_handle_reqidle(struct vm *vm, int vcpuid)
return (-1);
}
+static int
+vm_handle_run_state(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ bool handled = false;
+
+ vcpu_lock(vcpu);
+ while (1) {
+ if ((vcpu->run_state & VRS_PEND_INIT) != 0) {
+ vcpu_unlock(vcpu);
+ VERIFY0(vcpu_arch_reset(vm, vcpuid, true));
+ vcpu_lock(vcpu);
+
+ vcpu->run_state &= ~(VRS_RUN | VRS_PEND_INIT);
+ vcpu->run_state |= VRS_INIT;
+ }
+
+ if ((vcpu->run_state & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI)) ==
+ (VRS_INIT | VRS_PEND_SIPI)) {
+ const uint8_t vector = vcpu->sipi_vector;
+
+ vcpu_unlock(vcpu);
+ VERIFY0(vcpu_vector_sipi(vm, vcpuid, vector));
+ vcpu_lock(vcpu);
+
+ vcpu->run_state &= ~VRS_PEND_SIPI;
+ vcpu->run_state |= VRS_RUN;
+ }
+
+ /*
+ * If the vCPU is now in the running state, there is no need to
+ * wait for anything prior to re-entry.
+ */
+ if ((vcpu->run_state & VRS_RUN) != 0) {
+ handled = true;
+ break;
+ }
+
+ /*
+ * Also check for software events which would cause a wake-up.
+ * This will set the appropriate exitcode directly, rather than
+ * requiring a trip through VM_RUN().
+ */
+ if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
+ break;
+ }
+
+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
+ (void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
+ }
+ vcpu_unlock(vcpu);
+
+ return (handled ? 0 : -1);
+}
+
#ifndef __FreeBSD__
static int
vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
@@ -1850,18 +1908,6 @@ vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
}
#endif /* __FreeBSD__ */
-void
-vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip)
-{
- if (vm->sipi_req) {
- /* This should never occur if userspace is doing its job. */
- vm->stats.sipi_supersede++;
- }
- vm->sipi_req = true;
- vm->sipi_req_vcpu = req_vcpuid;
- vm->sipi_req_rip = req_rip;
-}
-
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
@@ -1890,66 +1936,17 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how)
}
void
-vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
-{
- struct vm_exit *vmexit;
-
- KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
- ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
-
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_SUSPENDED;
- vmexit->u.suspended.how = vm->suspend;
-}
-
-void
-vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip)
+vm_exit_run_state(struct vm *vm, int vcpuid, uint64_t rip)
{
struct vm_exit *vmexit;
vmexit = vm_exitinfo(vm, vcpuid);
vmexit->rip = rip;
vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_DEBUG;
+ vmexit->exitcode = VM_EXITCODE_RUN_STATE;
+ vmm_stat_incr(vm, vcpuid, VMEXIT_RUN_STATE, 1);
}
-void
-vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip)
-{
- struct vm_exit *vmexit;
-
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_RUNBLOCK;
- vmm_stat_incr(vm, vcpuid, VMEXIT_RUNBLOCK, 1);
-}
-
-void
-vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
-{
- struct vm_exit *vmexit;
-
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_REQIDLE;
- vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
-}
-
-void
-vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
-{
- struct vm_exit *vmexit;
-
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_BOGUS;
- vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
-}
#ifndef __FreeBSD__
/*
@@ -2072,7 +2069,7 @@ vm_entry_actions(struct vm *vm, int vcpuid, const struct vm_entry *entry,
case VEC_DISCARD_INSTR:
vie_reset(vie);
return (0);
- case VEC_COMPLETE_MMIO:
+ case VEC_FULFILL_MMIO:
err = vie_fulfill_mmio(vie, &entry->u.mmio);
if (err == 0) {
err = vie_emulate_mmio(vie, vm, vcpuid);
@@ -2091,7 +2088,7 @@ vm_entry_actions(struct vm *vm, int vcpuid, const struct vm_entry *entry,
}
}
break;
- case VEC_COMPLETE_INOUT:
+ case VEC_FULFILL_INOUT:
err = vie_fulfill_inout(vie, &entry->u.inout);
if (err == 0) {
err = vie_emulate_inout(vie, vm, vcpuid);
@@ -2132,25 +2129,12 @@ vm_loop_checks(struct vm *vm, int vcpuid, struct vm_exit *vme)
return (-1);
}
- if (vcpuid == 0 && vm->sipi_req) {
- /* The boot vCPU has sent a SIPI to one of the other CPUs */
- vme->exitcode = VM_EXITCODE_SPINUP_AP;
- vme->u.spinup_ap.vcpu = vm->sipi_req_vcpu;
- vme->u.spinup_ap.rip = vm->sipi_req_rip;
-
- vm->sipi_req = false;
- vm->sipi_req_vcpu = 0;
- vm->sipi_req_rip = 0;
- return (-1);
- }
-
return (0);
}
int
vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
{
- struct vm_eventinfo evinfo;
int error;
struct vcpu *vcpu;
#ifdef __FreeBSD__
@@ -2177,9 +2161,6 @@ vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- evinfo.rptr = &vcpu->runblock;
- evinfo.sptr = &vm->suspend;
- evinfo.iptr = &vcpu->reqidle;
#ifndef __FreeBSD__
vtc.vtc_vm = vm;
@@ -2242,7 +2223,7 @@ restart:
#endif
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
- error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
+ error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
#ifdef __FreeBSD__
@@ -2273,6 +2254,9 @@ restart:
case VM_EXITCODE_REQIDLE:
error = vm_handle_reqidle(vm, vcpuid);
break;
+ case VM_EXITCODE_RUN_STATE:
+ error = vm_handle_run_state(vm, vcpuid);
+ break;
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid);
break;
@@ -2280,8 +2264,6 @@ restart:
vioapic_process_eoi(vm, vcpuid,
vme->u.ioapic_eoi.vector);
break;
- case VM_EXITCODE_RUNBLOCK:
- break;
case VM_EXITCODE_HLT:
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
error = vm_handle_hlt(vm, vcpuid, intr_disabled);
@@ -2792,6 +2774,196 @@ vm_extint_clear(struct vm *vm, int vcpuid)
}
int
+vm_inject_init(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu_lock(vcpu);
+ vcpu->run_state |= VRS_PEND_INIT;
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+ vcpu_unlock(vcpu);
+ return (0);
+}
+
+int
+vm_inject_sipi(struct vm *vm, int vcpuid, uint8_t vector)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu_lock(vcpu);
+ vcpu->run_state |= VRS_PEND_SIPI;
+ vcpu->sipi_vector = vector;
+ /* SIPI is only actionable if the CPU is waiting in INIT state */
+ if ((vcpu->run_state & (VRS_INIT | VRS_RUN)) == VRS_INIT) {
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+ }
+ vcpu_unlock(vcpu);
+ return (0);
+}
+
+bool
+vcpu_run_state_pending(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
+ vcpu = &vm->vcpu[vcpuid];
+
+ /* Of interest: vCPU not in running state or with pending INIT */
+ return ((vcpu->run_state & (VRS_RUN | VRS_PEND_INIT)) != VRS_RUN);
+}
+
+int
+vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only)
+{
+ struct seg_desc desc;
+ const enum vm_reg_name clear_regs[] = {
+ VM_REG_GUEST_CR2,
+ VM_REG_GUEST_CR3,
+ VM_REG_GUEST_CR4,
+ VM_REG_GUEST_RAX,
+ VM_REG_GUEST_RBX,
+ VM_REG_GUEST_RCX,
+ VM_REG_GUEST_RSI,
+ VM_REG_GUEST_RDI,
+ VM_REG_GUEST_RBP,
+ VM_REG_GUEST_RSP,
+ VM_REG_GUEST_R8,
+ VM_REG_GUEST_R9,
+ VM_REG_GUEST_R10,
+ VM_REG_GUEST_R11,
+ VM_REG_GUEST_R12,
+ VM_REG_GUEST_R13,
+ VM_REG_GUEST_R14,
+ VM_REG_GUEST_R15,
+ VM_REG_GUEST_DR0,
+ VM_REG_GUEST_DR1,
+ VM_REG_GUEST_DR2,
+ VM_REG_GUEST_DR3,
+ VM_REG_GUEST_EFER,
+ };
+ const enum vm_reg_name data_segs[] = {
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ };
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ for (uint_t i = 0; i < nitems(clear_regs); i++) {
+ VERIFY0(vm_set_register(vm, vcpuid, clear_regs[i], 0));
+ }
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 2));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0xfff0));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CR0, 0x60000010));
+
+ /*
+ * The prescribed contents of %rdx differ slightly between the Intel and
+ * AMD architectural definitions. The former expects the Extended Model
+ * in bits 16-19 where the latter expects all the Family, Model, and
+ * Stepping be there. Common boot ROMs appear to disregard this
+ * anyways, so we stick with a compromise value similar to what is
+ * spelled out in the Intel SDM.
+ */
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RDX, 0x600));
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR6, 0xffff0ff0));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR7, 0x400));
+
+ /* CS: Present, R/W, Accessed */
+ desc.access = 0x0093;
+ desc.base = 0xffff0000;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS, 0xf000));
+
+ /* SS, DS, ES, FS, GS: Present, R/W, Accessed */
+ desc.access = 0x0093;
+ desc.base = 0;
+ desc.limit = 0xffff;
+ for (uint_t i = 0; i < nitems(data_segs); i++) {
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, data_segs[i], &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, data_segs[i], 0));
+ }
+
+ /* GDTR, IDTR */
+ desc.base = 0;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_GDTR, &desc));
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_IDTR, &desc));
+
+ /* LDTR: Present, LDT */
+ desc.access = 0x0082;
+ desc.base = 0;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_LDTR, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_LDTR, 0));
+
+ /* TR: Present, 32-bit TSS */
+ desc.access = 0x008b;
+ desc.base = 0;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_TR, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_TR, 0));
+
+ vlapic_reset(vm_lapic(vm, vcpuid));
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0));
+
+ vcpu->exitintinfo = 0;
+ vcpu->exception_pending = 0;
+ vcpu->nmi_pending = 0;
+ vcpu->extint_pending = 0;
+
+ /*
+ * A CPU reset caused by power-on or system reset clears more state than
+ * one which is trigged from an INIT IPI.
+ */
+ if (!init_only) {
+ vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
+ fpu_save_area_reset(vcpu->guestfpu);
+
+ /* XXX: clear MSRs and other pieces */
+ }
+
+ return (0);
+}
+
+static int
+vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector)
+{
+ struct seg_desc desc;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ /* CS: Present, R/W, Accessed */
+ desc.access = 0x0093;
+ desc.base = (uint64_t)vector << 12;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS,
+ (uint64_t)vector << 8));
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0));
+
+ return (0);
+}
+
+int
vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
{
if (vcpu < 0 || vcpu >= vm->maxcpus)
@@ -2894,7 +3066,7 @@ vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
- panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
+ panic("vcpu_set_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
@@ -2912,7 +3084,7 @@ vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
enum vcpu_state state;
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
- panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
+ panic("vcpu_get_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
@@ -2925,54 +3097,6 @@ vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
return (state);
}
-void
-vcpu_block_run(struct vm *vm, int vcpuid)
-{
- struct vcpu *vcpu;
-
- if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
- panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
-
- vcpu = &vm->vcpu[vcpuid];
-
- vcpu_lock(vcpu);
- vcpu->runblock++;
- if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
- vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
- }
- while (vcpu->state == VCPU_RUNNING) {
-#ifdef __FreeBSD__
- msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
-#else
- cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
-#endif
- }
- vcpu_unlock(vcpu);
-}
-
-void
-vcpu_unblock_run(struct vm *vm, int vcpuid)
-{
- struct vcpu *vcpu;
-
- if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
- panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
-
- vcpu = &vm->vcpu[vcpuid];
-
- vcpu_lock(vcpu);
- KASSERT(vcpu->runblock != 0, ("expected non-zero runblock"));
- vcpu->runblock--;
- if (vcpu->runblock == 0) {
-#ifdef __FreeBSD__
- wakeup(&vcpu->state);
-#else
- cv_broadcast(&vcpu->state_cv);
-#endif
- }
- vcpu_unlock(vcpu);
-}
-
#ifndef __FreeBSD__
uint64_t
vcpu_tsc_offset(struct vm *vm, int vcpuid)
@@ -3038,11 +3162,93 @@ vm_resume_cpu(struct vm *vm, int vcpuid)
return (0);
}
-int
-vcpu_debugged(struct vm *vm, int vcpuid)
+static bool
+vcpu_bailout_checks(struct vm *vm, int vcpuid, bool on_entry,
+ uint64_t entry_rip)
{
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ struct vm_exit *vme = &vcpu->exitinfo;
+ bool bail = false;
- return (CPU_ISSET(vcpuid, &vm->debug_cpus));
+ ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
+
+ if (vm->suspend) {
+ if (on_entry) {
+ VERIFY(vm->suspend > VM_SUSPEND_NONE &&
+ vm->suspend < VM_SUSPEND_LAST);
+
+ vme->exitcode = VM_EXITCODE_SUSPENDED;
+ vme->u.suspended.how = vm->suspend;
+ } else {
+ /*
+ * Handling VM suspend is complicated, so if that
+ * condition is detected outside of VM-entry itself,
+ * just emit a BOGUS exitcode so we take a lap to pick
+ * up the event during an entry and are directed into
+ * the vm_handle_suspend() logic.
+ */
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ }
+ bail = true;
+ }
+ if (vcpu->reqidle) {
+ vme->exitcode = VM_EXITCODE_REQIDLE;
+ vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
+
+ if (!on_entry) {
+ /*
+ * A reqidle request detected outside of VM-entry can be
+ * handled directly by clearing the request (and taking
+ * a lap to userspace).
+ */
+ vcpu_assert_locked(vcpu);
+ vcpu->reqidle = 0;
+ }
+ bail = true;
+ }
+ if (vcpu_should_yield(vm, vcpuid)) {
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
+ bail = true;
+ }
+ if (CPU_ISSET(vcpuid, &vm->debug_cpus)) {
+ vme->exitcode = VM_EXITCODE_DEBUG;
+ bail = true;
+ }
+
+ if (bail) {
+ if (on_entry) {
+ /*
+ * If bailing out during VM-entry, the current %rip must
+ * be recorded in the exitinfo.
+ */
+ vme->rip = entry_rip;
+ }
+ vme->inst_length = 0;
+ }
+ return (bail);
+}
+
+static bool
+vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid)
+{
+ /*
+ * Bail-out check done prior to sleeping (in vCPU contexts like HLT or
+ * wait-for-SIPI) expect that %rip is already populated in the vm_exit
+ * structure, and we would only modify the exitcode.
+ */
+ return (vcpu_bailout_checks(vm, vcpuid, false, 0));
+}
+
+bool
+vcpu_entry_bailout_checks(struct vm *vm, int vcpuid, uint64_t rip)
+{
+ /*
+ * Bail-out checks done as part of VM entry require an updated %rip to
+ * populate the vm_exit struct if any of the conditions of interest are
+ * matched in the check.
+ */
+ return (vcpu_bailout_checks(vm, vcpuid, true, rip));
}
cpuset_t
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index a2047c2934..cebcaf0fdb 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -443,6 +443,9 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_RESTART_INSTRUCTION:
case VM_SET_KERNEMU_DEV:
case VM_GET_KERNEMU_DEV:
+ case VM_RESET_CPU:
+ case VM_GET_RUN_STATE:
+ case VM_SET_RUN_STATE:
/*
* Copy in the ID of the vCPU chosen for this operation.
* Since a nefarious caller could update their struct between
@@ -989,6 +992,45 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
}
break;
}
+ case VM_RESET_CPU: {
+ struct vm_vcpu_reset vvr;
+
+ if (ddi_copyin(datap, &vvr, sizeof (vvr), md)) {
+ error = EFAULT;
+ break;
+ }
+ if (vvr.kind != VRK_RESET && vvr.kind != VRK_INIT) {
+ error = EINVAL;
+ }
+
+ error = vcpu_arch_reset(sc->vmm_vm, vcpu, vvr.kind == VRK_INIT);
+ break;
+ }
+ case VM_GET_RUN_STATE: {
+ struct vm_run_state vrs;
+
+ bzero(&vrs, sizeof (vrs));
+ error = vm_get_run_state(sc->vmm_vm, vcpu, &vrs.state,
+ &vrs.sipi_vector);
+ if (error == 0) {
+ if (ddi_copyout(&vrs, datap, sizeof (vrs), md)) {
+ error = EFAULT;
+ break;
+ }
+ }
+ break;
+ }
+ case VM_SET_RUN_STATE: {
+ struct vm_run_state vrs;
+
+ if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
+ error = EFAULT;
+ break;
+ }
+ error = vm_set_run_state(sc->vmm_vm, vcpu, vrs.state,
+ vrs.sipi_vector);
+ break;
+ }
case VM_SET_KERNEMU_DEV:
case VM_GET_KERNEMU_DEV: {
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.c b/usr/src/uts/i86pc/io/vmm/vmm_stat.c
index 26e3573bc9..da38bb7de5 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_stat.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.c
@@ -167,6 +167,5 @@ VMM_STAT(VMEXIT_MMIO_EMUL, "vm exits for mmio emulation");
VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
-VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
-VMM_STAT(VMEXIT_RUNBLOCK, "number of times runblock at exit");
VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
+VMM_STAT(VMEXIT_RUN_STATE, "number of vm exits due to run_state change");
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.h b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
index 68e43c7bfc..2975a4a914 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_stat.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
@@ -165,8 +165,7 @@ VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
VMM_STAT_DECLARE(VMEXIT_MMIO_EMUL);
VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
-VMM_STAT_DECLARE(VMEXIT_USERSPACE);
-VMM_STAT_DECLARE(VMEXIT_RUNBLOCK);
VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
VMM_STAT_DECLARE(VMEXIT_REQIDLE);
+VMM_STAT_DECLARE(VMEXIT_RUN_STATE);
#endif
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index c6859a3c00..65fdb19349 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -217,9 +217,9 @@ enum vm_exitcode {
VM_EXITCODE_PAUSE,
VM_EXITCODE_PAGING,
VM_EXITCODE_INST_EMUL,
- VM_EXITCODE_SPINUP_AP,
+ VM_EXITCODE_RUN_STATE,
VM_EXITCODE_MMIO_EMUL,
- VM_EXITCODE_RUNBLOCK,
+ VM_EXITCODE_DEPRECATED, /* formerly RUNBLOCK */
VM_EXITCODE_IOAPIC_EOI,
VM_EXITCODE_SUSPENDED,
VM_EXITCODE_MMIO,
@@ -287,6 +287,18 @@ struct vm_task_switch {
struct vm_guest_paging paging;
};
+enum vcpu_run_state {
+ VRS_HALT = 0,
+ VRS_INIT = (1 << 0),
+ VRS_RUN = (1 << 1),
+
+ VRS_PEND_INIT = (1 << 14),
+ VRS_PEND_SIPI = (1 << 15),
+};
+#define VRS_MASK_VALID(v) \
+ ((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI))
+#define VRS_IS_VALID(v) ((v) == VRS_MASK_VALID(v))
+
struct vm_exit {
enum vm_exitcode exitcode;
int inst_length; /* 0 means unknown */
@@ -348,10 +360,6 @@ struct vm_exit {
uint64_t wval;
} msr;
struct {
- int vcpu;
- uint64_t rip;
- } spinup_ap;
- struct {
uint64_t rflags;
} hlt;
struct {
@@ -367,8 +375,8 @@ struct vm_exit {
enum vm_entry_cmds {
VEC_DEFAULT = 0,
VEC_DISCARD_INSTR, /* discard inst emul state */
- VEC_COMPLETE_MMIO, /* entry includes result for mmio emul */
- VEC_COMPLETE_INOUT, /* entry includes result for inout emul */
+ VEC_FULFILL_MMIO, /* entry includes result for mmio emul */
+ VEC_FULFILL_INOUT, /* entry includes result for inout emul */
};
struct vm_entry {
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index c894c6aeb0..f5d031bfd4 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -259,6 +259,28 @@ struct vm_readwrite_kernemu_device {
};
_Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
+enum vcpu_reset_kind {
+ VRK_RESET = 0,
+ /*
+ * The reset performed by an INIT IPI clears much of the CPU state, but
+ * some portions are left untouched, unlike VRK_RESET, which represents
+ * a "full" reset as if the system was freshly powered on.
+ */
+ VRK_INIT = 1,
+};
+
+struct vm_vcpu_reset {
+ int vcpuid;
+ uint32_t kind; /* contains: enum vcpu_reset_kind */
+};
+
+struct vm_run_state {
+ int vcpuid;
+ uint32_t state; /* of enum cpu_init_status type */
+ uint8_t sipi_vector; /* vector of SIPI, if any */
+ uint8_t _pad[3];
+};
+
#define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8))
#define VMM_IOC_BASE (('v' << 16) | ('m' << 8))
#define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8))
@@ -291,6 +313,9 @@ _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
#define VM_RESTART_INSTRUCTION (VMM_CPU_IOC_BASE | 0x13)
#define VM_SET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x14)
#define VM_GET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x15)
+#define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16)
+#define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17)
+#define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18)
/* Operations requiring write-locking the VM */
#define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01)