summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/boot/Makefile.version3
-rw-r--r--usr/src/boot/sys/boot/common/gfx_fb.c5
-rw-r--r--usr/src/boot/sys/boot/common/gfx_fb.h6
-rw-r--r--usr/src/common/ficl/emu/gfx_fb.h6
-rw-r--r--usr/src/common/ficl/loader.c7
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c504
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.h12
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c720
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vioapic.c151
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c200
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.h9
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h20
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h28
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c50
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_lapic.c5
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h1
17 files changed, 809 insertions, 920 deletions
diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version
index baade5b6c4..1d116dda12 100644
--- a/usr/src/boot/Makefile.version
+++ b/usr/src/boot/Makefile.version
@@ -11,6 +11,7 @@
#
# Copyright 2016 Toomas Soome <tsoome@me.com>
+# Copyright 2020 RackTop Systems, Inc.
#
#
@@ -33,4 +34,4 @@ LOADER_VERSION = 1.1
# Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.
# The version is processed from left to right, the version number can only
# be increased.
-BOOT_VERSION = $(LOADER_VERSION)-2020.11.14.1
+BOOT_VERSION = $(LOADER_VERSION)-2020.11.25.1
diff --git a/usr/src/boot/sys/boot/common/gfx_fb.c b/usr/src/boot/sys/boot/common/gfx_fb.c
index be50d384f7..56314566d6 100644
--- a/usr/src/boot/sys/boot/common/gfx_fb.c
+++ b/usr/src/boot/sys/boot/common/gfx_fb.c
@@ -12,6 +12,7 @@
/*
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 RackTop Systems, Inc.
*/
/*
@@ -1194,10 +1195,6 @@ gfx_term_drawrect(uint32_t ux1, uint32_t uy1, uint32_t ux2, uint32_t uy2)
gfx_fb_bezier(x1, y1 - i, x2 + i, y1 - i, x2 + i, y2, width-i);
}
-#define FL_PUTIMAGE_BORDER 0x1
-#define FL_PUTIMAGE_NOSCROLL 0x2
-#define FL_PUTIMAGE_DEBUG 0x80
-
int
gfx_fb_putimage(png_t *png, uint32_t ux1, uint32_t uy1, uint32_t ux2,
uint32_t uy2, uint32_t flags)
diff --git a/usr/src/boot/sys/boot/common/gfx_fb.h b/usr/src/boot/sys/boot/common/gfx_fb.h
index e242931a44..04bbd91121 100644
--- a/usr/src/boot/sys/boot/common/gfx_fb.h
+++ b/usr/src/boot/sys/boot/common/gfx_fb.h
@@ -11,6 +11,7 @@
/*
* Copyright 2017 Toomas Soome <tsoome@me.com>
+ * Copyright 2020 RackTop Systems, Inc.
*/
#ifndef _GFX_FB_H
@@ -144,6 +145,11 @@ void gfx_fb_line(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
void gfx_fb_bezier(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
uint32_t);
void plat_cons_update_mode(int);
+
+#define FL_PUTIMAGE_BORDER 0x1
+#define FL_PUTIMAGE_NOSCROLL 0x2
+#define FL_PUTIMAGE_DEBUG 0x80
+
int gfx_fb_putimage(png_t *, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
bool gfx_parse_mode_str(char *, int *, int *, int *);
diff --git a/usr/src/common/ficl/emu/gfx_fb.h b/usr/src/common/ficl/emu/gfx_fb.h
index 4dfb386f5b..8499bff455 100644
--- a/usr/src/common/ficl/emu/gfx_fb.h
+++ b/usr/src/common/ficl/emu/gfx_fb.h
@@ -11,6 +11,7 @@
/*
* Copyright 2016 Toomas Some <tsoome@me.com>
+ * Copyright 2020 RackTop Systems, Inc.
*/
#ifndef _GFX_FB_H
@@ -59,6 +60,11 @@ void gfx_term_drawrect(uint32_t, uint32_t, uint32_t, uint32_t);
void gfx_fb_line(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
void gfx_fb_bezier(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
uint32_t);
+
+#define FL_PUTIMAGE_BORDER 0x1
+#define FL_PUTIMAGE_NOSCROLL 0x2
+#define FL_PUTIMAGE_DEBUG 0x80
+
int gfx_fb_putimage(png_t *, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
#ifdef __cplusplus
diff --git a/usr/src/common/ficl/loader.c b/usr/src/common/ficl/loader.c
index f1a1827eb0..c41c86c7c2 100644
--- a/usr/src/common/ficl/loader.c
+++ b/usr/src/common/ficl/loader.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2000 Daniel Capo Sobral
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 RackTop Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -79,6 +80,7 @@ ficl_fb_putimage(ficlVm *pVM)
ficlInteger ret = FICL_FALSE;
uint32_t x1, y1, x2, y2, f;
png_t png;
+ int error;
FICL_STACK_CHECK(ficlVmGetDataStack(pVM), 7, 1);
@@ -96,7 +98,10 @@ ficl_fb_putimage(ficlVm *pVM)
(void) strncpy(name, namep, names);
name[names] = '\0';
- if (png_open(&png, name) == PNG_NO_ERROR) {
+ if ((error = png_open(&png, name)) != PNG_NO_ERROR) {
+ if (f & FL_PUTIMAGE_DEBUG)
+ printf("%s\n", png_error_string(error));
+ } else {
if (gfx_fb_putimage(&png, x1, y1, x2, y2, f) == 0)
ret = FICL_TRUE; /* success */
(void) png_close(&png);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index dd9d5a55a8..8c12f4ba04 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -903,67 +903,6 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
vie_init_mmio(vie, inst_bytes, inst_len, &paging, gpa);
}
-#ifdef KTR
-static const char *
-intrtype_to_str(int intr_type)
-{
- switch (intr_type) {
- case VMCB_EVENTINJ_TYPE_INTR:
- return ("hwintr");
- case VMCB_EVENTINJ_TYPE_NMI:
- return ("nmi");
- case VMCB_EVENTINJ_TYPE_INTn:
- return ("swintr");
- case VMCB_EVENTINJ_TYPE_EXCEPTION:
- return ("exception");
- default:
- panic("%s: unknown intr_type %d", __func__, intr_type);
- }
-}
-#endif
-
-/*
- * Inject an event to vcpu as described in section 15.20, "Event injection".
- */
-static void
-svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector,
- uint32_t error, bool ec_valid)
-{
- struct vmcb_ctrl *ctrl;
-
- ctrl = svm_get_vmcb_ctrl(sc, vcpu);
-
- KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0,
- ("%s: event already pending %lx", __func__, ctrl->eventinj));
-
- KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d",
- __func__, vector));
-
- switch (intr_type) {
- case VMCB_EVENTINJ_TYPE_INTR:
- case VMCB_EVENTINJ_TYPE_NMI:
- case VMCB_EVENTINJ_TYPE_INTn:
- break;
- case VMCB_EVENTINJ_TYPE_EXCEPTION:
- if (vector >= 0 && vector <= 31 && vector != 2)
- break;
- /* FALLTHROUGH */
- default:
- panic("%s: invalid intr_type/vector: %d/%d", __func__,
- intr_type, vector);
- }
- ctrl->eventinj = vector | (intr_type << 8) | VMCB_EVENTINJ_VALID;
- if (ec_valid) {
- ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID;
- ctrl->eventinj |= (uint64_t)error << 32;
- VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %x",
- intrtype_to_str(intr_type), vector, error);
- } else {
- VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d",
- intrtype_to_str(intr_type), vector);
- }
-}
-
static void
svm_update_virqinfo(struct svm_softc *sc, int vcpu)
{
@@ -984,7 +923,7 @@ svm_update_virqinfo(struct svm_softc *sc, int vcpu)
}
static void
-svm_save_intinfo(struct svm_softc *svm_sc, int vcpu)
+svm_save_exitintinfo(struct svm_softc *svm_sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
uint64_t intinfo;
@@ -1014,12 +953,14 @@ vintr_intercept_enabled(struct svm_softc *sc, int vcpu)
VMCB_INTCPT_VINTR));
}
-static __inline void
-enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
+static void
+svm_enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
+ struct vmcb_state *state;
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ state = svm_get_vmcb_state(sc, vcpu);
if ((ctrl->v_irq & V_IRQ) != 0 && ctrl->v_intr_vector == 0) {
KASSERT(ctrl->v_intr_prio & V_IGN_TPR,
@@ -1029,6 +970,17 @@ enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
return;
}
+ /*
+ * We use V_IRQ in conjunction with the VINTR intercept to trap into the
+ * hypervisor as soon as a virtual interrupt can be delivered.
+ *
+ * Since injected events are not subject to intercept checks we need to
+ * ensure that the V_IRQ is not actually going to be delivered on VM
+ * entry.
+ */
+ VERIFY((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 ||
+ (state->rflags & PSL_I) == 0 || ctrl->intr_shadow);
+
VCPU_CTR0(sc->vm, vcpu, "Enable intr window exiting");
ctrl->v_irq |= V_IRQ;
ctrl->v_intr_prio |= V_IGN_TPR;
@@ -1037,8 +989,8 @@ enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
}
-static __inline void
-disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
+static void
+svm_disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
@@ -1063,30 +1015,18 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
* to track when the vcpu is done handling the NMI.
*/
static int
-nmi_blocked(struct svm_softc *sc, int vcpu)
+svm_nmi_blocked(struct svm_softc *sc, int vcpu)
{
- int blocked;
-
- blocked = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
- VMCB_INTCPT_IRET);
- return (blocked);
-}
-
-static void
-enable_nmi_blocking(struct svm_softc *sc, int vcpu)
-{
-
- KASSERT(!nmi_blocked(sc, vcpu), ("vNMI already blocked"));
- VCPU_CTR0(sc->vm, vcpu, "vNMI blocking enabled");
- svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
+ return (svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
+ VMCB_INTCPT_IRET));
}
static void
-clear_nmi_blocking(struct svm_softc *sc, int vcpu)
+svm_clear_nmi_blocking(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
- KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
+ KASSERT(svm_nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared");
/*
* When the IRET intercept is cleared the vcpu will attempt to execute
@@ -1102,13 +1042,80 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu)
svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
/*
- * Set 'intr_shadow' to prevent an NMI from being injected on the
- * immediate VMRUN.
+ * Set an interrupt shadow to prevent an NMI from being immediately
+ * injected on the next VMRUN.
*/
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
ctrl->intr_shadow = 1;
}
+static void
+svm_inject_event(struct svm_softc *sc, int vcpu, uint64_t intinfo)
+{
+ struct vmcb_ctrl *ctrl;
+ uint8_t vector;
+ uint32_t evtype;
+
+ ASSERT(VMCB_EXITINTINFO_VALID(intinfo));
+
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ vector = VMCB_EXITINTINFO_VECTOR(intinfo);
+ evtype = VMCB_EXITINTINFO_TYPE(intinfo);
+
+ switch (evtype) {
+ case VMCB_EVENTINJ_TYPE_INTR:
+ case VMCB_EVENTINJ_TYPE_NMI:
+ case VMCB_EVENTINJ_TYPE_INTn:
+ break;
+ case VMCB_EVENTINJ_TYPE_EXCEPTION:
+ VERIFY(vector <= 31);
+ /*
+ * NMIs are expected to be injected with VMCB_EVENTINJ_TYPE_NMI,
+ * rather than as an exception with the NMI vector.
+ */
+ VERIFY(vector != 2);
+ break;
+ default:
+ panic("unexpected event type %x", evtype);
+ }
+
+ ctrl->eventinj = VMCB_EVENTINJ_VALID | evtype | vector;
+ if (VMCB_EXITINTINFO_EC_VALID(intinfo)) {
+ ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID;
+ ctrl->eventinj |= (uint64_t)VMCB_EXITINTINFO_EC(intinfo) << 32;
+ }
+}
+
+static void
+svm_inject_nmi(struct svm_softc *sc, int vcpu)
+{
+ struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+
+ ASSERT(!svm_nmi_blocked(sc, vcpu));
+
+ ctrl->eventinj = VMCB_EVENTINJ_VALID | VMCB_EVENTINJ_TYPE_NMI;
+ vm_nmi_clear(sc->vm, vcpu);
+
+ /*
+ * Virtual NMI blocking is now in effect.
+ *
+ * Not only does this block a subsequent NMI injection from taking
+ * place, it also configures an intercept on the IRET so we can track
+ * when the next injection can take place.
+ */
+ svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
+}
+
+static void
+svm_inject_irq(struct svm_softc *sc, int vcpu, int vector)
+{
+ struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+
+ ASSERT(vector >= 0 && vector <= 255);
+
+ ctrl->eventinj = VMCB_EVENTINJ_VALID | vector;
+}
+
#define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL
static int
@@ -1335,7 +1342,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
vmexit->inst_length, code, info1, info2));
svm_update_virqinfo(svm_sc, vcpu);
- svm_save_intinfo(svm_sc, vcpu);
+ svm_save_exitintinfo(svm_sc, vcpu);
switch (code) {
case VMCB_EXIT_IRET:
@@ -1343,11 +1350,12 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
* Restart execution at "iret" but with the intercept cleared.
*/
vmexit->inst_length = 0;
- clear_nmi_blocking(svm_sc, vcpu);
+ svm_clear_nmi_blocking(svm_sc, vcpu);
handled = 1;
break;
case VMCB_EXIT_VINTR: /* interrupt window exiting */
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1);
+ svm_disable_intr_window_exiting(svm_sc, vcpu);
handled = 1;
break;
case VMCB_EXIT_INTR: /* external interrupt */
@@ -1571,51 +1579,40 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
return (handled);
}
-static void
-svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu)
-{
- uint64_t intinfo;
-
- if (!vm_entry_intinfo(svm_sc->vm, vcpu, &intinfo))
- return;
-
- KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not "
- "valid: %lx", __func__, intinfo));
-
- svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo),
- VMCB_EXITINTINFO_VECTOR(intinfo),
- VMCB_EXITINTINFO_EC(intinfo),
- VMCB_EXITINTINFO_EC_VALID(intinfo));
- vmm_stat_incr(svm_sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1);
- VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %lx", intinfo);
-}
-
/*
- * Inject event to virtual cpu.
+ * Inject exceptions, NMIs, and ExtINTs.
+ *
+ * The logic behind these are complicated and may involve mutex contention, so
+ * the injection is performed without the protection of host CPU interrupts
+ * being disabled. This means a racing notification could be "lost",
+ * necessitating a later call to svm_inject_recheck() to close that window
+ * of opportunity.
*/
-static void
-svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
+static enum event_inject_state
+svm_inject_events(struct svm_softc *sc, int vcpu)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
struct svm_vcpu *vcpustate;
- uint8_t v_tpr;
- int vector, need_intr_window;
- int extint_pending;
+ uint64_t intinfo;
+ enum event_inject_state ev_state;
state = svm_get_vmcb_state(sc, vcpu);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
vcpustate = svm_get_vcpu(sc, vcpu);
+ ev_state = EIS_CAN_INJECT;
- need_intr_window = 0;
-
- vlapic_tmr_update(vlapic);
-
+ /* Clear any interrupt shadow if guest %rip has changed */
if (vcpustate->nextrip != state->rip) {
ctrl->intr_shadow = 0;
- VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
- "cleared due to rip change: %lx/%lx",
- vcpustate->nextrip, state->rip);
+ }
+
+ /*
+ * An event is already pending for injection. This can occur when the
+ * vCPU exits prior to VM entry (like for an AST).
+ */
+ if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
+ return (EIS_EV_EXISTING | EIS_REQ_EXIT);
}
/*
@@ -1627,118 +1624,79 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
* An event might also be pending because an exception was injected
* by the hypervisor (e.g. #PF during instruction emulation).
*/
- svm_inj_intinfo(sc, vcpu);
+ if (vm_entry_intinfo(sc->vm, vcpu, &intinfo)) {
+ ASSERT(VMCB_EXITINTINFO_VALID(intinfo));
+
+ svm_inject_event(sc, vcpu, intinfo);
+ vmm_stat_incr(sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1);
+ ev_state = EIS_EV_INJECTED;
+ }
/* NMI event has priority over interrupts. */
- if (vm_nmi_pending(sc->vm, vcpu)) {
- if (nmi_blocked(sc, vcpu)) {
- /*
- * Can't inject another NMI if the guest has not
- * yet executed an "iret" after the last NMI.
- */
- VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due "
- "to NMI-blocking");
- } else if (ctrl->intr_shadow) {
- /*
- * Can't inject an NMI if the vcpu is in an intr_shadow.
- */
- VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due to "
- "interrupt shadow");
- need_intr_window = 1;
- goto done;
- } else if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
- /*
- * If there is already an exception/interrupt pending
- * then defer the NMI until after that.
- */
- VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to "
- "eventinj %lx", ctrl->eventinj);
+ if (vm_nmi_pending(sc->vm, vcpu) && !svm_nmi_blocked(sc, vcpu)) {
+ if (ev_state == EIS_CAN_INJECT) {
+ /* Can't inject NMI if vcpu is in an intr_shadow. */
+ if (ctrl->intr_shadow) {
+ return (EIS_GI_BLOCK);
+ }
- /*
- * Use self-IPI to trigger a VM-exit as soon as
- * possible after the event injection is completed.
- *
- * This works only if the external interrupt exiting
- * is at a lower priority than the event injection.
- *
- * Although not explicitly specified in APMv2 the
- * relative priorities were verified empirically.
- */
- ipi_cpu(curcpu, IPI_AST); /* XXX vmm_ipinum? */
+ svm_inject_nmi(sc, vcpu);
+ ev_state = EIS_EV_INJECTED;
} else {
- vm_nmi_clear(sc->vm, vcpu);
+ return (ev_state | EIS_REQ_EXIT);
+ }
+ }
- /* Inject NMI, vector number is not used */
- svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_NMI,
- IDT_NMI, 0, false);
+ if (vm_extint_pending(sc->vm, vcpu)) {
+ int vector;
- /* virtual NMI blocking is now in effect */
- enable_nmi_blocking(sc, vcpu);
+ if (ev_state != EIS_CAN_INJECT) {
+ return (ev_state | EIS_REQ_EXIT);
+ }
- VCPU_CTR0(sc->vm, vcpu, "Injecting vNMI");
+ /*
+ * If the guest has disabled interrupts or is in an interrupt
+ * shadow then we cannot inject the pending interrupt.
+ */
+ if ((state->rflags & PSL_I) == 0 || ctrl->intr_shadow) {
+ return (EIS_GI_BLOCK);
}
- }
- extint_pending = vm_extint_pending(sc->vm, vcpu);
- if (!extint_pending) {
- if (!vlapic_pending_intr(vlapic, &vector))
- goto done;
- KASSERT(vector >= 16 && vector <= 255,
- ("invalid vector %d from local APIC", vector));
- } else {
/* Ask the legacy pic for a vector to inject */
vatpic_pending_intr(sc->vm, &vector);
KASSERT(vector >= 0 && vector <= 255,
("invalid vector %d from INTR", vector));
- }
- /*
- * If the guest has disabled interrupts or is in an interrupt shadow
- * then we cannot inject the pending interrupt.
- */
- if ((state->rflags & PSL_I) == 0) {
- VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
- "rflags %lx", vector, state->rflags);
- need_intr_window = 1;
- goto done;
- }
-
- if (ctrl->intr_shadow) {
- VCPU_CTR1(sc->vm, vcpu, "Cannot inject vector %d due to "
- "interrupt shadow", vector);
- need_intr_window = 1;
- goto done;
+ svm_inject_irq(sc, vcpu, vector);
+ vm_extint_clear(sc->vm, vcpu);
+ vatpic_intr_accepted(sc->vm, vector);
+ ev_state = EIS_EV_INJECTED;
}
- if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
- VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
- "eventinj %lx", vector, ctrl->eventinj);
- need_intr_window = 1;
- goto done;
- }
+ return (ev_state);
+}
- svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false);
+/*
+ * Synchronize vLAPIC state and inject any interrupts pending on it.
+ *
+ * This is done with host CPU interrupts disabled so notification IPIs will be
+ * queued on the host APIC and recognized when entering SVM guest context.
+ */
+static enum event_inject_state
+svm_inject_vlapic(struct svm_softc *sc, int vcpu, struct vlapic *vlapic,
+ enum event_inject_state ev_state)
+{
+ struct vmcb_ctrl *ctrl;
+ struct vmcb_state *state;
+ int vector;
+ uint8_t v_tpr;
- if (!extint_pending) {
- vlapic_intr_accepted(vlapic, vector);
- } else {
- vm_extint_clear(sc->vm, vcpu);
- vatpic_intr_accepted(sc->vm, vector);
- }
+ state = svm_get_vmcb_state(sc, vcpu);
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
/*
- * Force a VM-exit as soon as the vcpu is ready to accept another
- * interrupt. This is done because the PIC might have another vector
- * that it wants to inject. Also, if the APIC has a pending interrupt
- * that was preempted by the ExtInt then it allows us to inject the
- * APIC vector as soon as possible.
- */
- need_intr_window = 1;
-done:
- /*
- * The guest can modify the TPR by writing to %CR8. In guest mode
- * the processor reflects this write to V_TPR without hypervisor
- * intervention.
+ * The guest can modify the TPR by writing to %cr8. In guest mode the
+ * CPU reflects this write to V_TPR without hypervisor intervention.
*
* The guest can also modify the TPR by writing to it via the memory
* mapped APIC page. In this case, the write will be emulated by the
@@ -1748,33 +1706,88 @@ done:
v_tpr = vlapic_get_cr8(vlapic);
KASSERT(v_tpr <= 15, ("invalid v_tpr %x", v_tpr));
if (ctrl->v_tpr != v_tpr) {
- VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %x to %x",
- ctrl->v_tpr, v_tpr);
ctrl->v_tpr = v_tpr;
svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
}
- if (need_intr_window) {
+ /* If an event cannot otherwise be injected, we are done for now */
+ if (ev_state != EIS_CAN_INJECT) {
+ return (ev_state);
+ }
+
+ if (!vlapic_pending_intr(vlapic, &vector)) {
+ return (EIS_CAN_INJECT);
+ }
+ KASSERT(vector >= 16 && vector <= 255,
+ ("invalid vector %d from local APIC", vector));
+
+ /*
+ * If the guest has disabled interrupts or is in an interrupt shadow
+ * then we cannot inject the pending interrupt.
+ */
+ if ((state->rflags & PSL_I) == 0 || ctrl->intr_shadow) {
+ return (EIS_GI_BLOCK);
+ }
+
+ svm_inject_irq(sc, vcpu, vector);
+ vlapic_intr_accepted(vlapic, vector);
+ return (EIS_EV_INJECTED);
+}
+
+/*
+ * Re-check for events to be injected.
+ *
+ * Once host CPU interrupts are disabled, check for the presence of any events
+ * which require injection processing. If an exit is required upon injection,
+ * or once the guest becomes interruptable, that will be configured too.
+ */
+static bool
+svm_inject_recheck(struct svm_softc *sc, int vcpu,
+ enum event_inject_state ev_state)
+{
+ struct vmcb_ctrl *ctrl;
+
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+
+ if (ev_state == EIS_CAN_INJECT) {
/*
- * We use V_IRQ in conjunction with the VINTR intercept to
- * trap into the hypervisor as soon as a virtual interrupt
- * can be delivered.
- *
- * Since injected events are not subject to intercept checks
- * we need to ensure that the V_IRQ is not actually going to
- * be delivered on VM entry. The KASSERT below enforces this.
+ * An active interrupt shadow would preclude us from injecting
+ * any events picked up during a re-check.
*/
- KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 ||
- (state->rflags & PSL_I) == 0 || ctrl->intr_shadow,
- ("Bogus intr_window_exiting: eventinj (%lx), "
- "intr_shadow (%lu), rflags (%lx)",
- ctrl->eventinj, ctrl->intr_shadow, state->rflags));
- enable_intr_window_exiting(sc, vcpu);
+ if (ctrl->intr_shadow != 0) {
+ return (false);
+ }
+
+ if (vm_nmi_pending(sc->vm, vcpu) &&
+ !svm_nmi_blocked(sc, vcpu)) {
+ /* queued NMI not blocked by NMI-window-exiting */
+ return (true);
+ }
+ if (vm_extint_pending(sc->vm, vcpu)) {
+ /* queued ExtINT not blocked by existing injection */
+ return (true);
+ }
} else {
- disable_intr_window_exiting(sc, vcpu);
+ if ((ev_state & EIS_REQ_EXIT) != 0) {
+ /*
+ * Use a self-IPI to force an immediate exit after
+ * event injection has occurred.
+ */
+ poke_cpu(CPU->cpu_id);
+ } else {
+ /*
+ * If any event is being injected, an exit immediately
+ * upon becoming interruptable again will allow pending
+ * or newly queued events to be injected in a timely
+ * manner.
+ */
+ svm_enable_intr_window_exiting(sc, vcpu);
+ }
}
+ return (false);
}
+
#ifdef __FreeBSD__
static void
check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
@@ -2039,15 +2052,15 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
state->rip = rip;
do {
-#ifndef __FreeBSD__
+ enum event_inject_state inject_state;
+
/*
- * Interrupt injection may involve mutex contention which, on
- * illumos bhyve, are blocking/non-spin. Doing so with global
- * interrupts disabled is a recipe for deadlock, so it is
- * performed here.
+ * Initial event injection is complex and may involve mutex
+ * contention, so it must be performed with global interrupts
+ * still enabled.
*/
- svm_inj_interrupts(svm_sc, vcpu, vlapic);
-#endif
+ inject_state = svm_inject_events(svm_sc, vcpu);
+ handled = 0;
/*
* Disable global interrupts to guarantee atomicity during
@@ -2058,6 +2071,13 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
*/
disable_gintr();
+ /*
+ * Synchronizing and injecting vlapic state is lock-free and is
+ * safe (and prudent) to perform with interrupts disabled.
+ */
+ inject_state = svm_inject_vlapic(svm_sc, vcpu, vlapic,
+ inject_state);
+
if (vcpu_suspended(evinfo)) {
enable_gintr();
vm_exit_suspended(vm, vcpu, state->rip);
@@ -2090,6 +2110,16 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
}
/*
+ * If subsequent activity queued events which require injection
+ * handling, take another lap to handle them.
+ */
+ if (svm_inject_recheck(svm_sc, vcpu, inject_state)) {
+ enable_gintr();
+ handled = 1;
+ continue;
+ }
+
+ /*
* #VMEXIT resumes the host with the guest LDTR, so
* save the current LDT selector so it can be restored
* after an exit. The userspace hypervisor probably
@@ -2098,10 +2128,6 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
*/
ldt_sel = sldt();
-#ifdef __FreeBSD__
- svm_inj_interrupts(svm_sc, vcpu, vlapic);
-#endif
-
/* Activate the nested pmap on 'curcpu' */
CPU_SET_ATOMIC_ACQ(curcpu, &pmap->pm_active);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
index 63b088253d..1c002aee7b 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
@@ -136,9 +136,9 @@ struct svm_softc;
/* Event types that can be injected */
#define VMCB_EVENTINJ_TYPE_INTR 0
-#define VMCB_EVENTINJ_TYPE_NMI 2
-#define VMCB_EVENTINJ_TYPE_EXCEPTION 3
-#define VMCB_EVENTINJ_TYPE_INTn 4
+#define VMCB_EVENTINJ_TYPE_NMI (2 << 8)
+#define VMCB_EVENTINJ_TYPE_EXCEPTION (3 << 8)
+#define VMCB_EVENTINJ_TYPE_INTn (4 << 8)
/* VMCB exit code, APM vol2 Appendix C */
#define VMCB_EXIT_MC 0x52
@@ -187,9 +187,9 @@ struct svm_softc;
* Section 15.7.2, Intercepts during IDT Interrupt Delivery.
*/
#define VMCB_EXITINTINFO_VECTOR(x) ((x) & 0xFF)
-#define VMCB_EXITINTINFO_TYPE(x) (((x) >> 8) & 0x7)
-#define VMCB_EXITINTINFO_EC_VALID(x) (((x) & BIT(11)) ? 1 : 0)
-#define VMCB_EXITINTINFO_VALID(x) (((x) & BIT(31)) ? 1 : 0)
+#define VMCB_EXITINTINFO_TYPE(x) ((x) & (0x7 << 8))
+#define VMCB_EXITINTINFO_EC_VALID(x) (((x) & BIT(11)) != 0)
+#define VMCB_EXITINTINFO_VALID(x) (((x) & BIT(31)) != 0)
#define VMCB_EXITINTINFO_EC(x) (((x) >> 32) & 0xFFFFFFFF)
/* Offset of various VMCB fields. */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index a791197d17..1f670ef3b3 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -338,8 +338,10 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
-static void vmx_inject_pir(struct vlapic *vlapic);
static void vmx_apply_tsc_adjust(struct vmx *, int);
+static void vmx_apicv_sync_tmr(struct vlapic *vlapic);
+static void vmx_tpr_shadow_enter(struct vlapic *vlapic);
+static void vmx_tpr_shadow_exit(struct vlapic *vlapic);
#ifdef KTR
static const char *
@@ -1270,26 +1272,27 @@ vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu)
VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting");
}
+static __inline bool
+vmx_nmi_window_exiting(struct vmx *vmx, int vcpu)
+{
+ return ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0);
+}
+
static __inline void
vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu)
{
-
- if ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) == 0) {
+ if (!vmx_nmi_window_exiting(vmx, vcpu)) {
vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING;
vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
- VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting");
}
}
static __inline void
vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
{
-
- KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0,
- ("nmi_window_exiting not set %x", vmx->cap[vcpu].proc_ctls));
+ ASSERT(vmx_nmi_window_exiting(vmx, vcpu));
vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING;
vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
- VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
}
/*
@@ -1319,60 +1322,46 @@ vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu)
#define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
-#ifndef __FreeBSD__
-static uint32_t
-vmx_inject_nmi(struct vmx *vmx, int vcpu)
-#else
static void
vmx_inject_nmi(struct vmx *vmx, int vcpu)
-#endif
{
- uint32_t gi, info;
-
- gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
- KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest "
- "interruptibility-state %x", gi));
-
- info = vmcs_read(VMCS_ENTRY_INTR_INFO);
- KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid "
- "VM-entry interruption information %x", info));
+ ASSERT0(vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & NMI_BLOCKING);
+ ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID);
/*
* Inject the virtual NMI. The vector must be the NMI IDT entry
* or the VMCS entry check will fail.
*/
- info = IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID;
- vmcs_write(VMCS_ENTRY_INTR_INFO, info);
-
- VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI");
+ vmcs_write(VMCS_ENTRY_INTR_INFO,
+ IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID);
/* Clear the request */
vm_nmi_clear(vmx->vm, vcpu);
-
-#ifndef __FreeBSD__
- return (info);
-#endif
}
-static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
- uint64_t guestrip)
+/*
+ * Inject exceptions, NMIs, and ExtINTs.
+ *
+ * The logic behind these are complicated and may involve mutex contention, so
+ * the injection is performed without the protection of host CPU interrupts
+ * being disabled. This means a racing notification could be "lost",
+ * necessitating a later call to vmx_inject_recheck() to close that window
+ * of opportunity.
+ */
+static enum event_inject_state
+vmx_inject_events(struct vmx *vmx, int vcpu, uint64_t rip)
{
- uint64_t entryinfo, rflags;
+ uint64_t entryinfo;
uint32_t gi, info;
int vector;
- boolean_t extint_pending = B_FALSE;
-
- vlapic_tmr_update(vlapic);
+ enum event_inject_state state;
gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
info = vmcs_read(VMCS_ENTRY_INTR_INFO);
+ state = EIS_CAN_INJECT;
- if (vmx->state[vcpu].nextrip != guestrip &&
- (gi & HWINTR_BLOCKING) != 0) {
- VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
- "cleared due to rip change: %lx/%lx",
- vmx->state[vcpu].nextrip, guestrip);
+ /* Clear any interrupt blocking if the guest %rip has changed */
+ if (vmx->state[vcpu].nextrip != rip && (gi & HWINTR_BLOCKING) != 0) {
gi &= ~HWINTR_BLOCKING;
vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
}
@@ -1383,15 +1372,11 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
* such as an AST before a vm-entry delivered the injection.
*/
if ((info & VMCS_INTR_VALID) != 0) {
- goto cantinject;
+ return (EIS_EV_EXISTING | EIS_REQ_EXIT);
}
if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
- KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
- "intinfo is not valid: %lx", __func__, entryinfo));
-
- KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
- "pending exception: %lx/%x", __func__, entryinfo, info));
+ ASSERT(entryinfo & VMCS_INTR_VALID);
info = entryinfo;
vector = info & 0xff;
@@ -1404,50 +1389,49 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
info |= VMCS_INTR_T_SWEXCEPTION;
}
- if (info & VMCS_INTR_DEL_ERRCODE)
+ if (info & VMCS_INTR_DEL_ERRCODE) {
vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32);
+ }
vmcs_write(VMCS_ENTRY_INTR_INFO, info);
+ state = EIS_EV_INJECTED;
}
if (vm_nmi_pending(vmx->vm, vcpu)) {
- int need_nmi_exiting = 1;
-
/*
- * If there are no conditions blocking NMI injection then
- * inject it directly here otherwise enable "NMI window
- * exiting" to inject it as soon as we can.
+ * If there are no conditions blocking NMI injection then inject
+ * it directly here otherwise enable "NMI window exiting" to
+ * inject it as soon as we can.
*
- * We also check for STI_BLOCKING because some implementations
- * don't allow NMI injection in this case. If we are running
- * on a processor that doesn't have this restriction it will
- * immediately exit and the NMI will be injected in the
- * "NMI window exiting" handler.
+ * According to the Intel manual, some CPUs do not allow NMI
+ * injection when STI_BLOCKING is active. That check is
+ * enforced here, regardless of CPU capability. If running on a
+ * CPU without such a restriction it will immediately exit and
+ * the NMI will be injected in the "NMI window exiting" handler.
*/
if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) {
- if ((info & VMCS_INTR_VALID) == 0) {
- info = vmx_inject_nmi(vmx, vcpu);
- need_nmi_exiting = 0;
+ if (state == EIS_CAN_INJECT) {
+ vmx_inject_nmi(vmx, vcpu);
+ state = EIS_EV_INJECTED;
} else {
- VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI "
- "due to VM-entry intr info %x", info);
+ return (state | EIS_REQ_EXIT);
}
} else {
- VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to "
- "Guest Interruptibility-state %x", gi);
- }
-
- if (need_nmi_exiting) {
vmx_set_nmi_window_exiting(vmx, vcpu);
- return;
}
}
- /* Check the AT-PIC and APIC for interrupts. */
if (vm_extint_pending(vmx->vm, vcpu)) {
+ if (state != EIS_CAN_INJECT) {
+ return (state | EIS_REQ_EXIT);
+ }
+ if ((gi & HWINTR_BLOCKING) != 0 ||
+ (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) {
+ return (EIS_GI_BLOCK);
+ }
+
/* Ask the legacy pic for a vector to inject */
vatpic_pending_intr(vmx->vm, &vector);
- extint_pending = B_TRUE;
/*
* From the Intel SDM, Volume 3, Section "Maskable
@@ -1457,80 +1441,131 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
*/
KASSERT(vector >= 0 && vector <= 255,
("invalid vector %d from INTR", vector));
- } else if (!vmx_cap_en(vmx, VMX_CAP_APICV)) {
- /* Ask the local apic for a vector to inject */
- if (!vlapic_pending_intr(vlapic, &vector))
- return;
- /*
- * From the Intel SDM, Volume 3, Section "Maskable
- * Hardware Interrupts":
- * - maskable interrupt vectors [16,255] can be delivered
- * through the local APIC.
- */
- KASSERT(vector >= 16 && vector <= 255,
- ("invalid vector %d from local APIC", vector));
- } else {
- /* No futher injection needed */
- return;
- }
+ /* Inject the interrupt */
+ vmcs_write(VMCS_ENTRY_INTR_INFO,
+ VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector);
- /*
- * Verify that the guest is interruptable and the above logic has not
- * already queued an event for injection.
- */
- if ((gi & HWINTR_BLOCKING) != 0) {
- VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
- "Guest Interruptibility-state %x", vector, gi);
- goto cantinject;
- }
- if ((info & VMCS_INTR_VALID) != 0) {
- VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
- "VM-entry intr info %x", vector, info);
- goto cantinject;
+ vm_extint_clear(vmx->vm, vcpu);
+ vatpic_intr_accepted(vmx->vm, vector);
+ state = EIS_EV_INJECTED;
}
- rflags = vmcs_read(VMCS_GUEST_RFLAGS);
- if ((rflags & PSL_I) == 0) {
- VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
- "rflags %lx", vector, rflags);
- goto cantinject;
+
+ return (state);
+}
+
+/*
+ * Inject any interrupts pending on the vLAPIC.
+ *
+ * This is done with host CPU interrupts disabled so notification IPIs, either
+ * from the standard vCPU notification or APICv posted interrupts, will be
+ * queued on the host APIC and recognized when entering VMX context.
+ */
+static enum event_inject_state
+vmx_inject_vlapic(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
+{
+ int vector;
+
+ if (!vlapic_pending_intr(vlapic, &vector)) {
+ return (EIS_CAN_INJECT);
}
- /* Inject the interrupt */
- info = VMCS_INTR_T_HWINTR | VMCS_INTR_VALID;
- info |= vector;
- vmcs_write(VMCS_ENTRY_INTR_INFO, info);
+ /*
+ * From the Intel SDM, Volume 3, Section "Maskable
+ * Hardware Interrupts":
+ * - maskable interrupt vectors [16,255] can be delivered
+ * through the local APIC.
+ */
+ KASSERT(vector >= 16 && vector <= 255,
+ ("invalid vector %d from local APIC", vector));
- if (extint_pending) {
- vm_extint_clear(vmx->vm, vcpu);
- vatpic_intr_accepted(vmx->vm, vector);
+ if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
+ uint16_t status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
+ uint16_t status_new = (status_old & 0xff00) | vector;
/*
- * After we accepted the current ExtINT the PIC may
- * have posted another one. If that is the case, set
- * the Interrupt Window Exiting execution control so
- * we can inject that one too.
- *
- * Also, interrupt window exiting allows us to inject any
- * pending APIC vector that was preempted by the ExtINT
- * as soon as possible. This applies both for the software
- * emulated vlapic and the hardware assisted virtual APIC.
+ * The APICv state will have been synced into the vLAPIC
+ * as part of vlapic_pending_intr(). Prepare the VMCS
+ * for the to-be-injected pending interrupt.
*/
- vmx_set_int_window_exiting(vmx, vcpu);
- } else {
- /* Update the Local APIC ISR */
- vlapic_intr_accepted(vlapic, vector);
+ if (status_new > status_old) {
+ vmcs_write(VMCS_GUEST_INTR_STATUS, status_new);
+ VCPU_CTR2(vlapic->vm, vlapic->vcpuid,
+ "vmx_inject_interrupts: guest_intr_status "
+ "changed from 0x%04x to 0x%04x",
+ status_old, status_new);
+ }
+
+ /*
+ * Ensure VMCS state regarding EOI traps is kept in sync
+ * with the TMRs in the vlapic.
+ */
+ vmx_apicv_sync_tmr(vlapic);
+
+ /*
+ * The rest of the injection process for injecting the
+ * interrupt(s) is handled by APICv. It does not preclude other
+ * event injection from occurring.
+ */
+ return (EIS_CAN_INJECT);
}
- VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
- return;
+ ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID);
-cantinject:
- /*
- * Set the Interrupt Window Exiting execution control so we can inject
- * the interrupt as soon as blocking condition goes away.
- */
- vmx_set_int_window_exiting(vmx, vcpu);
+ /* Does guest interruptability block injection? */
+ if ((vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & HWINTR_BLOCKING) != 0 ||
+ (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) {
+ return (EIS_GI_BLOCK);
+ }
+
+ /* Inject the interrupt */
+ vmcs_write(VMCS_ENTRY_INTR_INFO,
+ VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector);
+
+ /* Update the Local APIC ISR */
+ vlapic_intr_accepted(vlapic, vector);
+
+ return (EIS_EV_INJECTED);
+}
+
+/*
+ * Re-check for events to be injected.
+ *
+ * Once host CPU interrupts are disabled, check for the presence of any events
+ * which require injection processing. If an exit is required upon injection,
+ * or once the guest becomes interruptable, that will be configured too.
+ */
+static bool
+vmx_inject_recheck(struct vmx *vmx, int vcpu, enum event_inject_state state)
+{
+ if (state == EIS_CAN_INJECT) {
+ if (vm_nmi_pending(vmx->vm, vcpu) &&
+ !vmx_nmi_window_exiting(vmx, vcpu)) {
+ /* queued NMI not blocked by NMI-window-exiting */
+ return (true);
+ }
+ if (vm_extint_pending(vmx->vm, vcpu)) {
+ /* queued ExtINT not blocked by existing injection */
+ return (true);
+ }
+ } else {
+ if ((state & EIS_REQ_EXIT) != 0) {
+ /*
+ * Use a self-IPI to force an immediate exit after
+ * event injection has occurred.
+ */
+ poke_cpu(CPU->cpu_id);
+ } else {
+ /*
+ * If any event is being injected, an exit immediately
+ * upon becoming interruptable again will allow pending
+ * or newly queued events to be injected in a timely
+ * manner.
+ */
+ vmx_set_int_window_exiting(vmx, vcpu);
+ }
+ }
+ return (false);
}
/*
@@ -2437,12 +2472,6 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_HLT;
vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS);
- if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
- vmexit->u.hlt.intr_status =
- vmcs_read(VMCS_GUEST_INTR_STATUS);
- } else {
- vmexit->u.hlt.intr_status = 0;
- }
break;
case EXIT_REASON_MTF:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
@@ -2871,6 +2900,7 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
struct region_descriptor gdtr, idtr;
uint16_t ldt_sel;
#endif
+ bool tpr_shadow_active;
vmx = arg;
vm = vmx->vm;
@@ -2879,6 +2909,9 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
vlapic = vm_lapic(vm, vcpu);
vmexit = vm_exitinfo(vm, vcpu);
launched = 0;
+ tpr_shadow_active = vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) &&
+ !vmx_cap_en(vmx, VMX_CAP_APICV) &&
+ (vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0;
KASSERT(vmxctx->pmap == pmap,
("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
@@ -2905,10 +2938,19 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
vmcs_write(VMCS_GUEST_RIP, rip);
vmx_set_pcpu_defaults(vmx, vcpu, pmap);
do {
+ enum event_inject_state inject_state;
+
KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
"%lx/%lx", __func__, vmcs_guest_rip(), rip));
handled = UNHANDLED;
+
+ /*
+ * Perform initial event/exception/interrupt injection before
+ * host CPU interrupts are disabled.
+ */
+ inject_state = vmx_inject_events(vmx, vcpu, rip);
+
/*
* Interrupts are disabled from this point on until the
* guest starts executing. This is done for the following
@@ -2919,27 +2961,28 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
* will cause a VM exit due to external interrupt as soon as
* the guest state is loaded.
*
- * A posted interrupt after 'vmx_inject_interrupts()' will
- * not be "lost" because it will be held pending in the host
- * APIC because interrupts are disabled. The pending interrupt
- * will be recognized as soon as the guest state is loaded.
+ * A posted interrupt after vmx_inject_vlapic() will not be
+ * "lost" because it will be held pending in the host APIC
+ * because interrupts are disabled. The pending interrupt will
+ * be recognized as soon as the guest state is loaded.
*
* The same reasoning applies to the IPI generated by
* pmap_invalidate_ept().
- *
- * The bulk of guest interrupt injection is done without
- * interrupts disabled on the host CPU. This is necessary
- * since contended mutexes might force the thread to sleep.
*/
- vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
disable_intr();
- if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
- vmx_inject_pir(vlapic);
+
+ /*
+ * If not precluded by existing events, inject any interrupt
+ * pending on the vLAPIC. As a lock-less operation, it is safe
+ * (and prudent) to perform with host CPU interrupts disabled.
+ */
+ if (inject_state == EIS_CAN_INJECT) {
+ inject_state = vmx_inject_vlapic(vmx, vcpu, vlapic);
}
/*
* Check for vcpu suspension after injecting events because
- * vmx_inject_interrupts() can suspend the vcpu due to a
+ * vmx_inject_events() can suspend the vcpu due to a
* triple fault.
*/
if (vcpu_suspended(evinfo)) {
@@ -2974,6 +3017,16 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
break;
}
+ /*
+ * If subsequent activity queued events which require injection
+ * handling, take another lap to handle them.
+ */
+ if (vmx_inject_recheck(vmx, vcpu, inject_state)) {
+ enable_intr();
+ handled = HANDLED;
+ continue;
+ }
+
#ifndef __FreeBSD__
if ((rc = smt_acquire()) != 1) {
enable_intr();
@@ -3032,17 +3085,8 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
ldt_sel = sldt();
#endif
- /*
- * If TPR Shadowing is enabled, the TPR Threshold must be
- * updated right before entering the guest.
- */
- if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) &&
- !vmx_cap_en(vmx, VMX_CAP_APICV)) {
- if ((vmx->cap[vcpu].proc_ctls &
- PROCBASED_USE_TPR_SHADOW) != 0) {
- vmcs_write(VMCS_TPR_THRESHOLD,
- vlapic_get_cr8(vlapic));
- }
+ if (tpr_shadow_active) {
+ vmx_tpr_shadow_enter(vlapic);
}
vmx_run_trace(vmx, vcpu);
@@ -3059,6 +3103,10 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
lldt(ldt_sel);
#endif
+ if (tpr_shadow_active) {
+ vmx_tpr_shadow_exit(vlapic);
+ }
+
/* Collect some information for VM exit processing */
vmexit->rip = rip = vmcs_guest_rip();
vmexit->inst_length = vmexit_instruction_length();
@@ -3524,47 +3572,73 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
struct vlapic_vtx {
struct vlapic vlapic;
+
+ /* Align to the nearest cacheline */
+ uint8_t _pad[64 - (sizeof (struct vlapic) % 64)];
+
+ /* TMR handling state for posted interrupts */
+ uint32_t tmr_active[8];
+ uint32_t pending_level[8];
+ uint32_t pending_edge[8];
+
struct pir_desc *pir_desc;
struct vmx *vmx;
u_int pending_prio;
+ boolean_t tmr_sync;
};
-#define VPR_PRIO_BIT(vpr) (1 << ((vpr) >> 4))
+CTASSERT((offsetof (struct vlapic_vtx, tmr_active) & 63) == 0);
-#define VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg) \
-do { \
- VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d", \
- level ? "level" : "edge", vector); \
- VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]); \
- VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]); \
- VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]); \
- VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]); \
- VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\
-} while (0)
+#define VPR_PRIO_BIT(vpr) (1 << ((vpr) >> 4))
-/*
- * vlapic->ops handlers that utilize the APICv hardware assist described in
- * Chapter 29 of the Intel SDM.
- */
-static int
-vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
+static vcpu_notify_t
+vmx_apicv_set_ready(struct vlapic *vlapic, int vector, bool level)
{
struct vlapic_vtx *vlapic_vtx;
struct pir_desc *pir_desc;
- uint64_t mask;
- int idx, notify = 0;
+ uint32_t mask, tmrval;
+ int idx;
+ vcpu_notify_t notify = VCPU_NOTIFY_NONE;
vlapic_vtx = (struct vlapic_vtx *)vlapic;
pir_desc = vlapic_vtx->pir_desc;
+ idx = vector / 32;
+ mask = 1UL << (vector % 32);
/*
- * Keep track of interrupt requests in the PIR descriptor. This is
- * because the virtual APIC page pointed to by the VMCS cannot be
- * modified if the vcpu is running.
+ * If the currently asserted TMRs do not match the state requested by
+ * the incoming interrupt, an exit will be required to reconcile those
+ * bits in the APIC page. This will keep the vLAPIC behavior in line
+ * with the architecturally defined expectations.
+ *
+ * If actors of mixed types (edge and level) are racing against the same
+ * vector (toggling its TMR bit back and forth), the results could
+ * inconsistent. Such circumstances are considered a rare edge case and
+ * are never expected to be found in the wild.
*/
- idx = vector / 64;
- mask = 1UL << (vector % 64);
- atomic_set_long(&pir_desc->pir[idx], mask);
+ tmrval = atomic_load_acq_int(&vlapic_vtx->tmr_active[idx]);
+ if (!level) {
+ if ((tmrval & mask) != 0) {
+ /* Edge-triggered interrupt needs TMR de-asserted */
+ atomic_set_int(&vlapic_vtx->pending_edge[idx], mask);
+ atomic_store_rel_long(&pir_desc->pending, 1);
+ return (VCPU_NOTIFY_EXIT);
+ }
+ } else {
+ if ((tmrval & mask) == 0) {
+ /* Level-triggered interrupt needs TMR asserted */
+ atomic_set_int(&vlapic_vtx->pending_level[idx], mask);
+ atomic_store_rel_long(&pir_desc->pending, 1);
+ return (VCPU_NOTIFY_EXIT);
+ }
+ }
+
+ /*
+ * If the interrupt request does not require manipulation of the TMRs
+ * for delivery, set it in PIR descriptor. It cannot be inserted into
+ * the APIC page while the vCPU might be running.
+ */
+ atomic_set_int(&pir_desc->pir[idx], mask);
/*
* A notification is required whenever the 'pending' bit makes a
@@ -3585,7 +3659,7 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
* cleared whenever the 'pending' bit makes another 0->1 transition.
*/
if (atomic_cmpset_long(&pir_desc->pending, 0, 1) != 0) {
- notify = 1;
+ notify = VCPU_NOTIFY_APIC;
vlapic_vtx->pending_prio = 0;
} else {
const u_int old_prio = vlapic_vtx->pending_prio;
@@ -3593,113 +3667,44 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) {
atomic_set_int(&vlapic_vtx->pending_prio, prio_bit);
- notify = 1;
+ notify = VCPU_NOTIFY_APIC;
}
}
- VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
- level, "vmx_set_intr_ready");
return (notify);
}
-static int
-vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
+static void
+vmx_apicv_accepted(struct vlapic *vlapic, int vector)
{
- struct vlapic_vtx *vlapic_vtx;
- struct pir_desc *pir_desc;
- struct LAPIC *lapic;
- uint64_t pending, pirval;
- uint32_t ppr, vpr;
- int i;
-
- /*
- * This function is only expected to be called from the 'HLT' exit
- * handler which does not care about the vector that is pending.
- */
- KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL"));
-
- vlapic_vtx = (struct vlapic_vtx *)vlapic;
- pir_desc = vlapic_vtx->pir_desc;
-
- pending = atomic_load_acq_long(&pir_desc->pending);
- if (!pending) {
- /*
- * While a virtual interrupt may have already been
- * processed the actual delivery maybe pending the
- * interruptibility of the guest. Recognize a pending
- * interrupt by reevaluating virtual interrupts
- * following Section 29.2.1 in the Intel SDM Volume 3.
- */
- struct vm_exit *vmexit;
- uint8_t rvi, ppr;
-
- vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
- rvi = vmexit->u.hlt.intr_status & APIC_TPR_INT;
- lapic = vlapic->apic_page;
- ppr = lapic->ppr & APIC_TPR_INT;
- if (rvi > ppr) {
- return (1);
- }
-
- return (0);
- }
-
/*
- * If there is an interrupt pending then it will be recognized only
- * if its priority is greater than the processor priority.
- *
- * Special case: if the processor priority is zero then any pending
- * interrupt will be recognized.
+ * When APICv is enabled for an instance, the traditional interrupt
+ * injection method (populating ENTRY_INTR_INFO in the VMCS) is not
+ * used and the CPU does the heavy lifting of virtual interrupt
+ * delivery. For that reason vmx_intr_accepted() should never be called
+ * when APICv is enabled.
*/
- lapic = vlapic->apic_page;
- ppr = lapic->ppr & APIC_TPR_INT;
- if (ppr == 0)
- return (1);
-
- VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
- lapic->ppr);
-
- vpr = 0;
- for (i = 3; i >= 0; i--) {
- pirval = pir_desc->pir[i];
- if (pirval != 0) {
- vpr = (i * 64 + flsl(pirval) - 1) & APIC_TPR_INT;
- break;
- }
- }
-
- /*
- * If the highest-priority pending interrupt falls short of the
- * processor priority of this vCPU, ensure that 'pending_prio' does not
- * have any stale bits which would preclude a higher-priority interrupt
- * from incurring a notification later.
- */
- if (vpr <= ppr) {
- const u_int prio_bit = VPR_PRIO_BIT(vpr);
- const u_int old = vlapic_vtx->pending_prio;
-
- if (old > prio_bit && (old & prio_bit) == 0) {
- vlapic_vtx->pending_prio = prio_bit;
- }
- return (0);
- }
- return (1);
+ panic("vmx_intr_accepted: not expected to be called");
}
static void
-vmx_intr_accepted(struct vlapic *vlapic, int vector)
+vmx_apicv_sync_tmr(struct vlapic *vlapic)
{
+ struct vlapic_vtx *vlapic_vtx;
+ const uint32_t *tmrs;
- panic("vmx_intr_accepted: not expected to be called");
-}
+ vlapic_vtx = (struct vlapic_vtx *)vlapic;
+ tmrs = &vlapic_vtx->tmr_active[0];
-static void
-vmx_set_tmr(struct vlapic *vlapic, const uint32_t *masks)
-{
- vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)masks[1] << 32) | masks[0]);
- vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)masks[3] << 32) | masks[2]);
- vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)masks[5] << 32) | masks[4]);
- vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)masks[7] << 32) | masks[6]);
+ if (!vlapic_vtx->tmr_sync) {
+ return;
+ }
+
+ vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)tmrs[1] << 32) | tmrs[0]);
+ vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)tmrs[3] << 32) | tmrs[2]);
+ vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)tmrs[5] << 32) | tmrs[4]);
+ vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)tmrs[7] << 32) | tmrs[6]);
+ vlapic_vtx->tmr_sync = B_FALSE;
}
static void
@@ -3765,107 +3770,99 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
}
static void
-vmx_post_intr(struct vlapic *vlapic, int hostcpu)
+vmx_apicv_notify(struct vlapic *vlapic, int hostcpu)
{
-#ifdef __FreeBSD__
- ipi_cpu(hostcpu, pirvec);
-#else
psm_send_pir_ipi(hostcpu);
-#endif
}
-/*
- * Transfer the pending interrupts in the PIR descriptor to the IRR
- * in the virtual APIC page.
- */
static void
-vmx_inject_pir(struct vlapic *vlapic)
+vmx_apicv_sync(struct vlapic *vlapic)
{
struct vlapic_vtx *vlapic_vtx;
struct pir_desc *pir_desc;
struct LAPIC *lapic;
- uint64_t val, pirval;
- int rvi, pirbase = -1;
- uint16_t intr_status_old, intr_status_new;
+ uint_t i;
vlapic_vtx = (struct vlapic_vtx *)vlapic;
pir_desc = vlapic_vtx->pir_desc;
+ lapic = vlapic->apic_page;
+
if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) {
- VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
- "no posted interrupt pending");
return;
}
- pirval = 0;
- pirbase = -1;
- lapic = vlapic->apic_page;
+ vlapic_vtx->pending_prio = 0;
- val = atomic_readandclear_long(&pir_desc->pir[0]);
- if (val != 0) {
- lapic->irr0 |= val;
- lapic->irr1 |= val >> 32;
- pirbase = 0;
- pirval = val;
- }
+ /* Make sure the invalid (0-15) vectors are not set */
+ ASSERT0(vlapic_vtx->pending_level[0] & 0xffff);
+ ASSERT0(vlapic_vtx->pending_edge[0] & 0xffff);
+ ASSERT0(pir_desc->pir[0] & 0xffff);
- val = atomic_readandclear_long(&pir_desc->pir[1]);
- if (val != 0) {
- lapic->irr2 |= val;
- lapic->irr3 |= val >> 32;
- pirbase = 64;
- pirval = val;
- }
+ for (i = 0; i <= 7; i++) {
+ uint32_t *tmrp = &lapic->tmr0 + (i * 4);
+ uint32_t *irrp = &lapic->irr0 + (i * 4);
- val = atomic_readandclear_long(&pir_desc->pir[2]);
- if (val != 0) {
- lapic->irr4 |= val;
- lapic->irr5 |= val >> 32;
- pirbase = 128;
- pirval = val;
- }
+ const uint32_t pending_level =
+ atomic_readandclear_int(&vlapic_vtx->pending_level[i]);
+ const uint32_t pending_edge =
+ atomic_readandclear_int(&vlapic_vtx->pending_edge[i]);
+ const uint32_t pending_inject =
+ atomic_readandclear_int(&pir_desc->pir[i]);
+
+ if (pending_level != 0) {
+ /*
+ * Level-triggered interrupts assert their corresponding
+ * bit in the TMR when queued in IRR.
+ */
+ *tmrp |= pending_level;
+ *irrp |= pending_level;
+ }
+ if (pending_edge != 0) {
+ /*
+ * When queuing an edge-triggered interrupt in IRR, the
+ * corresponding bit in the TMR is cleared.
+ */
+ *tmrp &= ~pending_edge;
+ *irrp |= pending_edge;
+ }
+ if (pending_inject != 0) {
+ /*
+ * Interrupts which do not require a change to the TMR
+ * (because it already matches the necessary state) can
+ * simply be queued in IRR.
+ */
+ *irrp |= pending_inject;
+ }
- val = atomic_readandclear_long(&pir_desc->pir[3]);
- if (val != 0) {
- lapic->irr6 |= val;
- lapic->irr7 |= val >> 32;
- pirbase = 192;
- pirval = val;
+ if (*tmrp != vlapic_vtx->tmr_active[i]) {
+ /* Check if VMX EOI triggers require updating. */
+ vlapic_vtx->tmr_active[i] = *tmrp;
+ vlapic_vtx->tmr_sync = B_TRUE;
+ }
}
+}
- VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir");
+static void
+vmx_tpr_shadow_enter(struct vlapic *vlapic)
+{
+ /*
+ * When TPR shadowing is enabled, VMX will initiate a guest exit if its
+ * TPR falls below a threshold priority. That threshold is set to the
+ * current TPR priority, since guest interrupt status should be
+ * re-evaluated if its TPR is set lower.
+ */
+ vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic));
+}
+static void
+vmx_tpr_shadow_exit(struct vlapic *vlapic)
+{
/*
- * Update RVI so the processor can evaluate pending virtual
- * interrupts on VM-entry.
- *
- * It is possible for pirval to be 0 here, even though the
- * pending bit has been set. The scenario is:
- * CPU-Y is sending a posted interrupt to CPU-X, which
- * is running a guest and processing posted interrupts in h/w.
- * CPU-X will eventually exit and the state seen in s/w is
- * the pending bit set, but no PIR bits set.
- *
- * CPU-X CPU-Y
- * (vm running) (host running)
- * rx posted interrupt
- * CLEAR pending bit
- * SET PIR bit
- * READ/CLEAR PIR bits
- * SET pending bit
- * (vm exit)
- * pending bit set, PIR 0
+ * Unlike full APICv, where changes to the TPR are reflected in the PPR,
+ * with TPR shadowing, that duty is relegated to the VMM. Upon exit,
+ * the PPR is updated to reflect any change in the TPR here.
*/
- if (pirval != 0) {
- rvi = pirbase + flsl(pirval) - 1;
- intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
- intr_status_new = (intr_status_old & 0xFF00) | rvi;
- if (intr_status_new > intr_status_old) {
- vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new);
- VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
- "guest_intr_status changed from 0x%04x to 0x%04x",
- intr_status_old, intr_status_new);
- }
- }
+ vlapic_sync_tpr(vlapic);
}
static struct vlapic *
@@ -3890,14 +3887,13 @@ vmx_vlapic_init(void *arg, int vcpuid)
vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts;
}
if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
- vlapic->ops.set_intr_ready = vmx_set_intr_ready;
- vlapic->ops.pending_intr = vmx_pending_intr;
- vlapic->ops.intr_accepted = vmx_intr_accepted;
- vlapic->ops.set_tmr = vmx_set_tmr;
+ vlapic->ops.set_intr_ready = vmx_apicv_set_ready;
+ vlapic->ops.sync_state = vmx_apicv_sync;
+ vlapic->ops.intr_accepted = vmx_apicv_accepted;
vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_vid;
if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
- vlapic->ops.post_intr = vmx_post_intr;
+ vlapic->ops.post_intr = vmx_apicv_notify;
}
}
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
index 7943c1fd0e..b78f146755 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -110,7 +110,7 @@ CTASSERT(sizeof(struct apic_page) == PAGE_SIZE);
/* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM) */
struct pir_desc {
- uint64_t pir[4];
+ uint32_t pir[8];
uint64_t pending;
uint64_t unused[3];
} __aligned(64);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vioapic.c b/usr/src/uts/i86pc/io/vmm/io/vioapic.c
index 1e8ee1fa7a..89d3bf79df 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vioapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vioapic.c
@@ -237,141 +237,6 @@ vioapic_pulse_irq(struct vm *vm, int irq)
return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
}
-#define REDIR_IS_PHYS(reg) (((reg) & IOART_DESTMOD) == IOART_DESTPHY)
-#define REDIR_IS_LOWPRIO(reg) (((reg) & IOART_DELMOD) == IOART_DELLOPRI)
-/* Level-triggered interrupts only valid in fixed and low-priority modes */
-#define REDIR_IS_LVLTRIG(reg) \
- (((reg) & IOART_TRGRLVL) != 0 && \
- (((reg) & IOART_DELMOD) == IOART_DELFIXED || REDIR_IS_LOWPRIO(reg)))
-#define REDIR_DEST(reg) ((reg) >> (32 + APIC_ID_SHIFT))
-#define REDIR_VECTOR(reg) ((reg) & IOART_INTVEC)
-
-/*
- * Given a redirection entry, determine which vCPUs would be targeted.
- */
-static void
-vioapic_calcdest(struct vioapic *vioapic, uint64_t redir_ent, cpuset_t *dmask)
-{
-
- /*
- * When calculating interrupt destinations with vlapic_calcdest(), the
- * legacy xAPIC format is assumed, since the system lacks interrupt
- * redirection hardware.
- * See vlapic_deliver_intr() for more details.
- */
- vlapic_calcdest(vioapic->vm, dmask, REDIR_DEST(redir_ent),
- REDIR_IS_PHYS(redir_ent), REDIR_IS_LOWPRIO(redir_ent), false);
-}
-
-/*
- * Across all redirection entries utilizing a specified vector, determine the
- * set of vCPUs which would be targeted by a level-triggered interrupt.
- */
-static void
-vioapic_tmr_active(struct vioapic *vioapic, uint8_t vec, cpuset_t *result)
-{
- u_int i;
-
- CPU_ZERO(result);
- if (vec == 0) {
- return;
- }
-
- for (i = 0; i < REDIR_ENTRIES; i++) {
- cpuset_t dest;
- const uint64_t val = vioapic->rtbl[i].reg;
-
- if (!REDIR_IS_LVLTRIG(val) || REDIR_VECTOR(val) != vec) {
- continue;
- }
-
- CPU_ZERO(&dest);
- vioapic_calcdest(vioapic, val, &dest);
- CPU_OR(result, &dest);
- }
-}
-
-/*
- * Update TMR state in vLAPICs after changes to vIOAPIC pin configuration
- */
-static void
-vioapic_update_tmrs(struct vioapic *vioapic, int vcpuid, uint64_t oldval,
- uint64_t newval)
-{
- cpuset_t active, allset, newset, oldset;
- struct vm *vm;
- uint8_t newvec, oldvec;
-
- vm = vioapic->vm;
- CPU_ZERO(&allset);
- CPU_ZERO(&newset);
- CPU_ZERO(&oldset);
- newvec = oldvec = 0;
-
- if (REDIR_IS_LVLTRIG(oldval)) {
- vioapic_calcdest(vioapic, oldval, &oldset);
- CPU_OR(&allset, &oldset);
- oldvec = REDIR_VECTOR(oldval);
- }
-
- if (REDIR_IS_LVLTRIG(newval)) {
- vioapic_calcdest(vioapic, newval, &newset);
- CPU_OR(&allset, &newset);
- newvec = REDIR_VECTOR(newval);
- }
-
- if (CPU_EMPTY(&allset) ||
- (CPU_CMP(&oldset, &newset) == 0 && oldvec == newvec)) {
- return;
- }
-
- /*
- * Since the write to the redirection table has already occurred, a
- * scan of level-triggered entries referencing the old vector will find
- * only entries which are now currently valid.
- */
- vioapic_tmr_active(vioapic, oldvec, &active);
-
- while (!CPU_EMPTY(&allset)) {
- struct vlapic *vlapic;
- u_int i;
-
- i = CPU_FFS(&allset) - 1;
- CPU_CLR(i, &allset);
-
- if (oldvec == newvec &&
- CPU_ISSET(i, &oldset) && CPU_ISSET(i, &newset)) {
- continue;
- }
-
- if (i != vcpuid) {
- vcpu_block_run(vm, i);
- }
-
- vlapic = vm_lapic(vm, i);
- if (CPU_ISSET(i, &oldset)) {
- /*
- * Perform the deassertion if no other level-triggered
- * IOAPIC entries target this vCPU with the old vector
- *
- * Note: Sharing of vectors like that should be
- * extremely rare in modern operating systems and was
- * previously unsupported by the bhyve vIOAPIC.
- */
- if (!CPU_ISSET(i, &active)) {
- vlapic_tmr_set(vlapic, oldvec, false);
- }
- }
- if (CPU_ISSET(i, &newset)) {
- vlapic_tmr_set(vlapic, newvec, true);
- }
-
- if (i != vcpuid) {
- vcpu_unblock_run(vm, i);
- }
- }
-}
-
static uint32_t
vioapic_read(struct vioapic *vioapic, int vcpuid, uint32_t addr)
{
@@ -411,7 +276,6 @@ static void
vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
{
uint64_t data64, mask64;
- uint64_t last, changed;
int regnum, pin, lshift;
regnum = addr & 0xff;
@@ -436,8 +300,6 @@ vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
else
lshift = 0;
- last = vioapic->rtbl[pin].reg;
-
data64 = (uint64_t)data << lshift;
mask64 = (uint64_t)0xffffffff << lshift;
vioapic->rtbl[pin].reg &= ~mask64 | RTBL_RO_BITS;
@@ -447,19 +309,6 @@ vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
pin, vioapic->rtbl[pin].reg);
/*
- * If any fields in the redirection table entry (except mask
- * or polarity) have changed then update the trigger-mode
- * registers on all the vlapics.
- */
- changed = last ^ vioapic->rtbl[pin].reg;
- if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
- VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
- "vlapic trigger-mode register", pin);
- vioapic_update_tmrs(vioapic, vcpuid, last,
- vioapic->rtbl[pin].reg);
- }
-
- /*
* Generate an interrupt if the following conditions are met:
* - pin is not masked
* - previous interrupt has been EOIed
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index 038c17ca78..8af77a387b 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -70,7 +70,13 @@ __FBSDID("$FreeBSD$");
#include "vlapic_priv.h"
#include "vioapic.h"
-#define PRIO(x) ((x) >> 4)
+
+/*
+ * The 4 high bits of a given interrupt vector represent its priority. The same
+ * is true for the contents of the TPR when it is used to calculate the ultimate
+ * PPR of an APIC - the 4 high bits hold the priority.
+ */
+#define PRIO(x) ((x) & 0xf0)
#define VLAPIC_VERSION (16)
@@ -94,7 +100,6 @@ __FBSDID("$FreeBSD$");
#define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
static void vlapic_set_error(struct vlapic *, uint32_t, bool);
-static void vlapic_tmr_reset(struct vlapic *);
#ifdef __ISRVEC_DEBUG
static void vlapic_isrstk_accept(struct vlapic *, int);
@@ -289,52 +294,60 @@ vlapic_esr_write_handler(struct vlapic *vlapic)
vlapic->esr_pending = 0;
}
-int
+vcpu_notify_t
vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
{
struct LAPIC *lapic;
- uint32_t *irrptr, *tmrptr, mask;
+ uint32_t *irrptr, *tmrptr, mask, tmr;
int idx;
KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
lapic = vlapic->apic_page;
if (!(lapic->svr & APIC_SVR_ENABLE)) {
- VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
- "interrupt %d", vector);
- return (0);
+ /* ignore interrupt on software-disabled APIC */
+ return (VCPU_NOTIFY_NONE);
}
if (vector < 16) {
vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
false);
- VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
- vector);
- return (1);
+
+ /*
+ * If the error LVT is configured to interrupt the vCPU, it will
+ * have delivered a notification through that mechanism.
+ */
+ return (VCPU_NOTIFY_NONE);
}
- if (vlapic->ops.set_intr_ready)
+ if (vlapic->ops.set_intr_ready) {
return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
+ }
idx = (vector / 32) * 4;
mask = 1 << (vector % 32);
-
+ tmrptr = &lapic->tmr0;
irrptr = &lapic->irr0;
- atomic_set_int(&irrptr[idx], mask);
/*
- * Verify that the trigger-mode of the interrupt matches with
- * the vlapic TMR registers.
+ * Update TMR for requested vector, if necessary.
+ * This must be done prior to asserting the bit in IRR so that the
+ * proper TMR state is always visible before the to-be-queued interrupt
+ * can be injected.
*/
- tmrptr = &lapic->tmr0;
- if ((tmrptr[idx] & mask) != (level ? mask : 0)) {
- VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but "
- "interrupt is %s-triggered", idx / 4, tmrptr[idx],
- level ? "level" : "edge");
+ tmr = atomic_load_acq_32(&tmrptr[idx]);
+ if ((tmr & mask) != (level ? mask : 0)) {
+ if (level) {
+ atomic_set_int(&tmrptr[idx], mask);
+ } else {
+ atomic_clear_int(&tmrptr[idx], mask);
+ }
}
- VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
- return (1);
+ /* Now set the bit in IRR */
+ atomic_set_int(&irrptr[idx], mask);
+
+ return (VCPU_NOTIFY_EXIT);
}
static __inline uint32_t *
@@ -472,6 +485,7 @@ static int
vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt)
{
uint32_t mode, reg, vec;
+ vcpu_notify_t notify;
reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
@@ -487,8 +501,8 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt)
lvt == APIC_LVT_ERROR);
return (0);
}
- if (vlapic_set_intr_ready(vlapic, vec, false))
- vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
+ notify = vlapic_set_intr_ready(vlapic, vec, false);
+ vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify);
break;
case APIC_LVT_DM_NMI:
vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
@@ -532,8 +546,8 @@ vlapic_active_isr(struct vlapic *vlapic)
}
/*
- * Algorithm adopted from section "Interrupt, Task and Processor Priority"
- * in Intel Architecture Manual Vol 3a.
+ * After events which might arbitrarily change the value of PPR, such as a TPR
+ * write or an EOI, calculate that new PPR value and store it in the APIC page.
*/
static void
vlapic_update_ppr(struct vlapic *vlapic)
@@ -543,19 +557,44 @@ vlapic_update_ppr(struct vlapic *vlapic)
isrvec = vlapic_active_isr(vlapic);
tpr = vlapic->apic_page->tpr;
-#ifdef __ISRVEC_DEBUG
- vlapic_isrstk_verify(vlapic);
-#endif
-
- if (PRIO(tpr) >= PRIO(isrvec))
+ /*
+ * Algorithm adopted from section "Interrupt, Task and Processor
+ * Priority" in Intel Architecture Manual Vol 3a.
+ */
+ if (PRIO(tpr) >= PRIO(isrvec)) {
ppr = tpr;
- else
- ppr = isrvec & 0xf0;
+ } else {
+ ppr = PRIO(isrvec);
+ }
vlapic->apic_page->ppr = ppr;
VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
}
+/*
+ * When a vector is asserted in ISR as in-service, the PPR must be raised to the
+ * priority of that vector, as the vCPU would have been at a lower priority in
+ * order for the vector to be accepted.
+ */
+static void
+vlapic_raise_ppr(struct vlapic *vlapic, int vec)
+{
+ struct LAPIC *lapic = vlapic->apic_page;
+ int ppr;
+
+ ppr = PRIO(vec);
+
+#ifdef __ISRVEC_DEBUG
+ KASSERT(vec >= 16 && vec < 256, ("invalid vector %d", vec));
+ KASSERT(ppr > lapic->tpr, ("ppr %x <= tpr %x", ppr, lapic->tpr));
+ KASSERT(ppr > lapic->ppr, ("ppr %x <= old ppr %x", ppr, lapic->ppr));
+ KASSERT(vec == (int)vlapic_active_isr(vlapic), ("ISR missing for ppr"));
+#endif /* __ISRVEC_DEBUG */
+
+ lapic->ppr = ppr;
+ VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
+}
+
void
vlapic_sync_tpr(struct vlapic *vlapic)
{
@@ -1087,10 +1126,9 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
int idx, i, bitpos, vector;
uint32_t *irrptr, val;
- vlapic_update_ppr(vlapic);
-
- if (vlapic->ops.pending_intr)
- return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
+ if (vlapic->ops.sync_state) {
+ (*vlapic->ops.sync_state)(vlapic);
+ }
irrptr = &lapic->irr0;
@@ -1119,6 +1157,8 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
uint32_t *irrptr, *isrptr;
int idx;
+ KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector));
+
if (vlapic->ops.intr_accepted)
return ((*vlapic->ops.intr_accepted)(vlapic, vector));
@@ -1136,6 +1176,13 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
isrptr[idx] |= 1 << (vector % 32);
VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
+ /*
+ * The only way a fresh vector could be accepted into ISR is if it was
+ * of a higher priority than the current PPR. With that vector now
+ * in-service, the PPR must be raised.
+ */
+ vlapic_raise_ppr(vlapic, vector);
+
#ifdef __ISRVEC_DEBUG
vlapic_isrstk_accept(vlapic, vector);
#endif
@@ -1425,7 +1472,6 @@ vlapic_reset(struct vlapic *vlapic)
lapic->dfr = 0xffffffff;
lapic->svr = APIC_SVR_VECTOR;
vlapic_mask_lvts(vlapic);
- vlapic_tmr_reset(vlapic);
lapic->dcr_timer = 0;
vlapic_dcr_write_handler(vlapic);
@@ -1592,82 +1638,6 @@ vlapic_enabled(struct vlapic *vlapic)
return (false);
}
-static void
-vlapic_tmr_reset(struct vlapic *vlapic)
-{
- struct LAPIC *lapic;
-
- lapic = vlapic->apic_page;
- lapic->tmr0 = lapic->tmr1 = lapic->tmr2 = lapic->tmr3 = 0;
- lapic->tmr4 = lapic->tmr5 = lapic->tmr6 = lapic->tmr7 = 0;
- vlapic->tmr_pending = 1;
-}
-
-/*
- * Synchronize TMR designations into the LAPIC state.
- * The vCPU must be in the VCPU_RUNNING state.
- */
-void
-vlapic_tmr_update(struct vlapic *vlapic)
-{
- struct LAPIC *lapic;
- uint32_t *tmrptr;
- uint32_t result[VLAPIC_TMR_CNT];
- u_int i, tmr_idx;
-
- if (vlapic->tmr_pending == 0) {
- return;
- }
-
- lapic = vlapic->apic_page;
- tmrptr = &lapic->tmr0;
-
- VLAPIC_CTR0(vlapic, "synchronizing TMR");
- for (i = 0; i < VLAPIC_TMR_CNT; i++) {
- tmr_idx = i * 4;
-
- tmrptr[tmr_idx] &= ~vlapic->tmr_vec_deassert[i];
- tmrptr[tmr_idx] |= vlapic->tmr_vec_assert[i];
- vlapic->tmr_vec_deassert[i] = 0;
- vlapic->tmr_vec_assert[i] = 0;
- result[i] = tmrptr[tmr_idx];
- }
- vlapic->tmr_pending = 0;
-
- if (vlapic->ops.set_tmr != NULL) {
- (*vlapic->ops.set_tmr)(vlapic, result);
- }
-}
-
-/*
- * Designate the TMR state for a given interrupt vector.
- * The caller must hold the vIOAPIC lock and prevent the vCPU corresponding to
- * this vLAPIC instance from being-in or entering the VCPU_RUNNING state.
- */
-void
-vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active)
-{
- const uint32_t idx = vector / 32;
- const uint32_t mask = 1 << (vector % 32);
-
- VLAPIC_CTR2(vlapic, "TMR for vector %u %sasserted", vector,
- active ? "" : "de");
- if (active) {
- vlapic->tmr_vec_assert[idx] |= mask;
- vlapic->tmr_vec_deassert[idx] &= ~mask;
- } else {
- vlapic->tmr_vec_deassert[idx] |= mask;
- vlapic->tmr_vec_assert[idx] &= ~mask;
- }
-
- /*
- * Track the number of TMR changes between calls to vlapic_tmr_update.
- * While a simple boolean would suffice, this count may be useful when
- * tracing or debugging, and is cheap to calculate.
- */
- vlapic->tmr_pending = MIN(UINT32_MAX - 1, vlapic->tmr_pending) + 1;
-}
-
#ifndef __FreeBSD__
void
vlapic_localize_resources(struct vlapic *vlapic)
@@ -1685,6 +1655,7 @@ vlapic_isrstk_eoi(struct vlapic *vlapic, int vector)
vlapic->isrvec_stk_top);
}
vlapic->isrvec_stk_top--;
+ vlapic_isrstk_verify(vlapic);
}
static void
@@ -1699,6 +1670,7 @@ vlapic_isrstk_accept(struct vlapic *vlapic, int vector)
panic("isrvec_stk_top overflow %d", stk_top);
vlapic->isrvec_stk[stk_top] = vector;
+ vlapic_isrstk_verify(vlapic);
}
static void
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.h b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
index 746699393f..f34cf1ec4b 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
@@ -63,10 +63,8 @@ int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr);
*/
void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
-/*
- * Returns 1 if the vcpu needs to be notified of the interrupt and 0 otherwise.
- */
-int vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
+vcpu_notify_t vlapic_set_intr_ready(struct vlapic *vlapic, int vector,
+ bool level);
/*
* Post an interrupt to the vcpu running on 'hostcpu'. This will use a
@@ -91,9 +89,6 @@ void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
bool lowprio, bool x2apic_dest);
-void vlapic_tmr_update(struct vlapic *vlapic);
-void vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active);
-
void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
uint64_t vlapic_get_cr8(struct vlapic *vlapic);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
index 8a0d594de3..1329ab5b36 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
@@ -159,11 +159,11 @@ enum boot_state {
struct vlapic;
struct vlapic_ops {
- int (*set_intr_ready)(struct vlapic *vlapic, int vector, bool level);
- int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
+ vcpu_notify_t (*set_intr_ready)(struct vlapic *vlapic, int vector,
+ bool level);
+ void (*sync_state)(struct vlapic *vlapic);
void (*intr_accepted)(struct vlapic *vlapic, int vector);
void (*post_intr)(struct vlapic *vlapic, int hostcpu);
- void (*set_tmr)(struct vlapic *vlapic, const uint32_t *result);
void (*enable_x2apic_mode)(struct vlapic *vlapic);
};
@@ -174,7 +174,6 @@ struct vlapic {
struct vlapic_ops ops;
uint32_t esr_pending;
- uint32_t tmr_pending;
struct callout callout; /* vlapic timer */
struct bintime timer_fire_bt; /* callout expiry time */
@@ -194,19 +193,6 @@ struct vlapic {
uint32_t svr_last;
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
- /*
- * Store intended modifications to the trigger-mode register state.
- * Along with the tmr_pending counter above, these are protected by the
- * vIOAPIC lock and can only be modified under specific conditions:
- *
- * 1. When holding the vIOAPIC lock, and the vCPU to which the vLAPIC
- * belongs is prevented from entering the VCPU_RUNNING state.
- * 2. When the owning vCPU is in the VCPU_RUNNING state, and is
- * applying the TMR modifications prior to interrupt injection.
- */
- uint32_t tmr_vec_deassert[VLAPIC_TMR_CNT];
- uint32_t tmr_vec_assert[VLAPIC_TMR_CNT];
-
#ifdef __ISRVEC_DEBUG
/*
* The 'isrvec_stk' is a stack of vectors injected by the local APIC.
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index acdabf556f..b566e503e0 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -280,8 +280,15 @@ vcpu_should_yield(struct vm *vm, int vcpu)
}
#endif /* _SYS_THREAD_H */
+typedef enum vcpu_notify {
+ VCPU_NOTIFY_NONE,
+ VCPU_NOTIFY_APIC, /* Posted intr notification (if possible) */
+ VCPU_NOTIFY_EXIT, /* IPI to cause VM exit */
+} vcpu_notify_t;
+
void *vcpu_stats(struct vm *vm, int vcpu);
-void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
+void vcpu_notify_event(struct vm *vm, int vcpuid);
+void vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t);
struct vmspace *vm_get_vmspace(struct vm *vm);
struct vatpic *vm_atpic(struct vm *vm);
struct vatpit *vm_atpit(struct vm *vm);
@@ -374,6 +381,25 @@ void vm_inject_ac(struct vm *vm, int vcpuid, int errcode);
void vm_inject_ss(struct vm *vm, int vcpuid, int errcode);
void vm_inject_pf(struct vm *vm, int vcpuid, int errcode, uint64_t cr2);
+/*
+ * Both SVM and VMX have complex logic for injecting events such as exceptions
+ * or interrupts into the guest. Within those two backends, the progress of
+ * event injection is tracked by event_inject_state, hopefully making it easier
+ * to reason about.
+ */
+enum event_inject_state {
+ EIS_CAN_INJECT = 0, /* exception/interrupt can be injected */
+ EIS_EV_EXISTING = 1, /* blocked by existing event */
+ EIS_EV_INJECTED = 2, /* blocked by injected event */
+ EIS_GI_BLOCK = 3, /* blocked by guest interruptability */
+
+ /*
+ * Flag to request an immediate exit from VM context after event
+ * injection in order to perform more processing
+ */
+ EIS_REQ_EXIT = (1 << 15),
+};
+
#ifndef __FreeBSD__
void vmm_sol_glue_init(void);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 1821a96fd7..3cd89f9fe6 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -297,7 +297,7 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
static void vm_free_memmap(struct vm *vm, int ident);
static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
-static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
+static void vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t);
#ifndef __FreeBSD__
static void vm_clear_memseg(struct vm *, int);
@@ -1338,7 +1338,7 @@ vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
if (from_idle) {
while (vcpu->state != VCPU_IDLE) {
vcpu->reqidle = 1;
- vcpu_notify_event_locked(vcpu, false);
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to "
"idle requested", vcpu_state2str(vcpu->state));
#ifdef __FreeBSD__
@@ -1839,7 +1839,7 @@ vm_handle_suspend(struct vm *vm, int vcpuid)
*/
for (i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->suspended_cpus)) {
- vcpu_notify_event(vm, i, false);
+ vcpu_notify_event(vm, i);
}
}
@@ -1909,7 +1909,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how)
*/
for (i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm, i, false);
+ vcpu_notify_event(vm, i);
}
return (0);
@@ -2620,6 +2620,14 @@ vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid,
return (EINVAL);
/*
+ * NMIs (which bear an exception vector of 2) are to be injected via
+ * their own specialized path using vm_inject_nmi().
+ */
+ if (vector == 2) {
+ return (EINVAL);
+ }
+
+ /*
* A double fault exception should never be injected directly into
* the guest. It is a derived exception that results from specific
* combinations of nested faults.
@@ -2728,7 +2736,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid)
vcpu = &vm->vcpu[vcpuid];
vcpu->nmi_pending = 1;
- vcpu_notify_event(vm, vcpuid, false);
+ vcpu_notify_event(vm, vcpuid);
return (0);
}
@@ -2775,7 +2783,7 @@ vm_inject_extint(struct vm *vm, int vcpuid)
vcpu = &vm->vcpu[vcpuid];
vcpu->extint_pending = 1;
- vcpu_notify_event(vm, vcpuid, false);
+ vcpu_notify_event(vm, vcpuid);
return (0);
}
@@ -2956,7 +2964,7 @@ vcpu_block_run(struct vm *vm, int vcpuid)
vcpu_lock(vcpu);
vcpu->runblock++;
if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
- vcpu_notify_event_locked(vcpu, false);
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
}
while (vcpu->state == VCPU_RUNNING) {
#ifdef __FreeBSD__
@@ -3026,14 +3034,14 @@ vm_suspend_cpu(struct vm *vm, int vcpuid)
vm->debug_cpus = vm->active_cpus;
for (i = 0; i < vm->maxcpus; i++) {
if (CPU_ISSET(i, &vm->active_cpus))
- vcpu_notify_event(vm, i, false);
+ vcpu_notify_event(vm, i);
}
} else {
if (!CPU_ISSET(vcpuid, &vm->active_cpus))
return (EINVAL);
CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
- vcpu_notify_event(vm, vcpuid, false);
+ vcpu_notify_event(vm, vcpuid);
}
return (0);
}
@@ -3126,15 +3134,17 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
* to the host_cpu to cause the vcpu to trap into the hypervisor.
*/
static void
-vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
+vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t ntype)
{
int hostcpu;
+ ASSERT(ntype == VCPU_NOTIFY_APIC || VCPU_NOTIFY_EXIT);
+
hostcpu = vcpu->hostcpu;
if (vcpu->state == VCPU_RUNNING) {
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
if (hostcpu != curcpu) {
- if (lapic_intr) {
+ if (ntype == VCPU_NOTIFY_APIC) {
vlapic_post_intr(vcpu->vlapic, hostcpu,
vmm_ipinum);
} else {
@@ -3162,12 +3172,26 @@ vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
}
void
-vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+vcpu_notify_event(struct vm *vm, int vcpuid)
{
struct vcpu *vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
- vcpu_notify_event_locked(vcpu, lapic_intr);
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+ vcpu_unlock(vcpu);
+}
+
+void
+vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t ntype)
+{
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+ if (ntype == VCPU_NOTIFY_NONE) {
+ return;
+ }
+
+ vcpu_lock(vcpu);
+ vcpu_notify_event_locked(vcpu, ntype);
vcpu_unlock(vcpu);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
index f8d8970807..3de67f012d 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
@@ -67,6 +67,7 @@ int
lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
{
struct vlapic *vlapic;
+ vcpu_notify_t notify;
if (cpu < 0 || cpu >= vm_get_maxcpus(vm))
return (EINVAL);
@@ -79,8 +80,8 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
return (EINVAL);
vlapic = vm_lapic(vm, cpu);
- if (vlapic_set_intr_ready(vlapic, vector, level))
- vcpu_notify_event(vm, cpu, true);
+ notify = vlapic_set_intr_ready(vlapic, vector, level);
+ vcpu_notify_event_type(vm, cpu, notify);
return (0);
}
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 2322919d29..c6859a3c00 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -353,7 +353,6 @@ struct vm_exit {
} spinup_ap;
struct {
uint64_t rflags;
- uint64_t intr_status;
} hlt;
struct {
int vector;