17 files changed, 809 insertions, 920 deletions
diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version
index baade5b6c4..1d116dda12 100644
--- a/usr/src/boot/Makefile.version
+++ b/usr/src/boot/Makefile.version
@@ -11,6 +11,7 @@
 
 #
 # Copyright 2016 Toomas Soome <tsoome@me.com>
+# Copyright 2020 RackTop Systems, Inc.
 #
 
 #
@@ -33,4 +34,4 @@ LOADER_VERSION = 1.1
 # Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.
 # The version is processed from left to right, the version number can only
 # be increased.
-BOOT_VERSION = $(LOADER_VERSION)-2020.11.14.1
+BOOT_VERSION = $(LOADER_VERSION)-2020.11.25.1
diff --git a/usr/src/boot/sys/boot/common/gfx_fb.c b/usr/src/boot/sys/boot/common/gfx_fb.c
index be50d384f7..56314566d6 100644
--- a/usr/src/boot/sys/boot/common/gfx_fb.c
+++ b/usr/src/boot/sys/boot/common/gfx_fb.c
@@ -12,6 +12,7 @@
 /*
  * Copyright 2016 Toomas Soome <tsoome@me.com>
  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 RackTop Systems, Inc.
  */
 
 /*
@@ -1194,10 +1195,6 @@ gfx_term_drawrect(uint32_t ux1, uint32_t uy1, uint32_t ux2, uint32_t uy2)
 		gfx_fb_bezier(x1, y1 - i, x2 + i, y1 - i, x2 + i, y2, width-i);
 }
 
-#define	FL_PUTIMAGE_BORDER	0x1
-#define	FL_PUTIMAGE_NOSCROLL	0x2
-#define	FL_PUTIMAGE_DEBUG	0x80
-
 int
 gfx_fb_putimage(png_t *png, uint32_t ux1, uint32_t uy1, uint32_t ux2,
     uint32_t uy2, uint32_t flags)
diff --git a/usr/src/boot/sys/boot/common/gfx_fb.h b/usr/src/boot/sys/boot/common/gfx_fb.h
index e242931a44..04bbd91121 100644
--- a/usr/src/boot/sys/boot/common/gfx_fb.h
+++ b/usr/src/boot/sys/boot/common/gfx_fb.h
@@ -11,6 +11,7 @@
 
 /*
  * Copyright 2017 Toomas Soome <tsoome@me.com>
+ * Copyright 2020 RackTop Systems, Inc.
  */
 
 #ifndef _GFX_FB_H
@@ -144,6 +145,11 @@ void gfx_fb_line(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
 void gfx_fb_bezier(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
 	uint32_t);
 void plat_cons_update_mode(int);
+
+#define	FL_PUTIMAGE_BORDER	0x1
+#define	FL_PUTIMAGE_NOSCROLL	0x2
+#define	FL_PUTIMAGE_DEBUG	0x80
+
 int gfx_fb_putimage(png_t *, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
 
 bool gfx_parse_mode_str(char *, int *, int *, int *);
diff --git a/usr/src/common/ficl/emu/gfx_fb.h b/usr/src/common/ficl/emu/gfx_fb.h
index 4dfb386f5b..8499bff455 100644
--- a/usr/src/common/ficl/emu/gfx_fb.h
+++ b/usr/src/common/ficl/emu/gfx_fb.h
@@ -11,6 +11,7 @@
 
 /*
  * Copyright 2016 Toomas Some <tsoome@me.com>
+ * Copyright 2020 RackTop Systems, Inc.
  */
 
 #ifndef _GFX_FB_H
@@ -59,6 +60,11 @@ void gfx_term_drawrect(uint32_t, uint32_t, uint32_t, uint32_t);
 void gfx_fb_line(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
 void gfx_fb_bezier(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
 	uint32_t);
+
+#define	FL_PUTIMAGE_BORDER	0x1
+#define	FL_PUTIMAGE_NOSCROLL	0x2
+#define	FL_PUTIMAGE_DEBUG	0x80
+
 int gfx_fb_putimage(png_t *, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
 
 #ifdef __cplusplus
diff --git a/usr/src/common/ficl/loader.c b/usr/src/common/ficl/loader.c
index f1a1827eb0..c41c86c7c2 100644
--- a/usr/src/common/ficl/loader.c
+++ b/usr/src/common/ficl/loader.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2000 Daniel Capo Sobral
  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 RackTop Systems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -79,6 +80,7 @@ ficl_fb_putimage(ficlVm *pVM)
 	ficlInteger ret = FICL_FALSE;
 	uint32_t x1, y1, x2, y2, f;
 	png_t png;
+	int error;
 
 	FICL_STACK_CHECK(ficlVmGetDataStack(pVM), 7, 1);
 
@@ -96,7 +98,10 @@ ficl_fb_putimage(ficlVm *pVM)
 	(void) strncpy(name, namep, names);
 	name[names] = '\0';
 
-	if (png_open(&png, name) == PNG_NO_ERROR) {
+	if ((error = png_open(&png, name)) != PNG_NO_ERROR) {
+		if (f & FL_PUTIMAGE_DEBUG)
+			printf("%s\n", png_error_string(error));
+	} else {
 		if (gfx_fb_putimage(&png, x1, y1, x2, y2, f) == 0)
 			ret = FICL_TRUE;	/* success */
 		(void) png_close(&png);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index dd9d5a55a8..8c12f4ba04 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -903,67 +903,6 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
 	vie_init_mmio(vie, inst_bytes, inst_len, &paging, gpa);
 }
 
-#ifdef KTR
-static const char *
-intrtype_to_str(int intr_type)
-{
-	switch (intr_type) {
-	case VMCB_EVENTINJ_TYPE_INTR:
-		return ("hwintr");
-	case VMCB_EVENTINJ_TYPE_NMI:
-		return ("nmi");
-	case VMCB_EVENTINJ_TYPE_INTn:
-		return ("swintr");
-	case VMCB_EVENTINJ_TYPE_EXCEPTION:
-		return ("exception");
-	default:
-		panic("%s: unknown intr_type %d", __func__, intr_type);
-	}
-}
-#endif
-
-/*
- * Inject an event to vcpu as described in section 15.20, "Event injection".
- */
-static void
-svm_eventinject(struct svm_softc *sc, int vcpu, int intr_type, int vector,
-		 uint32_t error, bool ec_valid)
-{
-	struct vmcb_ctrl *ctrl;
-
-	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
-
-	KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0,
-	    ("%s: event already pending %lx", __func__, ctrl->eventinj));
-
-	KASSERT(vector >=0 && vector <= 255, ("%s: invalid vector %d",
-	    __func__, vector));
-
-	switch (intr_type) {
-	case VMCB_EVENTINJ_TYPE_INTR:
-	case VMCB_EVENTINJ_TYPE_NMI:
-	case VMCB_EVENTINJ_TYPE_INTn:
-		break;
-	case VMCB_EVENTINJ_TYPE_EXCEPTION:
-		if (vector >= 0 && vector <= 31 && vector != 2)
-			break;
-		/* FALLTHROUGH */
-	default:
-		panic("%s: invalid intr_type/vector: %d/%d", __func__,
-		    intr_type, vector);
-	}
-	ctrl->eventinj = vector | (intr_type << 8) | VMCB_EVENTINJ_VALID;
-	if (ec_valid) {
-		ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID;
-		ctrl->eventinj |= (uint64_t)error << 32;
-		VCPU_CTR3(sc->vm, vcpu, "Injecting %s at vector %d errcode %x",
-		    intrtype_to_str(intr_type), vector, error);
-	} else {
-		VCPU_CTR2(sc->vm, vcpu, "Injecting %s at vector %d",
-		    intrtype_to_str(intr_type), vector);
-	}
-}
-
 static void
 svm_update_virqinfo(struct svm_softc *sc, int vcpu)
 {
@@ -984,7 +923,7 @@ svm_update_virqinfo(struct svm_softc *sc, int vcpu)
 }
 
 static void
-svm_save_intinfo(struct svm_softc *svm_sc, int vcpu)
+svm_save_exitintinfo(struct svm_softc *svm_sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
 	uint64_t intinfo;
@@ -1014,12 +953,14 @@ vintr_intercept_enabled(struct svm_softc *sc, int vcpu)
 	    VMCB_INTCPT_VINTR));
 }
 
-static __inline void
-enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
+static void
+svm_enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
+	struct vmcb_state *state;
 
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+	state = svm_get_vmcb_state(sc, vcpu);
 
 	if ((ctrl->v_irq & V_IRQ) != 0 && ctrl->v_intr_vector == 0) {
 		KASSERT(ctrl->v_intr_prio & V_IGN_TPR,
@@ -1029,6 +970,17 @@ enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
 		return;
 	}
 
+	/*
+	 * We use V_IRQ in conjunction with the VINTR intercept to trap into the
+	 * hypervisor as soon as a virtual interrupt can be delivered.
+	 *
+	 * Since injected events are not subject to intercept checks we need to
+	 * ensure that the V_IRQ is not actually going to be delivered on VM
+	 * entry.
+	 */
+	VERIFY((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 ||
+	    (state->rflags & PSL_I) == 0 || ctrl->intr_shadow);
+
 	VCPU_CTR0(sc->vm, vcpu, "Enable intr window exiting");
 	ctrl->v_irq |= V_IRQ;
 	ctrl->v_intr_prio |= V_IGN_TPR;
@@ -1037,8 +989,8 @@ enable_intr_window_exiting(struct svm_softc *sc, int vcpu)
 	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
 }
 
-static __inline void
-disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
+static void
+svm_disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
 
@@ -1063,30 +1015,18 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
  * to track when the vcpu is done handling the NMI.
  */
 static int
-nmi_blocked(struct svm_softc *sc, int vcpu)
+svm_nmi_blocked(struct svm_softc *sc, int vcpu)
 {
-	int blocked;
-
-	blocked = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
-	    VMCB_INTCPT_IRET);
-	return (blocked);
-}
-
-static void
-enable_nmi_blocking(struct svm_softc *sc, int vcpu)
-{
-
-	KASSERT(!nmi_blocked(sc, vcpu), ("vNMI already blocked"));
-	VCPU_CTR0(sc->vm, vcpu, "vNMI blocking enabled");
-	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
+	return (svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
+	    VMCB_INTCPT_IRET));
 }
 
 static void
-clear_nmi_blocking(struct svm_softc *sc, int vcpu)
+svm_clear_nmi_blocking(struct svm_softc *sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
 
-	KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
+	KASSERT(svm_nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
 	VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared");
 	/*
 	 * When the IRET intercept is cleared the vcpu will attempt to execute
@@ -1102,13 +1042,80 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu)
 	svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
 
 	/*
-	 * Set 'intr_shadow' to prevent an NMI from being injected on the
-	 * immediate VMRUN.
+	 * Set an interrupt shadow to prevent an NMI from being immediately
+	 * injected on the next VMRUN.
 	 */
 	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
 	ctrl->intr_shadow = 1;
 }
 
+static void
+svm_inject_event(struct svm_softc *sc, int vcpu, uint64_t intinfo)
+{
+	struct vmcb_ctrl *ctrl;
+	uint8_t vector;
+	uint32_t evtype;
+
+	ASSERT(VMCB_EXITINTINFO_VALID(intinfo));
+
+	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+	vector = VMCB_EXITINTINFO_VECTOR(intinfo);
+	evtype = VMCB_EXITINTINFO_TYPE(intinfo);
+
+	switch (evtype) {
+	case VMCB_EVENTINJ_TYPE_INTR:
+	case VMCB_EVENTINJ_TYPE_NMI:
+	case VMCB_EVENTINJ_TYPE_INTn:
+		break;
+	case VMCB_EVENTINJ_TYPE_EXCEPTION:
+		VERIFY(vector <= 31);
+		/*
+		 * NMIs are expected to be injected with VMCB_EVENTINJ_TYPE_NMI,
+		 * rather than as an exception with the NMI vector.
+		 */
+		VERIFY(vector != 2);
+		break;
+	default:
+		panic("unexpected event type %x", evtype);
+	}
+
+	ctrl->eventinj = VMCB_EVENTINJ_VALID | evtype | vector;
+	if (VMCB_EXITINTINFO_EC_VALID(intinfo)) {
+		ctrl->eventinj |= VMCB_EVENTINJ_EC_VALID;
+		ctrl->eventinj |= (uint64_t)VMCB_EXITINTINFO_EC(intinfo) << 32;
+	}
+}
+
+static void
+svm_inject_nmi(struct svm_softc *sc, int vcpu)
+{
+	struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+
+	ASSERT(!svm_nmi_blocked(sc, vcpu));
+
+	ctrl->eventinj = VMCB_EVENTINJ_VALID | VMCB_EVENTINJ_TYPE_NMI;
+	vm_nmi_clear(sc->vm, vcpu);
+
+	/*
+	 * Virtual NMI blocking is now in effect.
+	 *
+	 * Not only does this block a subsequent NMI injection from taking
+	 * place, it also configures an intercept on the IRET so we can track
+	 * when the next injection can take place.
+	 */
+	svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
+}
+
+static void
+svm_inject_irq(struct svm_softc *sc, int vcpu, int vector)
+{
+	struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+
+	ASSERT(vector >= 0 && vector <= 255);
+
+	ctrl->eventinj = VMCB_EVENTINJ_VALID | vector;
+}
+
 #define	EFER_MBZ_BITS	0xFFFFFFFFFFFF0200UL
 
 static int
@@ -1335,7 +1342,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 	    vmexit->inst_length, code, info1, info2));
 
 	svm_update_virqinfo(svm_sc, vcpu);
-	svm_save_intinfo(svm_sc, vcpu);
+	svm_save_exitintinfo(svm_sc, vcpu);
 
 	switch (code) {
 	case VMCB_EXIT_IRET:
@@ -1343,11 +1350,12 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 		 * Restart execution at "iret" but with the intercept cleared.
 		 */
 		vmexit->inst_length = 0;
-		clear_nmi_blocking(svm_sc, vcpu);
+		svm_clear_nmi_blocking(svm_sc, vcpu);
 		handled = 1;
 		break;
 	case VMCB_EXIT_VINTR:	/* interrupt window exiting */
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1);
+		svm_disable_intr_window_exiting(svm_sc, vcpu);
 		handled = 1;
 		break;
 	case VMCB_EXIT_INTR:	/* external interrupt */
@@ -1571,51 +1579,40 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 	return (handled);
 }
 
-static void
-svm_inj_intinfo(struct svm_softc *svm_sc, int vcpu)
-{
-	uint64_t intinfo;
-
-	if (!vm_entry_intinfo(svm_sc->vm, vcpu, &intinfo))
-		return;
-
-	KASSERT(VMCB_EXITINTINFO_VALID(intinfo), ("%s: entry intinfo is not "
-	    "valid: %lx", __func__, intinfo));
-
-	svm_eventinject(svm_sc, vcpu, VMCB_EXITINTINFO_TYPE(intinfo),
-		VMCB_EXITINTINFO_VECTOR(intinfo),
-		VMCB_EXITINTINFO_EC(intinfo),
-		VMCB_EXITINTINFO_EC_VALID(intinfo));
-	vmm_stat_incr(svm_sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1);
-	VCPU_CTR1(svm_sc->vm, vcpu, "Injected entry intinfo: %lx", intinfo);
-}
-
 /*
- * Inject event to virtual cpu.
+ * Inject exceptions, NMIs, and ExtINTs.
+ *
+ * The logic behind these are complicated and may involve mutex contention, so
+ * the injection is performed without the protection of host CPU interrupts
+ * being disabled.  This means a racing notification could be "lost",
+ * necessitating a later call to svm_inject_recheck() to close that window
+ * of opportunity.
  */
-static void
-svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
+static enum event_inject_state
+svm_inject_events(struct svm_softc *sc, int vcpu)
 {
 	struct vmcb_ctrl *ctrl;
 	struct vmcb_state *state;
 	struct svm_vcpu *vcpustate;
-	uint8_t v_tpr;
-	int vector, need_intr_window;
-	int extint_pending;
+	uint64_t intinfo;
+	enum event_inject_state ev_state;
 
 	state = svm_get_vmcb_state(sc, vcpu);
 	ctrl  = svm_get_vmcb_ctrl(sc, vcpu);
 	vcpustate = svm_get_vcpu(sc, vcpu);
+	ev_state = EIS_CAN_INJECT;
 
-	need_intr_window = 0;
-
-	vlapic_tmr_update(vlapic);
-
+	/* Clear any interrupt shadow if guest %rip has changed */
 	if (vcpustate->nextrip != state->rip) {
 		ctrl->intr_shadow = 0;
-		VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
-		    "cleared due to rip change: %lx/%lx",
-		    vcpustate->nextrip, state->rip);
+	}
+
+	/*
+	 * An event is already pending for injection.  This can occur when the
+	 * vCPU exits prior to VM entry (like for an AST).
+	 */
+	if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
+		return (EIS_EV_EXISTING | EIS_REQ_EXIT);
 	}
 
 	/*
@@ -1627,118 +1624,79 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
 	 * An event might also be pending because an exception was injected
 	 * by the hypervisor (e.g. #PF during instruction emulation).
 	 */
-	svm_inj_intinfo(sc, vcpu);
+	if (vm_entry_intinfo(sc->vm, vcpu, &intinfo)) {
+		ASSERT(VMCB_EXITINTINFO_VALID(intinfo));
+
+		svm_inject_event(sc, vcpu, intinfo);
+		vmm_stat_incr(sc->vm, vcpu, VCPU_INTINFO_INJECTED, 1);
+		ev_state = EIS_EV_INJECTED;
+	}
 
 	/* NMI event has priority over interrupts. */
-	if (vm_nmi_pending(sc->vm, vcpu)) {
-		if (nmi_blocked(sc, vcpu)) {
-			/*
-			 * Can't inject another NMI if the guest has not
-			 * yet executed an "iret" after the last NMI.
-			 */
-			VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due "
-			    "to NMI-blocking");
-		} else if (ctrl->intr_shadow) {
-			/*
-			 * Can't inject an NMI if the vcpu is in an intr_shadow.
-			 */
-			VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due to "
-			    "interrupt shadow");
-			need_intr_window = 1;
-			goto done;
-		} else if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
-			/*
-			 * If there is already an exception/interrupt pending
-			 * then defer the NMI until after that.
-			 */
-			VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to "
-			    "eventinj %lx", ctrl->eventinj);
+	if (vm_nmi_pending(sc->vm, vcpu) && !svm_nmi_blocked(sc, vcpu)) {
+		if (ev_state == EIS_CAN_INJECT) {
+			/* Can't inject NMI if vcpu is in an intr_shadow. */
+			if (ctrl->intr_shadow) {
+				return (EIS_GI_BLOCK);
+			}
 
-			/*
-			 * Use self-IPI to trigger a VM-exit as soon as
-			 * possible after the event injection is completed.
-			 *
-			 * This works only if the external interrupt exiting
-			 * is at a lower priority than the event injection.
-			 *
-			 * Although not explicitly specified in APMv2 the
-			 * relative priorities were verified empirically.
-			 */
-			ipi_cpu(curcpu, IPI_AST);	/* XXX vmm_ipinum? */
+			svm_inject_nmi(sc, vcpu);
+			ev_state = EIS_EV_INJECTED;
 		} else {
-			vm_nmi_clear(sc->vm, vcpu);
+			return (ev_state | EIS_REQ_EXIT);
+		}
+	}
 
-			/* Inject NMI, vector number is not used */
-			svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_NMI,
-			    IDT_NMI, 0, false);
+	if (vm_extint_pending(sc->vm, vcpu)) {
+		int vector;
 
-			/* virtual NMI blocking is now in effect */
-			enable_nmi_blocking(sc, vcpu);
+		if (ev_state != EIS_CAN_INJECT) {
+			return (ev_state | EIS_REQ_EXIT);
+		}
 
-			VCPU_CTR0(sc->vm, vcpu, "Injecting vNMI");
+		/*
+		 * If the guest has disabled interrupts or is in an interrupt
+		 * shadow then we cannot inject the pending interrupt.
+		 */
+		if ((state->rflags & PSL_I) == 0 || ctrl->intr_shadow) {
+			return (EIS_GI_BLOCK);
 		}
-	}
 
-	extint_pending = vm_extint_pending(sc->vm, vcpu);
-	if (!extint_pending) {
-		if (!vlapic_pending_intr(vlapic, &vector))
-			goto done;
-		KASSERT(vector >= 16 && vector <= 255,
-		    ("invalid vector %d from local APIC", vector));
-	} else {
 		/* Ask the legacy pic for a vector to inject */
 		vatpic_pending_intr(sc->vm, &vector);
 		KASSERT(vector >= 0 && vector <= 255,
 		    ("invalid vector %d from INTR", vector));
-	}
 
-	/*
-	 * If the guest has disabled interrupts or is in an interrupt shadow
-	 * then we cannot inject the pending interrupt.
-	 */
-	if ((state->rflags & PSL_I) == 0) {
-		VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
-		    "rflags %lx", vector, state->rflags);
-		need_intr_window = 1;
-		goto done;
-	}
-
-	if (ctrl->intr_shadow) {
-		VCPU_CTR1(sc->vm, vcpu, "Cannot inject vector %d due to "
-		    "interrupt shadow", vector);
-		need_intr_window = 1;
-		goto done;
+		svm_inject_irq(sc, vcpu, vector);
+		vm_extint_clear(sc->vm, vcpu);
+		vatpic_intr_accepted(sc->vm, vector);
+		ev_state = EIS_EV_INJECTED;
 	}
 
-	if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
-		VCPU_CTR2(sc->vm, vcpu, "Cannot inject vector %d due to "
-		    "eventinj %lx", vector, ctrl->eventinj);
-		need_intr_window = 1;
-		goto done;
-	}
+	return (ev_state);
+}
 
-	svm_eventinject(sc, vcpu, VMCB_EVENTINJ_TYPE_INTR, vector, 0, false);
+/*
+ * Synchronize vLAPIC state and inject any interrupts pending on it.
+ *
+ * This is done with host CPU interrupts disabled so notification IPIs will be
+ * queued on the host APIC and recognized when entering SVM guest context.
+ */
+static enum event_inject_state
+svm_inject_vlapic(struct svm_softc *sc, int vcpu, struct vlapic *vlapic,
+    enum event_inject_state ev_state)
+{
+	struct vmcb_ctrl *ctrl;
+	struct vmcb_state *state;
+	int vector;
+	uint8_t v_tpr;
 
-	if (!extint_pending) {
-		vlapic_intr_accepted(vlapic, vector);
-	} else {
-		vm_extint_clear(sc->vm, vcpu);
-		vatpic_intr_accepted(sc->vm, vector);
-	}
+	state = svm_get_vmcb_state(sc, vcpu);
+	ctrl  = svm_get_vmcb_ctrl(sc, vcpu);
 
 	/*
-	 * Force a VM-exit as soon as the vcpu is ready to accept another
-	 * interrupt. This is done because the PIC might have another vector
-	 * that it wants to inject. Also, if the APIC has a pending interrupt
-	 * that was preempted by the ExtInt then it allows us to inject the
-	 * APIC vector as soon as possible.
-	 */
-	need_intr_window = 1;
-done:
-	/*
-	 * The guest can modify the TPR by writing to %CR8. In guest mode
-	 * the processor reflects this write to V_TPR without hypervisor
-	 * intervention.
+	 * The guest can modify the TPR by writing to %cr8. In guest mode the
+	 * CPU reflects this write to V_TPR without hypervisor intervention.
 	 *
 	 * The guest can also modify the TPR by writing to it via the memory
 	 * mapped APIC page. In this case, the write will be emulated by the
@@ -1748,33 +1706,88 @@ done:
 	v_tpr = vlapic_get_cr8(vlapic);
 	KASSERT(v_tpr <= 15, ("invalid v_tpr %x", v_tpr));
 	if (ctrl->v_tpr != v_tpr) {
-		VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %x to %x",
-		    ctrl->v_tpr, v_tpr);
 		ctrl->v_tpr = v_tpr;
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_TPR);
 	}
 
-	if (need_intr_window) {
+	/* If an event cannot otherwise be injected, we are done for now */
+	if (ev_state != EIS_CAN_INJECT) {
+		return (ev_state);
+	}
+
+	if (!vlapic_pending_intr(vlapic, &vector)) {
+		return (EIS_CAN_INJECT);
+	}
+	KASSERT(vector >= 16 && vector <= 255,
+	    ("invalid vector %d from local APIC", vector));
+
+	/*
+	 * If the guest has disabled interrupts or is in an interrupt shadow
+	 * then we cannot inject the pending interrupt.
+	 */
+	if ((state->rflags & PSL_I) == 0 || ctrl->intr_shadow) {
+		return (EIS_GI_BLOCK);
+	}
+
+	svm_inject_irq(sc, vcpu, vector);
+	vlapic_intr_accepted(vlapic, vector);
+	return (EIS_EV_INJECTED);
+}
+
+/*
+ * Re-check for events to be injected.
+ *
+ * Once host CPU interrupts are disabled, check for the presence of any events
+ * which require injection processing.  If an exit is required upon injection,
+ * or once the guest becomes interruptable, that will be configured too.
+ */
+static bool
+svm_inject_recheck(struct svm_softc *sc, int vcpu,
+    enum event_inject_state ev_state)
+{
+	struct vmcb_ctrl *ctrl;
+
+	ctrl  = svm_get_vmcb_ctrl(sc, vcpu);
+
+	if (ev_state == EIS_CAN_INJECT) {
 		/*
-		 * We use V_IRQ in conjunction with the VINTR intercept to
-		 * trap into the hypervisor as soon as a virtual interrupt
-		 * can be delivered.
-		 *
-		 * Since injected events are not subject to intercept checks
-		 * we need to ensure that the V_IRQ is not actually going to
-		 * be delivered on VM entry. The KASSERT below enforces this.
+		 * An active interrupt shadow would preclude us from injecting
+		 * any events picked up during a re-check.
 		 */
-		KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) != 0 ||
-		    (state->rflags & PSL_I) == 0 || ctrl->intr_shadow,
-		    ("Bogus intr_window_exiting: eventinj (%lx), "
-		    "intr_shadow (%lu), rflags (%lx)",
-		    ctrl->eventinj, ctrl->intr_shadow, state->rflags));
-		enable_intr_window_exiting(sc, vcpu);
+		if (ctrl->intr_shadow != 0) {
+			return (false);
+		}
+
+		if (vm_nmi_pending(sc->vm, vcpu) &&
+		    !svm_nmi_blocked(sc, vcpu)) {
+			/* queued NMI not blocked by NMI-window-exiting */
+			return (true);
+		}
+		if (vm_extint_pending(sc->vm, vcpu)) {
+			/* queued ExtINT not blocked by existing injection */
+			return (true);
+		}
 	} else {
-		disable_intr_window_exiting(sc, vcpu);
+		if ((ev_state & EIS_REQ_EXIT) != 0) {
+			/*
+			 * Use a self-IPI to force an immediate exit after
+			 * event injection has occurred.
+			 */
+			poke_cpu(CPU->cpu_id);
+		} else {
+			/*
+			 * If any event is being injected, an exit immediately
+			 * upon becoming interruptable again will allow pending
+			 * or newly queued events to be injected in a timely
+			 * manner.
+			 */
+			svm_enable_intr_window_exiting(sc, vcpu);
+		}
 	}
+	return (false);
 }
 
+
 #ifdef __FreeBSD__
 static void
 check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
@@ -2039,15 +2052,15 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 	state->rip = rip;
 
 	do {
-#ifndef __FreeBSD__
+		enum event_inject_state inject_state;
+
 		/*
-		 * Interrupt injection may involve mutex contention which, on
-		 * illumos bhyve, are blocking/non-spin.  Doing so with global
-		 * interrupts disabled is a recipe for deadlock, so it is
-		 * performed here.
+		 * Initial event injection is complex and may involve mutex
+		 * contention, so it must be performed with global interrupts
+		 * still enabled.
 		 */
-		svm_inj_interrupts(svm_sc, vcpu, vlapic);
-#endif
+		inject_state = svm_inject_events(svm_sc, vcpu);
+		handled = 0;
 
 		/*
 		 * Disable global interrupts to guarantee atomicity during
@@ -2058,6 +2071,13 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 		 */
 		disable_gintr();
 
+		/*
+		 * Synchronizing and injecting vlapic state is lock-free and is
+		 * safe (and prudent) to perform with interrupts disabled.
+		 */
+		inject_state = svm_inject_vlapic(svm_sc, vcpu, vlapic,
+		    inject_state);
+
 		if (vcpu_suspended(evinfo)) {
 			enable_gintr();
 			vm_exit_suspended(vm, vcpu, state->rip);
@@ -2090,6 +2110,16 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 		}
 
 		/*
+		 * If subsequent activity queued events which require injection
+		 * handling, take another lap to handle them.
+		 */
+		if (svm_inject_recheck(svm_sc, vcpu, inject_state)) {
+			enable_gintr();
+			handled = 1;
+			continue;
+		}
+
+		/*
 		 * #VMEXIT resumes the host with the guest LDTR, so
 		 * save the current LDT selector so it can be restored
 		 * after an exit.  The userspace hypervisor probably
@@ -2098,10 +2128,6 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 		 */
 		ldt_sel = sldt();
 
-#ifdef __FreeBSD__
-		svm_inj_interrupts(svm_sc, vcpu, vlapic);
-#endif
-
 		/* Activate the nested pmap on 'curcpu' */
 		CPU_SET_ATOMIC_ACQ(curcpu, &pmap->pm_active);
 
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
index 63b088253d..1c002aee7b 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
@@ -136,9 +136,9 @@ struct svm_softc;
 
 /* Event types that can be injected */
 #define	VMCB_EVENTINJ_TYPE_INTR		0
-#define	VMCB_EVENTINJ_TYPE_NMI		2
-#define	VMCB_EVENTINJ_TYPE_EXCEPTION	3
-#define	VMCB_EVENTINJ_TYPE_INTn		4
+#define	VMCB_EVENTINJ_TYPE_NMI		(2 << 8)
+#define	VMCB_EVENTINJ_TYPE_EXCEPTION	(3 << 8)
+#define	VMCB_EVENTINJ_TYPE_INTn		(4 << 8)
 
 /* VMCB exit code, APM vol2 Appendix C */
 #define	VMCB_EXIT_MC			0x52
@@ -187,9 +187,9 @@ struct svm_softc;
  * Section 15.7.2, Intercepts during IDT Interrupt Delivery.
  */
 #define VMCB_EXITINTINFO_VECTOR(x)	((x) & 0xFF)
-#define VMCB_EXITINTINFO_TYPE(x)	(((x) >> 8) & 0x7)
-#define VMCB_EXITINTINFO_EC_VALID(x)	(((x) & BIT(11)) ? 1 : 0)
-#define VMCB_EXITINTINFO_VALID(x)	(((x) & BIT(31)) ? 1 : 0)
+#define VMCB_EXITINTINFO_TYPE(x)	((x) & (0x7 << 8))
+#define VMCB_EXITINTINFO_EC_VALID(x)	(((x) & BIT(11)) != 0)
+#define VMCB_EXITINTINFO_VALID(x)	(((x) & BIT(31)) != 0)
 #define VMCB_EXITINTINFO_EC(x)		(((x) >> 32) & 0xFFFFFFFF)
 
 /* Offset of various VMCB fields. */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index a791197d17..1f670ef3b3 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -338,8 +338,10 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
 
 static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
 static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
-static void vmx_inject_pir(struct vlapic *vlapic);
 static void vmx_apply_tsc_adjust(struct vmx *, int);
+static void vmx_apicv_sync_tmr(struct vlapic *vlapic);
+static void vmx_tpr_shadow_enter(struct vlapic *vlapic);
+static void vmx_tpr_shadow_exit(struct vlapic *vlapic);
 
 #ifdef KTR
 static const char *
@@ -1270,26 +1272,27 @@ vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu)
 	VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting");
 }
 
+static __inline bool
+vmx_nmi_window_exiting(struct vmx *vmx, int vcpu)
+{
+	return ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0);
+}
+
 static __inline void
 vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu)
 {
-
-	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) == 0) {
+	if (!vmx_nmi_window_exiting(vmx, vcpu)) {
 		vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING;
 		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
-		VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting");
 	}
 }
 
 static __inline void
 vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
 {
-
-	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0,
-	    ("nmi_window_exiting not set %x", vmx->cap[vcpu].proc_ctls));
+	ASSERT(vmx_nmi_window_exiting(vmx, vcpu));
 	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING;
 	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
-	VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
 }
 
 /*
@@ -1319,60 +1322,46 @@ vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu)
 #define	HWINTR_BLOCKING	(VMCS_INTERRUPTIBILITY_STI_BLOCKING |		\
 			 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
 
-#ifndef __FreeBSD__
-static uint32_t
-vmx_inject_nmi(struct vmx *vmx, int vcpu)
-#else
 static void
 vmx_inject_nmi(struct vmx *vmx, int vcpu)
-#endif
 {
-	uint32_t gi, info;
-
-	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
-	KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest "
-	    "interruptibility-state %x", gi));
-
-	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
-	KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid "
-	    "VM-entry interruption information %x", info));
+	ASSERT0(vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & NMI_BLOCKING);
+	ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID);
 
 	/*
 	 * Inject the virtual NMI. The vector must be the NMI IDT entry
 	 * or the VMCS entry check will fail.
 	 */
-	info = IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID;
-	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
-
-	VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI");
+	vmcs_write(VMCS_ENTRY_INTR_INFO,
+	    IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID);
 
 	/* Clear the request */
 	vm_nmi_clear(vmx->vm, vcpu);
-
-#ifndef __FreeBSD__
-	return (info);
-#endif
 }
 
-static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
-    uint64_t guestrip)
+/*
+ * Inject exceptions, NMIs, and ExtINTs.
+ *
+ * The logic behind these are complicated and may involve mutex contention, so
+ * the injection is performed without the protection of host CPU interrupts
+ * being disabled.  This means a racing notification could be "lost",
+ * necessitating a later call to vmx_inject_recheck() to close that window
+ * of opportunity.
+ */
+static enum event_inject_state
+vmx_inject_events(struct vmx *vmx, int vcpu, uint64_t rip)
 {
-	uint64_t entryinfo, rflags;
+	uint64_t entryinfo;
 	uint32_t gi, info;
 	int vector;
-	boolean_t extint_pending = B_FALSE;
-
-	vlapic_tmr_update(vlapic);
+	enum event_inject_state state;
 
 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
+	state = EIS_CAN_INJECT;
 
-	if (vmx->state[vcpu].nextrip != guestrip &&
-	    (gi & HWINTR_BLOCKING) != 0) {
-		VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
-		    "cleared due to rip change: %lx/%lx",
-		    vmx->state[vcpu].nextrip, guestrip);
+	/* Clear any interrupt blocking if the guest %rip has changed */
+	if (vmx->state[vcpu].nextrip != rip && (gi & HWINTR_BLOCKING) != 0) {
 		gi &= ~HWINTR_BLOCKING;
 		vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
 	}
@@ -1383,15 +1372,11 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
 	 * such as an AST before a vm-entry delivered the injection.
 	 */
 	if ((info & VMCS_INTR_VALID) != 0) {
-		goto cantinject;
+		return (EIS_EV_EXISTING | EIS_REQ_EXIT);
 	}
 
 	if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
-		KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
-		    "intinfo is not valid: %lx", __func__, entryinfo));
-
-		KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
-		     "pending exception: %lx/%x", __func__, entryinfo, info));
+		ASSERT(entryinfo & VMCS_INTR_VALID);
 
 		info = entryinfo;
 		vector = info & 0xff;
@@ -1404,50 +1389,49 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
 			info |= VMCS_INTR_T_SWEXCEPTION;
 		}
 
-		if (info & VMCS_INTR_DEL_ERRCODE)
+		if (info & VMCS_INTR_DEL_ERRCODE) {
 			vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32);
+		}
 
 		vmcs_write(VMCS_ENTRY_INTR_INFO, info);
+		state = EIS_EV_INJECTED;
 	}
 
 	if (vm_nmi_pending(vmx->vm, vcpu)) {
-		int need_nmi_exiting = 1;
-
 		/*
-		 * If there are no conditions blocking NMI injection then
-		 * inject it directly here otherwise enable "NMI window
-		 * exiting" to inject it as soon as we can.
+		 * If there are no conditions blocking NMI injection then inject
+		 * it directly here otherwise enable "NMI window exiting" to
+		 * inject it as soon as we can.
 		 *
-		 * We also check for STI_BLOCKING because some implementations
-		 * don't allow NMI injection in this case. If we are running
-		 * on a processor that doesn't have this restriction it will
-		 * immediately exit and the NMI will be injected in the
-		 * "NMI window exiting" handler.
+		 * According to the Intel manual, some CPUs do not allow NMI
+		 * injection when STI_BLOCKING is active.  That check is
+		 * enforced here, regardless of CPU capability.  If running on a
+		 * CPU without such a restriction it will immediately exit and
+		 * the NMI will be injected in the "NMI window exiting" handler.
 		 */
 		if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) {
-			if ((info & VMCS_INTR_VALID) == 0) {
-				info = vmx_inject_nmi(vmx, vcpu);
-				need_nmi_exiting = 0;
+			if (state == EIS_CAN_INJECT) {
+				vmx_inject_nmi(vmx, vcpu);
+				state = EIS_EV_INJECTED;
 			} else {
-				VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI "
-				    "due to VM-entry intr info %x", info);
+				return (state | EIS_REQ_EXIT);
 			}
 		} else {
-			VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to "
-			    "Guest Interruptibility-state %x", gi);
-		}
-
-		if (need_nmi_exiting) {
 			vmx_set_nmi_window_exiting(vmx, vcpu);
-			return;
 		}
 	}
 
-	/* Check the AT-PIC and APIC for interrupts. */
 	if (vm_extint_pending(vmx->vm, vcpu)) {
+		if (state != EIS_CAN_INJECT) {
+			return (state | EIS_REQ_EXIT);
+		}
+		if ((gi & HWINTR_BLOCKING) != 0 ||
+		    (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) {
+			return (EIS_GI_BLOCK);
+		}
+
 		/* Ask the legacy pic for a vector to inject */
 		vatpic_pending_intr(vmx->vm, &vector);
-		extint_pending = B_TRUE;
 
 		/*
 		 * From the Intel SDM, Volume 3, Section "Maskable
@@ -1457,80 +1441,131 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
 		 */
 		KASSERT(vector >= 0 && vector <= 255,
 		    ("invalid vector %d from INTR", vector));
-	} else if (!vmx_cap_en(vmx, VMX_CAP_APICV)) {
-		/* Ask the local apic for a vector to inject */
-		if (!vlapic_pending_intr(vlapic, &vector))
-			return;
 
-		/*
-		 * From the Intel SDM, Volume 3, Section "Maskable
-		 * Hardware Interrupts":
-		 * - maskable interrupt vectors [16,255] can be delivered
-		 *   through the local APIC.
-		*/
-		KASSERT(vector >= 16 && vector <= 255,
-		    ("invalid vector %d from local APIC", vector));
-	} else {
-		/* No futher injection needed */
-		return;
-	}
+		/* Inject the interrupt */
+		vmcs_write(VMCS_ENTRY_INTR_INFO,
+		    VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector);
 
-	/*
-	 * Verify that the guest is interruptable and the above logic has not
-	 * already queued an event for injection.
-	 */
-	if ((gi & HWINTR_BLOCKING) != 0) {
-		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
-		    "Guest Interruptibility-state %x", vector, gi);
-		goto cantinject;
-	}
-	if ((info & VMCS_INTR_VALID) != 0) {
-		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
-		    "VM-entry intr info %x", vector, info);
-		goto cantinject;
+		vm_extint_clear(vmx->vm, vcpu);
+		vatpic_intr_accepted(vmx->vm, vector);
+		state = EIS_EV_INJECTED;
 	}
-	rflags = vmcs_read(VMCS_GUEST_RFLAGS);
-	if ((rflags & PSL_I) == 0) {
-		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
-		    "rflags %lx", vector, rflags);
-		goto cantinject;
+
+	return (state);
+}
+
+/*
+ * Inject any interrupts pending on the vLAPIC.
+ *
+ * This is done with host CPU interrupts disabled so notification IPIs, either
+ * from the standard vCPU notification or APICv posted interrupts, will be
+ * queued on the host APIC and recognized when entering VMX context.
+ */
+static enum event_inject_state
+vmx_inject_vlapic(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
+{
+	int vector;
+
+	if (!vlapic_pending_intr(vlapic, &vector)) {
+		return (EIS_CAN_INJECT);
 	}
 
-	/* Inject the interrupt */
-	info = VMCS_INTR_T_HWINTR | VMCS_INTR_VALID;
-	info |= vector;
-	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
+	/*
+	 * From the Intel SDM, Volume 3, Section "Maskable
+	 * Hardware Interrupts":
+	 * - maskable interrupt vectors [16,255] can be delivered
+	 *   through the local APIC.
+	*/
+	KASSERT(vector >= 16 && vector <= 255,
+	    ("invalid vector %d from local APIC", vector));
 
-	if (extint_pending) {
-		vm_extint_clear(vmx->vm, vcpu);
-		vatpic_intr_accepted(vmx->vm, vector);
+	if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
+		uint16_t status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
+		uint16_t status_new = (status_old & 0xff00) | vector;
 
 		/*
-		 * After we accepted the current ExtINT the PIC may
-		 * have posted another one.  If that is the case, set
-		 * the Interrupt Window Exiting execution control so
-		 * we can inject that one too.
-		 *
-		 * Also, interrupt window exiting allows us to inject any
-		 * pending APIC vector that was preempted by the ExtINT
-		 * as soon as possible. This applies both for the software
-		 * emulated vlapic and the hardware assisted virtual APIC.
+		 * The APICv state will have been synced into the vLAPIC
+		 * as part of vlapic_pending_intr().  Prepare the VMCS
+		 * for the to-be-injected pending interrupt.
 		 */
-		vmx_set_int_window_exiting(vmx, vcpu);
-	} else {
-		/* Update the Local APIC ISR */
-		vlapic_intr_accepted(vlapic, vector);
+		if (status_new > status_old) {
+			vmcs_write(VMCS_GUEST_INTR_STATUS, status_new);
+			VCPU_CTR2(vlapic->vm, vlapic->vcpuid,
+			    "vmx_inject_interrupts: guest_intr_status "
+			    "changed from 0x%04x to 0x%04x",
+			    status_old, status_new);
+		}
+
+		/*
+		 * Ensure VMCS state regarding EOI traps is kept in sync
+		 * with the TMRs in the vlapic.
+		 */
+		vmx_apicv_sync_tmr(vlapic);
+
+		/*
+		 * The rest of the injection process for injecting the
+		 * interrupt(s) is handled by APICv. It does not preclude other
+		 * event injection from occurring.
+		 */
+		return (EIS_CAN_INJECT);
 	}
 
-	VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
-	return;
+	ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID);
 
-cantinject:
-	/*
-	 * Set the Interrupt Window Exiting execution control so we can inject
-	 * the interrupt as soon as blocking condition goes away.
-	 */
-	vmx_set_int_window_exiting(vmx, vcpu);
+	/* Does guest interruptability block injection? */
+	if ((vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & HWINTR_BLOCKING) != 0 ||
+	    (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) {
+		return (EIS_GI_BLOCK);
+	}
+
+	/* Inject the interrupt */
+	vmcs_write(VMCS_ENTRY_INTR_INFO,
+	    VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector);
+
+	/* Update the Local APIC ISR */
+	vlapic_intr_accepted(vlapic, vector);
+
+	return (EIS_EV_INJECTED);
+}
+
+/*
+ * Re-check for events to be injected.
+ *
+ * Once host CPU interrupts are disabled, check for the presence of any events
+ * which require injection processing.  If an exit is required upon injection,
+ * or once the guest becomes interruptable, that will be configured too.
+ */
+static bool
+vmx_inject_recheck(struct vmx *vmx, int vcpu, enum event_inject_state state)
+{
+	if (state == EIS_CAN_INJECT) {
+		if (vm_nmi_pending(vmx->vm, vcpu) &&
+		    !vmx_nmi_window_exiting(vmx, vcpu)) {
+			/* queued NMI not blocked by NMI-window-exiting */
+			return (true);
+		}
+		if (vm_extint_pending(vmx->vm, vcpu)) {
+			/* queued ExtINT not blocked by existing injection */
+			return (true);
+		}
+	} else {
+		if ((state & EIS_REQ_EXIT) != 0) {
+			/*
+			 * Use a self-IPI to force an immediate exit after
+			 * event injection has occurred.
+			 */
+			poke_cpu(CPU->cpu_id);
+		} else {
+			/*
+			 * If any event is being injected, an exit immediately
+			 * upon becoming interruptable again will allow pending
+			 * or newly queued events to be injected in a timely
+			 * manner.
+			 */
+			vmx_set_int_window_exiting(vmx, vcpu);
+		}
+	}
+	return (false);
 }
 
 /*
@@ -2437,12 +2472,6 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_HLT;
 		vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS);
-		if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
-			vmexit->u.hlt.intr_status =
-			    vmcs_read(VMCS_GUEST_INTR_STATUS);
-		} else {
-			vmexit->u.hlt.intr_status = 0;
-		}
 		break;
 	case EXIT_REASON_MTF:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
@@ -2871,6 +2900,7 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 	struct region_descriptor gdtr, idtr;
 	uint16_t ldt_sel;
 #endif
+	bool tpr_shadow_active;
 
 	vmx = arg;
 	vm = vmx->vm;
@@ -2879,6 +2909,9 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 	vlapic = vm_lapic(vm, vcpu);
 	vmexit = vm_exitinfo(vm, vcpu);
 	launched = 0;
+	tpr_shadow_active = vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) &&
+	    !vmx_cap_en(vmx, VMX_CAP_APICV) &&
+	    (vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0;
 
 	KASSERT(vmxctx->pmap == pmap,
 	    ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
@@ -2905,10 +2938,19 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 	vmcs_write(VMCS_GUEST_RIP, rip);
 	vmx_set_pcpu_defaults(vmx, vcpu, pmap);
 	do {
+		enum event_inject_state inject_state;
+
 		KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
 		    "%lx/%lx", __func__, vmcs_guest_rip(), rip));
 
 		handled = UNHANDLED;
+
+		/*
+		 * Perform initial event/exception/interrupt injection before
+		 * host CPU interrupts are disabled.
+		 */
+		inject_state = vmx_inject_events(vmx, vcpu, rip);
+
 		/*
 		 * Interrupts are disabled from this point on until the
 		 * guest starts executing. This is done for the following
@@ -2919,27 +2961,28 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 		 * will cause a VM exit due to external interrupt as soon as
 		 * the guest state is loaded.
 		 *
-		 * A posted interrupt after 'vmx_inject_interrupts()' will
-		 * not be "lost" because it will be held pending in the host
-		 * APIC because interrupts are disabled. The pending interrupt
-		 * will be recognized as soon as the guest state is loaded.
+		 * A posted interrupt after vmx_inject_vlapic() will not be
+		 * "lost" because it will be held pending in the host APIC
+		 * because interrupts are disabled. The pending interrupt will
+		 * be recognized as soon as the guest state is loaded.
 		 *
 		 * The same reasoning applies to the IPI generated by
 		 * pmap_invalidate_ept().
-		 *
-		 * The bulk of guest interrupt injection is done without
-		 * interrupts disabled on the host CPU.  This is necessary
-		 * since contended mutexes might force the thread to sleep.
 		 */
-		vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
 		disable_intr();
-		if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
-			vmx_inject_pir(vlapic);
+
+		/*
+		 * If not precluded by existing events, inject any interrupt
+		 * pending on the vLAPIC.  As a lock-less operation, it is safe
+		 * (and prudent) to perform with host CPU interrupts disabled.
+		 */
+		if (inject_state == EIS_CAN_INJECT) {
+			inject_state = vmx_inject_vlapic(vmx, vcpu, vlapic);
 		}
 
 		/*
 		 * Check for vcpu suspension after injecting events because
-		 * vmx_inject_interrupts() can suspend the vcpu due to a
+		 * vmx_inject_events() can suspend the vcpu due to a
 		 * triple fault.
 		 */
 		if (vcpu_suspended(evinfo)) {
@@ -2974,6 +3017,16 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 			break;
 		}
 
+		/*
+		 * If subsequent activity queued events which require injection
+		 * handling, take another lap to handle them.
+		 */
+		if (vmx_inject_recheck(vmx, vcpu, inject_state)) {
+			enable_intr();
+			handled = HANDLED;
+			continue;
+		}
+
 #ifndef __FreeBSD__
 		if ((rc = smt_acquire()) != 1) {
 			enable_intr();
@@ -3032,17 +3085,8 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 		ldt_sel = sldt();
 #endif
 
-		/*
-		 * If TPR Shadowing is enabled, the TPR Threshold must be
-		 * updated right before entering the guest.
-		 */
-		if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) &&
-		    !vmx_cap_en(vmx, VMX_CAP_APICV)) {
-			if ((vmx->cap[vcpu].proc_ctls &
-			    PROCBASED_USE_TPR_SHADOW) != 0) {
-				vmcs_write(VMCS_TPR_THRESHOLD,
-				    vlapic_get_cr8(vlapic));
-			}
+		if (tpr_shadow_active) {
+			vmx_tpr_shadow_enter(vlapic);
 		}
 
 		vmx_run_trace(vmx, vcpu);
@@ -3059,6 +3103,10 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
 		lldt(ldt_sel);
 #endif
 
+		if (tpr_shadow_active) {
+			vmx_tpr_shadow_exit(vlapic);
+		}
+
 		/* Collect some information for VM exit processing */
 		vmexit->rip = rip = vmcs_guest_rip();
 		vmexit->inst_length = vmexit_instruction_length();
@@ -3524,47 +3572,73 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
 
 struct vlapic_vtx {
 	struct vlapic	vlapic;
+
+	/* Align to the nearest cacheline */
+	uint8_t		_pad[64 - (sizeof (struct vlapic) % 64)];
+
+	/* TMR handling state for posted interrupts */
+	uint32_t	tmr_active[8];
+	uint32_t	pending_level[8];
+	uint32_t	pending_edge[8];
+
 	struct pir_desc	*pir_desc;
 	struct vmx	*vmx;
 	u_int	pending_prio;
+	boolean_t	tmr_sync;
 };
 
-#define VPR_PRIO_BIT(vpr)	(1 << ((vpr) >> 4))
+CTASSERT((offsetof (struct vlapic_vtx, tmr_active) & 63) == 0);
 
-#define	VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg)	\
-do {									\
-	VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d",	\
-	    level ? "level" : "edge", vector);				\
-	VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]);	\
-	VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]);	\
-	VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]);	\
-	VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]);	\
-	VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\
-} while (0)
+#define VPR_PRIO_BIT(vpr)	(1 << ((vpr) >> 4))
 
-/*
- * vlapic->ops handlers that utilize the APICv hardware assist described in
- * Chapter 29 of the Intel SDM.
- */
-static int
-vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
+static vcpu_notify_t
+vmx_apicv_set_ready(struct vlapic *vlapic, int vector, bool level)
 {
 	struct vlapic_vtx *vlapic_vtx;
 	struct pir_desc *pir_desc;
-	uint64_t mask;
-	int idx, notify = 0;
+	uint32_t mask, tmrval;
+	int idx;
+	vcpu_notify_t notify = VCPU_NOTIFY_NONE;
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	pir_desc = vlapic_vtx->pir_desc;
+	idx = vector / 32;
+	mask = 1UL << (vector % 32);
 
 	/*
-	 * Keep track of interrupt requests in the PIR descriptor. This is
-	 * because the virtual APIC page pointed to by the VMCS cannot be
-	 * modified if the vcpu is running.
+	 * If the currently asserted TMRs do not match the state requested by
+	 * the incoming interrupt, an exit will be required to reconcile those
+	 * bits in the APIC page.  This will keep the vLAPIC behavior in line
+	 * with the architecturally defined expectations.
+	 *
+	 * If actors of mixed types (edge and level) are racing against the same
+	 * vector (toggling its TMR bit back and forth), the results could
+	 * inconsistent.  Such circumstances are considered a rare edge case and
+	 * are never expected to be found in the wild.
 	 */
-	idx = vector / 64;
-	mask = 1UL << (vector % 64);
-	atomic_set_long(&pir_desc->pir[idx], mask);
+	tmrval = atomic_load_acq_int(&vlapic_vtx->tmr_active[idx]);
+	if (!level) {
+		if ((tmrval & mask) != 0) {
+			/* Edge-triggered interrupt needs TMR de-asserted */
+			atomic_set_int(&vlapic_vtx->pending_edge[idx], mask);
+			atomic_store_rel_long(&pir_desc->pending, 1);
+			return (VCPU_NOTIFY_EXIT);
+		}
+	} else {
+		if ((tmrval & mask) == 0) {
+			/* Level-triggered interrupt needs TMR asserted */
+			atomic_set_int(&vlapic_vtx->pending_level[idx], mask);
+			atomic_store_rel_long(&pir_desc->pending, 1);
+			return (VCPU_NOTIFY_EXIT);
+		}
+	}
+
+	/*
+	 * If the interrupt request does not require manipulation of the TMRs
+	 * for delivery, set it in PIR descriptor.  It cannot be inserted into
+	 * the APIC page while the vCPU might be running.
+	 */
+	atomic_set_int(&pir_desc->pir[idx], mask);
 
 	/*
 	 * A notification is required whenever the 'pending' bit makes a
@@ -3585,7 +3659,7 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 	 * cleared whenever the 'pending' bit makes another 0->1 transition.
 	 */
 	if (atomic_cmpset_long(&pir_desc->pending, 0, 1) != 0) {
-		notify = 1;
+		notify = VCPU_NOTIFY_APIC;
 		vlapic_vtx->pending_prio = 0;
 	} else {
 		const u_int old_prio = vlapic_vtx->pending_prio;
@@ -3593,113 +3667,44 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 
 		if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) {
 			atomic_set_int(&vlapic_vtx->pending_prio, prio_bit);
-			notify = 1;
+			notify = VCPU_NOTIFY_APIC;
 		}
 	}
 
-	VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
-	    level, "vmx_set_intr_ready");
 	return (notify);
 }
 
-static int
-vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
+static void
+vmx_apicv_accepted(struct vlapic *vlapic, int vector)
 {
-	struct vlapic_vtx *vlapic_vtx;
-	struct pir_desc *pir_desc;
-	struct LAPIC *lapic;
-	uint64_t pending, pirval;
-	uint32_t ppr, vpr;
-	int i;
-
-	/*
-	 * This function is only expected to be called from the 'HLT' exit
-	 * handler which does not care about the vector that is pending.
-	 */
-	KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL"));
-
-	vlapic_vtx = (struct vlapic_vtx *)vlapic;
-	pir_desc = vlapic_vtx->pir_desc;
-
-	pending = atomic_load_acq_long(&pir_desc->pending);
-	if (!pending) {
-		/*
-		 * While a virtual interrupt may have already been
-		 * processed the actual delivery maybe pending the
-		 * interruptibility of the guest.  Recognize a pending
-		 * interrupt by reevaluating virtual interrupts
-		 * following Section 29.2.1 in the Intel SDM Volume 3.
-		 */
-		struct vm_exit *vmexit;
-		uint8_t rvi, ppr;
-
-		vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
-		rvi = vmexit->u.hlt.intr_status & APIC_TPR_INT;
-		lapic = vlapic->apic_page;
-		ppr = lapic->ppr & APIC_TPR_INT;
-		if (rvi > ppr) {
-			return (1);
-		}
-
-		return (0);
-	}
-
 	/*
-	 * If there is an interrupt pending then it will be recognized only
-	 * if its priority is greater than the processor priority.
-	 *
-	 * Special case: if the processor priority is zero then any pending
-	 * interrupt will be recognized.
+	 * When APICv is enabled for an instance, the traditional interrupt
+	 * injection method (populating ENTRY_INTR_INFO in the VMCS) is not
+	 * used and the CPU does the heavy lifting of virtual interrupt
+	 * delivery.  For that reason vmx_intr_accepted() should never be called
+	 * when APICv is enabled.
 	 */
-	lapic = vlapic->apic_page;
-	ppr = lapic->ppr & APIC_TPR_INT;
-	if (ppr == 0)
-		return (1);
-
-	VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
-	    lapic->ppr);
-
-	vpr = 0;
-	for (i = 3; i >= 0; i--) {
-		pirval = pir_desc->pir[i];
-		if (pirval != 0) {
-			vpr = (i * 64 + flsl(pirval) - 1) & APIC_TPR_INT;
-			break;
-		}
-	}
-
-	/*
-	 * If the highest-priority pending interrupt falls short of the
-	 * processor priority of this vCPU, ensure that 'pending_prio' does not
-	 * have any stale bits which would preclude a higher-priority interrupt
-	 * from incurring a notification later.
-	 */
-	if (vpr <= ppr) {
-		const u_int prio_bit = VPR_PRIO_BIT(vpr);
-		const u_int old = vlapic_vtx->pending_prio;
-
-		if (old > prio_bit && (old & prio_bit) == 0) {
-			vlapic_vtx->pending_prio = prio_bit;
-		}
-		return (0);
-	}
-	return (1);
+	panic("vmx_intr_accepted: not expected to be called");
 }
 
 static void
-vmx_intr_accepted(struct vlapic *vlapic, int vector)
+vmx_apicv_sync_tmr(struct vlapic *vlapic)
 {
+	struct vlapic_vtx *vlapic_vtx;
+	const uint32_t *tmrs;
 
-	panic("vmx_intr_accepted: not expected to be called");
-}
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	tmrs = &vlapic_vtx->tmr_active[0];
 
-static void
-vmx_set_tmr(struct vlapic *vlapic, const uint32_t *masks)
-{
-	vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)masks[1] << 32) | masks[0]);
-	vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)masks[3] << 32) | masks[2]);
-	vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)masks[5] << 32) | masks[4]);
-	vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)masks[7] << 32) | masks[6]);
+	if (!vlapic_vtx->tmr_sync) {
+		return;
+	}
+
+	vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)tmrs[1] << 32) | tmrs[0]);
+	vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)tmrs[3] << 32) | tmrs[2]);
+	vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)tmrs[5] << 32) | tmrs[4]);
+	vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)tmrs[7] << 32) | tmrs[6]);
+	vlapic_vtx->tmr_sync = B_FALSE;
 }
 
 static void
@@ -3765,107 +3770,99 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
 }
 
 static void
-vmx_post_intr(struct vlapic *vlapic, int hostcpu)
+vmx_apicv_notify(struct vlapic *vlapic, int hostcpu)
 {
-#ifdef __FreeBSD__
-	ipi_cpu(hostcpu, pirvec);
-#else
 	psm_send_pir_ipi(hostcpu);
-#endif
 }
 
-/*
- * Transfer the pending interrupts in the PIR descriptor to the IRR
- * in the virtual APIC page.
- */
 static void
-vmx_inject_pir(struct vlapic *vlapic)
+vmx_apicv_sync(struct vlapic *vlapic)
 {
 	struct vlapic_vtx *vlapic_vtx;
 	struct pir_desc *pir_desc;
 	struct LAPIC *lapic;
-	uint64_t val, pirval;
-	int rvi, pirbase = -1;
-	uint16_t intr_status_old, intr_status_new;
+	uint_t i;
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	pir_desc = vlapic_vtx->pir_desc;
+	lapic = vlapic->apic_page;
+
 	if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) {
-		VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
-		    "no posted interrupt pending");
 		return;
 	}
 
-	pirval = 0;
-	pirbase = -1;
-	lapic = vlapic->apic_page;
+	vlapic_vtx->pending_prio = 0;
 
-	val = atomic_readandclear_long(&pir_desc->pir[0]);
-	if (val != 0) {
-		lapic->irr0 |= val;
-		lapic->irr1 |= val >> 32;
-		pirbase = 0;
-		pirval = val;
-	}
+	/* Make sure the invalid (0-15) vectors are not set */
+	ASSERT0(vlapic_vtx->pending_level[0] & 0xffff);
+	ASSERT0(vlapic_vtx->pending_edge[0] & 0xffff);
+	ASSERT0(pir_desc->pir[0] & 0xffff);
 
-	val = atomic_readandclear_long(&pir_desc->pir[1]);
-	if (val != 0) {
-		lapic->irr2 |= val;
-		lapic->irr3 |= val >> 32;
-		pirbase = 64;
-		pirval = val;
-	}
+	for (i = 0; i <= 7; i++) {
+		uint32_t *tmrp = &lapic->tmr0 + (i * 4);
+		uint32_t *irrp = &lapic->irr0 + (i * 4);
 
-	val = atomic_readandclear_long(&pir_desc->pir[2]);
-	if (val != 0) {
-		lapic->irr4 |= val;
-		lapic->irr5 |= val >> 32;
-		pirbase = 128;
-		pirval = val;
-	}
+		const uint32_t pending_level =
+		    atomic_readandclear_int(&vlapic_vtx->pending_level[i]);
+		const uint32_t pending_edge =
+		    atomic_readandclear_int(&vlapic_vtx->pending_edge[i]);
+		const uint32_t pending_inject =
+		    atomic_readandclear_int(&pir_desc->pir[i]);
+
+		if (pending_level != 0) {
+			/*
+			 * Level-triggered interrupts assert their corresponding
+			 * bit in the TMR when queued in IRR.
+			 */
+			*tmrp |= pending_level;
+			*irrp |= pending_level;
+		}
+		if (pending_edge != 0) {
+			/*
+			 * When queuing an edge-triggered interrupt in IRR, the
+			 * corresponding bit in the TMR is cleared.
+			 */
+			*tmrp &= ~pending_edge;
+			*irrp |= pending_edge;
+		}
+		if (pending_inject != 0) {
+			/*
+			 * Interrupts which do not require a change to the TMR
+			 * (because it already matches the necessary state) can
+			 * simply be queued in IRR.
+			 */
+			*irrp |= pending_inject;
+		}
 
-	val = atomic_readandclear_long(&pir_desc->pir[3]);
-	if (val != 0) {
-		lapic->irr6 |= val;
-		lapic->irr7 |= val >> 32;
-		pirbase = 192;
-		pirval = val;
+		if (*tmrp != vlapic_vtx->tmr_active[i]) {
+			/* Check if VMX EOI triggers require updating. */
+			vlapic_vtx->tmr_active[i] = *tmrp;
+			vlapic_vtx->tmr_sync = B_TRUE;
+		}
 	}
+}
 
-	VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir");
+static void
+vmx_tpr_shadow_enter(struct vlapic *vlapic)
+{
+	/*
+	 * When TPR shadowing is enabled, VMX will initiate a guest exit if its
+	 * TPR falls below a threshold priority.  That threshold is set to the
+	 * current TPR priority, since guest interrupt status should be
+	 * re-evaluated if its TPR is set lower.
+	 */
+	vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic));
+}
 
+static void
+vmx_tpr_shadow_exit(struct vlapic *vlapic)
+{
 	/*
-	 * Update RVI so the processor can evaluate pending virtual
-	 * interrupts on VM-entry.
-	 *
-	 * It is possible for pirval to be 0 here, even though the
-	 * pending bit has been set. The scenario is:
-	 * CPU-Y is sending a posted interrupt to CPU-X, which
-	 * is running a guest and processing posted interrupts in h/w.
-	 * CPU-X will eventually exit and the state seen in s/w is
-	 * the pending bit set, but no PIR bits set.
-	 *
-	 *      CPU-X                      CPU-Y
-	 *   (vm running)                (host running)
-	 *   rx posted interrupt
-	 *   CLEAR pending bit
-	 *				 SET PIR bit
-	 *   READ/CLEAR PIR bits
-	 *				 SET pending bit
-	 *   (vm exit)
-	 *   pending bit set, PIR 0
+	 * Unlike full APICv, where changes to the TPR are reflected in the PPR,
+	 * with TPR shadowing, that duty is relegated to the VMM.  Upon exit,
+	 * the PPR is updated to reflect any change in the TPR here.
 	 */
-	if (pirval != 0) {
-		rvi = pirbase + flsl(pirval) - 1;
-		intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
-		intr_status_new = (intr_status_old & 0xFF00) | rvi;
-		if (intr_status_new > intr_status_old) {
-			vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new);
-			VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
-			    "guest_intr_status changed from 0x%04x to 0x%04x",
-			    intr_status_old, intr_status_new);
-		}
-	}
+	vlapic_sync_tpr(vlapic);
 }
 
 static struct vlapic *
@@ -3890,14 +3887,13 @@ vmx_vlapic_init(void *arg, int vcpuid)
 		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts;
 	}
 	if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
-		vlapic->ops.set_intr_ready = vmx_set_intr_ready;
-		vlapic->ops.pending_intr = vmx_pending_intr;
-		vlapic->ops.intr_accepted = vmx_intr_accepted;
-		vlapic->ops.set_tmr = vmx_set_tmr;
+		vlapic->ops.set_intr_ready = vmx_apicv_set_ready;
+		vlapic->ops.sync_state = vmx_apicv_sync;
+		vlapic->ops.intr_accepted = vmx_apicv_accepted;
 		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_vid;
 
 		if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
-			vlapic->ops.post_intr = vmx_post_intr;
+			vlapic->ops.post_intr = vmx_apicv_notify;
 		}
 	}
 
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
index 7943c1fd0e..b78f146755 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -110,7 +110,7 @@ CTASSERT(sizeof(struct apic_page) == PAGE_SIZE);
 
 /* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM) */
 struct pir_desc {
-	uint64_t	pir[4];
+	uint32_t	pir[8];
 	uint64_t	pending;
 	uint64_t	unused[3];
 } __aligned(64);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vioapic.c b/usr/src/uts/i86pc/io/vmm/io/vioapic.c
index 1e8ee1fa7a..89d3bf79df 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vioapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vioapic.c
@@ -237,141 +237,6 @@ vioapic_pulse_irq(struct vm *vm, int irq)
 	return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
 }
 
-#define	REDIR_IS_PHYS(reg)	(((reg) & IOART_DESTMOD) == IOART_DESTPHY)
-#define	REDIR_IS_LOWPRIO(reg)	(((reg) & IOART_DELMOD) == IOART_DELLOPRI)
-/* Level-triggered interrupts only valid in fixed and low-priority modes */
-#define	REDIR_IS_LVLTRIG(reg)						\
-    (((reg) & IOART_TRGRLVL) != 0 &&					\
-    (((reg) & IOART_DELMOD) == IOART_DELFIXED || REDIR_IS_LOWPRIO(reg)))
-#define	REDIR_DEST(reg)		((reg) >> (32 + APIC_ID_SHIFT))
-#define	REDIR_VECTOR(reg)	((reg) & IOART_INTVEC)
-
-/*
- * Given a redirection entry, determine which vCPUs would be targeted.
- */
-static void
-vioapic_calcdest(struct vioapic *vioapic, uint64_t redir_ent, cpuset_t *dmask)
-{
-
-	/*
-	 * When calculating interrupt destinations with vlapic_calcdest(), the
-	 * legacy xAPIC format is assumed, since the system lacks interrupt
-	 * redirection hardware.
-	 * See vlapic_deliver_intr() for more details.
-	 */
-	vlapic_calcdest(vioapic->vm, dmask, REDIR_DEST(redir_ent),
-	    REDIR_IS_PHYS(redir_ent), REDIR_IS_LOWPRIO(redir_ent), false);
-}
-
-/*
- * Across all redirection entries utilizing a specified vector, determine the
- * set of vCPUs which would be targeted by a level-triggered interrupt.
- */
-static void
-vioapic_tmr_active(struct vioapic *vioapic, uint8_t vec, cpuset_t *result)
-{
-	u_int i;
-
-	CPU_ZERO(result);
-	if (vec == 0) {
-		return;
-	}
-
-	for (i = 0; i < REDIR_ENTRIES; i++) {
-		cpuset_t dest;
-		const uint64_t val = vioapic->rtbl[i].reg;
-
-		if (!REDIR_IS_LVLTRIG(val) || REDIR_VECTOR(val) != vec) {
-			continue;
-		}
-
-		CPU_ZERO(&dest);
-		vioapic_calcdest(vioapic, val, &dest);
-		CPU_OR(result, &dest);
-	}
-}
-
-/*
- * Update TMR state in vLAPICs after changes to vIOAPIC pin configuration
- */
-static void
-vioapic_update_tmrs(struct vioapic *vioapic, int vcpuid, uint64_t oldval,
-    uint64_t newval)
-{
-	cpuset_t active, allset, newset, oldset;
-	struct vm *vm;
-	uint8_t newvec, oldvec;
-
-	vm = vioapic->vm;
-	CPU_ZERO(&allset);
-	CPU_ZERO(&newset);
-	CPU_ZERO(&oldset);
-	newvec = oldvec = 0;
-
-	if (REDIR_IS_LVLTRIG(oldval)) {
-		vioapic_calcdest(vioapic, oldval, &oldset);
-		CPU_OR(&allset, &oldset);
-		oldvec = REDIR_VECTOR(oldval);
-	}
-
-	if (REDIR_IS_LVLTRIG(newval)) {
-		vioapic_calcdest(vioapic, newval, &newset);
-		CPU_OR(&allset, &newset);
-		newvec = REDIR_VECTOR(newval);
-	}
-
-	if (CPU_EMPTY(&allset) ||
-	    (CPU_CMP(&oldset, &newset) == 0 && oldvec == newvec)) {
-		return;
-	}
-
-	/*
-	 * Since the write to the redirection table has already occurred, a
-	 * scan of level-triggered entries referencing the old vector will find
-	 * only entries which are now currently valid.
-	 */
-	vioapic_tmr_active(vioapic, oldvec, &active);
-
-	while (!CPU_EMPTY(&allset)) {
-		struct vlapic *vlapic;
-		u_int i;
-
-		i = CPU_FFS(&allset) - 1;
-		CPU_CLR(i, &allset);
-
-		if (oldvec == newvec &&
-		    CPU_ISSET(i, &oldset) && CPU_ISSET(i, &newset)) {
-			continue;
-		}
-
-		if (i != vcpuid) {
-			vcpu_block_run(vm, i);
-		}
-
-		vlapic = vm_lapic(vm, i);
-		if (CPU_ISSET(i, &oldset)) {
-			/*
-			 * Perform the deassertion if no other level-triggered
-			 * IOAPIC entries target this vCPU with the old vector
-			 *
-			 * Note: Sharing of vectors like that should be
-			 * extremely rare in modern operating systems and was
-			 * previously unsupported by the bhyve vIOAPIC.
-			 */
-			if (!CPU_ISSET(i, &active)) {
-				vlapic_tmr_set(vlapic, oldvec, false);
-			}
-		}
-		if (CPU_ISSET(i, &newset)) {
-			vlapic_tmr_set(vlapic, newvec, true);
-		}
-
-		if (i != vcpuid) {
-			vcpu_unblock_run(vm, i);
-		}
-	}
-}
-
 static uint32_t
 vioapic_read(struct vioapic *vioapic, int vcpuid, uint32_t addr)
 {
@@ -411,7 +276,6 @@ static void
 vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
 {
 	uint64_t data64, mask64;
-	uint64_t last, changed;
 	int regnum, pin, lshift;
 
 	regnum = addr & 0xff;
@@ -436,8 +300,6 @@ vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
 		else
 			lshift = 0;
 
-		last = vioapic->rtbl[pin].reg;
-
 		data64 = (uint64_t)data << lshift;
 		mask64 = (uint64_t)0xffffffff << lshift;
 		vioapic->rtbl[pin].reg &= ~mask64 | RTBL_RO_BITS;
@@ -447,19 +309,6 @@ vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
 		    pin, vioapic->rtbl[pin].reg);
 
 		/*
-		 * If any fields in the redirection table entry (except mask
-		 * or polarity) have changed then update the trigger-mode
-		 * registers on all the vlapics.
-		 */
-		changed = last ^ vioapic->rtbl[pin].reg;
-		if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
-			VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
-			    "vlapic trigger-mode register", pin);
-			vioapic_update_tmrs(vioapic, vcpuid, last,
-			    vioapic->rtbl[pin].reg);
-		}
-
-		/*
 		 * Generate an interrupt if the following conditions are met:
 		 * - pin is not masked
 		 * - previous interrupt has been EOIed
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index 038c17ca78..8af77a387b 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -70,7 +70,13 @@ __FBSDID("$FreeBSD$");
 #include "vlapic_priv.h"
 #include "vioapic.h"
 
-#define	PRIO(x)			((x) >> 4)
+
+/*
+ * The 4 high bits of a given interrupt vector represent its priority.  The same
+ * is true for the contents of the TPR when it is used to calculate the ultimate
+ * PPR of an APIC - the 4 high bits hold the priority.
+ */
+#define	PRIO(x)			((x) & 0xf0)
 
 #define VLAPIC_VERSION		(16)
 
@@ -94,7 +100,6 @@ __FBSDID("$FreeBSD$");
 #define VLAPIC_BUS_FREQ		(128 * 1024 * 1024)
 
 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
-static void vlapic_tmr_reset(struct vlapic *);
 
 #ifdef __ISRVEC_DEBUG
 static void vlapic_isrstk_accept(struct vlapic *, int);
@@ -289,52 +294,60 @@ vlapic_esr_write_handler(struct vlapic *vlapic)
 	vlapic->esr_pending = 0;
 }
 
-int
+vcpu_notify_t
 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 {
 	struct LAPIC *lapic;
-	uint32_t *irrptr, *tmrptr, mask;
+	uint32_t *irrptr, *tmrptr, mask, tmr;
 	int idx;
 
 	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
 
 	lapic = vlapic->apic_page;
 	if (!(lapic->svr & APIC_SVR_ENABLE)) {
-		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
-		    "interrupt %d", vector);
-		return (0);
+		/* ignore interrupt on software-disabled APIC */
+		return (VCPU_NOTIFY_NONE);
 	}
 
 	if (vector < 16) {
 		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
 		    false);
-		VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
-		    vector);
-		return (1);
+
+		/*
+		 * If the error LVT is configured to interrupt the vCPU, it will
+		 * have delivered a notification through that mechanism.
+		 */
+		return (VCPU_NOTIFY_NONE);
 	}
 
-	if (vlapic->ops.set_intr_ready)
+	if (vlapic->ops.set_intr_ready) {
 		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
+	}
 
 	idx = (vector / 32) * 4;
 	mask = 1 << (vector % 32);
-
+	tmrptr = &lapic->tmr0;
 	irrptr = &lapic->irr0;
-	atomic_set_int(&irrptr[idx], mask);
 
 	/*
-	 * Verify that the trigger-mode of the interrupt matches with
-	 * the vlapic TMR registers.
+	 * Update TMR for requested vector, if necessary.
+	 * This must be done prior to asserting the bit in IRR so that the
+	 * proper TMR state is always visible before the to-be-queued interrupt
+	 * can be injected.
 	 */
-	tmrptr = &lapic->tmr0;
-	if ((tmrptr[idx] & mask) != (level ? mask : 0)) {
-		VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but "
-		    "interrupt is %s-triggered", idx / 4, tmrptr[idx],
-		    level ? "level" : "edge");
+	tmr = atomic_load_acq_32(&tmrptr[idx]);
+	if ((tmr & mask) != (level ? mask : 0)) {
+		if (level) {
+			atomic_set_int(&tmrptr[idx], mask);
+		} else {
+			atomic_clear_int(&tmrptr[idx], mask);
+		}
 	}
 
-	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
-	return (1);
+	/* Now set the bit in IRR */
+	atomic_set_int(&irrptr[idx], mask);
+
+	return (VCPU_NOTIFY_EXIT);
 }
 
 static __inline uint32_t *
@@ -472,6 +485,7 @@ static int
 vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt)
 {
 	uint32_t mode, reg, vec;
+	vcpu_notify_t notify;
 
 	reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
 
@@ -487,8 +501,8 @@ vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt)
 			    lvt == APIC_LVT_ERROR);
 			return (0);
 		}
-		if (vlapic_set_intr_ready(vlapic, vec, false))
-			vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
+		notify = vlapic_set_intr_ready(vlapic, vec, false);
+		vcpu_notify_event_type(vlapic->vm, vlapic->vcpuid, notify);
 		break;
 	case APIC_LVT_DM_NMI:
 		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
@@ -532,8 +546,8 @@ vlapic_active_isr(struct vlapic *vlapic)
 }
 
 /*
- * Algorithm adopted from section "Interrupt, Task and Processor Priority"
- * in Intel Architecture Manual Vol 3a.
+ * After events which might arbitrarily change the value of PPR, such as a TPR
+ * write or an EOI, calculate that new PPR value and store it in the APIC page.
  */
 static void
 vlapic_update_ppr(struct vlapic *vlapic)
@@ -543,19 +557,44 @@ vlapic_update_ppr(struct vlapic *vlapic)
 	isrvec = vlapic_active_isr(vlapic);
 	tpr = vlapic->apic_page->tpr;
 
-#ifdef __ISRVEC_DEBUG
-	vlapic_isrstk_verify(vlapic);
-#endif
-
-	if (PRIO(tpr) >= PRIO(isrvec))
+	/*
+	 * Algorithm adopted from section "Interrupt, Task and Processor
+	 * Priority" in Intel Architecture Manual Vol 3a.
+	 */
+	if (PRIO(tpr) >= PRIO(isrvec)) {
 		ppr = tpr;
-	else
-		ppr = isrvec & 0xf0;
+	} else {
+		ppr = PRIO(isrvec);
+	}
 
 	vlapic->apic_page->ppr = ppr;
 	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
 }
 
+/*
+ * When a vector is asserted in ISR as in-service, the PPR must be raised to the
+ * priority of that vector, as the vCPU would have been at a lower priority in
+ * order for the vector to be accepted.
+ */
+static void
+vlapic_raise_ppr(struct vlapic *vlapic, int vec)
+{
+	struct LAPIC *lapic = vlapic->apic_page;
+	int ppr;
+
+	ppr = PRIO(vec);
+
+#ifdef __ISRVEC_DEBUG
+	KASSERT(vec >= 16 && vec < 256, ("invalid vector %d", vec));
+	KASSERT(ppr > lapic->tpr, ("ppr %x <= tpr %x", ppr, lapic->tpr));
+	KASSERT(ppr > lapic->ppr, ("ppr %x <= old ppr %x", ppr, lapic->ppr));
+	KASSERT(vec == (int)vlapic_active_isr(vlapic), ("ISR missing for ppr"));
+#endif /* __ISRVEC_DEBUG */
+
+	lapic->ppr = ppr;
+	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
+}
+
 void
 vlapic_sync_tpr(struct vlapic *vlapic)
 {
@@ -1087,10 +1126,9 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
 	int		 idx, i, bitpos, vector;
 	uint32_t	*irrptr, val;
 
-	vlapic_update_ppr(vlapic);
-
-	if (vlapic->ops.pending_intr)
-		return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
+	if (vlapic->ops.sync_state) {
+		(*vlapic->ops.sync_state)(vlapic);
+	}
 
 	irrptr = &lapic->irr0;
 
@@ -1119,6 +1157,8 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 	uint32_t	*irrptr, *isrptr;
 	int		idx;
 
+	KASSERT(vector >= 16 && vector < 256, ("invalid vector %d", vector));
+
 	if (vlapic->ops.intr_accepted)
 		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
 
@@ -1136,6 +1176,13 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 	isrptr[idx] |= 1 << (vector % 32);
 	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
 
+	/*
+	 * The only way a fresh vector could be accepted into ISR is if it was
+	 * of a higher priority than the current PPR.  With that vector now
+	 * in-service, the PPR must be raised.
+	 */
+	vlapic_raise_ppr(vlapic, vector);
+
 #ifdef __ISRVEC_DEBUG
 	vlapic_isrstk_accept(vlapic, vector);
 #endif
@@ -1425,7 +1472,6 @@ vlapic_reset(struct vlapic *vlapic)
 	lapic->dfr = 0xffffffff;
 	lapic->svr = APIC_SVR_VECTOR;
 	vlapic_mask_lvts(vlapic);
-	vlapic_tmr_reset(vlapic);
 
 	lapic->dcr_timer = 0;
 	vlapic_dcr_write_handler(vlapic);
@@ -1592,82 +1638,6 @@ vlapic_enabled(struct vlapic *vlapic)
 		return (false);
 }
 
-static void
-vlapic_tmr_reset(struct vlapic *vlapic)
-{
-	struct LAPIC *lapic;
-
-	lapic = vlapic->apic_page;
-	lapic->tmr0 = lapic->tmr1 = lapic->tmr2 = lapic->tmr3 = 0;
-	lapic->tmr4 = lapic->tmr5 = lapic->tmr6 = lapic->tmr7 = 0;
-	vlapic->tmr_pending = 1;
-}
-
-/*
- * Synchronize TMR designations into the LAPIC state.
- * The vCPU must be in the VCPU_RUNNING state.
- */
-void
-vlapic_tmr_update(struct vlapic *vlapic)
-{
-	struct LAPIC *lapic;
-	uint32_t *tmrptr;
-	uint32_t result[VLAPIC_TMR_CNT];
-	u_int i, tmr_idx;
-
-	if (vlapic->tmr_pending == 0) {
-		return;
-	}
-
-	lapic = vlapic->apic_page;
-	tmrptr = &lapic->tmr0;
-
-	VLAPIC_CTR0(vlapic, "synchronizing TMR");
-	for (i = 0; i < VLAPIC_TMR_CNT; i++) {
-		tmr_idx = i * 4;
-
-		tmrptr[tmr_idx] &= ~vlapic->tmr_vec_deassert[i];
-		tmrptr[tmr_idx] |= vlapic->tmr_vec_assert[i];
-		vlapic->tmr_vec_deassert[i] = 0;
-		vlapic->tmr_vec_assert[i] = 0;
-		result[i] = tmrptr[tmr_idx];
-	}
-	vlapic->tmr_pending = 0;
-
-	if (vlapic->ops.set_tmr != NULL) {
-		(*vlapic->ops.set_tmr)(vlapic, result);
-	}
-}
-
-/*
- * Designate the TMR state for a given interrupt vector.
- * The caller must hold the vIOAPIC lock and prevent the vCPU corresponding to
- * this vLAPIC instance from being-in or entering the VCPU_RUNNING state.
- */
-void
-vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active)
-{
-	const uint32_t idx = vector / 32;
-	const uint32_t mask = 1 << (vector % 32);
-
-	VLAPIC_CTR2(vlapic, "TMR for vector %u %sasserted", vector,
-	    active ? "" : "de");
-	if (active) {
-		vlapic->tmr_vec_assert[idx] |= mask;
-		vlapic->tmr_vec_deassert[idx] &= ~mask;
-	} else {
-		vlapic->tmr_vec_deassert[idx] |= mask;
-		vlapic->tmr_vec_assert[idx] &= ~mask;
-	}
-
-	/*
-	 * Track the number of TMR changes between calls to vlapic_tmr_update.
-	 * While a simple boolean would suffice, this count may be useful when
-	 * tracing or debugging, and is cheap to calculate.
-	 */
-	vlapic->tmr_pending = MIN(UINT32_MAX - 1, vlapic->tmr_pending) + 1;
-}
-
 #ifndef __FreeBSD__
 void
 vlapic_localize_resources(struct vlapic *vlapic)
@@ -1685,6 +1655,7 @@ vlapic_isrstk_eoi(struct vlapic *vlapic, int vector)
 		      vlapic->isrvec_stk_top);
 	}
 	vlapic->isrvec_stk_top--;
+	vlapic_isrstk_verify(vlapic);
 }
 
 static void
@@ -1699,6 +1670,7 @@ vlapic_isrstk_accept(struct vlapic *vlapic, int vector)
 		panic("isrvec_stk_top overflow %d", stk_top);
 
 	vlapic->isrvec_stk[stk_top] = vector;
+	vlapic_isrstk_verify(vlapic);
 }
 
 static void
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.h b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
index 746699393f..f34cf1ec4b 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
@@ -63,10 +63,8 @@ int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr);
  */
 void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
 
-/*
- * Returns 1 if the vcpu needs to be notified of the interrupt and 0 otherwise.
- */
-int vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
+vcpu_notify_t vlapic_set_intr_ready(struct vlapic *vlapic, int vector,
+    bool level);
 
 /*
  * Post an interrupt to the vcpu running on 'hostcpu'. This will use a
@@ -91,9 +89,6 @@ void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
 void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
     bool lowprio, bool x2apic_dest);
 
-void vlapic_tmr_update(struct vlapic *vlapic);
-void vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active);
-
 void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
 uint64_t vlapic_get_cr8(struct vlapic *vlapic);
 
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
index 8a0d594de3..1329ab5b36 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
@@ -159,11 +159,11 @@ enum boot_state {
 struct vlapic;
 
 struct vlapic_ops {
-	int (*set_intr_ready)(struct vlapic *vlapic, int vector, bool level);
-	int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
+	vcpu_notify_t (*set_intr_ready)(struct vlapic *vlapic, int vector,
+	    bool level);
+	void (*sync_state)(struct vlapic *vlapic);
 	void (*intr_accepted)(struct vlapic *vlapic, int vector);
 	void (*post_intr)(struct vlapic *vlapic, int hostcpu);
-	void (*set_tmr)(struct vlapic *vlapic, const uint32_t *result);
 	void (*enable_x2apic_mode)(struct vlapic *vlapic);
 };
 
@@ -174,7 +174,6 @@ struct vlapic {
 	struct vlapic_ops	ops;
 
 	uint32_t		esr_pending;
-	uint32_t		tmr_pending;
 
 	struct callout	callout;	/* vlapic timer */
 	struct bintime	timer_fire_bt;	/* callout expiry time */
@@ -194,19 +193,6 @@ struct vlapic {
 	uint32_t	svr_last;
 	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
 
-	/*
-	 * Store intended modifications to the trigger-mode register state.
-	 * Along with the tmr_pending counter above, these are protected by the
-	 * vIOAPIC lock and can only be modified under specific conditions:
-	 *
-	 * 1. When holding the vIOAPIC lock, and the vCPU to which the vLAPIC
-	 *    belongs is prevented from entering the VCPU_RUNNING state.
-	 * 2. When the owning vCPU is in the VCPU_RUNNING state, and is
-	 *    applying the TMR modifications prior to interrupt injection.
-	 */
-	uint32_t	tmr_vec_deassert[VLAPIC_TMR_CNT];
-	uint32_t	tmr_vec_assert[VLAPIC_TMR_CNT];
-
 #ifdef __ISRVEC_DEBUG
 	/*
 	 * The 'isrvec_stk' is a stack of vectors injected by the local APIC.
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index acdabf556f..b566e503e0 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -280,8 +280,15 @@ vcpu_should_yield(struct vm *vm, int vcpu)
 }
 #endif /* _SYS_THREAD_H */
 
+typedef enum vcpu_notify {
+	VCPU_NOTIFY_NONE,
+	VCPU_NOTIFY_APIC,	/* Posted intr notification (if possible) */
+	VCPU_NOTIFY_EXIT,	/* IPI to cause VM exit */
+} vcpu_notify_t;
+
 void *vcpu_stats(struct vm *vm, int vcpu);
-void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
+void vcpu_notify_event(struct vm *vm, int vcpuid);
+void vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
@@ -374,6 +381,25 @@ void vm_inject_ac(struct vm *vm, int vcpuid, int errcode);
 void vm_inject_ss(struct vm *vm, int vcpuid, int errcode);
 void vm_inject_pf(struct vm *vm, int vcpuid, int errcode, uint64_t cr2);
 
+/*
+ * Both SVM and VMX have complex logic for injecting events such as exceptions
+ * or interrupts into the guest.  Within those two backends, the progress of
+ * event injection is tracked by event_inject_state, hopefully making it easier
+ * to reason about.
+ */
+enum event_inject_state {
+	EIS_CAN_INJECT	= 0, /* exception/interrupt can be injected */
+	EIS_EV_EXISTING	= 1, /* blocked by existing event */
+	EIS_EV_INJECTED	= 2, /* blocked by injected event */
+	EIS_GI_BLOCK	= 3, /* blocked by guest interruptability */
+
+	/*
+	 * Flag to request an immediate exit from VM context after event
+	 * injection in order to perform more processing
+	 */
+	EIS_REQ_EXIT	= (1 << 15),
+};
+
 #ifndef	__FreeBSD__
 
 void vmm_sol_glue_init(void);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 1821a96fd7..3cd89f9fe6 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -297,7 +297,7 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
 
 static void vm_free_memmap(struct vm *vm, int ident);
 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
-static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
+static void vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t);
 
 #ifndef __FreeBSD__
 static void vm_clear_memseg(struct vm *, int);
@@ -1338,7 +1338,7 @@ vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
 	if (from_idle) {
 		while (vcpu->state != VCPU_IDLE) {
 			vcpu->reqidle = 1;
-			vcpu_notify_event_locked(vcpu, false);
+			vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
 			VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to "
 			    "idle requested", vcpu_state2str(vcpu->state));
 #ifdef __FreeBSD__
@@ -1839,7 +1839,7 @@ vm_handle_suspend(struct vm *vm, int vcpuid)
 	 */
 	for (i = 0; i < vm->maxcpus; i++) {
 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
-			vcpu_notify_event(vm, i, false);
+			vcpu_notify_event(vm, i);
 		}
 	}
 
@@ -1909,7 +1909,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how)
 	 */
 	for (i = 0; i < vm->maxcpus; i++) {
 		if (CPU_ISSET(i, &vm->active_cpus))
-			vcpu_notify_event(vm, i, false);
+			vcpu_notify_event(vm, i);
 	}
 
 	return (0);
@@ -2620,6 +2620,14 @@ vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid,
 		return (EINVAL);
 
 	/*
+	 * NMIs (which bear an exception vector of 2) are to be injected via
+	 * their own specialized path using vm_inject_nmi().
+	 */
+	if (vector == 2) {
+		return (EINVAL);
+	}
+
+	/*
 	 * A double fault exception should never be injected directly into
 	 * the guest. It is a derived exception that results from specific
 	 * combinations of nested faults.
@@ -2728,7 +2736,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid)
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->nmi_pending = 1;
-	vcpu_notify_event(vm, vcpuid, false);
+	vcpu_notify_event(vm, vcpuid);
 	return (0);
 }
 
@@ -2775,7 +2783,7 @@ vm_inject_extint(struct vm *vm, int vcpuid)
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->extint_pending = 1;
-	vcpu_notify_event(vm, vcpuid, false);
+	vcpu_notify_event(vm, vcpuid);
 	return (0);
 }
 
@@ -2956,7 +2964,7 @@ vcpu_block_run(struct vm *vm, int vcpuid)
 	vcpu_lock(vcpu);
 	vcpu->runblock++;
 	if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
-		vcpu_notify_event_locked(vcpu, false);
+		vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
 	}
 	while (vcpu->state == VCPU_RUNNING) {
 #ifdef __FreeBSD__
@@ -3026,14 +3034,14 @@ vm_suspend_cpu(struct vm *vm, int vcpuid)
 		vm->debug_cpus = vm->active_cpus;
 		for (i = 0; i < vm->maxcpus; i++) {
 			if (CPU_ISSET(i, &vm->active_cpus))
-				vcpu_notify_event(vm, i, false);
+				vcpu_notify_event(vm, i);
 		}
 	} else {
 		if (!CPU_ISSET(vcpuid, &vm->active_cpus))
 			return (EINVAL);
 
 		CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
-		vcpu_notify_event(vm, vcpuid, false);
+		vcpu_notify_event(vm, vcpuid);
 	}
 	return (0);
 }
@@ -3126,15 +3134,17 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
 static void
-vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
+vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t ntype)
 {
 	int hostcpu;
 
+	ASSERT(ntype == VCPU_NOTIFY_APIC || VCPU_NOTIFY_EXIT);
+
 	hostcpu = vcpu->hostcpu;
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
 		if (hostcpu != curcpu) {
-			if (lapic_intr) {
+			if (ntype == VCPU_NOTIFY_APIC) {
 				vlapic_post_intr(vcpu->vlapic, hostcpu,
 				    vmm_ipinum);
 			} else {
@@ -3162,12 +3172,26 @@ vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
 }
 
 void
-vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+vcpu_notify_event(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
-	vcpu_notify_event_locked(vcpu, lapic_intr);
+	vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+	vcpu_unlock(vcpu);
+}
+
+void
+vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t ntype)
+{
+	struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+	if (ntype == VCPU_NOTIFY_NONE) {
+		return;
+	}
+
+	vcpu_lock(vcpu);
+	vcpu_notify_event_locked(vcpu, ntype);
 	vcpu_unlock(vcpu);
 }
 
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
index f8d8970807..3de67f012d 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
@@ -67,6 +67,7 @@ int
 lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 {
 	struct vlapic *vlapic;
+	vcpu_notify_t notify;
 
 	if (cpu < 0 || cpu >= vm_get_maxcpus(vm))
 		return (EINVAL);
@@ -79,8 +80,8 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
-	if (vlapic_set_intr_ready(vlapic, vector, level))
-		vcpu_notify_event(vm, cpu, true);
+	notify = vlapic_set_intr_ready(vlapic, vector, level);
+	vcpu_notify_event_type(vm, cpu, notify);
 	return (0);
 }
 
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 2322919d29..c6859a3c00 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -353,7 +353,6 @@ struct vm_exit {
 		} spinup_ap;
 		struct {
 			uint64_t	rflags;
-			uint64_t	intr_status;
 		} hlt;
 		struct {
 			int		vector;