14261 bhyve should expose kernel device state

Reviewed by: Dan Cross <cross@oxidecomputer.com> Reviewed by: Luqman Aden <luqman@oxide.computer> Reviewed by: Jordan Paige Hendricks <jordan@oxidecomputer.com> Approved by: Dan McDonald <danmcd@mnx.io>
author: Patrick Mooney <pmooney@pfmooney.com> 2022-04-14 02:14:09 +0000
committer: Patrick Mooney <pmooney@oxide.computer> 2022-06-23 19:41:39 +0000
commit: d515dd7754a14758624ee9b1330197cdb6a47c49 (patch)
tree: c6cb2b8b5abc9ede600d077f6395262e49809bf9 /usr/src/uts/intel
parent: 3b5f2d22219c7c9f6926c804c8fa13b60d9e8a63 (diff)
download: illumos-joyent-d515dd7754a14758624ee9b1330197cdb6a47c49.tar.gz
14 files changed, 1429 insertions, 158 deletions
diff --git a/usr/src/uts/intel/io/vmm/io/vatpic.c b/usr/src/uts/intel/io/vmm/io/vatpic.c
index 2b4dc81b12..3113c0fa48 100644
--- a/usr/src/uts/intel/io/vmm/io/vatpic.c
+++ b/usr/src/uts/intel/io/vmm/io/vatpic.c
@@ -729,6 +729,20 @@ vatpic_slave_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
 	return (vatpic_write(vatpic, atpic, in, port, bytes, eax));
 }
 
+static const uint8_t vatpic_elc_mask[2] = {
+	/*
+	 * For the master PIC the cascade channel (IRQ2), the heart beat timer
+	 * (IRQ0), and the keyboard controller (IRQ1) cannot be programmed for
+	 * level mode.
+	 */
+	0xf8,
+	/*
+	 * For the slave PIC the real time clock (IRQ8) and the floating point
+	 * error interrupt (IRQ13) cannot be programmed for level mode.
+	 */
+	0xde
+};
+
 int
 vatpic_elc_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
     uint32_t *eax)
@@ -740,21 +754,11 @@ vatpic_elc_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
 	switch (port) {
 	case IO_ELCR1:
 		atpic = &vatpic->atpic[0];
-		/*
-		 * For the master PIC the cascade channel (IRQ2), the heart beat
-		 * timer (IRQ0), and the keyboard controller (IRQ1) cannot be
-		 * programmed for level mode.
-		 */
-		elc_mask = 0xf8;
+		elc_mask = vatpic_elc_mask[0];
 		break;
 	case IO_ELCR2:
 		atpic = &vatpic->atpic[1];
-		/*
-		 * For the slave PIC the real time clock (IRQ8) and the floating
-		 * point error interrupt (IRQ13) cannot be programmed for level
-		 * mode.
-		 */
-		elc_mask = 0xde;
+		elc_mask = vatpic_elc_mask[1];
 		break;
 	default:
 		return (-1);
@@ -793,3 +797,117 @@ vatpic_cleanup(struct vatpic *vatpic)
 	mutex_destroy(&vatpic->lock);
 	kmem_free(vatpic, sizeof (*vatpic));
 }
+
+static int
+vatpic_data_read(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_ATPIC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpic_v1));
+
+	struct vatpic *vatpic = datap;
+	struct vdi_atpic_v1 *out = req->vdr_data;
+
+	VATPIC_LOCK(vatpic);
+	for (uint_t i = 0; i < 2; i++) {
+		const struct atpic *src = &vatpic->atpic[i];
+		struct vdi_atpic_chip_v1 *chip = &out->va_chip[i];
+
+		chip->vac_icw_state = src->icw_state;
+		chip->vac_status =
+		    (src->ready ? (1 << 0) : 0) |
+		    (src->auto_eoi ? (1 << 1) : 0) |
+		    (src->poll ? (1 << 2) : 0) |
+		    (src->rotate ? (1 << 3) : 0) |
+		    (src->special_full_nested ? (1 << 4) : 0) |
+		    (src->read_isr_next ? (1 << 5) : 0) |
+		    (src->intr_raised ? (1 << 6) : 0) |
+		    (src->special_mask_mode ? (1 << 7) : 0);
+		chip->vac_reg_irr = src->reg_irr;
+		chip->vac_reg_isr = src->reg_isr;
+		chip->vac_reg_imr = src->reg_imr;
+		chip->vac_irq_base = src->irq_base;
+		chip->vac_lowprio = src->lowprio;
+		chip->vac_elc = src->elc;
+		for (uint_t j = 0; j < 8; j++) {
+			chip->vac_level[j] = src->acnt[j];
+		}
+	}
+	VATPIC_UNLOCK(vatpic);
+
+	return (0);
+}
+
+static bool
+vatpic_data_validate(const struct vdi_atpic_v1 *src)
+{
+	for (uint_t i = 0; i < 2; i++) {
+		const struct vdi_atpic_chip_v1 *chip = &src->va_chip[i];
+
+		if (chip->vac_icw_state > IS_ICW4) {
+			return (false);
+		}
+		if ((chip->vac_elc & ~vatpic_elc_mask[i]) != 0) {
+			return (false);
+		}
+		/*
+		 * TODO: The state of `intr_raised` could be checked what
+		 * resides in the ISR/IRR registers.
+		 */
+	}
+
+	return (true);
+}
+
+static int
+vatpic_data_write(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_ATPIC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpic_v1));
+
+	struct vatpic *vatpic = datap;
+	const struct vdi_atpic_v1 *src = req->vdr_data;
+	if (!vatpic_data_validate(src)) {
+		return (EINVAL);
+	}
+
+	VATPIC_LOCK(vatpic);
+	for (uint_t i = 0; i < 2; i++) {
+		const struct vdi_atpic_chip_v1 *chip = &src->va_chip[i];
+		struct atpic *out = &vatpic->atpic[i];
+
+		out->icw_state = chip->vac_icw_state;
+
+		out->ready = (chip->vac_status & (1 << 0)) != 0;
+		out->auto_eoi = (chip->vac_status & (1 << 1)) != 0;
+		out->poll = (chip->vac_status & (1 << 2)) != 0;
+		out->rotate = (chip->vac_status & (1 << 3)) != 0;
+		out->special_full_nested = (chip->vac_status & (1 << 4)) != 0;
+		out->read_isr_next = (chip->vac_status & (1 << 5)) != 0;
+		out->intr_raised = (chip->vac_status & (1 << 6)) != 0;
+		out->special_mask_mode = (chip->vac_status & (1 << 7)) != 0;
+
+		out->reg_irr = chip->vac_reg_irr;
+		out->reg_isr = chip->vac_reg_isr;
+		out->reg_imr = chip->vac_reg_imr;
+		out->irq_base = chip->vac_irq_base;
+		out->lowprio = chip->vac_lowprio;
+		out->elc = chip->vac_elc;
+		for (uint_t j = 0; j < 8; j++) {
+			out->acnt[j] = chip->vac_level[j];
+		}
+	}
+	VATPIC_UNLOCK(vatpic);
+
+	return (0);
+}
+
+static const vmm_data_version_entry_t atpic_v1 = {
+	.vdve_class = VDC_ATPIC,
+	.vdve_version = 1,
+	.vdve_len_expect = sizeof (struct vdi_atpic_v1),
+	.vdve_readf = vatpic_data_read,
+	.vdve_writef = vatpic_data_write,
+};
+VMM_DATA_VERSION(atpic_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vatpit.c b/usr/src/uts/intel/io/vmm/io/vatpit.c
index 9bf6c01ff4..99c4035e1c 100644
--- a/usr/src/uts/intel/io/vmm/io/vatpit.c
+++ b/usr/src/uts/intel/io/vmm/io/vatpit.c
@@ -26,6 +26,18 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2022 Oxide Computer Company
+ */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
@@ -55,6 +67,8 @@ __FBSDID("$FreeBSD$");
 #define	TIMER_STS_OUT		0x80
 #define	TIMER_STS_NULLCNT	0x40
 
+#define	VALID_STATUS_BITS	(TIMER_STS_OUT | TIMER_STS_NULLCNT)
+
 #define	TIMER_RB_LCTR		0x20
 #define	TIMER_RB_LSTATUS	0x10
 #define	TIMER_RB_CTR_2		0x08
@@ -185,7 +199,7 @@ pit_timer_start_cntr0(struct vatpit *vatpit)
 	hrtime_t now = gethrtime();
 	if (c->time_target < now) {
 		const uint64_t ticks_behind =
-		    hrt_freq_count(c->time_target - now, PIT_8254_FREQ);
+		    hrt_freq_count(now - c->time_target, PIT_8254_FREQ);
 
 		c->total_target += roundup(ticks_behind, c->initial);
 		c->time_target = c->time_loaded +
@@ -482,3 +496,128 @@ vatpit_localize_resources(struct vatpit *vatpit)
 		}
 	}
 }
+
+static int
+vatpit_data_read(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_ATPIT);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpit_v1));
+
+	struct vatpit *vatpit = datap;
+	struct vdi_atpit_v1 *out = req->vdr_data;
+
+	VATPIT_LOCK(vatpit);
+	for (uint_t i = 0; i < 3; i++) {
+		const struct channel *src = &vatpit->channel[i];
+		struct vdi_atpit_channel_v1 *chan = &out->va_channel[i];
+
+		chan->vac_initial = src->initial;
+		chan->vac_reg_cr =
+		    (src->reg_cr[0] | (uint16_t)src->reg_cr[1] << 8);
+		chan->vac_reg_ol =
+		    (src->reg_ol[0] | (uint16_t)src->reg_ol[1] << 8);
+		chan->vac_reg_status = src->reg_status;
+		chan->vac_mode = src->mode;
+		chan->vac_status =
+		    (src->slatched ? (1 << 0) : 0) |
+		    (src->olatched ? (1 << 1) : 0) |
+		    (src->cr_sel ? (1 << 2) : 0) |
+		    (src->ol_sel ? (1 << 3) : 0) |
+		    (src->fr_sel ? (1 << 4) : 0);
+		/* Only channel 0 has the timer configured */
+		if (i == 0) {
+			chan->vac_time_target =
+			    vm_normalize_hrtime(vatpit->vm, src->time_target);
+		} else {
+			chan->vac_time_target = 0;
+		}
+	}
+	VATPIT_UNLOCK(vatpit);
+
+	return (0);
+}
+
+static bool
+vatpit_data_validate(const struct vdi_atpit_v1 *src)
+{
+	for (uint_t i = 0; i < 3; i++) {
+		const struct vdi_atpit_channel_v1 *chan = &src->va_channel[i];
+
+		if ((chan->vac_status & ~VALID_STATUS_BITS) != 0) {
+			return (false);
+		}
+	}
+	return (true);
+}
+
+static int
+vatpit_data_write(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_ATPIT);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpit_v1));
+
+	struct vatpit *vatpit = datap;
+	const struct vdi_atpit_v1 *src = req->vdr_data;
+	if (!vatpit_data_validate(src)) {
+		return (EINVAL);
+	}
+
+	VATPIT_LOCK(vatpit);
+	for (uint_t i = 0; i < 3; i++) {
+		const struct vdi_atpit_channel_v1 *chan = &src->va_channel[i];
+		struct channel *out = &vatpit->channel[i];
+
+		out->initial = chan->vac_initial;
+		out->reg_cr[0] = chan->vac_reg_cr;
+		out->reg_cr[1] = chan->vac_reg_cr >> 8;
+		out->reg_ol[0] = chan->vac_reg_ol;
+		out->reg_ol[1] = chan->vac_reg_ol >> 8;
+		out->reg_status = chan->vac_reg_status;
+		out->mode = chan->vac_mode;
+		out->slatched = (chan->vac_status & (1 << 0)) != 0;
+		out->olatched = (chan->vac_status & (1 << 1)) != 0;
+		out->cr_sel = (chan->vac_status & (1 << 2)) != 0;
+		out->ol_sel = (chan->vac_status & (1 << 3)) != 0;
+		out->fr_sel = (chan->vac_status & (1 << 4)) != 0;
+
+		/* Only channel 0 has the timer configured */
+		if (i != 0) {
+			continue;
+		}
+
+		struct callout *callout = &out->callout;
+		if (callout_active(callout)) {
+			callout_deactivate(callout);
+		}
+
+		if (chan->vac_time_target == 0) {
+			out->time_loaded = 0;
+			out->time_target = 0;
+			continue;
+		}
+
+		/* back-calculate time_loaded for the appropriate interval */
+		const uint64_t time_target =
+		    vm_denormalize_hrtime(vatpit->vm, chan->vac_time_target);
+		out->total_target = out->initial;
+		out->time_target = time_target;
+		out->time_loaded = time_target -
+		    hrt_freq_interval(PIT_8254_FREQ, out->initial);
+		callout_reset_hrtime(callout, out->time_target,
+		    vatpit_callout_handler, &out->callout_arg, C_ABSOLUTE);
+	}
+	VATPIT_UNLOCK(vatpit);
+
+	return (0);
+}
+
+static const vmm_data_version_entry_t atpit_v1 = {
+	.vdve_class = VDC_ATPIT,
+	.vdve_version = 1,
+	.vdve_len_expect = sizeof (struct vdi_atpit_v1),
+	.vdve_readf = vatpit_data_read,
+	.vdve_writef = vatpit_data_write,
+};
+VMM_DATA_VERSION(atpit_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vhpet.c b/usr/src/uts/intel/io/vmm/io/vhpet.c
index cab4f54458..979b3aa8fe 100644
--- a/usr/src/uts/intel/io/vmm/io/vhpet.c
+++ b/usr/src/uts/intel/io/vmm/io/vhpet.c
@@ -738,3 +738,180 @@ vhpet_localize_resources(struct vhpet *vhpet)
 		vmm_glue_callout_localize(&vhpet->timer[i].callout);
 	}
 }
+
+static int
+vhpet_data_read(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_HPET);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_hpet_v1));
+
+	struct vhpet *vhpet = datap;
+	struct vdi_hpet_v1 *out = req->vdr_data;
+
+	VHPET_LOCK(vhpet);
+	out->vh_config = vhpet->config;
+	out->vh_isr = vhpet->isr;
+	out->vh_count_base = vhpet->base_count;
+	out->vh_time_base = vm_normalize_hrtime(vhpet->vm, vhpet->base_time);
+	for (uint_t i = 0; i < 8; i++) {
+		const struct vhpet_timer *timer = &vhpet->timer[i];
+		struct vdi_hpet_timer_v1 *timer_out = &out->vh_timers[i];
+
+		timer_out->vht_config = timer->cap_config;
+		timer_out->vht_msi = timer->msireg;
+		timer_out->vht_comp_val = timer->compval;
+		timer_out->vht_comp_rate = timer->comprate;
+		if (callout_pending(&timer->callout)) {
+			timer_out->vht_time_target =
+			    vm_normalize_hrtime(vhpet->vm,
+			    timer->callout_expire);
+		} else {
+			timer_out->vht_time_target = 0;
+		}
+	}
+	VHPET_UNLOCK(vhpet);
+
+	return (0);
+}
+
+enum vhpet_validation_error {
+	VVE_OK,
+	VVE_BAD_CONFIG,
+	VVE_BAD_BASE_TIME,
+	VVE_BAD_ISR,
+	VVE_BAD_TIMER_CONFIG,
+	VVE_BAD_TIMER_ISR,
+	VVE_BAD_TIMER_TIME,
+};
+
+static enum vhpet_validation_error
+vhpet_data_validate(const vmm_data_req_t *req, struct vm *vm)
+{
+	ASSERT(req->vdr_version == 1 &&
+	    req->vdr_len == sizeof (struct vdi_hpet_v1));
+	const struct vdi_hpet_v1 *src = req->vdr_data;
+
+	/* LegacyReplacement Routing is not supported */
+	if ((src->vh_config & HPET_CNF_LEG_RT) != 0) {
+		return (VVE_BAD_CONFIG);
+	}
+
+	/* A base time in the future makes no sense */
+	const hrtime_t base_time = vm_denormalize_hrtime(vm, src->vh_time_base);
+	if (base_time > gethrtime()) {
+		return (VVE_BAD_BASE_TIME);
+	}
+
+	/* All asserted ISRs must be associated with an existing timer */
+	if ((src->vh_isr & ~(uint64_t)((1 << VHPET_NUM_TIMERS) - 1)) != 0) {
+		return (VVE_BAD_ISR);
+	}
+
+	for (uint_t i = 0; i < 8; i++) {
+		const struct vdi_hpet_timer_v1 *timer = &src->vh_timers[i];
+
+		const bool msi_enabled =
+		    (timer->vht_config & HPET_TCNF_FSB_EN) != 0;
+		const bool level_triggered =
+		    (timer->vht_config & HPET_TCNF_INT_TYPE) != 0;
+		const bool irq_asserted = (src->vh_isr & (1 << i)) != 0;
+		const uint32_t allowed_irqs = (timer->vht_config >> 32);
+		const uint32_t irq_pin =
+		    (timer->vht_config & HPET_TCNF_INT_ROUTE) >> 9;
+
+		if (msi_enabled) {
+			if (level_triggered) {
+				return (VVE_BAD_TIMER_CONFIG);
+			}
+		} else {
+			/*
+			 * Ensure interrupt route is valid as ensured by the
+			 * logic in vhpet_timer_update_config.
+			 */
+			if (irq_pin != 0 &&
+			    (allowed_irqs & (1 << irq_pin)) == 0) {
+				return (VVE_BAD_TIMER_CONFIG);
+			}
+		}
+		if (irq_asserted && !level_triggered) {
+			return (VVE_BAD_TIMER_ISR);
+		}
+
+		if (timer->vht_time_target != 0) {
+			/*
+			 * A timer scheduled earlier than the base time of the
+			 * entire HPET makes no sense.
+			 */
+			const uint64_t timer_target =
+			    vm_denormalize_hrtime(vm, timer->vht_time_target);
+			if (timer_target < base_time) {
+				return (VVE_BAD_TIMER_TIME);
+			}
+		}
+	}
+
+	return (VVE_OK);
+}
+
+static int
+vhpet_data_write(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_HPET);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_hpet_v1));
+
+	struct vhpet *vhpet = datap;
+
+	if (vhpet_data_validate(req, vhpet->vm) != VVE_OK) {
+		return (EINVAL);
+	}
+	const struct vdi_hpet_v1 *src = req->vdr_data;
+
+	VHPET_LOCK(vhpet);
+	vhpet->config = src->vh_config;
+	vhpet->isr = src->vh_isr;
+	vhpet->base_count = src->vh_count_base;
+	vhpet->base_time = vm_denormalize_hrtime(vhpet->vm, src->vh_time_base);
+
+	for (uint_t i = 0; i < 8; i++) {
+		struct vhpet_timer *timer = &vhpet->timer[i];
+		const struct vdi_hpet_timer_v1 *timer_src = &src->vh_timers[i];
+
+		timer->cap_config = timer_src->vht_config;
+		timer->msireg = timer_src->vht_msi;
+		timer->compval = timer_src->vht_comp_val;
+		timer->comprate = timer_src->vht_comp_rate;
+
+		/*
+		 * For now, any state associating an IOAPIC pin with a given
+		 * timer is not kept in sync. (We will not increment or
+		 * decrement a pin level based on the timer state.)  It is left
+		 * to the consumer to keep those pin levels maintained if
+		 * modifying either the HPET or the IOAPIC.
+		 *
+		 * If both the HPET and IOAPIC are exported and then imported,
+		 * this will occur naturally, as any asserted IOAPIC pin level
+		 * from the HPET would come along for the ride.
+		 */
+
+		/* TODO: properly configure timer */
+		if (timer_src->vht_time_target != 0) {
+			timer->callout_expire = vm_denormalize_hrtime(vhpet->vm,
+			    timer_src->vht_time_target);
+		} else {
+			timer->callout_expire = 0;
+		}
+	}
+	VHPET_UNLOCK(vhpet);
+	return (0);
+}
+
+static const vmm_data_version_entry_t hpet_v1 = {
+	.vdve_class = VDC_HPET,
+	.vdve_version = 1,
+	.vdve_len_expect = sizeof (struct vdi_hpet_v1),
+	.vdve_readf = vhpet_data_read,
+	.vdve_writef = vhpet_data_write,
+};
+VMM_DATA_VERSION(hpet_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vioapic.c b/usr/src/uts/intel/io/vmm/io/vioapic.c
index 8222fd33e3..b4cde71a81 100644
--- a/usr/src/uts/intel/io/vmm/io/vioapic.c
+++ b/usr/src/uts/intel/io/vmm/io/vioapic.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 
 #include <x86/apicreg.h>
 #include <machine/vmm.h>
+#include <sys/vmm_data.h>
 
 #include "vmm_lapic.h"
 #include "vlapic.h"
@@ -451,3 +452,56 @@ vioapic_pincount(struct vm *vm)
 
 	return (REDIR_ENTRIES);
 }
+
+static int
+vioapic_data_read(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_IOAPIC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_ioapic_v1));
+
+	struct vioapic *vioapic = datap;
+	struct vdi_ioapic_v1 *out = req->vdr_data;
+
+	VIOAPIC_LOCK(vioapic);
+	out->vi_id = vioapic->id;
+	out->vi_reg_sel = vioapic->ioregsel;
+	for (uint_t i = 0; i < REDIR_ENTRIES; i++) {
+		out->vi_pin_reg[i] = vioapic->rtbl[i].reg;
+		out->vi_pin_level[i] = vioapic->rtbl[i].acnt;
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return (0);
+}
+
+static int
+vioapic_data_write(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_IOAPIC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_ioapic_v1));
+
+	struct vioapic *vioapic = datap;
+	const struct vdi_ioapic_v1 *src = req->vdr_data;
+
+	VIOAPIC_LOCK(vioapic);
+	vioapic->id = src->vi_id;
+	vioapic->ioregsel = src->vi_reg_sel;
+	for (uint_t i = 0; i < REDIR_ENTRIES; i++) {
+		vioapic->rtbl[i].reg = src->vi_pin_reg[i] & ~RTBL_RO_BITS;
+		vioapic->rtbl[i].acnt = src->vi_pin_level[i];
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return (0);
+}
+
+static const vmm_data_version_entry_t ioapic_v1 = {
+	.vdve_class = VDC_IOAPIC,
+	.vdve_version = 1,
+	.vdve_len_expect = sizeof (struct vdi_ioapic_v1),
+	.vdve_readf = vioapic_data_read,
+	.vdve_writef = vioapic_data_write,
+};
+VMM_DATA_VERSION(ioapic_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vlapic.c b/usr/src/uts/intel/io/vmm/io/vlapic.c
index 50964a9c4c..3127bede2f 100644
--- a/usr/src/uts/intel/io/vmm/io/vlapic.c
+++ b/usr/src/uts/intel/io/vmm/io/vlapic.c
@@ -40,7 +40,7 @@
  *
  * Copyright 2014 Pluribus Networks Inc.
  * Copyright 2018 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
  */
 
 #include <sys/cdefs.h>
@@ -97,16 +97,14 @@ __FBSDID("$FreeBSD$");
 
 #define	APICBASE_ADDR_MASK	0xfffffffffffff000UL
 
+#define	APIC_VALID_MASK_ESR	(APIC_ESR_SEND_CS_ERROR | \
+		APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
+		APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
+		APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
+
 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
 static void vlapic_callout_handler(void *arg);
 
-#ifdef __ISRVEC_DEBUG
-static void vlapic_isrstk_accept(struct vlapic *, int);
-static void vlapic_isrstk_eoi(struct vlapic *, int);
-static void vlapic_isrstk_verify(const struct vlapic *);
-#endif /* __ISRVEC_DEBUG */
-
-
 static __inline bool
 vlapic_x2mode(const struct vlapic *vlapic)
 {
@@ -134,7 +132,7 @@ vlapic_enabled(const struct vlapic *vlapic)
 }
 
 static __inline uint32_t
-vlapic_get_id(struct vlapic *vlapic)
+vlapic_get_id(const struct vlapic *vlapic)
 {
 
 	if (vlapic_x2mode(vlapic))
@@ -144,7 +142,7 @@ vlapic_get_id(struct vlapic *vlapic)
 }
 
 static uint32_t
-x2apic_ldr(struct vlapic *vlapic)
+x2apic_ldr(const struct vlapic *vlapic)
 {
 	int apicid;
 	uint32_t ldr;
@@ -263,27 +261,30 @@ vlapic_get_ccr(struct vlapic *vlapic)
 	return (ccr);
 }
 
-void
-vlapic_dcr_write_handler(struct vlapic *vlapic)
+static void
+vlapic_update_divider(struct vlapic *vlapic)
 {
-	struct LAPIC *lapic;
-	int divisor;
+	struct LAPIC *lapic = vlapic->apic_page;
 
-	lapic = vlapic->apic_page;
-	VLAPIC_TIMER_LOCK(vlapic);
+	ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
 
-	divisor = vlapic_timer_divisor(lapic->dcr_timer);
+	vlapic->timer_cur_freq =
+	    VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer);
+	vlapic->timer_period =
+	    hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer);
+}
 
+void
+vlapic_dcr_write_handler(struct vlapic *vlapic)
+{
 	/*
 	 * Update the timer frequency and the timer period.
 	 *
 	 * XXX changes to the frequency divider will not take effect until
 	 * the timer is reloaded.
 	 */
-	vlapic->timer_cur_freq = VLAPIC_BUS_FREQ / divisor;
-	vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
-	    lapic->icr_timer);
-
+	VLAPIC_TIMER_LOCK(vlapic);
+	vlapic_update_divider(vlapic);
 	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
@@ -453,30 +454,30 @@ vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
 }
 
 static void
+vlapic_refresh_lvts(struct vlapic *vlapic)
+{
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
+}
+
+static void
 vlapic_mask_lvts(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic = vlapic->apic_page;
 
 	lapic->lvt_cmci |= APIC_LVT_M;
-	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
-
 	lapic->lvt_timer |= APIC_LVT_M;
-	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
-
 	lapic->lvt_thermal |= APIC_LVT_M;
-	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
-
 	lapic->lvt_pcint |= APIC_LVT_M;
-	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
-
 	lapic->lvt_lint0 |= APIC_LVT_M;
-	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
-
 	lapic->lvt_lint1 |= APIC_LVT_M;
-	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
-
 	lapic->lvt_error |= APIC_LVT_M;
-	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
+	vlapic_refresh_lvts(vlapic);
 }
 
 static int
@@ -581,13 +582,6 @@ vlapic_raise_ppr(struct vlapic *vlapic, int vec)
 
 	ppr = PRIO(vec);
 
-#ifdef __ISRVEC_DEBUG
-	KASSERT(vec >= 16 && vec < 256, ("invalid vector %d", vec));
-	KASSERT(ppr > lapic->tpr, ("ppr %x <= tpr %x", ppr, lapic->tpr));
-	KASSERT(ppr > lapic->ppr, ("ppr %x <= old ppr %x", ppr, lapic->ppr));
-	KASSERT(vec == (int)vlapic_active_isr(vlapic), ("ISR missing for ppr"));
-#endif /* __ISRVEC_DEBUG */
-
 	lapic->ppr = ppr;
 }
 
@@ -617,9 +611,6 @@ vlapic_process_eoi(struct vlapic *vlapic)
 			vector = i * 32 + bitpos;
 
 			isrptr[idx] &= ~(1 << bitpos);
-#ifdef __ISRVEC_DEBUG
-			vlapic_isrstk_eoi(vlapic, vector);
-#endif
 			vlapic_update_ppr(vlapic);
 			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
 				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
@@ -934,7 +925,7 @@ vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
 }
 
 uint64_t
-vlapic_get_cr8(struct vlapic *vlapic)
+vlapic_get_cr8(const struct vlapic *vlapic)
 {
 	const struct LAPIC *lapic = vlapic->apic_page;
 
@@ -1110,10 +1101,6 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 	 * in-service, the PPR must be raised.
 	 */
 	vlapic_raise_ppr(vlapic, vector);
-
-#ifdef __ISRVEC_DEBUG
-	vlapic_isrstk_accept(vlapic, vector);
-#endif
 }
 
 void
@@ -1342,9 +1329,9 @@ vlapic_reset(struct vlapic *vlapic)
 	callout_stop(&vlapic->callout);
 	lapic->icr_timer = 0;
 	lapic->ccr_timer = 0;
-	VLAPIC_TIMER_UNLOCK(vlapic);
 	lapic->dcr_timer = 0;
-	vlapic_dcr_write_handler(vlapic);
+	vlapic_update_divider(vlapic);
+	VLAPIC_TIMER_UNLOCK(vlapic);
 
 	/*
 	 * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
@@ -1368,11 +1355,6 @@ vlapic_reset(struct vlapic *vlapic)
 	lapic->apr = 0;
 	lapic->ppr = 0;
 
-#ifdef __ISRVEC_DEBUG
-	/* With the PPR cleared, the isrvec tracking should be reset too */
-	vlapic->isrvec_stk_top = 0;
-#endif
-
 	lapic->eoi = 0;
 	lapic->ldr = 0;
 	lapic->dfr = 0xffffffff;
@@ -1726,93 +1708,237 @@ vlapic_localize_resources(struct vlapic *vlapic)
 	vmm_glue_callout_localize(&vlapic->callout);
 }
 
-#ifdef __ISRVEC_DEBUG
-static void
-vlapic_isrstk_eoi(struct vlapic *vlapic, int vector)
+static int
+vlapic_data_read(void *datap, const vmm_data_req_t *req)
 {
-	if (vlapic->isrvec_stk_top <= 0) {
-		panic("invalid vlapic isrvec_stk_top %d",
-		    vlapic->isrvec_stk_top);
+	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_lapic_v1));
+
+	struct vlapic *vlapic = datap;
+	struct vdi_lapic_v1 *out = req->vdr_data;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+
+	if (vlapic->ops.sync_state) {
+		(*vlapic->ops.sync_state)(vlapic);
 	}
-	vlapic->isrvec_stk_top--;
-	vlapic_isrstk_verify(vlapic);
-}
 
-static void
-vlapic_isrstk_accept(struct vlapic *vlapic, int vector)
-{
-	int stk_top;
+	out->vl_msr_apicbase = vlapic->msr_apicbase;
+	out->vl_esr_pending = vlapic->esr_pending;
+	if (callout_pending(&vlapic->callout)) {
+		out->vl_timer_target =
+		    vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when);
+	} else {
+		out->vl_timer_target = 0;
+	}
 
-	vlapic->isrvec_stk_top++;
+	const struct LAPIC *lapic = vlapic->apic_page;
+	struct vdi_lapic_page_v1 *out_page = &out->vl_lapic;
 
-	stk_top = vlapic->isrvec_stk_top;
-	if (stk_top >= ISRVEC_STK_SIZE)
-		panic("isrvec_stk_top overflow %d", stk_top);
+	/*
+	 * While this might appear, at first glance, to be missing some fields,
+	 * they are intentionally omitted:
+	 * - PPR: its contents are always generated at runtime
+	 * - EOI: write-only, and contents are ignored after handling
+	 * - RRD: (aka RRR) read-only and always 0
+	 * - CCR: calculated from underlying timer data
+	 */
+	out_page->vlp_id = lapic->id;
+	out_page->vlp_version = lapic->version;
+	out_page->vlp_tpr = lapic->tpr;
+	out_page->vlp_apr = lapic->apr;
+	out_page->vlp_ldr = lapic->ldr;
+	out_page->vlp_dfr = lapic->dfr;
+	out_page->vlp_svr = lapic->svr;
+	out_page->vlp_esr = lapic->esr;
+	out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
+	out_page->vlp_icr_timer = lapic->icr_timer;
+	out_page->vlp_dcr_timer = lapic->dcr_timer;
+
+	out_page->vlp_lvt_cmci = lapic->lvt_cmci;
+	out_page->vlp_lvt_timer = lapic->lvt_timer;
+	out_page->vlp_lvt_thermal = lapic->lvt_thermal;
+	out_page->vlp_lvt_pcint = lapic->lvt_pcint;
+	out_page->vlp_lvt_lint0 = lapic->lvt_lint0;
+	out_page->vlp_lvt_lint1 = lapic->lvt_lint1;
+	out_page->vlp_lvt_error = lapic->lvt_error;
+
+	const uint32_t *isrptr = &lapic->isr0;
+	const uint32_t *tmrptr = &lapic->tmr0;
+	const uint32_t *irrptr = &lapic->irr0;
+	for (uint_t i = 0; i < 8; i++) {
+		out_page->vlp_isr[i] = isrptr[i * 4];
+		out_page->vlp_tmr[i] = tmrptr[i * 4];
+		out_page->vlp_irr[i] = irrptr[i * 4];
+	}
+	VLAPIC_TIMER_UNLOCK(vlapic);
 
-	vlapic->isrvec_stk[stk_top] = vector;
-	vlapic_isrstk_verify(vlapic);
+	return (0);
 }
 
-static void
-vlapic_isrstk_dump(const struct vlapic *vlapic)
+static uint8_t
+popc8(uint8_t val)
 {
-	int i;
-	uint32_t *isrptr;
+	uint8_t cnt;
 
-	isrptr = &vlapic->apic_page->isr0;
-	for (i = 0; i < 8; i++)
-		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
-
-	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
-		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
+	for (cnt = 0; val != 0; val &= (val - 1)) {
+		cnt++;
+	}
+	return (cnt);
 }
 
-static void
-vlapic_isrstk_verify(const struct vlapic *vlapic)
-{
-	int i, lastprio, curprio, vector, idx;
-	uint32_t *isrptr;
+/*
+ * Descriptions for the various failures which can occur when validating
+ * to-be-written vlapic state.
+ */
+enum vlapic_validation_error {
+	VVE_OK,
+	VVE_BAD_ID,
+	VVE_BAD_VERSION,
+	VVE_BAD_MSR_BASE,
+	VVE_BAD_ESR,
+	VVE_BAD_TPR,
+	VVE_LOW_VECTOR,
+	VVE_ISR_PRIORITY,
+};
+
+static enum vlapic_validation_error
+vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req)
+{
+	ASSERT(req->vdr_version == 1 &&
+	    req->vdr_len == sizeof (struct vdi_lapic_v1));
+	const struct vdi_lapic_v1 *src = req->vdr_data;
+
+	if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 ||
+	    (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) {
+		return (VVE_BAD_ESR);
+	}
 
-	/*
-	 * Note: The value at index 0 in isrvec_stk is always 0.
-	 *
-	 * It is a placeholder for the value of ISR vector when no bits are set
-	 * in the ISRx registers.
-	 */
-	if (vlapic->isrvec_stk_top == 0 && vlapic->isrvec_stk[0] != 0) {
-		panic("isrvec_stk is corrupted: %d", vlapic->isrvec_stk[0]);
+	/* Use the same restrictions as the wrmsr accessor for now */
+	const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC |
+	    APICBASE_BSP;
+	const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase;
+	if ((diff & apicbase_reserved) != 0) {
+		return (VVE_BAD_MSR_BASE);
 	}
 
+	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
 	/*
-	 * Make sure that the priority of the nested interrupts is
-	 * always increasing.
+	 * Demand that ID match for now.  This can be further updated when some
+	 * of the x2apic handling is improved.
 	 */
-	lastprio = -1;
-	for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
-		curprio = PRIO(vlapic->isrvec_stk[i]);
-		if (curprio <= lastprio) {
-			vlapic_isrstk_dump(vlapic);
-			panic("isrvec_stk does not satisfy invariant");
-		}
-		lastprio = curprio;
+	if (page->vlp_id != vlapic_get_id(vlapic)) {
+		return (VVE_BAD_ID);
 	}
 
-	/*
-	 * Make sure that each bit set in the ISRx registers has a
-	 * corresponding entry on the isrvec stack.
-	 */
-	i = 1;
-	isrptr = &vlapic->apic_page->isr0;
-	for (vector = 0; vector < 256; vector++) {
-		idx = (vector / 32) * 4;
-		if (isrptr[idx] & (1 << (vector % 32))) {
-			if (i > vlapic->isrvec_stk_top ||
-			    vlapic->isrvec_stk[i] != vector) {
-				vlapic_isrstk_dump(vlapic);
-				panic("ISR and isrvec_stk out of sync");
-			}
-			i++;
+	if (page->vlp_version != vlapic->apic_page->version) {
+		return (VVE_BAD_VERSION);
+	}
+
+	if (page->vlp_tpr > 0xff) {
+		return (VVE_BAD_TPR);
+	}
+
+	/* Vectors 0-15 are not expected to be handled by the lapic */
+	if ((page->vlp_isr[0] & 0xffff) != 0 ||
+	    (page->vlp_irr[0] & 0xffff) != 0 ||
+	    (page->vlp_tmr[0] & 0xffff) != 0) {
+		return (VVE_LOW_VECTOR);
+	}
+
+	/* Only one interrupt should be in-service for each priority level */
+	for (uint_t i = 0; i < 8; i++) {
+		if (popc8((uint8_t)page->vlp_isr[i]) > 1 ||
+		    popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 ||
+		    popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 ||
+		    popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) {
+			return (VVE_ISR_PRIORITY);
 		}
 	}
+
+	return (VVE_OK);
 }
-#endif
+
+static int
+vlapic_data_write(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_lapic_v1));
+
+	struct vlapic *vlapic = datap;
+	if (vlapic_data_validate(vlapic, req) != VVE_OK) {
+		return (EINVAL);
+	}
+	const struct vdi_lapic_v1 *src = req->vdr_data;
+	const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
+	struct LAPIC *lapic = vlapic->apic_page;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+
+	/* Already ensured by vlapic_data_validate() */
+	VERIFY3U(page->vlp_id, ==, lapic->id);
+	VERIFY3U(page->vlp_version, ==, lapic->version);
+
+	vlapic->msr_apicbase = src->vl_msr_apicbase;
+	vlapic->esr_pending = src->vl_esr_pending;
+
+	lapic->tpr = page->vlp_tpr;
+	lapic->apr = page->vlp_apr;
+	lapic->ldr = page->vlp_ldr;
+	lapic->dfr = page->vlp_dfr;
+	lapic->svr = page->vlp_svr;
+	lapic->esr = page->vlp_esr;
+	lapic->icr_lo = (uint32_t)page->vlp_icr;
+	lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32);
+
+	lapic->icr_timer = page->vlp_icr_timer;
+	lapic->dcr_timer = page->vlp_dcr_timer;
+	vlapic_update_divider(vlapic);
+
+	/* cleanse LDR/DFR */
+	vlapic_ldr_write_handler(vlapic);
+	vlapic_dfr_write_handler(vlapic);
+
+	lapic->lvt_cmci = page->vlp_lvt_cmci;
+	lapic->lvt_timer = page->vlp_lvt_timer;
+	lapic->lvt_thermal = page->vlp_lvt_thermal;
+	lapic->lvt_pcint = page->vlp_lvt_pcint;
+	lapic->lvt_lint0 = page->vlp_lvt_lint0;
+	lapic->lvt_lint1 = page->vlp_lvt_lint1;
+	lapic->lvt_error = page->vlp_lvt_error;
+	/* cleanse LVTs */
+	vlapic_refresh_lvts(vlapic);
+
+	uint32_t *isrptr = &lapic->isr0;
+	uint32_t *tmrptr = &lapic->tmr0;
+	uint32_t *irrptr = &lapic->irr0;
+	for (uint_t i = 0; i < 8; i++) {
+		isrptr[i * 4] = page->vlp_isr[i];
+		tmrptr[i * 4] = page->vlp_tmr[i];
+		irrptr[i * 4] = page->vlp_irr[i];
+	}
+
+	if (src->vl_timer_target != 0) {
+		vlapic->timer_fire_when =
+		    vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target);
+		vlapic_callout_reset(vlapic);
+	}
+
+	if (vlapic->ops.sync_state) {
+		(*vlapic->ops.sync_state)(vlapic);
+	}
+	VLAPIC_TIMER_UNLOCK(vlapic);
+
+	return (0);
+}
+
+static const vmm_data_version_entry_t lapic_v1 = {
+	.vdve_class = VDC_LAPIC,
+	.vdve_version = 1,
+	.vdve_len_expect = sizeof (struct vdi_lapic_v1),
+	.vdve_readf = vlapic_data_read,
+	.vdve_writef = vlapic_data_write,
+};
+VMM_DATA_VERSION(lapic_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vlapic.h b/usr/src/uts/intel/io/vmm/io/vlapic.h
index dd1970cb6a..4fe2d79c69 100644
--- a/usr/src/uts/intel/io/vmm/io/vlapic.h
+++ b/usr/src/uts/intel/io/vmm/io/vlapic.h
@@ -89,7 +89,7 @@ void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
     bool lowprio, bool x2apic_dest);
 
 void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
-uint64_t vlapic_get_cr8(struct vlapic *vlapic);
+uint64_t vlapic_get_cr8(const struct vlapic *vlapic);
 
 /* APIC write handlers */
 void vlapic_id_write_handler(struct vlapic *vlapic);
diff --git a/usr/src/uts/intel/io/vmm/io/vlapic_priv.h b/usr/src/uts/intel/io/vmm/io/vlapic_priv.h
index abe1271fcc..95e1650b5c 100644
--- a/usr/src/uts/intel/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/intel/io/vmm/io/vlapic_priv.h
@@ -106,10 +106,6 @@
 
 #define	VLAPIC_TMR_CNT		8
 
-#ifdef DEBUG
-#define	__ISRVEC_DEBUG
-#endif
-
 struct vlapic;
 
 struct vlapic_ops {
@@ -146,19 +142,6 @@ struct vlapic {
 	 */
 	uint32_t	svr_last;
 	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
-
-#ifdef __ISRVEC_DEBUG
-	/*
-	 * The 'isrvec_stk' is a stack of vectors injected by the local APIC.
-	 * It is used as a debugging method to double-check the behavior of the
-	 * emulation.  Vectors are pushed to the stack when they are accepted
-	 * for injection and popped from the stack when the processor performs
-	 * an EOI.  The vector on the top of the stack is used to verify the
-	 * computed Processor Priority.
-	 */
-	uint8_t		isrvec_stk[ISRVEC_STK_SIZE];
-	int		isrvec_stk_top;
-#endif
 };
 
 void vlapic_init(struct vlapic *vlapic);
diff --git a/usr/src/uts/intel/io/vmm/io/vpmtmr.c b/usr/src/uts/intel/io/vmm/io/vpmtmr.c
index 930aa2d237..cb8713c9d0 100644
--- a/usr/src/uts/intel/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/intel/io/vmm/io/vpmtmr.c
@@ -155,3 +155,44 @@ vpmtmr_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *val)
 
 	return (0);
 }
+
+static int
+vpmtmr_data_read(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_PM_TIMER);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_pm_timer_v1));
+
+	struct vpmtmr *vpmtmr = datap;
+	struct vdi_pm_timer_v1 *out = req->vdr_data;
+
+	out->vpt_time_base = vm_normalize_hrtime(vpmtmr->vm, vpmtmr->base_time);
+	out->vpt_ioport = vpmtmr->io_port;
+
+	return (0);
+}
+
+static int
+vpmtmr_data_write(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_PM_TIMER);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_pm_timer_v1));
+
+	struct vpmtmr *vpmtmr = datap;
+	const struct vdi_pm_timer_v1 *src = req->vdr_data;
+
+	vpmtmr->base_time =
+	    vm_denormalize_hrtime(vpmtmr->vm, src->vpt_time_base);
+
+	return (0);
+}
+
+static const vmm_data_version_entry_t pm_timer_v1 = {
+	.vdve_class = VDC_PM_TIMER,
+	.vdve_version = 1,
+	.vdve_len_expect = sizeof (struct vdi_pm_timer_v1),
+	.vdve_readf = vpmtmr_data_read,
+	.vdve_writef = vpmtmr_data_write,
+};
+VMM_DATA_VERSION(pm_timer_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vrtc.c b/usr/src/uts/intel/io/vmm/io/vrtc.c
index 4c74cd7922..906b449ddc 100644
--- a/usr/src/uts/intel/io/vmm/io/vrtc.c
+++ b/usr/src/uts/intel/io/vmm/io/vrtc.c
@@ -97,6 +97,7 @@ struct vrtc {
 #define	RTC_IRQ			8
 #define	RTCSB_BIN		0x04
 #define	RTCSB_ALL_INTRS		(RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR)
+#define	RTCSC_MASK	(RTCIR_UPDATE | RTCIR_ALARM | RTCIR_PERIOD | RTCIR_INT)
 #define	rtc_halted(vrtc)	((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0)
 #define	aintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0)
 #define	pintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0)
@@ -968,3 +969,69 @@ vrtc_localize_resources(struct vrtc *vrtc)
 {
 	vmm_glue_callout_localize(&vrtc->callout);
 }
+
+static int
+vrtc_data_read(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_RTC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_rtc_v1));
+
+	struct vrtc *vrtc = datap;
+	struct vdi_rtc_v1 *out = req->vdr_data;
+
+	VRTC_LOCK(vrtc);
+
+	out->vr_addr = vrtc->addr;
+	out->vr_time_base = vm_normalize_hrtime(vrtc->vm, vrtc->base_uptime);
+	out->vr_rtc_sec = vrtc->base_rtctime;
+	/* XXX: vrtc does not have sub-1s precision yet */
+	out->vr_rtc_nsec = 0;
+	bcopy(&vrtc->rtcdev, out->vr_content, sizeof (out->vr_content));
+
+	VRTC_UNLOCK(vrtc);
+
+	return (0);
+}
+
+static int
+vrtc_data_write(void *datap, const vmm_data_req_t *req)
+{
+	VERIFY3U(req->vdr_class, ==, VDC_RTC);
+	VERIFY3U(req->vdr_version, ==, 1);
+	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_rtc_v1));
+
+	struct vrtc *vrtc = datap;
+	const struct vdi_rtc_v1 *src = req->vdr_data;
+
+	VRTC_LOCK(vrtc);
+
+	vrtc->addr = src->vr_addr;
+	vrtc->base_uptime = vm_denormalize_hrtime(vrtc->vm, src->vr_time_base);
+	vrtc->base_rtctime = src->vr_rtc_sec;
+	bcopy(src->vr_content, &vrtc->rtcdev, sizeof (vrtc->rtcdev));
+
+	/* TODO: handle status update for register B */
+	vrtc->rtcdev.reg_a &= ~RTCSA_TUP;
+	vrtc->rtcdev.reg_c &= RTCSC_MASK;
+	vrtc->rtcdev.reg_d = RTCSD_PWR;
+
+	/* Sync the actual RTC time into the appropriate fields */
+	time_t curtime = vrtc_curtime(vrtc, NULL);
+	secs_to_rtc(curtime, vrtc, 1);
+
+	/* Make sure the callout is appropriately scheduled */
+	vrtc_callout_reset(vrtc, vrtc_freq(vrtc));
+
+	VRTC_UNLOCK(vrtc);
+	return (0);
+}
+
+static const vmm_data_version_entry_t rtc_v1 = {
+	.vdve_class = VDC_RTC,
+	.vdve_version = 1,
+	.vdve_len_expect = sizeof (struct vdi_rtc_v1),
+	.vdve_readf = vrtc_data_read,
+	.vdve_writef = vrtc_data_write,
+};
+VMM_DATA_VERSION(rtc_v1);
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
index 87f8e18b47..bc7f1bb0f2 100644
--- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
@@ -49,6 +49,8 @@
 #include <sys/sdt.h>
 #include <x86/segments.h>
 #include <sys/vmm.h>
+#include <sys/vmm_data.h>
+#include <sys/linker_set.h>
 
 SDT_PROVIDER_DECLARE(vmm);
 
@@ -65,6 +67,7 @@ struct vmspace;
 struct vm_client;
 struct vm_object;
 struct vm_guest_paging;
+struct vmm_data_req;
 
 typedef int	(*vmm_init_func_t)(void);
 typedef int	(*vmm_cleanup_func_t)(void);
@@ -229,8 +232,10 @@ void vcpu_block_run(struct vm *, int);
 void vcpu_unblock_run(struct vm *, int);
 
 uint64_t vcpu_tsc_offset(struct vm *vm, int vcpuid, bool phys_adj);
+hrtime_t vm_normalize_hrtime(struct vm *, hrtime_t);
+hrtime_t vm_denormalize_hrtime(struct vm *, hrtime_t);
 
-static __inline int
+static __inline bool
 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 {
 	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
@@ -434,4 +439,29 @@ typedef struct vmm_vcpu_kstats {
 
 int vmm_kstat_update_vcpu(struct kstat *, int);
 
+typedef struct vmm_data_req {
+	uint16_t	vdr_class;
+	uint16_t	vdr_version;
+	uint32_t	vdr_flags;
+	uint32_t	vdr_len;
+	void		*vdr_data;
+} vmm_data_req_t;
+typedef struct vmm_data_req vmm_data_req_t;
+
+typedef int (*vmm_data_writef_t)(void *, const vmm_data_req_t *);
+typedef int (*vmm_data_readf_t)(void *, const vmm_data_req_t *);
+
+typedef struct vmm_data_version_entry {
+	uint16_t		vdve_class;
+	uint16_t		vdve_version;
+	uint16_t		vdve_len_expect;
+	vmm_data_readf_t	vdve_readf;
+	vmm_data_writef_t	vdve_writef;
+} vmm_data_version_entry_t;
+
+#define	VMM_DATA_VERSION(sym)	SET_ENTRY(vmm_data_version_entries, sym)
+
+int vmm_data_read(struct vm *, int, const vmm_data_req_t *);
+int vmm_data_write(struct vm *, int, const vmm_data_req_t *);
+
 #endif /* _VMM_KERNEL_H_ */
diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c
index 0ff23e88b2..565dcbbe0a 100644
--- a/usr/src/uts/intel/io/vmm/vmm.c
+++ b/usr/src/uts/intel/io/vmm/vmm.c
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/vmm_instruction_emul.h>
 #include <sys/vmm_vm.h>
 #include <sys/vmm_gpt.h>
+#include <sys/vmm_data.h>
 
 #include "vmm_ioport.h"
 #include "vmm_host.h"
@@ -211,7 +212,9 @@ struct vm {
 	uint16_t	cores;			/* (o) num of cores/socket */
 	uint16_t	threads;		/* (o) num of threads/core */
 	uint16_t	maxcpus;		/* (o) max pluggable cpus */
+
 	uint64_t	boot_tsc_offset;	/* (i) TSC offset at VM boot */
+	hrtime_t	boot_hrtime;		/* (i) hrtime at VM boot */
 
 	struct ioport_config ioports;		/* (o) ioport handling */
 
@@ -519,7 +522,12 @@ vm_init(struct vm *vm, bool create)
 	 * The TSC offsetting math is all unsigned, using overflow for negative
 	 * offets.  A reading of the TSC is negated to form the boot offset.
 	 */
-	vm->boot_tsc_offset = (uint64_t)(-(int64_t)rdtsc_offset());
+	const uint64_t boot_tsc = rdtsc_offset();
+	vm->boot_tsc_offset = (uint64_t)(-(int64_t)boot_tsc);
+
+	/* Convert the boot TSC reading to hrtime */
+	vm->boot_hrtime = (hrtime_t)boot_tsc;
+	scalehrtime(&vm->boot_hrtime);
 }
 
 /*
@@ -3007,6 +3015,9 @@ vm_set_capability(struct vm *vm, int vcpu, int type, int val)
 struct vlapic *
 vm_lapic(struct vm *vm, int cpu)
 {
+	ASSERT3S(cpu, >=, 0);
+	ASSERT3S(cpu, <, VM_MAXCPU);
+
 	return (vm->vcpu[cpu].vlapic);
 }
 
@@ -3086,6 +3097,22 @@ vcpu_tsc_offset(struct vm *vm, int vcpuid, bool phys_adj)
 	return (vcpu_off);
 }
 
+/* Normalize hrtime against the boot time for a VM */
+hrtime_t
+vm_normalize_hrtime(struct vm *vm, hrtime_t hrt)
+{
+	/* To avoid underflow/overflow UB, perform math as unsigned */
+	return ((hrtime_t)((uint64_t)hrt - (uint64_t)vm->boot_hrtime));
+}
+
+/* Denormalize hrtime against the boot time for a VM */
+hrtime_t
+vm_denormalize_hrtime(struct vm *vm, hrtime_t hrt)
+{
+	/* To avoid underflow/overflow UB, perform math as unsigned */
+	return ((hrtime_t)((uint64_t)hrt + (uint64_t)vm->boot_hrtime));
+}
+
 int
 vm_activate_cpu(struct vm *vm, int vcpuid)
 {
@@ -3664,3 +3691,166 @@ vmm_kstat_update_vcpu(struct kstat *ksp, int rw)
 
 	return (0);
 }
+
+SET_DECLARE(vmm_data_version_entries, const vmm_data_version_entry_t);
+
+static inline bool
+vmm_data_is_cpu_specific(uint16_t data_class)
+{
+	switch (data_class) {
+	case VDC_REGISTER:
+	case VDC_MSR:
+	case VDC_FPU:
+	case VDC_LAPIC:
+	case VDC_VMM_ARCH:
+		return (true);
+	default:
+		return (false);
+	}
+}
+
+static const vmm_data_version_entry_t *
+vmm_data_find(const vmm_data_req_t *req, int *err)
+{
+	const vmm_data_version_entry_t **vdpp, *vdp;
+	SET_FOREACH(vdpp, vmm_data_version_entries) {
+		vdp = *vdpp;
+		if (vdp->vdve_class == req->vdr_class &&
+		    vdp->vdve_version == req->vdr_version) {
+			/*
+			 * Enforce any data length expectation expressed by the
+			 * provider for this data.
+			 */
+			if (vdp->vdve_len_expect != 0 &&
+			    vdp->vdve_len_expect != req->vdr_len) {
+				*err = ENOSPC;
+				return (NULL);
+			}
+			return (vdp);
+		}
+	}
+	*err = EINVAL;
+	return (NULL);
+}
+
+static void *
+vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid)
+{
+	switch (req->vdr_class) {
+		/* per-cpu data/devices */
+	case VDC_LAPIC:
+		return (vm_lapic(vm, vcpuid));
+
+	case VDC_FPU:
+	case VDC_REGISTER:
+	case VDC_VMM_ARCH:
+	case VDC_MSR:
+		/*
+		 * These have per-CPU handling which is dispatched outside
+		 * vmm_data_version_entries listing.
+		 */
+		return (NULL);
+
+		/* system-wide data/devices */
+	case VDC_IOAPIC:
+		return (vm->vioapic);
+	case VDC_ATPIT:
+		return (vm->vatpit);
+	case VDC_ATPIC:
+		return (vm->vatpic);
+	case VDC_HPET:
+		return (vm->vhpet);
+	case VDC_PM_TIMER:
+		return (vm->vpmtmr);
+	case VDC_RTC:
+		return (vm->vrtc);
+
+	default:
+		/* The data class will have been validated by now */
+		panic("Unexpected class %u", req->vdr_class);
+	}
+}
+
+int
+vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
+{
+	int err = 0;
+
+	if (vmm_data_is_cpu_specific(req->vdr_class)) {
+		if (vcpuid >= VM_MAXCPU) {
+			return (EINVAL);
+		}
+	}
+
+	const vmm_data_version_entry_t *entry;
+	entry = vmm_data_find(req, &err);
+	if (entry == NULL) {
+		ASSERT(err != 0);
+		return (err);
+	}
+
+	void *datap = vmm_data_from_class(req, vm, vcpuid);
+	if (datap != NULL) {
+		err = entry->vdve_readf(datap, req);
+	} else {
+		switch (req->vdr_class) {
+		case VDC_FPU:
+			/* TODO: wire up to xsave export via hma_fpu iface */
+			err = EINVAL;
+			break;
+		case VDC_REGISTER:
+		case VDC_VMM_ARCH:
+		case VDC_MSR:
+			/* TODO: implement */
+			err = EINVAL;
+			break;
+		default:
+			err = EINVAL;
+			break;
+		}
+	}
+
+	return (err);
+}
+
+int
+vmm_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
+{
+	int err = 0;
+
+	if (vmm_data_is_cpu_specific(req->vdr_class)) {
+		if (vcpuid >= VM_MAXCPU) {
+			return (EINVAL);
+		}
+	}
+
+	const vmm_data_version_entry_t *entry;
+	entry = vmm_data_find(req, &err);
+	if (entry == NULL) {
+		ASSERT(err != 0);
+		return (err);
+	}
+
+	void *datap = vmm_data_from_class(req, vm, vcpuid);
+	if (datap != NULL) {
+		err = entry->vdve_writef(datap, req);
+	} else {
+		switch (req->vdr_class) {
+		case VDC_FPU:
+			/* TODO: wire up to xsave import via hma_fpu iface */
+			err = EINVAL;
+			break;
+		case VDC_REGISTER:
+		case VDC_VMM_ARCH:
+		case VDC_MSR:
+			/* TODO: implement */
+			err = EINVAL;
+			break;
+		default:
+			err = EINVAL;
+			break;
+		}
+	}
+
+	return (err);
+}
diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
index a58e9d63f9..9a4693fc78 100644
--- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
@@ -81,6 +81,9 @@ static list_t		vmm_destroy_list;
 static id_space_t	*vmm_minors;
 static void		*vmm_statep;
 
+/* temporary safety switch */
+int		vmm_allow_state_writes;
+
 static const char *vmmdev_hvm_name = "bhyve";
 
 /* For sdev plugin (/dev) */
@@ -477,6 +480,24 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 		lock_type = LOCK_READ_HOLD;
 		break;
 
+	case VM_DATA_READ:
+	case VM_DATA_WRITE:
+		if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
+			return (EFAULT);
+		}
+		if (vcpu == -1) {
+			/* Access data for VM-wide devices */
+			vmm_write_lock(sc);
+			lock_type = LOCK_WRITE_HOLD;
+		} else if (vcpu >= 0 && vcpu < vm_get_maxcpus(sc->vmm_vm)) {
+			/* Access data associated with a specific vCPU */
+			vcpu_lock_one(sc, vcpu);
+			lock_type = LOCK_VCPU;
+		} else {
+			return (EINVAL);
+		}
+		break;
+
 	case VM_GET_GPA_PMAP:
 	case VM_IOAPIC_PINCOUNT:
 	case VM_SUSPEND:
@@ -1512,6 +1533,99 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 		 */
 		break;
 	}
+	case VM_DATA_READ: {
+		struct vm_data_xfer vdx;
+
+		if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) {
+			error = EFAULT;
+			break;
+		}
+		if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) {
+			error = EINVAL;
+			break;
+		}
+		if (vdx.vdx_len > VM_DATA_XFER_LIMIT) {
+			error = EFBIG;
+			break;
+		}
+
+		const size_t len = vdx.vdx_len;
+		void *buf = kmem_alloc(len, KM_SLEEP);
+		if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0) {
+			if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
+				kmem_free(buf, len);
+				error = EFAULT;
+				break;
+			}
+		} else {
+			bzero(buf, len);
+		}
+
+		vmm_data_req_t req = {
+			.vdr_class = vdx.vdx_class,
+			.vdr_version = vdx.vdx_version,
+			.vdr_flags = vdx.vdx_flags,
+			.vdr_len = vdx.vdx_len,
+			.vdr_data = buf,
+		};
+		error = vmm_data_read(sc->vmm_vm, vdx.vdx_vcpuid, &req);
+
+		if (error == 0) {
+			if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
+				error = EFAULT;
+			}
+		}
+		kmem_free(buf, len);
+		break;
+	}
+	case VM_DATA_WRITE: {
+		struct vm_data_xfer vdx;
+
+		if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) {
+			error = EFAULT;
+			break;
+		}
+		if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) {
+			error = EINVAL;
+			break;
+		}
+		if (vdx.vdx_len > VM_DATA_XFER_LIMIT) {
+			error = EFBIG;
+			break;
+		}
+
+		const size_t len = vdx.vdx_len;
+		void *buf = kmem_alloc(len, KM_SLEEP);
+		if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
+			kmem_free(buf, len);
+			error = EFAULT;
+			break;
+		}
+
+		vmm_data_req_t req = {
+			.vdr_class = vdx.vdx_class,
+			.vdr_version = vdx.vdx_version,
+			.vdr_flags = vdx.vdx_flags,
+			.vdr_len = vdx.vdx_len,
+			.vdr_data = buf,
+		};
+		if (vmm_allow_state_writes == 0) {
+			/* XXX: Play it safe for now */
+			error = EPERM;
+		} else {
+			error = vmm_data_write(sc->vmm_vm, vdx.vdx_vcpuid,
+			    &req);
+		}
+
+		if (error == 0 &&
+		    (vdx.vdx_flags & VDX_FLAG_WRITE_COPYOUT) != 0) {
+			if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
+				error = EFAULT;
+			}
+		}
+		kmem_free(buf, len);
+		break;
+	}
 
 	default:
 		error = ENOTTY;
diff --git a/usr/src/uts/intel/sys/vmm_data.h b/usr/src/uts/intel/sys/vmm_data.h
new file mode 100644
index 0000000000..1b8614543c
--- /dev/null
+++ b/usr/src/uts/intel/sys/vmm_data.h
@@ -0,0 +1,211 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _VMM_DATA_H_
+#define	_VMM_DATA_H_
+
+/* VMM Data Classes */
+#define	VDC_META	0	/* Meta information about data system */
+#define	VDC_VERSION	1	/* Version information for each data class */
+
+/* Classes bearing per-CPU data */
+#define	VDC_REGISTER	2	/* Registers (GPR, segment, etc) */
+#define	VDC_MSR		3	/* Model-specific registers */
+#define	VDC_FPU		4	/* FPU (and associated SIMD) */
+#define	VDC_LAPIC	5	/* Local APIC */
+#define	VDC_VMM_ARCH	6	/* Arch-specific VMM state (VMX/SVM) */
+
+/* Classes for system-wide devices */
+#define	VDC_IOAPIC	7	/* bhyve IO-APIC */
+#define	VDC_ATPIT	8	/* i8254 PIT */
+#define	VDC_ATPIC	9	/* i8259 PIC */
+#define	VDC_HPET	10	/* HPET */
+#define	VDC_PM_TIMER	11	/* ACPI Power Management Timer */
+#define	VDC_RTC		12	/* IBM PC Real Time Clock */
+
+/* Indicates top of VMM Data Class range, updated as classes are added */
+#define	VDC_MAX		(VDC_RTC + 1)
+
+
+/* VMM Data Identifiers */
+
+
+/*
+ * VDC_REGISTER:
+ */
+
+/*
+ * VDC_MSR:
+ *
+ * Use MSR identifiers directly
+ */
+
+struct vdi_msr_entry_v1 {
+	uint32_t	vme_msr;
+	uint32_t	_pad;
+	uint64_t	vme_value;
+};
+
+/*
+ * VDC_FPU:
+ *
+ * Unimplemented for now.  Use VM_GET_FPU/VM_SET_FPU ioctls.
+ */
+
+/* VDC_LAPIC: */
+
+struct vdi_lapic_page_v1 {
+	uint32_t	vlp_id;
+	uint32_t	vlp_version;
+	uint32_t	vlp_tpr;
+	uint32_t	vlp_apr;
+	uint32_t	vlp_ldr;
+	uint32_t	vlp_dfr;
+	uint32_t	vlp_svr;
+	uint32_t	vlp_isr[8];
+	uint32_t	vlp_tmr[8];
+	uint32_t	vlp_irr[8];
+	uint32_t	vlp_esr;
+	uint32_t	vlp_lvt_cmci;
+	uint64_t	vlp_icr;
+	uint32_t	vlp_lvt_timer;
+	uint32_t	vlp_lvt_thermal;
+	uint32_t	vlp_lvt_pcint;
+	uint32_t	vlp_lvt_lint0;
+	uint32_t	vlp_lvt_lint1;
+	uint32_t	vlp_lvt_error;
+	uint32_t	vlp_icr_timer;
+	uint32_t	vlp_dcr_timer;
+};
+
+struct vdi_lapic_v1 {
+	struct vdi_lapic_page_v1 vl_lapic;
+	uint64_t		vl_msr_apicbase;
+	int64_t			vl_timer_target;
+	uint32_t		vl_esr_pending;
+};
+
+
+/*
+ * VDC_VMM_ARCH:
+ */
+
+/* VDC_IOAPIC: */
+
+struct vdi_ioapic_v1 {
+	uint64_t	vi_pin_reg[32];
+	uint32_t	vi_pin_level[32];
+	uint32_t	vi_id;
+	uint32_t	vi_reg_sel;
+};
+
+/* VDC_ATPIT: */
+
+struct vdi_atpit_channel_v1 {
+	uint16_t	vac_initial;
+	uint16_t	vac_reg_cr;
+	uint16_t	vac_reg_ol;
+	uint8_t		vac_reg_status;
+	uint8_t		vac_mode;
+	/*
+	 * vac_status bits:
+	 * - 0b00001 status latched
+	 * - 0b00010 output latched
+	 * - 0b00100 control register sel
+	 * - 0b01000 output latch sel
+	 * - 0b10000 free-running timer
+	 */
+	uint8_t		vac_status;
+
+	int64_t		vac_time_target;
+};
+
+struct vdi_atpit_v1 {
+	struct vdi_atpit_channel_v1 va_channel[3];
+};
+
+/* VDC_ATPIC: */
+
+struct vdi_atpic_chip_v1 {
+	uint8_t		vac_icw_state;
+	/*
+	 * vac_status bits:
+	 * - 0b00000001 ready
+	 * - 0b00000010 auto EOI
+	 * - 0b00000100 poll
+	 * - 0b00001000 rotate
+	 * - 0b00010000 special full nested
+	 * - 0b00100000 read isr next
+	 * - 0b01000000 intr raised
+	 * - 0b10000000 special mask mode
+	 */
+	uint8_t		vac_status;
+	uint8_t		vac_reg_irr;
+	uint8_t		vac_reg_isr;
+	uint8_t		vac_reg_imr;
+	uint8_t		vac_irq_base;
+	uint8_t		vac_lowprio;
+	uint8_t		vac_elc;
+	uint32_t	vac_level[8];
+};
+
+struct vdi_atpic_v1 {
+	struct vdi_atpic_chip_v1 va_chip[2];
+};
+
+/* VDC_HPET: */
+
+struct vdi_hpet_timer_v1 {
+	uint64_t	vht_config;
+	uint64_t	vht_msi;
+	uint32_t	vht_comp_val;
+	uint32_t	vht_comp_rate;
+	int64_t		vht_time_target;
+};
+
+struct vdi_hpet_v1 {
+	uint64_t	vh_config;
+	uint64_t	vh_isr;
+	uint32_t	vh_count_base;
+	int64_t		vh_time_base;
+
+	struct vdi_hpet_timer_v1	vh_timers[8];
+};
+
+/* VDC_PM_TIMER: */
+
+struct vdi_pm_timer_v1 {
+	int64_t		vpt_time_base;
+	/*
+	 * Since the PM-timer IO port registration can be set by a dedicated
+	 * ioctl today, it is considered a read-only field in the vmm data
+	 * interface and its contents will be ignored when writing state data to
+	 * the timer.
+	 */
+	uint16_t	vpt_ioport;
+};
+
+/* VDC_RTC: */
+
+struct vdi_rtc_v1 {
+	uint8_t		vr_content[128];
+	uint8_t		vr_addr;
+	int64_t		vr_time_base;
+	uint64_t	vr_rtc_sec;
+	uint64_t	vr_rtc_nsec;
+};
+
+#endif /* _VMM_DATA_H_ */
diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h
index 5333facacf..fc8ccf406e 100644
--- a/usr/src/uts/intel/sys/vmm_dev.h
+++ b/usr/src/uts/intel/sys/vmm_dev.h
@@ -49,6 +49,7 @@
 
 #include <sys/param.h>
 #include <sys/cpuset.h>
+#include <sys/vmm_data.h>
 
 struct vm_create_req {
 	char		name[VM_MAX_NAMELEN];
@@ -351,6 +352,23 @@ struct vmm_dirty_tracker {
 	void		*vdt_pfns;	/* bit vector of dirty bits */
 };
 
+/* Current (arbitrary) max length for vm_data_xfer */
+#define VM_DATA_XFER_LIMIT	8192
+
+#define	VDX_FLAG_READ_COPYIN	(1 << 0)
+#define	VDX_FLAG_WRITE_COPYOUT	(1 << 1)
+
+#define	VDX_FLAGS_VALID		(VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT)
+
+struct vm_data_xfer {
+	int		vdx_vcpuid;
+	uint16_t	vdx_class;
+	uint16_t	vdx_version;
+	uint32_t	vdx_flags;
+	uint32_t	vdx_len;
+	void		*vdx_data;
+};
+
 /*
  * VMM Interface Version
  *
@@ -366,7 +384,7 @@ struct vmm_dirty_tracker {
  * best-effort activity.  Nothing is to be inferred about the magnitude of a
  * change when the version is modified.  It follows no rules like semver.
  */
-#define	VMM_CURRENT_INTERFACE_VERSION	1
+#define	VMM_CURRENT_INTERFACE_VERSION	2
 
 
 #define	VMMCTL_IOC_BASE		(('V' << 16) | ('M' << 8))
@@ -472,6 +490,9 @@ struct vmm_dirty_tracker {
 #define	VM_TRACK_DIRTY_PAGES		(VMM_IOC_BASE | 0x20)
 #define	VM_DESC_FPU_AREA		(VMM_IOC_BASE | 0x21)
 
+#define	VM_DATA_READ			(VMM_IOC_BASE | 0x22)
+#define	VM_DATA_WRITE			(VMM_IOC_BASE | 0x23)
+
 #define	VM_DEVMEM_GETOFFSET		(VMM_IOC_BASE | 0xff)
 
 #define	VMM_CTL_DEV		"/dev/vmmctl"
author	Patrick Mooney <pmooney@pfmooney.com>	2022-04-14 02:14:09 +0000
committer	Patrick Mooney <pmooney@oxide.computer>	2022-06-23 19:41:39 +0000
commit	d515dd7754a14758624ee9b1330197cdb6a47c49 (patch)
tree	c6cb2b8b5abc9ede600d077f6395262e49809bf9 /usr/src/uts/intel
parent	3b5f2d22219c7c9f6926c804c8fa13b60d9e8a63 (diff)
download	illumos-joyent-d515dd7754a14758624ee9b1330197cdb6a47c49.tar.gz