summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2022-04-14 02:14:09 +0000
committerPatrick Mooney <pmooney@oxide.computer>2022-06-23 19:41:39 +0000
commitd515dd7754a14758624ee9b1330197cdb6a47c49 (patch)
treec6cb2b8b5abc9ede600d077f6395262e49809bf9
parent3b5f2d22219c7c9f6926c804c8fa13b60d9e8a63 (diff)
downloadillumos-joyent-d515dd7754a14758624ee9b1330197cdb6a47c49.tar.gz
14261 bhyve should expose kernel device state
Reviewed by: Dan Cross <cross@oxidecomputer.com> Reviewed by: Luqman Aden <luqman@oxide.computer> Reviewed by: Jordan Paige Hendricks <jordan@oxidecomputer.com> Approved by: Dan McDonald <danmcd@mnx.io>
-rw-r--r--usr/src/uts/intel/io/vmm/io/vatpic.c142
-rw-r--r--usr/src/uts/intel/io/vmm/io/vatpit.c141
-rw-r--r--usr/src/uts/intel/io/vmm/io/vhpet.c177
-rw-r--r--usr/src/uts/intel/io/vmm/io/vioapic.c54
-rw-r--r--usr/src/uts/intel/io/vmm/io/vlapic.c374
-rw-r--r--usr/src/uts/intel/io/vmm/io/vlapic.h2
-rw-r--r--usr/src/uts/intel/io/vmm/io/vlapic_priv.h17
-rw-r--r--usr/src/uts/intel/io/vmm/io/vpmtmr.c41
-rw-r--r--usr/src/uts/intel/io/vmm/io/vrtc.c67
-rw-r--r--usr/src/uts/intel/io/vmm/sys/vmm_kernel.h32
-rw-r--r--usr/src/uts/intel/io/vmm/vmm.c192
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_sol_dev.c114
-rw-r--r--usr/src/uts/intel/sys/vmm_data.h211
-rw-r--r--usr/src/uts/intel/sys/vmm_dev.h23
14 files changed, 1429 insertions, 158 deletions
diff --git a/usr/src/uts/intel/io/vmm/io/vatpic.c b/usr/src/uts/intel/io/vmm/io/vatpic.c
index 2b4dc81b12..3113c0fa48 100644
--- a/usr/src/uts/intel/io/vmm/io/vatpic.c
+++ b/usr/src/uts/intel/io/vmm/io/vatpic.c
@@ -729,6 +729,20 @@ vatpic_slave_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
return (vatpic_write(vatpic, atpic, in, port, bytes, eax));
}
+static const uint8_t vatpic_elc_mask[2] = {
+ /*
+ * For the master PIC the cascade channel (IRQ2), the heart beat timer
+ * (IRQ0), and the keyboard controller (IRQ1) cannot be programmed for
+ * level mode.
+ */
+ 0xf8,
+ /*
+ * For the slave PIC the real time clock (IRQ8) and the floating point
+ * error interrupt (IRQ13) cannot be programmed for level mode.
+ */
+ 0xde
+};
+
int
vatpic_elc_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
uint32_t *eax)
@@ -740,21 +754,11 @@ vatpic_elc_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
switch (port) {
case IO_ELCR1:
atpic = &vatpic->atpic[0];
- /*
- * For the master PIC the cascade channel (IRQ2), the heart beat
- * timer (IRQ0), and the keyboard controller (IRQ1) cannot be
- * programmed for level mode.
- */
- elc_mask = 0xf8;
+ elc_mask = vatpic_elc_mask[0];
break;
case IO_ELCR2:
atpic = &vatpic->atpic[1];
- /*
- * For the slave PIC the real time clock (IRQ8) and the floating
- * point error interrupt (IRQ13) cannot be programmed for level
- * mode.
- */
- elc_mask = 0xde;
+ elc_mask = vatpic_elc_mask[1];
break;
default:
return (-1);
@@ -793,3 +797,117 @@ vatpic_cleanup(struct vatpic *vatpic)
mutex_destroy(&vatpic->lock);
kmem_free(vatpic, sizeof (*vatpic));
}
+
+static int
+vatpic_data_read(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_ATPIC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpic_v1));
+
+ struct vatpic *vatpic = datap;
+ struct vdi_atpic_v1 *out = req->vdr_data;
+
+ VATPIC_LOCK(vatpic);
+ for (uint_t i = 0; i < 2; i++) {
+ const struct atpic *src = &vatpic->atpic[i];
+ struct vdi_atpic_chip_v1 *chip = &out->va_chip[i];
+
+ chip->vac_icw_state = src->icw_state;
+ chip->vac_status =
+ (src->ready ? (1 << 0) : 0) |
+ (src->auto_eoi ? (1 << 1) : 0) |
+ (src->poll ? (1 << 2) : 0) |
+ (src->rotate ? (1 << 3) : 0) |
+ (src->special_full_nested ? (1 << 4) : 0) |
+ (src->read_isr_next ? (1 << 5) : 0) |
+ (src->intr_raised ? (1 << 6) : 0) |
+ (src->special_mask_mode ? (1 << 7) : 0);
+ chip->vac_reg_irr = src->reg_irr;
+ chip->vac_reg_isr = src->reg_isr;
+ chip->vac_reg_imr = src->reg_imr;
+ chip->vac_irq_base = src->irq_base;
+ chip->vac_lowprio = src->lowprio;
+ chip->vac_elc = src->elc;
+ for (uint_t j = 0; j < 8; j++) {
+ chip->vac_level[j] = src->acnt[j];
+ }
+ }
+ VATPIC_UNLOCK(vatpic);
+
+ return (0);
+}
+
+static bool
+vatpic_data_validate(const struct vdi_atpic_v1 *src)
+{
+ for (uint_t i = 0; i < 2; i++) {
+ const struct vdi_atpic_chip_v1 *chip = &src->va_chip[i];
+
+ if (chip->vac_icw_state > IS_ICW4) {
+ return (false);
+ }
+ if ((chip->vac_elc & ~vatpic_elc_mask[i]) != 0) {
+ return (false);
+ }
+ /*
+ * TODO: The state of `intr_raised` could be checked what
+ * resides in the ISR/IRR registers.
+ */
+ }
+
+ return (true);
+}
+
+static int
+vatpic_data_write(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_ATPIC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpic_v1));
+
+ struct vatpic *vatpic = datap;
+ const struct vdi_atpic_v1 *src = req->vdr_data;
+ if (!vatpic_data_validate(src)) {
+ return (EINVAL);
+ }
+
+ VATPIC_LOCK(vatpic);
+ for (uint_t i = 0; i < 2; i++) {
+ const struct vdi_atpic_chip_v1 *chip = &src->va_chip[i];
+ struct atpic *out = &vatpic->atpic[i];
+
+ out->icw_state = chip->vac_icw_state;
+
+ out->ready = (chip->vac_status & (1 << 0)) != 0;
+ out->auto_eoi = (chip->vac_status & (1 << 1)) != 0;
+ out->poll = (chip->vac_status & (1 << 2)) != 0;
+ out->rotate = (chip->vac_status & (1 << 3)) != 0;
+ out->special_full_nested = (chip->vac_status & (1 << 4)) != 0;
+ out->read_isr_next = (chip->vac_status & (1 << 5)) != 0;
+ out->intr_raised = (chip->vac_status & (1 << 6)) != 0;
+ out->special_mask_mode = (chip->vac_status & (1 << 7)) != 0;
+
+ out->reg_irr = chip->vac_reg_irr;
+ out->reg_isr = chip->vac_reg_isr;
+ out->reg_imr = chip->vac_reg_imr;
+ out->irq_base = chip->vac_irq_base;
+ out->lowprio = chip->vac_lowprio;
+ out->elc = chip->vac_elc;
+ for (uint_t j = 0; j < 8; j++) {
+ out->acnt[j] = chip->vac_level[j];
+ }
+ }
+ VATPIC_UNLOCK(vatpic);
+
+ return (0);
+}
+
+static const vmm_data_version_entry_t atpic_v1 = {
+ .vdve_class = VDC_ATPIC,
+ .vdve_version = 1,
+ .vdve_len_expect = sizeof (struct vdi_atpic_v1),
+ .vdve_readf = vatpic_data_read,
+ .vdve_writef = vatpic_data_write,
+};
+VMM_DATA_VERSION(atpic_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vatpit.c b/usr/src/uts/intel/io/vmm/io/vatpit.c
index 9bf6c01ff4..99c4035e1c 100644
--- a/usr/src/uts/intel/io/vmm/io/vatpit.c
+++ b/usr/src/uts/intel/io/vmm/io/vatpit.c
@@ -26,6 +26,18 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2022 Oxide Computer Company
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -55,6 +67,8 @@ __FBSDID("$FreeBSD$");
#define TIMER_STS_OUT 0x80
#define TIMER_STS_NULLCNT 0x40
+#define VALID_STATUS_BITS (TIMER_STS_OUT | TIMER_STS_NULLCNT)
+
#define TIMER_RB_LCTR 0x20
#define TIMER_RB_LSTATUS 0x10
#define TIMER_RB_CTR_2 0x08
@@ -185,7 +199,7 @@ pit_timer_start_cntr0(struct vatpit *vatpit)
hrtime_t now = gethrtime();
if (c->time_target < now) {
const uint64_t ticks_behind =
- hrt_freq_count(c->time_target - now, PIT_8254_FREQ);
+ hrt_freq_count(now - c->time_target, PIT_8254_FREQ);
c->total_target += roundup(ticks_behind, c->initial);
c->time_target = c->time_loaded +
@@ -482,3 +496,128 @@ vatpit_localize_resources(struct vatpit *vatpit)
}
}
}
+
+static int
+vatpit_data_read(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_ATPIT);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpit_v1));
+
+ struct vatpit *vatpit = datap;
+ struct vdi_atpit_v1 *out = req->vdr_data;
+
+ VATPIT_LOCK(vatpit);
+ for (uint_t i = 0; i < 3; i++) {
+ const struct channel *src = &vatpit->channel[i];
+ struct vdi_atpit_channel_v1 *chan = &out->va_channel[i];
+
+ chan->vac_initial = src->initial;
+ chan->vac_reg_cr =
+ (src->reg_cr[0] | (uint16_t)src->reg_cr[1] << 8);
+ chan->vac_reg_ol =
+ (src->reg_ol[0] | (uint16_t)src->reg_ol[1] << 8);
+ chan->vac_reg_status = src->reg_status;
+ chan->vac_mode = src->mode;
+ chan->vac_status =
+ (src->slatched ? (1 << 0) : 0) |
+ (src->olatched ? (1 << 1) : 0) |
+ (src->cr_sel ? (1 << 2) : 0) |
+ (src->ol_sel ? (1 << 3) : 0) |
+ (src->fr_sel ? (1 << 4) : 0);
+ /* Only channel 0 has the timer configured */
+ if (i == 0) {
+ chan->vac_time_target =
+ vm_normalize_hrtime(vatpit->vm, src->time_target);
+ } else {
+ chan->vac_time_target = 0;
+ }
+ }
+ VATPIT_UNLOCK(vatpit);
+
+ return (0);
+}
+
+static bool
+vatpit_data_validate(const struct vdi_atpit_v1 *src)
+{
+ for (uint_t i = 0; i < 3; i++) {
+ const struct vdi_atpit_channel_v1 *chan = &src->va_channel[i];
+
+ if ((chan->vac_status & ~VALID_STATUS_BITS) != 0) {
+ return (false);
+ }
+ }
+ return (true);
+}
+
+static int
+vatpit_data_write(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_ATPIT);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_atpit_v1));
+
+ struct vatpit *vatpit = datap;
+ const struct vdi_atpit_v1 *src = req->vdr_data;
+ if (!vatpit_data_validate(src)) {
+ return (EINVAL);
+ }
+
+ VATPIT_LOCK(vatpit);
+ for (uint_t i = 0; i < 3; i++) {
+ const struct vdi_atpit_channel_v1 *chan = &src->va_channel[i];
+ struct channel *out = &vatpit->channel[i];
+
+ out->initial = chan->vac_initial;
+ out->reg_cr[0] = chan->vac_reg_cr;
+ out->reg_cr[1] = chan->vac_reg_cr >> 8;
+ out->reg_ol[0] = chan->vac_reg_ol;
+ out->reg_ol[1] = chan->vac_reg_ol >> 8;
+ out->reg_status = chan->vac_reg_status;
+ out->mode = chan->vac_mode;
+ out->slatched = (chan->vac_status & (1 << 0)) != 0;
+ out->olatched = (chan->vac_status & (1 << 1)) != 0;
+ out->cr_sel = (chan->vac_status & (1 << 2)) != 0;
+ out->ol_sel = (chan->vac_status & (1 << 3)) != 0;
+ out->fr_sel = (chan->vac_status & (1 << 4)) != 0;
+
+ /* Only channel 0 has the timer configured */
+ if (i != 0) {
+ continue;
+ }
+
+ struct callout *callout = &out->callout;
+ if (callout_active(callout)) {
+ callout_deactivate(callout);
+ }
+
+ if (chan->vac_time_target == 0) {
+ out->time_loaded = 0;
+ out->time_target = 0;
+ continue;
+ }
+
+ /* back-calculate time_loaded for the appropriate interval */
+ const uint64_t time_target =
+ vm_denormalize_hrtime(vatpit->vm, chan->vac_time_target);
+ out->total_target = out->initial;
+ out->time_target = time_target;
+ out->time_loaded = time_target -
+ hrt_freq_interval(PIT_8254_FREQ, out->initial);
+ callout_reset_hrtime(callout, out->time_target,
+ vatpit_callout_handler, &out->callout_arg, C_ABSOLUTE);
+ }
+ VATPIT_UNLOCK(vatpit);
+
+ return (0);
+}
+
+static const vmm_data_version_entry_t atpit_v1 = {
+ .vdve_class = VDC_ATPIT,
+ .vdve_version = 1,
+ .vdve_len_expect = sizeof (struct vdi_atpit_v1),
+ .vdve_readf = vatpit_data_read,
+ .vdve_writef = vatpit_data_write,
+};
+VMM_DATA_VERSION(atpit_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vhpet.c b/usr/src/uts/intel/io/vmm/io/vhpet.c
index cab4f54458..979b3aa8fe 100644
--- a/usr/src/uts/intel/io/vmm/io/vhpet.c
+++ b/usr/src/uts/intel/io/vmm/io/vhpet.c
@@ -738,3 +738,180 @@ vhpet_localize_resources(struct vhpet *vhpet)
vmm_glue_callout_localize(&vhpet->timer[i].callout);
}
}
+
+static int
+vhpet_data_read(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_HPET);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_hpet_v1));
+
+ struct vhpet *vhpet = datap;
+ struct vdi_hpet_v1 *out = req->vdr_data;
+
+ VHPET_LOCK(vhpet);
+ out->vh_config = vhpet->config;
+ out->vh_isr = vhpet->isr;
+ out->vh_count_base = vhpet->base_count;
+ out->vh_time_base = vm_normalize_hrtime(vhpet->vm, vhpet->base_time);
+ for (uint_t i = 0; i < 8; i++) {
+ const struct vhpet_timer *timer = &vhpet->timer[i];
+ struct vdi_hpet_timer_v1 *timer_out = &out->vh_timers[i];
+
+ timer_out->vht_config = timer->cap_config;
+ timer_out->vht_msi = timer->msireg;
+ timer_out->vht_comp_val = timer->compval;
+ timer_out->vht_comp_rate = timer->comprate;
+ if (callout_pending(&timer->callout)) {
+ timer_out->vht_time_target =
+ vm_normalize_hrtime(vhpet->vm,
+ timer->callout_expire);
+ } else {
+ timer_out->vht_time_target = 0;
+ }
+ }
+ VHPET_UNLOCK(vhpet);
+
+ return (0);
+}
+
+enum vhpet_validation_error {
+ VVE_OK,
+ VVE_BAD_CONFIG,
+ VVE_BAD_BASE_TIME,
+ VVE_BAD_ISR,
+ VVE_BAD_TIMER_CONFIG,
+ VVE_BAD_TIMER_ISR,
+ VVE_BAD_TIMER_TIME,
+};
+
+static enum vhpet_validation_error
+vhpet_data_validate(const vmm_data_req_t *req, struct vm *vm)
+{
+ ASSERT(req->vdr_version == 1 &&
+ req->vdr_len == sizeof (struct vdi_hpet_v1));
+ const struct vdi_hpet_v1 *src = req->vdr_data;
+
+ /* LegacyReplacement Routing is not supported */
+ if ((src->vh_config & HPET_CNF_LEG_RT) != 0) {
+ return (VVE_BAD_CONFIG);
+ }
+
+ /* A base time in the future makes no sense */
+ const hrtime_t base_time = vm_denormalize_hrtime(vm, src->vh_time_base);
+ if (base_time > gethrtime()) {
+ return (VVE_BAD_BASE_TIME);
+ }
+
+ /* All asserted ISRs must be associated with an existing timer */
+ if ((src->vh_isr & ~(uint64_t)((1 << VHPET_NUM_TIMERS) - 1)) != 0) {
+ return (VVE_BAD_ISR);
+ }
+
+ for (uint_t i = 0; i < 8; i++) {
+ const struct vdi_hpet_timer_v1 *timer = &src->vh_timers[i];
+
+ const bool msi_enabled =
+ (timer->vht_config & HPET_TCNF_FSB_EN) != 0;
+ const bool level_triggered =
+ (timer->vht_config & HPET_TCNF_INT_TYPE) != 0;
+ const bool irq_asserted = (src->vh_isr & (1 << i)) != 0;
+ const uint32_t allowed_irqs = (timer->vht_config >> 32);
+ const uint32_t irq_pin =
+ (timer->vht_config & HPET_TCNF_INT_ROUTE) >> 9;
+
+ if (msi_enabled) {
+ if (level_triggered) {
+ return (VVE_BAD_TIMER_CONFIG);
+ }
+ } else {
+ /*
+ * Ensure interrupt route is valid as ensured by the
+ * logic in vhpet_timer_update_config.
+ */
+ if (irq_pin != 0 &&
+ (allowed_irqs & (1 << irq_pin)) == 0) {
+ return (VVE_BAD_TIMER_CONFIG);
+ }
+ }
+ if (irq_asserted && !level_triggered) {
+ return (VVE_BAD_TIMER_ISR);
+ }
+
+ if (timer->vht_time_target != 0) {
+ /*
+ * A timer scheduled earlier than the base time of the
+ * entire HPET makes no sense.
+ */
+ const uint64_t timer_target =
+ vm_denormalize_hrtime(vm, timer->vht_time_target);
+ if (timer_target < base_time) {
+ return (VVE_BAD_TIMER_TIME);
+ }
+ }
+ }
+
+ return (VVE_OK);
+}
+
+static int
+vhpet_data_write(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_HPET);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_hpet_v1));
+
+ struct vhpet *vhpet = datap;
+
+ if (vhpet_data_validate(req, vhpet->vm) != VVE_OK) {
+ return (EINVAL);
+ }
+ const struct vdi_hpet_v1 *src = req->vdr_data;
+
+ VHPET_LOCK(vhpet);
+ vhpet->config = src->vh_config;
+ vhpet->isr = src->vh_isr;
+ vhpet->base_count = src->vh_count_base;
+ vhpet->base_time = vm_denormalize_hrtime(vhpet->vm, src->vh_time_base);
+
+ for (uint_t i = 0; i < 8; i++) {
+ struct vhpet_timer *timer = &vhpet->timer[i];
+ const struct vdi_hpet_timer_v1 *timer_src = &src->vh_timers[i];
+
+ timer->cap_config = timer_src->vht_config;
+ timer->msireg = timer_src->vht_msi;
+ timer->compval = timer_src->vht_comp_val;
+ timer->comprate = timer_src->vht_comp_rate;
+
+ /*
+ * For now, any state associating an IOAPIC pin with a given
+ * timer is not kept in sync. (We will not increment or
+ * decrement a pin level based on the timer state.) It is left
+ * to the consumer to keep those pin levels maintained if
+ * modifying either the HPET or the IOAPIC.
+ *
+ * If both the HPET and IOAPIC are exported and then imported,
+ * this will occur naturally, as any asserted IOAPIC pin level
+ * from the HPET would come along for the ride.
+ */
+
+ /* TODO: properly configure timer */
+ if (timer_src->vht_time_target != 0) {
+ timer->callout_expire = vm_denormalize_hrtime(vhpet->vm,
+ timer_src->vht_time_target);
+ } else {
+ timer->callout_expire = 0;
+ }
+ }
+ VHPET_UNLOCK(vhpet);
+ return (0);
+}
+
+static const vmm_data_version_entry_t hpet_v1 = {
+ .vdve_class = VDC_HPET,
+ .vdve_version = 1,
+ .vdve_len_expect = sizeof (struct vdi_hpet_v1),
+ .vdve_readf = vhpet_data_read,
+ .vdve_writef = vhpet_data_write,
+};
+VMM_DATA_VERSION(hpet_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vioapic.c b/usr/src/uts/intel/io/vmm/io/vioapic.c
index 8222fd33e3..b4cde71a81 100644
--- a/usr/src/uts/intel/io/vmm/io/vioapic.c
+++ b/usr/src/uts/intel/io/vmm/io/vioapic.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <x86/apicreg.h>
#include <machine/vmm.h>
+#include <sys/vmm_data.h>
#include "vmm_lapic.h"
#include "vlapic.h"
@@ -451,3 +452,56 @@ vioapic_pincount(struct vm *vm)
return (REDIR_ENTRIES);
}
+
+static int
+vioapic_data_read(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_IOAPIC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_ioapic_v1));
+
+ struct vioapic *vioapic = datap;
+ struct vdi_ioapic_v1 *out = req->vdr_data;
+
+ VIOAPIC_LOCK(vioapic);
+ out->vi_id = vioapic->id;
+ out->vi_reg_sel = vioapic->ioregsel;
+ for (uint_t i = 0; i < REDIR_ENTRIES; i++) {
+ out->vi_pin_reg[i] = vioapic->rtbl[i].reg;
+ out->vi_pin_level[i] = vioapic->rtbl[i].acnt;
+ }
+ VIOAPIC_UNLOCK(vioapic);
+
+ return (0);
+}
+
+static int
+vioapic_data_write(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_IOAPIC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_ioapic_v1));
+
+ struct vioapic *vioapic = datap;
+ const struct vdi_ioapic_v1 *src = req->vdr_data;
+
+ VIOAPIC_LOCK(vioapic);
+ vioapic->id = src->vi_id;
+ vioapic->ioregsel = src->vi_reg_sel;
+ for (uint_t i = 0; i < REDIR_ENTRIES; i++) {
+ vioapic->rtbl[i].reg = src->vi_pin_reg[i] & ~RTBL_RO_BITS;
+ vioapic->rtbl[i].acnt = src->vi_pin_level[i];
+ }
+ VIOAPIC_UNLOCK(vioapic);
+
+ return (0);
+}
+
+static const vmm_data_version_entry_t ioapic_v1 = {
+ .vdve_class = VDC_IOAPIC,
+ .vdve_version = 1,
+ .vdve_len_expect = sizeof (struct vdi_ioapic_v1),
+ .vdve_readf = vioapic_data_read,
+ .vdve_writef = vioapic_data_write,
+};
+VMM_DATA_VERSION(ioapic_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vlapic.c b/usr/src/uts/intel/io/vmm/io/vlapic.c
index 50964a9c4c..3127bede2f 100644
--- a/usr/src/uts/intel/io/vmm/io/vlapic.c
+++ b/usr/src/uts/intel/io/vmm/io/vlapic.c
@@ -40,7 +40,7 @@
*
* Copyright 2014 Pluribus Networks Inc.
* Copyright 2018 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -97,16 +97,14 @@ __FBSDID("$FreeBSD$");
#define APICBASE_ADDR_MASK 0xfffffffffffff000UL
+#define APIC_VALID_MASK_ESR (APIC_ESR_SEND_CS_ERROR | \
+ APIC_ESR_RECEIVE_CS_ERROR | APIC_ESR_SEND_ACCEPT | \
+ APIC_ESR_RECEIVE_ACCEPT | APIC_ESR_SEND_ILLEGAL_VECTOR | \
+ APIC_ESR_RECEIVE_ILLEGAL_VECTOR | APIC_ESR_ILLEGAL_REGISTER)
+
static void vlapic_set_error(struct vlapic *, uint32_t, bool);
static void vlapic_callout_handler(void *arg);
-#ifdef __ISRVEC_DEBUG
-static void vlapic_isrstk_accept(struct vlapic *, int);
-static void vlapic_isrstk_eoi(struct vlapic *, int);
-static void vlapic_isrstk_verify(const struct vlapic *);
-#endif /* __ISRVEC_DEBUG */
-
-
static __inline bool
vlapic_x2mode(const struct vlapic *vlapic)
{
@@ -134,7 +132,7 @@ vlapic_enabled(const struct vlapic *vlapic)
}
static __inline uint32_t
-vlapic_get_id(struct vlapic *vlapic)
+vlapic_get_id(const struct vlapic *vlapic)
{
if (vlapic_x2mode(vlapic))
@@ -144,7 +142,7 @@ vlapic_get_id(struct vlapic *vlapic)
}
static uint32_t
-x2apic_ldr(struct vlapic *vlapic)
+x2apic_ldr(const struct vlapic *vlapic)
{
int apicid;
uint32_t ldr;
@@ -263,27 +261,30 @@ vlapic_get_ccr(struct vlapic *vlapic)
return (ccr);
}
-void
-vlapic_dcr_write_handler(struct vlapic *vlapic)
+static void
+vlapic_update_divider(struct vlapic *vlapic)
{
- struct LAPIC *lapic;
- int divisor;
+ struct LAPIC *lapic = vlapic->apic_page;
- lapic = vlapic->apic_page;
- VLAPIC_TIMER_LOCK(vlapic);
+ ASSERT(VLAPIC_TIMER_LOCKED(vlapic));
- divisor = vlapic_timer_divisor(lapic->dcr_timer);
+ vlapic->timer_cur_freq =
+ VLAPIC_BUS_FREQ / vlapic_timer_divisor(lapic->dcr_timer);
+ vlapic->timer_period =
+ hrt_freq_interval(vlapic->timer_cur_freq, lapic->icr_timer);
+}
+void
+vlapic_dcr_write_handler(struct vlapic *vlapic)
+{
/*
* Update the timer frequency and the timer period.
*
* XXX changes to the frequency divider will not take effect until
* the timer is reloaded.
*/
- vlapic->timer_cur_freq = VLAPIC_BUS_FREQ / divisor;
- vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
- lapic->icr_timer);
-
+ VLAPIC_TIMER_LOCK(vlapic);
+ vlapic_update_divider(vlapic);
VLAPIC_TIMER_UNLOCK(vlapic);
}
@@ -453,30 +454,30 @@ vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
}
static void
+vlapic_refresh_lvts(struct vlapic *vlapic)
+{
+ vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
+ vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
+ vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
+ vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
+ vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
+ vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
+ vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
+}
+
+static void
vlapic_mask_lvts(struct vlapic *vlapic)
{
struct LAPIC *lapic = vlapic->apic_page;
lapic->lvt_cmci |= APIC_LVT_M;
- vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
-
lapic->lvt_timer |= APIC_LVT_M;
- vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
-
lapic->lvt_thermal |= APIC_LVT_M;
- vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
-
lapic->lvt_pcint |= APIC_LVT_M;
- vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
-
lapic->lvt_lint0 |= APIC_LVT_M;
- vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
-
lapic->lvt_lint1 |= APIC_LVT_M;
- vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
-
lapic->lvt_error |= APIC_LVT_M;
- vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
+ vlapic_refresh_lvts(vlapic);
}
static int
@@ -581,13 +582,6 @@ vlapic_raise_ppr(struct vlapic *vlapic, int vec)
ppr = PRIO(vec);
-#ifdef __ISRVEC_DEBUG
- KASSERT(vec >= 16 && vec < 256, ("invalid vector %d", vec));
- KASSERT(ppr > lapic->tpr, ("ppr %x <= tpr %x", ppr, lapic->tpr));
- KASSERT(ppr > lapic->ppr, ("ppr %x <= old ppr %x", ppr, lapic->ppr));
- KASSERT(vec == (int)vlapic_active_isr(vlapic), ("ISR missing for ppr"));
-#endif /* __ISRVEC_DEBUG */
-
lapic->ppr = ppr;
}
@@ -617,9 +611,6 @@ vlapic_process_eoi(struct vlapic *vlapic)
vector = i * 32 + bitpos;
isrptr[idx] &= ~(1 << bitpos);
-#ifdef __ISRVEC_DEBUG
- vlapic_isrstk_eoi(vlapic, vector);
-#endif
vlapic_update_ppr(vlapic);
if ((tmrptr[idx] & (1 << bitpos)) != 0) {
vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
@@ -934,7 +925,7 @@ vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
}
uint64_t
-vlapic_get_cr8(struct vlapic *vlapic)
+vlapic_get_cr8(const struct vlapic *vlapic)
{
const struct LAPIC *lapic = vlapic->apic_page;
@@ -1110,10 +1101,6 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
* in-service, the PPR must be raised.
*/
vlapic_raise_ppr(vlapic, vector);
-
-#ifdef __ISRVEC_DEBUG
- vlapic_isrstk_accept(vlapic, vector);
-#endif
}
void
@@ -1342,9 +1329,9 @@ vlapic_reset(struct vlapic *vlapic)
callout_stop(&vlapic->callout);
lapic->icr_timer = 0;
lapic->ccr_timer = 0;
- VLAPIC_TIMER_UNLOCK(vlapic);
lapic->dcr_timer = 0;
- vlapic_dcr_write_handler(vlapic);
+ vlapic_update_divider(vlapic);
+ VLAPIC_TIMER_UNLOCK(vlapic);
/*
* Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
@@ -1368,11 +1355,6 @@ vlapic_reset(struct vlapic *vlapic)
lapic->apr = 0;
lapic->ppr = 0;
-#ifdef __ISRVEC_DEBUG
- /* With the PPR cleared, the isrvec tracking should be reset too */
- vlapic->isrvec_stk_top = 0;
-#endif
-
lapic->eoi = 0;
lapic->ldr = 0;
lapic->dfr = 0xffffffff;
@@ -1726,93 +1708,237 @@ vlapic_localize_resources(struct vlapic *vlapic)
vmm_glue_callout_localize(&vlapic->callout);
}
-#ifdef __ISRVEC_DEBUG
-static void
-vlapic_isrstk_eoi(struct vlapic *vlapic, int vector)
+static int
+vlapic_data_read(void *datap, const vmm_data_req_t *req)
{
- if (vlapic->isrvec_stk_top <= 0) {
- panic("invalid vlapic isrvec_stk_top %d",
- vlapic->isrvec_stk_top);
+ VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_lapic_v1));
+
+ struct vlapic *vlapic = datap;
+ struct vdi_lapic_v1 *out = req->vdr_data;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ if (vlapic->ops.sync_state) {
+ (*vlapic->ops.sync_state)(vlapic);
}
- vlapic->isrvec_stk_top--;
- vlapic_isrstk_verify(vlapic);
-}
-static void
-vlapic_isrstk_accept(struct vlapic *vlapic, int vector)
-{
- int stk_top;
+ out->vl_msr_apicbase = vlapic->msr_apicbase;
+ out->vl_esr_pending = vlapic->esr_pending;
+ if (callout_pending(&vlapic->callout)) {
+ out->vl_timer_target =
+ vm_normalize_hrtime(vlapic->vm, vlapic->timer_fire_when);
+ } else {
+ out->vl_timer_target = 0;
+ }
- vlapic->isrvec_stk_top++;
+ const struct LAPIC *lapic = vlapic->apic_page;
+ struct vdi_lapic_page_v1 *out_page = &out->vl_lapic;
- stk_top = vlapic->isrvec_stk_top;
- if (stk_top >= ISRVEC_STK_SIZE)
- panic("isrvec_stk_top overflow %d", stk_top);
+ /*
+ * While this might appear, at first glance, to be missing some fields,
+ * they are intentionally omitted:
+ * - PPR: its contents are always generated at runtime
+ * - EOI: write-only, and contents are ignored after handling
+ * - RRD: (aka RRR) read-only and always 0
+ * - CCR: calculated from underlying timer data
+ */
+ out_page->vlp_id = lapic->id;
+ out_page->vlp_version = lapic->version;
+ out_page->vlp_tpr = lapic->tpr;
+ out_page->vlp_apr = lapic->apr;
+ out_page->vlp_ldr = lapic->ldr;
+ out_page->vlp_dfr = lapic->dfr;
+ out_page->vlp_svr = lapic->svr;
+ out_page->vlp_esr = lapic->esr;
+ out_page->vlp_icr = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
+ out_page->vlp_icr_timer = lapic->icr_timer;
+ out_page->vlp_dcr_timer = lapic->dcr_timer;
+
+ out_page->vlp_lvt_cmci = lapic->lvt_cmci;
+ out_page->vlp_lvt_timer = lapic->lvt_timer;
+ out_page->vlp_lvt_thermal = lapic->lvt_thermal;
+ out_page->vlp_lvt_pcint = lapic->lvt_pcint;
+ out_page->vlp_lvt_lint0 = lapic->lvt_lint0;
+ out_page->vlp_lvt_lint1 = lapic->lvt_lint1;
+ out_page->vlp_lvt_error = lapic->lvt_error;
+
+ const uint32_t *isrptr = &lapic->isr0;
+ const uint32_t *tmrptr = &lapic->tmr0;
+ const uint32_t *irrptr = &lapic->irr0;
+ for (uint_t i = 0; i < 8; i++) {
+ out_page->vlp_isr[i] = isrptr[i * 4];
+ out_page->vlp_tmr[i] = tmrptr[i * 4];
+ out_page->vlp_irr[i] = irrptr[i * 4];
+ }
+ VLAPIC_TIMER_UNLOCK(vlapic);
- vlapic->isrvec_stk[stk_top] = vector;
- vlapic_isrstk_verify(vlapic);
+ return (0);
}
-static void
-vlapic_isrstk_dump(const struct vlapic *vlapic)
+static uint8_t
+popc8(uint8_t val)
{
- int i;
- uint32_t *isrptr;
+ uint8_t cnt;
- isrptr = &vlapic->apic_page->isr0;
- for (i = 0; i < 8; i++)
- printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
-
- for (i = 0; i <= vlapic->isrvec_stk_top; i++)
- printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
+ for (cnt = 0; val != 0; val &= (val - 1)) {
+ cnt++;
+ }
+ return (cnt);
}
-static void
-vlapic_isrstk_verify(const struct vlapic *vlapic)
-{
- int i, lastprio, curprio, vector, idx;
- uint32_t *isrptr;
+/*
+ * Descriptions for the various failures which can occur when validating
+ * to-be-written vlapic state.
+ */
+enum vlapic_validation_error {
+ VVE_OK,
+ VVE_BAD_ID,
+ VVE_BAD_VERSION,
+ VVE_BAD_MSR_BASE,
+ VVE_BAD_ESR,
+ VVE_BAD_TPR,
+ VVE_LOW_VECTOR,
+ VVE_ISR_PRIORITY,
+};
+
+static enum vlapic_validation_error
+vlapic_data_validate(const struct vlapic *vlapic, const vmm_data_req_t *req)
+{
+ ASSERT(req->vdr_version == 1 &&
+ req->vdr_len == sizeof (struct vdi_lapic_v1));
+ const struct vdi_lapic_v1 *src = req->vdr_data;
+
+ if ((src->vl_esr_pending & ~APIC_VALID_MASK_ESR) != 0 ||
+ (src->vl_lapic.vlp_esr & ~APIC_VALID_MASK_ESR) != 0) {
+ return (VVE_BAD_ESR);
+ }
- /*
- * Note: The value at index 0 in isrvec_stk is always 0.
- *
- * It is a placeholder for the value of ISR vector when no bits are set
- * in the ISRx registers.
- */
- if (vlapic->isrvec_stk_top == 0 && vlapic->isrvec_stk[0] != 0) {
- panic("isrvec_stk is corrupted: %d", vlapic->isrvec_stk[0]);
+ /* Use the same restrictions as the wrmsr accessor for now */
+ const uint64_t apicbase_reserved = APICBASE_RESERVED | APICBASE_X2APIC |
+ APICBASE_BSP;
+ const uint64_t diff = src->vl_msr_apicbase ^ vlapic->msr_apicbase;
+ if ((diff & apicbase_reserved) != 0) {
+ return (VVE_BAD_MSR_BASE);
}
+ const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
/*
- * Make sure that the priority of the nested interrupts is
- * always increasing.
+ * Demand that ID match for now. This can be further updated when some
+ * of the x2apic handling is improved.
*/
- lastprio = -1;
- for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
- curprio = PRIO(vlapic->isrvec_stk[i]);
- if (curprio <= lastprio) {
- vlapic_isrstk_dump(vlapic);
- panic("isrvec_stk does not satisfy invariant");
- }
- lastprio = curprio;
+ if (page->vlp_id != vlapic_get_id(vlapic)) {
+ return (VVE_BAD_ID);
}
- /*
- * Make sure that each bit set in the ISRx registers has a
- * corresponding entry on the isrvec stack.
- */
- i = 1;
- isrptr = &vlapic->apic_page->isr0;
- for (vector = 0; vector < 256; vector++) {
- idx = (vector / 32) * 4;
- if (isrptr[idx] & (1 << (vector % 32))) {
- if (i > vlapic->isrvec_stk_top ||
- vlapic->isrvec_stk[i] != vector) {
- vlapic_isrstk_dump(vlapic);
- panic("ISR and isrvec_stk out of sync");
- }
- i++;
+ if (page->vlp_version != vlapic->apic_page->version) {
+ return (VVE_BAD_VERSION);
+ }
+
+ if (page->vlp_tpr > 0xff) {
+ return (VVE_BAD_TPR);
+ }
+
+ /* Vectors 0-15 are not expected to be handled by the lapic */
+ if ((page->vlp_isr[0] & 0xffff) != 0 ||
+ (page->vlp_irr[0] & 0xffff) != 0 ||
+ (page->vlp_tmr[0] & 0xffff) != 0) {
+ return (VVE_LOW_VECTOR);
+ }
+
+ /* Only one interrupt should be in-service for each priority level */
+ for (uint_t i = 0; i < 8; i++) {
+ if (popc8((uint8_t)page->vlp_isr[i]) > 1 ||
+ popc8((uint8_t)(page->vlp_isr[i] >> 8)) > 1 ||
+ popc8((uint8_t)(page->vlp_isr[i] >> 16)) > 1 ||
+ popc8((uint8_t)(page->vlp_isr[i] >> 24)) > 1) {
+ return (VVE_ISR_PRIORITY);
}
}
+
+ return (VVE_OK);
}
-#endif
+
+static int
+vlapic_data_write(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_LAPIC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_lapic_v1));
+
+ struct vlapic *vlapic = datap;
+ if (vlapic_data_validate(vlapic, req) != VVE_OK) {
+ return (EINVAL);
+ }
+ const struct vdi_lapic_v1 *src = req->vdr_data;
+ const struct vdi_lapic_page_v1 *page = &src->vl_lapic;
+ struct LAPIC *lapic = vlapic->apic_page;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ /* Already ensured by vlapic_data_validate() */
+ VERIFY3U(page->vlp_id, ==, lapic->id);
+ VERIFY3U(page->vlp_version, ==, lapic->version);
+
+ vlapic->msr_apicbase = src->vl_msr_apicbase;
+ vlapic->esr_pending = src->vl_esr_pending;
+
+ lapic->tpr = page->vlp_tpr;
+ lapic->apr = page->vlp_apr;
+ lapic->ldr = page->vlp_ldr;
+ lapic->dfr = page->vlp_dfr;
+ lapic->svr = page->vlp_svr;
+ lapic->esr = page->vlp_esr;
+ lapic->icr_lo = (uint32_t)page->vlp_icr;
+ lapic->icr_hi = (uint32_t)(page->vlp_icr >> 32);
+
+ lapic->icr_timer = page->vlp_icr_timer;
+ lapic->dcr_timer = page->vlp_dcr_timer;
+ vlapic_update_divider(vlapic);
+
+ /* cleanse LDR/DFR */
+ vlapic_ldr_write_handler(vlapic);
+ vlapic_dfr_write_handler(vlapic);
+
+ lapic->lvt_cmci = page->vlp_lvt_cmci;
+ lapic->lvt_timer = page->vlp_lvt_timer;
+ lapic->lvt_thermal = page->vlp_lvt_thermal;
+ lapic->lvt_pcint = page->vlp_lvt_pcint;
+ lapic->lvt_lint0 = page->vlp_lvt_lint0;
+ lapic->lvt_lint1 = page->vlp_lvt_lint1;
+ lapic->lvt_error = page->vlp_lvt_error;
+ /* cleanse LVTs */
+ vlapic_refresh_lvts(vlapic);
+
+ uint32_t *isrptr = &lapic->isr0;
+ uint32_t *tmrptr = &lapic->tmr0;
+ uint32_t *irrptr = &lapic->irr0;
+ for (uint_t i = 0; i < 8; i++) {
+ isrptr[i * 4] = page->vlp_isr[i];
+ tmrptr[i * 4] = page->vlp_tmr[i];
+ irrptr[i * 4] = page->vlp_irr[i];
+ }
+
+ if (src->vl_timer_target != 0) {
+ vlapic->timer_fire_when =
+ vm_denormalize_hrtime(vlapic->vm, src->vl_timer_target);
+ vlapic_callout_reset(vlapic);
+ }
+
+ if (vlapic->ops.sync_state) {
+ (*vlapic->ops.sync_state)(vlapic);
+ }
+ VLAPIC_TIMER_UNLOCK(vlapic);
+
+ return (0);
+}
+
+static const vmm_data_version_entry_t lapic_v1 = {
+ .vdve_class = VDC_LAPIC,
+ .vdve_version = 1,
+ .vdve_len_expect = sizeof (struct vdi_lapic_v1),
+ .vdve_readf = vlapic_data_read,
+ .vdve_writef = vlapic_data_write,
+};
+VMM_DATA_VERSION(lapic_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vlapic.h b/usr/src/uts/intel/io/vmm/io/vlapic.h
index dd1970cb6a..4fe2d79c69 100644
--- a/usr/src/uts/intel/io/vmm/io/vlapic.h
+++ b/usr/src/uts/intel/io/vmm/io/vlapic.h
@@ -89,7 +89,7 @@ void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
bool lowprio, bool x2apic_dest);
void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
-uint64_t vlapic_get_cr8(struct vlapic *vlapic);
+uint64_t vlapic_get_cr8(const struct vlapic *vlapic);
/* APIC write handlers */
void vlapic_id_write_handler(struct vlapic *vlapic);
diff --git a/usr/src/uts/intel/io/vmm/io/vlapic_priv.h b/usr/src/uts/intel/io/vmm/io/vlapic_priv.h
index abe1271fcc..95e1650b5c 100644
--- a/usr/src/uts/intel/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/intel/io/vmm/io/vlapic_priv.h
@@ -106,10 +106,6 @@
#define VLAPIC_TMR_CNT 8
-#ifdef DEBUG
-#define __ISRVEC_DEBUG
-#endif
-
struct vlapic;
struct vlapic_ops {
@@ -146,19 +142,6 @@ struct vlapic {
*/
uint32_t svr_last;
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
-
-#ifdef __ISRVEC_DEBUG
- /*
- * The 'isrvec_stk' is a stack of vectors injected by the local APIC.
- * It is used as a debugging method to double-check the behavior of the
- * emulation. Vectors are pushed to the stack when they are accepted
- * for injection and popped from the stack when the processor performs
- * an EOI. The vector on the top of the stack is used to verify the
- * computed Processor Priority.
- */
- uint8_t isrvec_stk[ISRVEC_STK_SIZE];
- int isrvec_stk_top;
-#endif
};
void vlapic_init(struct vlapic *vlapic);
diff --git a/usr/src/uts/intel/io/vmm/io/vpmtmr.c b/usr/src/uts/intel/io/vmm/io/vpmtmr.c
index 930aa2d237..cb8713c9d0 100644
--- a/usr/src/uts/intel/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/intel/io/vmm/io/vpmtmr.c
@@ -155,3 +155,44 @@ vpmtmr_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *val)
return (0);
}
+
+static int
+vpmtmr_data_read(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_PM_TIMER);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_pm_timer_v1));
+
+ struct vpmtmr *vpmtmr = datap;
+ struct vdi_pm_timer_v1 *out = req->vdr_data;
+
+ out->vpt_time_base = vm_normalize_hrtime(vpmtmr->vm, vpmtmr->base_time);
+ out->vpt_ioport = vpmtmr->io_port;
+
+ return (0);
+}
+
+static int
+vpmtmr_data_write(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_PM_TIMER);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_pm_timer_v1));
+
+ struct vpmtmr *vpmtmr = datap;
+ const struct vdi_pm_timer_v1 *src = req->vdr_data;
+
+ vpmtmr->base_time =
+ vm_denormalize_hrtime(vpmtmr->vm, src->vpt_time_base);
+
+ return (0);
+}
+
+static const vmm_data_version_entry_t pm_timer_v1 = {
+ .vdve_class = VDC_PM_TIMER,
+ .vdve_version = 1,
+ .vdve_len_expect = sizeof (struct vdi_pm_timer_v1),
+ .vdve_readf = vpmtmr_data_read,
+ .vdve_writef = vpmtmr_data_write,
+};
+VMM_DATA_VERSION(pm_timer_v1);
diff --git a/usr/src/uts/intel/io/vmm/io/vrtc.c b/usr/src/uts/intel/io/vmm/io/vrtc.c
index 4c74cd7922..906b449ddc 100644
--- a/usr/src/uts/intel/io/vmm/io/vrtc.c
+++ b/usr/src/uts/intel/io/vmm/io/vrtc.c
@@ -97,6 +97,7 @@ struct vrtc {
#define RTC_IRQ 8
#define RTCSB_BIN 0x04
#define RTCSB_ALL_INTRS (RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR)
+#define RTCSC_MASK (RTCIR_UPDATE | RTCIR_ALARM | RTCIR_PERIOD | RTCIR_INT)
#define rtc_halted(vrtc) ((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0)
#define aintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0)
#define pintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0)
@@ -968,3 +969,69 @@ vrtc_localize_resources(struct vrtc *vrtc)
{
vmm_glue_callout_localize(&vrtc->callout);
}
+
+static int
+vrtc_data_read(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_RTC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_rtc_v1));
+
+ struct vrtc *vrtc = datap;
+ struct vdi_rtc_v1 *out = req->vdr_data;
+
+ VRTC_LOCK(vrtc);
+
+ out->vr_addr = vrtc->addr;
+ out->vr_time_base = vm_normalize_hrtime(vrtc->vm, vrtc->base_uptime);
+ out->vr_rtc_sec = vrtc->base_rtctime;
+ /* XXX: vrtc does not have sub-1s precision yet */
+ out->vr_rtc_nsec = 0;
+ bcopy(&vrtc->rtcdev, out->vr_content, sizeof (out->vr_content));
+
+ VRTC_UNLOCK(vrtc);
+
+ return (0);
+}
+
+static int
+vrtc_data_write(void *datap, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_RTC);
+ VERIFY3U(req->vdr_version, ==, 1);
+ VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_rtc_v1));
+
+ struct vrtc *vrtc = datap;
+ const struct vdi_rtc_v1 *src = req->vdr_data;
+
+ VRTC_LOCK(vrtc);
+
+ vrtc->addr = src->vr_addr;
+ vrtc->base_uptime = vm_denormalize_hrtime(vrtc->vm, src->vr_time_base);
+ vrtc->base_rtctime = src->vr_rtc_sec;
+ bcopy(src->vr_content, &vrtc->rtcdev, sizeof (vrtc->rtcdev));
+
+ /* TODO: handle status update for register B */
+ vrtc->rtcdev.reg_a &= ~RTCSA_TUP;
+ vrtc->rtcdev.reg_c &= RTCSC_MASK;
+ vrtc->rtcdev.reg_d = RTCSD_PWR;
+
+ /* Sync the actual RTC time into the appropriate fields */
+ time_t curtime = vrtc_curtime(vrtc, NULL);
+ secs_to_rtc(curtime, vrtc, 1);
+
+ /* Make sure the callout is appropriately scheduled */
+ vrtc_callout_reset(vrtc, vrtc_freq(vrtc));
+
+ VRTC_UNLOCK(vrtc);
+ return (0);
+}
+
+static const vmm_data_version_entry_t rtc_v1 = {
+ .vdve_class = VDC_RTC,
+ .vdve_version = 1,
+ .vdve_len_expect = sizeof (struct vdi_rtc_v1),
+ .vdve_readf = vrtc_data_read,
+ .vdve_writef = vrtc_data_write,
+};
+VMM_DATA_VERSION(rtc_v1);
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
index 87f8e18b47..bc7f1bb0f2 100644
--- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
@@ -49,6 +49,8 @@
#include <sys/sdt.h>
#include <x86/segments.h>
#include <sys/vmm.h>
+#include <sys/vmm_data.h>
+#include <sys/linker_set.h>
SDT_PROVIDER_DECLARE(vmm);
@@ -65,6 +67,7 @@ struct vmspace;
struct vm_client;
struct vm_object;
struct vm_guest_paging;
+struct vmm_data_req;
typedef int (*vmm_init_func_t)(void);
typedef int (*vmm_cleanup_func_t)(void);
@@ -229,8 +232,10 @@ void vcpu_block_run(struct vm *, int);
void vcpu_unblock_run(struct vm *, int);
uint64_t vcpu_tsc_offset(struct vm *vm, int vcpuid, bool phys_adj);
+hrtime_t vm_normalize_hrtime(struct vm *, hrtime_t);
+hrtime_t vm_denormalize_hrtime(struct vm *, hrtime_t);
-static __inline int
+static __inline bool
vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
{
return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
@@ -434,4 +439,29 @@ typedef struct vmm_vcpu_kstats {
int vmm_kstat_update_vcpu(struct kstat *, int);
+typedef struct vmm_data_req {
+ uint16_t vdr_class;
+ uint16_t vdr_version;
+ uint32_t vdr_flags;
+ uint32_t vdr_len;
+ void *vdr_data;
+} vmm_data_req_t;
+typedef struct vmm_data_req vmm_data_req_t;
+
+typedef int (*vmm_data_writef_t)(void *, const vmm_data_req_t *);
+typedef int (*vmm_data_readf_t)(void *, const vmm_data_req_t *);
+
+typedef struct vmm_data_version_entry {
+ uint16_t vdve_class;
+ uint16_t vdve_version;
+ uint16_t vdve_len_expect;
+ vmm_data_readf_t vdve_readf;
+ vmm_data_writef_t vdve_writef;
+} vmm_data_version_entry_t;
+
+#define VMM_DATA_VERSION(sym) SET_ENTRY(vmm_data_version_entries, sym)
+
+int vmm_data_read(struct vm *, int, const vmm_data_req_t *);
+int vmm_data_write(struct vm *, int, const vmm_data_req_t *);
+
#endif /* _VMM_KERNEL_H_ */
diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c
index 0ff23e88b2..565dcbbe0a 100644
--- a/usr/src/uts/intel/io/vmm/vmm.c
+++ b/usr/src/uts/intel/io/vmm/vmm.c
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
#include <sys/vmm_instruction_emul.h>
#include <sys/vmm_vm.h>
#include <sys/vmm_gpt.h>
+#include <sys/vmm_data.h>
#include "vmm_ioport.h"
#include "vmm_host.h"
@@ -211,7 +212,9 @@ struct vm {
uint16_t cores; /* (o) num of cores/socket */
uint16_t threads; /* (o) num of threads/core */
uint16_t maxcpus; /* (o) max pluggable cpus */
+
uint64_t boot_tsc_offset; /* (i) TSC offset at VM boot */
+ hrtime_t boot_hrtime; /* (i) hrtime at VM boot */
struct ioport_config ioports; /* (o) ioport handling */
@@ -519,7 +522,12 @@ vm_init(struct vm *vm, bool create)
* The TSC offsetting math is all unsigned, using overflow for negative
* offets. A reading of the TSC is negated to form the boot offset.
*/
- vm->boot_tsc_offset = (uint64_t)(-(int64_t)rdtsc_offset());
+ const uint64_t boot_tsc = rdtsc_offset();
+ vm->boot_tsc_offset = (uint64_t)(-(int64_t)boot_tsc);
+
+ /* Convert the boot TSC reading to hrtime */
+ vm->boot_hrtime = (hrtime_t)boot_tsc;
+ scalehrtime(&vm->boot_hrtime);
}
/*
@@ -3007,6 +3015,9 @@ vm_set_capability(struct vm *vm, int vcpu, int type, int val)
struct vlapic *
vm_lapic(struct vm *vm, int cpu)
{
+ ASSERT3S(cpu, >=, 0);
+ ASSERT3S(cpu, <, VM_MAXCPU);
+
return (vm->vcpu[cpu].vlapic);
}
@@ -3086,6 +3097,22 @@ vcpu_tsc_offset(struct vm *vm, int vcpuid, bool phys_adj)
return (vcpu_off);
}
+/* Normalize hrtime against the boot time for a VM */
+hrtime_t
+vm_normalize_hrtime(struct vm *vm, hrtime_t hrt)
+{
+ /* To avoid underflow/overflow UB, perform math as unsigned */
+ return ((hrtime_t)((uint64_t)hrt - (uint64_t)vm->boot_hrtime));
+}
+
+/* Denormalize hrtime against the boot time for a VM */
+hrtime_t
+vm_denormalize_hrtime(struct vm *vm, hrtime_t hrt)
+{
+ /* To avoid underflow/overflow UB, perform math as unsigned */
+ return ((hrtime_t)((uint64_t)hrt + (uint64_t)vm->boot_hrtime));
+}
+
int
vm_activate_cpu(struct vm *vm, int vcpuid)
{
@@ -3664,3 +3691,166 @@ vmm_kstat_update_vcpu(struct kstat *ksp, int rw)
return (0);
}
+
+SET_DECLARE(vmm_data_version_entries, const vmm_data_version_entry_t);
+
+static inline bool
+vmm_data_is_cpu_specific(uint16_t data_class)
+{
+ switch (data_class) {
+ case VDC_REGISTER:
+ case VDC_MSR:
+ case VDC_FPU:
+ case VDC_LAPIC:
+ case VDC_VMM_ARCH:
+ return (true);
+ default:
+ return (false);
+ }
+}
+
+static const vmm_data_version_entry_t *
+vmm_data_find(const vmm_data_req_t *req, int *err)
+{
+ const vmm_data_version_entry_t **vdpp, *vdp;
+ SET_FOREACH(vdpp, vmm_data_version_entries) {
+ vdp = *vdpp;
+ if (vdp->vdve_class == req->vdr_class &&
+ vdp->vdve_version == req->vdr_version) {
+ /*
+ * Enforce any data length expectation expressed by the
+ * provider for this data.
+ */
+ if (vdp->vdve_len_expect != 0 &&
+ vdp->vdve_len_expect != req->vdr_len) {
+ *err = ENOSPC;
+ return (NULL);
+ }
+ return (vdp);
+ }
+ }
+ *err = EINVAL;
+ return (NULL);
+}
+
+static void *
+vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid)
+{
+ switch (req->vdr_class) {
+ /* per-cpu data/devices */
+ case VDC_LAPIC:
+ return (vm_lapic(vm, vcpuid));
+
+ case VDC_FPU:
+ case VDC_REGISTER:
+ case VDC_VMM_ARCH:
+ case VDC_MSR:
+ /*
+ * These have per-CPU handling which is dispatched outside
+ * vmm_data_version_entries listing.
+ */
+ return (NULL);
+
+ /* system-wide data/devices */
+ case VDC_IOAPIC:
+ return (vm->vioapic);
+ case VDC_ATPIT:
+ return (vm->vatpit);
+ case VDC_ATPIC:
+ return (vm->vatpic);
+ case VDC_HPET:
+ return (vm->vhpet);
+ case VDC_PM_TIMER:
+ return (vm->vpmtmr);
+ case VDC_RTC:
+ return (vm->vrtc);
+
+ default:
+ /* The data class will have been validated by now */
+ panic("Unexpected class %u", req->vdr_class);
+ }
+}
+
+int
+vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
+{
+ int err = 0;
+
+ if (vmm_data_is_cpu_specific(req->vdr_class)) {
+ if (vcpuid >= VM_MAXCPU) {
+ return (EINVAL);
+ }
+ }
+
+ const vmm_data_version_entry_t *entry;
+ entry = vmm_data_find(req, &err);
+ if (entry == NULL) {
+ ASSERT(err != 0);
+ return (err);
+ }
+
+ void *datap = vmm_data_from_class(req, vm, vcpuid);
+ if (datap != NULL) {
+ err = entry->vdve_readf(datap, req);
+ } else {
+ switch (req->vdr_class) {
+ case VDC_FPU:
+ /* TODO: wire up to xsave export via hma_fpu iface */
+ err = EINVAL;
+ break;
+ case VDC_REGISTER:
+ case VDC_VMM_ARCH:
+ case VDC_MSR:
+ /* TODO: implement */
+ err = EINVAL;
+ break;
+ default:
+ err = EINVAL;
+ break;
+ }
+ }
+
+ return (err);
+}
+
+int
+vmm_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
+{
+ int err = 0;
+
+ if (vmm_data_is_cpu_specific(req->vdr_class)) {
+ if (vcpuid >= VM_MAXCPU) {
+ return (EINVAL);
+ }
+ }
+
+ const vmm_data_version_entry_t *entry;
+ entry = vmm_data_find(req, &err);
+ if (entry == NULL) {
+ ASSERT(err != 0);
+ return (err);
+ }
+
+ void *datap = vmm_data_from_class(req, vm, vcpuid);
+ if (datap != NULL) {
+ err = entry->vdve_writef(datap, req);
+ } else {
+ switch (req->vdr_class) {
+ case VDC_FPU:
+ /* TODO: wire up to xsave import via hma_fpu iface */
+ err = EINVAL;
+ break;
+ case VDC_REGISTER:
+ case VDC_VMM_ARCH:
+ case VDC_MSR:
+ /* TODO: implement */
+ err = EINVAL;
+ break;
+ default:
+ err = EINVAL;
+ break;
+ }
+ }
+
+ return (err);
+}
diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
index a58e9d63f9..9a4693fc78 100644
--- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
@@ -81,6 +81,9 @@ static list_t vmm_destroy_list;
static id_space_t *vmm_minors;
static void *vmm_statep;
+/* temporary safety switch */
+int vmm_allow_state_writes;
+
static const char *vmmdev_hvm_name = "bhyve";
/* For sdev plugin (/dev) */
@@ -477,6 +480,24 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
lock_type = LOCK_READ_HOLD;
break;
+ case VM_DATA_READ:
+ case VM_DATA_WRITE:
+ if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
+ return (EFAULT);
+ }
+ if (vcpu == -1) {
+ /* Access data for VM-wide devices */
+ vmm_write_lock(sc);
+ lock_type = LOCK_WRITE_HOLD;
+ } else if (vcpu >= 0 && vcpu < vm_get_maxcpus(sc->vmm_vm)) {
+ /* Access data associated with a specific vCPU */
+ vcpu_lock_one(sc, vcpu);
+ lock_type = LOCK_VCPU;
+ } else {
+ return (EINVAL);
+ }
+ break;
+
case VM_GET_GPA_PMAP:
case VM_IOAPIC_PINCOUNT:
case VM_SUSPEND:
@@ -1512,6 +1533,99 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
*/
break;
}
+ case VM_DATA_READ: {
+ struct vm_data_xfer vdx;
+
+ if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) {
+ error = EFAULT;
+ break;
+ }
+ if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) {
+ error = EINVAL;
+ break;
+ }
+ if (vdx.vdx_len > VM_DATA_XFER_LIMIT) {
+ error = EFBIG;
+ break;
+ }
+
+ const size_t len = vdx.vdx_len;
+ void *buf = kmem_alloc(len, KM_SLEEP);
+ if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0) {
+ if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
+ kmem_free(buf, len);
+ error = EFAULT;
+ break;
+ }
+ } else {
+ bzero(buf, len);
+ }
+
+ vmm_data_req_t req = {
+ .vdr_class = vdx.vdx_class,
+ .vdr_version = vdx.vdx_version,
+ .vdr_flags = vdx.vdx_flags,
+ .vdr_len = vdx.vdx_len,
+ .vdr_data = buf,
+ };
+ error = vmm_data_read(sc->vmm_vm, vdx.vdx_vcpuid, &req);
+
+ if (error == 0) {
+ if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
+ error = EFAULT;
+ }
+ }
+ kmem_free(buf, len);
+ break;
+ }
+ case VM_DATA_WRITE: {
+ struct vm_data_xfer vdx;
+
+ if (ddi_copyin(datap, &vdx, sizeof (vdx), md) != 0) {
+ error = EFAULT;
+ break;
+ }
+ if ((vdx.vdx_flags & ~VDX_FLAGS_VALID) != 0) {
+ error = EINVAL;
+ break;
+ }
+ if (vdx.vdx_len > VM_DATA_XFER_LIMIT) {
+ error = EFBIG;
+ break;
+ }
+
+ const size_t len = vdx.vdx_len;
+ void *buf = kmem_alloc(len, KM_SLEEP);
+ if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
+ kmem_free(buf, len);
+ error = EFAULT;
+ break;
+ }
+
+ vmm_data_req_t req = {
+ .vdr_class = vdx.vdx_class,
+ .vdr_version = vdx.vdx_version,
+ .vdr_flags = vdx.vdx_flags,
+ .vdr_len = vdx.vdx_len,
+ .vdr_data = buf,
+ };
+ if (vmm_allow_state_writes == 0) {
+ /* XXX: Play it safe for now */
+ error = EPERM;
+ } else {
+ error = vmm_data_write(sc->vmm_vm, vdx.vdx_vcpuid,
+ &req);
+ }
+
+ if (error == 0 &&
+ (vdx.vdx_flags & VDX_FLAG_WRITE_COPYOUT) != 0) {
+ if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
+ error = EFAULT;
+ }
+ }
+ kmem_free(buf, len);
+ break;
+ }
default:
error = ENOTTY;
diff --git a/usr/src/uts/intel/sys/vmm_data.h b/usr/src/uts/intel/sys/vmm_data.h
new file mode 100644
index 0000000000..1b8614543c
--- /dev/null
+++ b/usr/src/uts/intel/sys/vmm_data.h
@@ -0,0 +1,211 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _VMM_DATA_H_
+#define _VMM_DATA_H_
+
+/* VMM Data Classes */
+#define VDC_META 0 /* Meta information about data system */
+#define VDC_VERSION 1 /* Version information for each data class */
+
+/* Classes bearing per-CPU data */
+#define VDC_REGISTER 2 /* Registers (GPR, segment, etc) */
+#define VDC_MSR 3 /* Model-specific registers */
+#define VDC_FPU 4 /* FPU (and associated SIMD) */
+#define VDC_LAPIC 5 /* Local APIC */
+#define VDC_VMM_ARCH 6 /* Arch-specific VMM state (VMX/SVM) */
+
+/* Classes for system-wide devices */
+#define VDC_IOAPIC 7 /* bhyve IO-APIC */
+#define VDC_ATPIT 8 /* i8254 PIT */
+#define VDC_ATPIC 9 /* i8259 PIC */
+#define VDC_HPET 10 /* HPET */
+#define VDC_PM_TIMER 11 /* ACPI Power Management Timer */
+#define VDC_RTC 12 /* IBM PC Real Time Clock */
+
+/* Indicates top of VMM Data Class range, updated as classes are added */
+#define VDC_MAX (VDC_RTC + 1)
+
+
+/* VMM Data Identifiers */
+
+
+/*
+ * VDC_REGISTER:
+ */
+
+/*
+ * VDC_MSR:
+ *
+ * Use MSR identifiers directly
+ */
+
+struct vdi_msr_entry_v1 {
+ uint32_t vme_msr;
+ uint32_t _pad;
+ uint64_t vme_value;
+};
+
+/*
+ * VDC_FPU:
+ *
+ * Unimplemented for now. Use VM_GET_FPU/VM_SET_FPU ioctls.
+ */
+
+/* VDC_LAPIC: */
+
+struct vdi_lapic_page_v1 {
+ uint32_t vlp_id;
+ uint32_t vlp_version;
+ uint32_t vlp_tpr;
+ uint32_t vlp_apr;
+ uint32_t vlp_ldr;
+ uint32_t vlp_dfr;
+ uint32_t vlp_svr;
+ uint32_t vlp_isr[8];
+ uint32_t vlp_tmr[8];
+ uint32_t vlp_irr[8];
+ uint32_t vlp_esr;
+ uint32_t vlp_lvt_cmci;
+ uint64_t vlp_icr;
+ uint32_t vlp_lvt_timer;
+ uint32_t vlp_lvt_thermal;
+ uint32_t vlp_lvt_pcint;
+ uint32_t vlp_lvt_lint0;
+ uint32_t vlp_lvt_lint1;
+ uint32_t vlp_lvt_error;
+ uint32_t vlp_icr_timer;
+ uint32_t vlp_dcr_timer;
+};
+
+struct vdi_lapic_v1 {
+ struct vdi_lapic_page_v1 vl_lapic;
+ uint64_t vl_msr_apicbase;
+ int64_t vl_timer_target;
+ uint32_t vl_esr_pending;
+};
+
+
+/*
+ * VDC_VMM_ARCH:
+ */
+
+/* VDC_IOAPIC: */
+
+struct vdi_ioapic_v1 {
+ uint64_t vi_pin_reg[32];
+ uint32_t vi_pin_level[32];
+ uint32_t vi_id;
+ uint32_t vi_reg_sel;
+};
+
+/* VDC_ATPIT: */
+
+struct vdi_atpit_channel_v1 {
+ uint16_t vac_initial;
+ uint16_t vac_reg_cr;
+ uint16_t vac_reg_ol;
+ uint8_t vac_reg_status;
+ uint8_t vac_mode;
+ /*
+ * vac_status bits:
+ * - 0b00001 status latched
+ * - 0b00010 output latched
+ * - 0b00100 control register sel
+ * - 0b01000 output latch sel
+ * - 0b10000 free-running timer
+ */
+ uint8_t vac_status;
+
+ int64_t vac_time_target;
+};
+
+struct vdi_atpit_v1 {
+ struct vdi_atpit_channel_v1 va_channel[3];
+};
+
+/* VDC_ATPIC: */
+
+struct vdi_atpic_chip_v1 {
+ uint8_t vac_icw_state;
+ /*
+ * vac_status bits:
+ * - 0b00000001 ready
+ * - 0b00000010 auto EOI
+ * - 0b00000100 poll
+ * - 0b00001000 rotate
+ * - 0b00010000 special full nested
+ * - 0b00100000 read isr next
+ * - 0b01000000 intr raised
+ * - 0b10000000 special mask mode
+ */
+ uint8_t vac_status;
+ uint8_t vac_reg_irr;
+ uint8_t vac_reg_isr;
+ uint8_t vac_reg_imr;
+ uint8_t vac_irq_base;
+ uint8_t vac_lowprio;
+ uint8_t vac_elc;
+ uint32_t vac_level[8];
+};
+
+struct vdi_atpic_v1 {
+ struct vdi_atpic_chip_v1 va_chip[2];
+};
+
+/* VDC_HPET: */
+
+struct vdi_hpet_timer_v1 {
+ uint64_t vht_config;
+ uint64_t vht_msi;
+ uint32_t vht_comp_val;
+ uint32_t vht_comp_rate;
+ int64_t vht_time_target;
+};
+
+struct vdi_hpet_v1 {
+ uint64_t vh_config;
+ uint64_t vh_isr;
+ uint32_t vh_count_base;
+ int64_t vh_time_base;
+
+ struct vdi_hpet_timer_v1 vh_timers[8];
+};
+
+/* VDC_PM_TIMER: */
+
+struct vdi_pm_timer_v1 {
+ int64_t vpt_time_base;
+ /*
+ * Since the PM-timer IO port registration can be set by a dedicated
+ * ioctl today, it is considered a read-only field in the vmm data
+ * interface and its contents will be ignored when writing state data to
+ * the timer.
+ */
+ uint16_t vpt_ioport;
+};
+
+/* VDC_RTC: */
+
+struct vdi_rtc_v1 {
+ uint8_t vr_content[128];
+ uint8_t vr_addr;
+ int64_t vr_time_base;
+ uint64_t vr_rtc_sec;
+ uint64_t vr_rtc_nsec;
+};
+
+#endif /* _VMM_DATA_H_ */
diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h
index 5333facacf..fc8ccf406e 100644
--- a/usr/src/uts/intel/sys/vmm_dev.h
+++ b/usr/src/uts/intel/sys/vmm_dev.h
@@ -49,6 +49,7 @@
#include <sys/param.h>
#include <sys/cpuset.h>
+#include <sys/vmm_data.h>
struct vm_create_req {
char name[VM_MAX_NAMELEN];
@@ -351,6 +352,23 @@ struct vmm_dirty_tracker {
void *vdt_pfns; /* bit vector of dirty bits */
};
+/* Current (arbitrary) max length for vm_data_xfer */
+#define VM_DATA_XFER_LIMIT 8192
+
+#define VDX_FLAG_READ_COPYIN (1 << 0)
+#define VDX_FLAG_WRITE_COPYOUT (1 << 1)
+
+#define VDX_FLAGS_VALID (VDX_FLAG_READ_COPYIN | VDX_FLAG_WRITE_COPYOUT)
+
+struct vm_data_xfer {
+ int vdx_vcpuid;
+ uint16_t vdx_class;
+ uint16_t vdx_version;
+ uint32_t vdx_flags;
+ uint32_t vdx_len;
+ void *vdx_data;
+};
+
/*
* VMM Interface Version
*
@@ -366,7 +384,7 @@ struct vmm_dirty_tracker {
* best-effort activity. Nothing is to be inferred about the magnitude of a
* change when the version is modified. It follows no rules like semver.
*/
-#define VMM_CURRENT_INTERFACE_VERSION 1
+#define VMM_CURRENT_INTERFACE_VERSION 2
#define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8))
@@ -472,6 +490,9 @@ struct vmm_dirty_tracker {
#define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20)
#define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21)
+#define VM_DATA_READ (VMM_IOC_BASE | 0x22)
+#define VM_DATA_WRITE (VMM_IOC_BASE | 0x23)
+
#define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff)
#define VMM_CTL_DEV "/dev/vmmctl"