summaryrefslogtreecommitdiff
path: root/usr/src/uts/intel/sys
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/intel/sys')
-rw-r--r--usr/src/uts/intel/sys/Makefile11
-rw-r--r--usr/src/uts/intel/sys/Makefile.psm27
-rw-r--r--usr/src/uts/intel/sys/hma.h178
-rw-r--r--usr/src/uts/intel/sys/ppt_dev.h57
-rw-r--r--usr/src/uts/intel/sys/smt.h54
-rw-r--r--usr/src/uts/intel/sys/smt_machcpu.h44
-rw-r--r--usr/src/uts/intel/sys/viona_io.h63
-rw-r--r--usr/src/uts/intel/sys/vmm.h392
-rw-r--r--usr/src/uts/intel/sys/vmm_dev.h458
-rw-r--r--usr/src/uts/intel/sys/vmm_drv.h70
10 files changed, 1352 insertions, 2 deletions
diff --git a/usr/src/uts/intel/sys/Makefile b/usr/src/uts/intel/sys/Makefile
index 5cfbdec4fc..0a6af2de2d 100644
--- a/usr/src/uts/intel/sys/Makefile
+++ b/usr/src/uts/intel/sys/Makefile
@@ -91,6 +91,12 @@ HDRS = \
x86_archext.h \
xen_errno.h
+
+# Headers shared with the various machine architectures are installed via
+# different means, but are checked here, since it is a common point.
+include Makefile.psm
+CHECK_ONLY_HDRS = $(PSM_SHARED_HDRS)
+
ROOTDIR= $(ROOT)/usr/include/sys
SCSIDIR= $(ROOTDIR)/scsi
SCSIDIRS= $(SCSIDIR) $(SCSIDIR)/conf $(SCSIDIR)/generic \
@@ -100,8 +106,9 @@ ROOTDIRS= $(ROOTDIR) $(ROOTFSDIR)
ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%)
-CHECKHDRS= \
- $(HDRS:%.h=%.check)
+CHECKHDRS = \
+ $(HDRS:%.h=%.check) \
+ $(CHECK_ONLY_HDRS:%.h=%.check) \
# install rules
$(ROOTDIR)/%: %
diff --git a/usr/src/uts/intel/sys/Makefile.psm b/usr/src/uts/intel/sys/Makefile.psm
new file mode 100644
index 0000000000..8fecc14f49
--- /dev/null
+++ b/usr/src/uts/intel/sys/Makefile.psm
@@ -0,0 +1,27 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2022 Oxide Computer Company
+#
+
+# HMA and SMT-exclusion headers are used by and exposed (via system-header) by
+# the various machine architectures.
+
+PSM_SHARED_HDRS = \
+ hma.h \
+ smt.h \
+ smt_machcpu.h
+
+PSM_SHARED_HDR_DIR = $(UTSBASE)/intel/sys
+
+$(USR_PSM_ISYS_DIR)/%: $(PSM_SHARED_HDR_DIR)/% $(USR_PSM_ISYS_DIR)
+ $(INS.file)
diff --git a/usr/src/uts/intel/sys/hma.h b/usr/src/uts/intel/sys/hma.h
new file mode 100644
index 0000000000..e15cd60d5e
--- /dev/null
+++ b/usr/src/uts/intel/sys/hma.h
@@ -0,0 +1,178 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _SYS_HMA_H
+#define _SYS_HMA_H
+
+/*
+ * Hypervisor Multiplexor API
+ *
+ * This provides a set of APIs that are usable by hypervisor implementations
+ * that allows them to coexist and to make sure that they are all in a
+ * consistent state.
+ */
+
+#include <sys/fp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Register a hypervisor with HMA. On success, a pointer to the opaque
+ * registration token will be returned, indicating that proper host setup has
+ * occurred for further hypervisor actions.
+ */
+typedef struct hma_reg hma_reg_t;
+extern hma_reg_t *hma_register(const char *);
+extern hma_reg_t *hma_register_exclusive(const char *);
+extern void hma_unregister(hma_reg_t *);
+
+/*
+ * Allocate or free a VPID for use with VMX.
+ *
+ * This must not be performed by a hypervisor until it has successfully
+ * registered via hma_register().
+ */
+extern uint16_t hma_vmx_vpid_alloc(void);
+extern void hma_vmx_vpid_free(uint16_t);
+
+/*
+ * On all active CPUs, perform a single-context INVEPT on the given EPTP.
+ */
+extern void hma_vmx_invept_allcpus(uintptr_t);
+
+struct hma_svm_asid {
+ uint64_t hsa_gen;
+ uint32_t hsa_asid;
+};
+typedef struct hma_svm_asid hma_svm_asid_t;
+
+extern void hma_svm_asid_init(hma_svm_asid_t *);
+extern uint8_t hma_svm_asid_update(hma_svm_asid_t *, boolean_t, boolean_t);
+
+/*
+ * FPU related management. These functions provide a set of APIs to manage the
+ * FPU state and switch between host and guest management of this state.
+ */
+
+typedef struct hma_fpu hma_fpu_t;
+
+/*
+ * Allocate and free FPU state management structures.
+ */
+extern hma_fpu_t *hma_fpu_alloc(int);
+extern void hma_fpu_free(hma_fpu_t *);
+
+/*
+ * Resets the FPU to the standard x86 default state. This should be called after
+ * allocation and whenever the guest needs to logically reset the state (when
+ * the CPU is reset, etc.). If the system supports xsave, then the xbv state
+ * will be set to have the x87 and SSE portions as valid and the rest will be
+ * set to their initial states (regardless of whether or not they will be
+ * advertised in the host).
+ */
+extern int hma_fpu_init(hma_fpu_t *);
+
+/*
+ * Save the current host's FPU state and restore the guest's state in the FPU.
+ * At this point, CR0.TS will not be set. The caller must not use the FPU in any
+ * way before entering the guest.
+ *
+ * This should be used in normal operation before entering the guest. It should
+ * also be used in a thread context operation when the thread is being scheduled
+ * again. This interface has an implicit assumption that a given guest state
+ * will be mapped to only one specific OS thread at any given time.
+ *
+ * This must be called with preemption disabled.
+ */
+extern void hma_fpu_start_guest(hma_fpu_t *);
+
+/*
+ * Save the current guest's FPU state and restore the host's state in the FPU.
+ * By the time the thread returns to userland, the FPU will be in a usable
+ * state; however, the FPU will not be usable while inside the kernel (CR0.TS
+ * will be set).
+ *
+ * This should be used in normal operation after leaving the guest and returning
+ * to user land. It should also be used in a thread context operation when the
+ * thread is being descheduled. Like the hma_fpu_start_guest() interface, this
+ * interface has an implicit assumption that a given guest state will be mapped
+ * to only a single OS thread at any given time.
+ *
+ * This must be called with preemption disabled.
+ */
+extern void hma_fpu_stop_guest(hma_fpu_t *);
+
+typedef enum {
+ HFXR_OK = 0,
+ HFXR_NO_SPACE, /* buffer is not large enough */
+ HFXR_BAD_ALIGN, /* buffer is not properly (64-byte) aligned */
+ HFXR_UNSUP_FMT, /* data using unsupported (compressed) format */
+ HFXR_UNSUP_FEAT, /* data has unsupported features set */
+ HFXR_INVALID_DATA, /* CPU determined xsave data is invalid */
+} hma_fpu_xsave_result_t;
+
+/*
+ * Get and set the contents of the FPU save area, formatted as XSAVE-style
+ * information. If XSAVE is not supported by the host, the input and output
+ * values will be translated to and from the FXSAVE format. Attempts to set
+ * XSAVE values not supported by the host will result in an error.
+ *
+ * These functions cannot be called while the FPU is in use by the guest. It is
+ * up to callers to guarantee this invariant.
+ */
+extern hma_fpu_xsave_result_t hma_fpu_get_xsave_state(const hma_fpu_t *, void *,
+ size_t);
+extern hma_fpu_xsave_result_t hma_fpu_set_xsave_state(hma_fpu_t *, void *,
+ size_t);
+
+typedef struct hma_xsave_state_desc {
+ uint64_t hxsd_bit;
+ uint32_t hxsd_size;
+ uint32_t hxsd_off;
+} hma_xsave_state_desc_t;
+
+/*
+ * Get a description of the data fields supported by the host via the XSAVE APIs
+ * for getting/setting guest FPU data. See the function definition for more
+ * detailed parameter usage.
+ */
+extern uint_t hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *, uint_t,
+ size_t *);
+
+/*
+ * Get and set the contents of the FPU save area. This sets the fxsave style
+ * information. In all cases when this is in use, if an XSAVE state is actually
+ * used by the host, then this will end up zeroing all of the non-fxsave state
+ * and it will reset the xbv to indicate that the legacy x87 and SSE portions
+ * are valid.
+ *
+ * These functions cannot be called while the FPU is in use by the guest. It is
+ * up to callers to guarantee this fact.
+ */
+extern void hma_fpu_get_fxsave_state(const hma_fpu_t *, struct fxsave_state *);
+extern int hma_fpu_set_fxsave_state(hma_fpu_t *, const struct fxsave_state *);
+
+/* Perform HMA initialization steps during boot-up. */
+extern void hma_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_HMA_H */
diff --git a/usr/src/uts/intel/sys/ppt_dev.h b/usr/src/uts/intel/sys/ppt_dev.h
new file mode 100644
index 0000000000..a7b65ad0dd
--- /dev/null
+++ b/usr/src/uts/intel/sys/ppt_dev.h
@@ -0,0 +1,57 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
+
+/*
+ * Copyright 2018 Joyent, Inc
+ */
+
+#ifndef _PPT_DEV_H
+#define _PPT_DEV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PPT_IOC (('P' << 16)|('T' << 8))
+
+#define PPT_CFG_READ (PPT_IOC | 0x01)
+#define PPT_CFG_WRITE (PPT_IOC | 0x02)
+#define PPT_BAR_QUERY (PPT_IOC | 0x03)
+#define PPT_BAR_READ (PPT_IOC | 0x04)
+#define PPT_BAR_WRITE (PPT_IOC | 0x05)
+
+#define PPT_MAXNAMELEN 32
+
+struct ppt_cfg_io {
+ uint64_t pci_off;
+ uint32_t pci_width;
+ uint32_t pci_data;
+};
+struct ppt_bar_io {
+ uint32_t pbi_bar;
+ uint32_t pbi_off;
+ uint32_t pbi_width;
+ uint32_t pbi_data;
+};
+
+struct ppt_bar_query {
+ uint32_t pbq_baridx;
+ uint32_t pbq_type;
+ uint64_t pbq_base;
+ uint64_t pbq_size;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PPT_DEV_H */
diff --git a/usr/src/uts/intel/sys/smt.h b/usr/src/uts/intel/sys/smt.h
new file mode 100644
index 0000000000..f539d13799
--- /dev/null
+++ b/usr/src/uts/intel/sys/smt.h
@@ -0,0 +1,54 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _SYS_SMT_H
+#define _SYS_SMT_H
+
+#include <sys/types.h>
+#include <sys/thread.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct cpu;
+
+extern int smt_boot_disable;
+
+extern void smt_init(void);
+extern void smt_late_init(void);
+extern int smt_disable(void);
+extern boolean_t smt_can_enable(struct cpu *, int);
+extern void smt_force_enabled(void);
+
+extern void smt_intr_alloc_pil(uint_t);
+
+extern int smt_acquire(void);
+extern void smt_release(void);
+extern void smt_mark(void);
+extern void smt_begin_unsafe(void);
+extern void smt_end_unsafe(void);
+extern void smt_begin_intr(uint_t);
+extern void smt_end_intr(void);
+extern void smt_mark_as_vcpu(void);
+
+extern boolean_t smt_should_run(kthread_t *, struct cpu *);
+extern pri_t smt_adjust_cpu_score(kthread_t *, struct cpu *, pri_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SMT_H */
diff --git a/usr/src/uts/intel/sys/smt_machcpu.h b/usr/src/uts/intel/sys/smt_machcpu.h
new file mode 100644
index 0000000000..a8fcd8621b
--- /dev/null
+++ b/usr/src/uts/intel/sys/smt_machcpu.h
@@ -0,0 +1,44 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _SYS_SMT_MACHCPU_H
+#define _SYS_SMT_MACHCPU_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The SMT exclusion logic requires `struct cpu_smt` be present in
+ * `struct machcpu` as the field `mcpu_smt`. It is defined here, on its own, so
+ * it may be easily included by the relevant machine architecture(s).
+ */
+typedef struct cpu_smt {
+ lock_t cs_lock;
+ char cs_pad[56];
+ struct cpu *cs_sib;
+ volatile uint64_t cs_intr_depth;
+ volatile uint64_t cs_state;
+ volatile uint64_t cs_sibstate;
+} cpu_smt_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SMT_MACHCPU_H */
diff --git a/usr/src/uts/intel/sys/viona_io.h b/usr/src/uts/intel/sys/viona_io.h
new file mode 100644
index 0000000000..46cc72eb06
--- /dev/null
+++ b/usr/src/uts/intel/sys/viona_io.h
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _VIONA_IO_H_
+#define _VIONA_IO_H_
+
+#define VNA_IOC (('V' << 16)|('C' << 8))
+#define VNA_IOC_CREATE (VNA_IOC | 0x01)
+#define VNA_IOC_DELETE (VNA_IOC | 0x02)
+
+#define VNA_IOC_RING_INIT (VNA_IOC | 0x10)
+#define VNA_IOC_RING_RESET (VNA_IOC | 0x11)
+#define VNA_IOC_RING_KICK (VNA_IOC | 0x12)
+#define VNA_IOC_RING_SET_MSI (VNA_IOC | 0x13)
+#define VNA_IOC_RING_INTR_CLR (VNA_IOC | 0x14)
+
+#define VNA_IOC_INTR_POLL (VNA_IOC | 0x20)
+#define VNA_IOC_SET_FEATURES (VNA_IOC | 0x21)
+#define VNA_IOC_GET_FEATURES (VNA_IOC | 0x22)
+#define VNA_IOC_SET_NOTIFY_IOP (VNA_IOC | 0x23)
+
+typedef struct vioc_create {
+ datalink_id_t c_linkid;
+ int c_vmfd;
+} vioc_create_t;
+
+typedef struct vioc_ring_init {
+ uint16_t ri_index;
+ uint16_t ri_qsize;
+ uint64_t ri_qaddr;
+} vioc_ring_init_t;
+
+typedef struct vioc_ring_msi {
+ uint16_t rm_index;
+ uint64_t rm_addr;
+ uint64_t rm_msg;
+} vioc_ring_msi_t;
+
+enum viona_vq_id {
+ VIONA_VQ_RX = 0,
+ VIONA_VQ_TX = 1,
+ VIONA_VQ_MAX = 2
+};
+
+typedef struct vioc_intr_poll {
+ uint32_t vip_status[VIONA_VQ_MAX];
+} vioc_intr_poll_t;
+
+
+#endif /* _VIONA_IO_H_ */
diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h
new file mode 100644
index 0000000000..e58d63761e
--- /dev/null
+++ b/usr/src/uts/intel/sys/vmm.h
@@ -0,0 +1,392 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#ifndef _VMM_H_
+#define _VMM_H_
+
+enum vm_suspend_how {
+ VM_SUSPEND_NONE,
+ VM_SUSPEND_RESET,
+ VM_SUSPEND_POWEROFF,
+ VM_SUSPEND_HALT,
+ VM_SUSPEND_TRIPLEFAULT,
+ VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+ VM_REG_GUEST_RAX,
+ VM_REG_GUEST_RBX,
+ VM_REG_GUEST_RCX,
+ VM_REG_GUEST_RDX,
+ VM_REG_GUEST_RSI,
+ VM_REG_GUEST_RDI,
+ VM_REG_GUEST_RBP,
+ VM_REG_GUEST_R8,
+ VM_REG_GUEST_R9,
+ VM_REG_GUEST_R10,
+ VM_REG_GUEST_R11,
+ VM_REG_GUEST_R12,
+ VM_REG_GUEST_R13,
+ VM_REG_GUEST_R14,
+ VM_REG_GUEST_R15,
+ VM_REG_GUEST_CR0,
+ VM_REG_GUEST_CR3,
+ VM_REG_GUEST_CR4,
+ VM_REG_GUEST_DR7,
+ VM_REG_GUEST_RSP,
+ VM_REG_GUEST_RIP,
+ VM_REG_GUEST_RFLAGS,
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_CS,
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ VM_REG_GUEST_LDTR,
+ VM_REG_GUEST_TR,
+ VM_REG_GUEST_IDTR,
+ VM_REG_GUEST_GDTR,
+ VM_REG_GUEST_EFER,
+ VM_REG_GUEST_CR2,
+ VM_REG_GUEST_PDPTE0,
+ VM_REG_GUEST_PDPTE1,
+ VM_REG_GUEST_PDPTE2,
+ VM_REG_GUEST_PDPTE3,
+ VM_REG_GUEST_INTR_SHADOW,
+ VM_REG_GUEST_DR0,
+ VM_REG_GUEST_DR1,
+ VM_REG_GUEST_DR2,
+ VM_REG_GUEST_DR3,
+ VM_REG_GUEST_DR6,
+ VM_REG_GUEST_ENTRY_INST_LENGTH,
+ VM_REG_LAST
+};
+
+enum x2apic_state {
+ X2APIC_DISABLED,
+ X2APIC_ENABLED,
+ X2APIC_STATE_LAST
+};
+
+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
+#define VM_INTINFO_DEL_ERRCODE 0x800
+#define VM_INTINFO_RSVD 0x7ffff000
+#define VM_INTINFO_VALID 0x80000000
+#define VM_INTINFO_TYPE 0x700
+#define VM_INTINFO_HWINTR (0 << 8)
+#define VM_INTINFO_NMI (2 << 8)
+#define VM_INTINFO_HWEXCEPTION (3 << 8)
+#define VM_INTINFO_SWINTR (4 << 8)
+
+/*
+ * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
+ * To simplify structure definitions, an arbitrary limit has been chosen.
+ * This same limit is used for memory segment names
+ */
+
+#define VM_MAX_NAMELEN 128
+#define VM_MAX_SEG_NAMELEN 128
+
+#define VM_MAXCPU 32 /* maximum virtual cpus */
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+ VM_CAP_HALT_EXIT,
+ VM_CAP_MTRAP_EXIT,
+ VM_CAP_PAUSE_EXIT,
+ VM_CAP_ENABLE_INVPCID,
+ VM_CAP_BPT_EXIT,
+ VM_CAP_MAX
+};
+
+enum vmx_caps {
+ VMX_CAP_NONE = 0,
+ VMX_CAP_TPR_SHADOW = (1UL << 0),
+ VMX_CAP_APICV = (1UL << 1),
+ VMX_CAP_APICV_X2APIC = (1UL << 2),
+ VMX_CAP_APICV_PIR = (1UL << 3),
+};
+
+enum vm_intr_trigger {
+ EDGE_TRIGGER,
+ LEVEL_TRIGGER
+};
+
+/*
+ * The 'access' field has the format specified in Table 21-2 of the Intel
+ * Architecture Manual vol 3b.
+ *
+ * XXX The contents of the 'access' field are architecturally defined except
+ * bit 16 - Segment Unusable.
+ */
+struct seg_desc {
+ uint64_t base;
+ uint32_t limit;
+ uint32_t access;
+};
+#define SEG_DESC_TYPE(access) ((access) & 0x001f)
+#define SEG_DESC_DPL(access) (((access) >> 5) & 0x3)
+#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0)
+#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0)
+#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0)
+#define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0)
+
+enum vm_cpu_mode {
+ CPU_MODE_REAL,
+ CPU_MODE_PROTECTED,
+ CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */
+ CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+ PAGING_MODE_FLAT,
+ PAGING_MODE_32,
+ PAGING_MODE_PAE,
+ PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+ uint64_t cr3;
+ int cpl;
+ enum vm_cpu_mode cpu_mode;
+ enum vm_paging_mode paging_mode;
+};
+
+enum vm_exitcode {
+ VM_EXITCODE_INOUT,
+ VM_EXITCODE_VMX,
+ VM_EXITCODE_BOGUS,
+ VM_EXITCODE_RDMSR,
+ VM_EXITCODE_WRMSR,
+ VM_EXITCODE_HLT,
+ VM_EXITCODE_MTRAP,
+ VM_EXITCODE_PAUSE,
+ VM_EXITCODE_PAGING,
+ VM_EXITCODE_INST_EMUL,
+ VM_EXITCODE_RUN_STATE,
+ VM_EXITCODE_MMIO_EMUL,
+ VM_EXITCODE_DEPRECATED, /* formerly RUNBLOCK */
+ VM_EXITCODE_IOAPIC_EOI,
+ VM_EXITCODE_SUSPENDED,
+ VM_EXITCODE_MMIO,
+ VM_EXITCODE_TASK_SWITCH,
+ VM_EXITCODE_MONITOR,
+ VM_EXITCODE_MWAIT,
+ VM_EXITCODE_SVM,
+ VM_EXITCODE_REQIDLE,
+ VM_EXITCODE_DEBUG,
+ VM_EXITCODE_VMINSN,
+ VM_EXITCODE_BPT,
+ VM_EXITCODE_HT,
+ VM_EXITCODE_MAX
+};
+
+enum inout_flags {
+ INOUT_IN = (1U << 0), /* direction: 'in' when set, else 'out' */
+
+ /*
+ * The following flags are used only for in-kernel emulation logic and
+ * are not exposed to userspace.
+ */
+ INOUT_STR = (1U << 1), /* ins/outs operation */
+ INOUT_REP = (1U << 2), /* 'rep' prefix present on instruction */
+};
+
+struct vm_inout {
+ uint32_t eax;
+ uint16_t port;
+ uint8_t bytes; /* 1 or 2 or 4 */
+ uint8_t flags; /* see: inout_flags */
+
+ /*
+ * The address size and segment are relevant to INS/OUTS operations.
+ * Userspace is not concerned with them since the in-kernel emulation
+ * handles those specific aspects.
+ */
+ uint8_t addrsize;
+ uint8_t segment;
+};
+
+struct vm_mmio {
+ uint8_t bytes; /* 1/2/4/8 bytes */
+ uint8_t read; /* read: 1, write: 0 */
+ uint16_t _pad[3];
+ uint64_t gpa;
+ uint64_t data;
+};
+
+enum task_switch_reason {
+ TSR_CALL,
+ TSR_IRET,
+ TSR_JMP,
+ TSR_IDT_GATE, /* task gate in IDT */
+};
+
+struct vm_task_switch {
+ uint16_t tsssel; /* new TSS selector */
+ int ext; /* task switch due to external event */
+ uint32_t errcode;
+ int errcode_valid; /* push 'errcode' on the new stack */
+ enum task_switch_reason reason;
+ struct vm_guest_paging paging;
+};
+
+enum vcpu_run_state {
+ VRS_HALT = 0,
+ VRS_INIT = (1 << 0),
+ VRS_RUN = (1 << 1),
+
+ VRS_PEND_INIT = (1 << 14),
+ VRS_PEND_SIPI = (1 << 15),
+};
+#define VRS_MASK_VALID(v) \
+ ((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI))
+#define VRS_IS_VALID(v) ((v) == VRS_MASK_VALID(v))
+
+struct vm_exit {
+ enum vm_exitcode exitcode;
+ int inst_length; /* 0 means unknown */
+ uint64_t rip;
+ union {
+ struct vm_inout inout;
+ struct vm_mmio mmio;
+ struct {
+ uint64_t gpa;
+ int fault_type;
+ } paging;
+ /*
+ * Kernel-internal MMIO decoding and emulation.
+ * Userspace should not expect to see this, but rather a
+ * VM_EXITCODE_MMIO with the above 'mmio' context.
+ */
+ struct {
+ uint64_t gpa;
+ uint64_t gla;
+ uint64_t cs_base;
+ int cs_d; /* CS.D */
+ } mmio_emul;
+ struct {
+ uint8_t inst[15];
+ uint8_t num_valid;
+ } inst_emul;
+ /*
+ * VMX specific payload. Used when there is no "better"
+ * exitcode to represent the VM-exit.
+ */
+ struct {
+ int status; /* vmx inst status */
+ /*
+ * 'exit_reason' and 'exit_qualification' are valid
+ * only if 'status' is zero.
+ */
+ uint32_t exit_reason;
+ uint64_t exit_qualification;
+ /*
+ * 'inst_error' and 'inst_type' are valid
+ * only if 'status' is non-zero.
+ */
+ int inst_type;
+ int inst_error;
+ } vmx;
+ /*
+ * SVM specific payload.
+ */
+ struct {
+ uint64_t exitcode;
+ uint64_t exitinfo1;
+ uint64_t exitinfo2;
+ } svm;
+ struct {
+ int inst_length;
+ } bpt;
+ struct {
+ uint32_t code; /* ecx value */
+ uint64_t wval;
+ } msr;
+ struct {
+ uint64_t rflags;
+ } hlt;
+ struct {
+ int vector;
+ } ioapic_eoi;
+ struct {
+ enum vm_suspend_how how;
+ } suspended;
+ struct vm_task_switch task_switch;
+ } u;
+};
+
+enum vm_entry_cmds {
+ VEC_DEFAULT = 0,
+ VEC_DISCARD_INSTR, /* discard inst emul state */
+ VEC_FULFILL_MMIO, /* entry includes result for mmio emul */
+ VEC_FULFILL_INOUT, /* entry includes result for inout emul */
+};
+
+struct vm_entry {
+ int cpuid;
+ uint_t cmd; /* see: vm_entry_cmds */
+ void *exit_data;
+ union {
+ struct vm_inout inout;
+ struct vm_mmio mmio;
+ } u;
+};
+
+int vm_restart_instruction(void *vm, int vcpuid);
+
+enum vm_create_flags {
+ /*
+ * Allocate guest memory segments from existing reservoir capacity,
+ * rather than attempting to create transient allocations.
+ */
+ VCF_RESERVOIR_MEM = (1 << 0),
+};
+
+#endif /* _VMM_H_ */
diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h
new file mode 100644
index 0000000000..027a7da214
--- /dev/null
+++ b/usr/src/uts/intel/sys/vmm_dev.h
@@ -0,0 +1,458 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#ifndef _VMM_DEV_H_
+#define _VMM_DEV_H_
+
+#include <machine/vmm.h>
+
+#include <sys/param.h>
+#include <sys/cpuset.h>
+
+struct vm_create_req {
+ char name[VM_MAX_NAMELEN];
+ uint64_t flags;
+};
+
+
+struct vm_destroy_req {
+ char name[VM_MAX_NAMELEN];
+};
+
+struct vm_memmap {
+ vm_paddr_t gpa;
+ int segid; /* memory segment */
+ vm_ooffset_t segoff; /* offset into memory segment */
+ size_t len; /* mmap length */
+ int prot; /* RWX */
+ int flags;
+};
+#define VM_MEMMAP_F_WIRED 0x01
+#define VM_MEMMAP_F_IOMMU 0x02
+
+struct vm_munmap {
+ vm_paddr_t gpa;
+ size_t len;
+};
+
+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SEG_NAMELEN];
+};
+
+struct vm_register {
+ int cpuid;
+ int regnum; /* enum vm_reg_name */
+ uint64_t regval;
+};
+
+struct vm_seg_desc { /* data or code segment */
+ int cpuid;
+ int regnum; /* enum vm_reg_name */
+ struct seg_desc desc;
+};
+
+struct vm_register_set {
+ int cpuid;
+ unsigned int count;
+ const int *regnums; /* enum vm_reg_name */
+ uint64_t *regvals;
+};
+
+struct vm_exception {
+ int cpuid;
+ int vector;
+ uint32_t error_code;
+ int error_code_valid;
+ int restart_instruction;
+};
+
+struct vm_lapic_msi {
+ uint64_t msg;
+ uint64_t addr;
+};
+
+struct vm_lapic_irq {
+ int cpuid;
+ int vector;
+};
+
+struct vm_ioapic_irq {
+ int irq;
+};
+
+struct vm_isa_irq {
+ int atpic_irq;
+ int ioapic_irq;
+};
+
+struct vm_isa_irq_trigger {
+ int atpic_irq;
+ enum vm_intr_trigger trigger;
+};
+
+struct vm_capability {
+ int cpuid;
+ enum vm_cap_type captype;
+ int capval;
+ int allcpus;
+};
+
+struct vm_pptdev {
+ int pptfd;
+};
+
+struct vm_pptdev_mmio {
+ int pptfd;
+ vm_paddr_t gpa;
+ vm_paddr_t hpa;
+ size_t len;
+};
+
+struct vm_pptdev_msi {
+ int vcpu;
+ int pptfd;
+ int numvec; /* 0 means disabled */
+ uint64_t msg;
+ uint64_t addr;
+};
+
+struct vm_pptdev_msix {
+ int vcpu;
+ int pptfd;
+ int idx;
+ uint64_t msg;
+ uint32_t vector_control;
+ uint64_t addr;
+};
+
+struct vm_pptdev_limits {
+ int pptfd;
+ int msi_limit;
+ int msix_limit;
+};
+
+struct vm_nmi {
+ int cpuid;
+};
+
+#define MAX_VM_STATS (64 + VM_MAXCPU)
+
+struct vm_stats {
+ int cpuid; /* in */
+ int num_entries; /* out */
+ struct timeval tv;
+ uint64_t statbuf[MAX_VM_STATS];
+};
+
+struct vm_stat_desc {
+ int index; /* in */
+ char desc[128]; /* out */
+};
+
+struct vm_x2apic {
+ int cpuid;
+ enum x2apic_state state;
+};
+
+struct vm_gpa_pte {
+ uint64_t gpa; /* in */
+ uint64_t pte[4]; /* out */
+ int ptenum;
+};
+
+struct vm_hpet_cap {
+ uint32_t capabilities; /* lower 32 bits of HPET capabilities */
+};
+
+struct vm_suspend {
+ enum vm_suspend_how how;
+};
+
+#define VM_REINIT_F_FORCE_SUSPEND (1 << 0)
+
+struct vm_reinit {
+ uint64_t flags;
+};
+
+struct vm_gla2gpa {
+ int vcpuid; /* inputs */
+ int prot; /* PROT_READ or PROT_WRITE */
+ uint64_t gla;
+ struct vm_guest_paging paging;
+ int fault; /* outputs */
+ uint64_t gpa;
+};
+
+struct vm_activate_cpu {
+ int vcpuid;
+};
+
+struct vm_cpuset {
+ int which;
+ int cpusetsize;
+#ifndef _KERNEL
+ cpuset_t *cpus;
+#else
+ void *cpus;
+#endif
+};
+#define VM_ACTIVE_CPUS 0
+#define VM_SUSPENDED_CPUS 1
+#define VM_DEBUG_CPUS 2
+
+struct vm_intinfo {
+ int vcpuid;
+ uint64_t info1;
+ uint64_t info2;
+};
+
+struct vm_rtc_time {
+ time_t secs;
+};
+
+struct vm_rtc_data {
+ int offset;
+ uint8_t value;
+};
+
+struct vm_devmem_offset {
+ int segid;
+ off_t offset;
+};
+
+struct vm_cpu_topology {
+ uint16_t sockets;
+ uint16_t cores;
+ uint16_t threads;
+ uint16_t maxcpus;
+};
+
+struct vm_readwrite_kernemu_device {
+ int vcpuid;
+ unsigned access_width : 3;
+ unsigned _unused : 29;
+ uint64_t gpa;
+ uint64_t value;
+};
+_Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
+
+enum vcpu_reset_kind {
+ VRK_RESET = 0,
+ /*
+ * The reset performed by an INIT IPI clears much of the CPU state, but
+ * some portions are left untouched, unlike VRK_RESET, which represents
+ * a "full" reset as if the system was freshly powered on.
+ */
+ VRK_INIT = 1,
+};
+
+struct vm_vcpu_reset {
+ int vcpuid;
+ uint32_t kind; /* contains: enum vcpu_reset_kind */
+};
+
+struct vm_run_state {
+ int vcpuid;
+ uint32_t state; /* of enum cpu_init_status type */
+ uint8_t sipi_vector; /* vector of SIPI, if any */
+ uint8_t _pad[3];
+};
+
+/* Transfer data for VM_GET_FPU and VM_SET_FPU */
+struct vm_fpu_state {
+ int vcpuid;
+ void *buf;
+ size_t len;
+};
+
+struct vm_fpu_desc_entry {
+ uint64_t vfde_feature;
+ uint32_t vfde_size;
+ uint32_t vfde_off;
+};
+
+struct vm_fpu_desc {
+ struct vm_fpu_desc_entry *vfd_entry_data;
+ size_t vfd_req_size;
+ uint32_t vfd_num_entries;
+};
+
+struct vmm_resv_query {
+ size_t vrq_free_sz;
+ size_t vrq_alloc_sz;
+ size_t vrq_alloc_transient_sz;
+ size_t vrq_limit;
+};
+
+/*
+ * struct vmm_dirty_tracker is used for tracking dirty guest pages during
+ * e.g. live migration.
+ *
+ * - The `vdt_start_gpa` field specifies the offset from the beginning of
+ * guest physical memory to track;
+ * - `vdt_pfns` points to a bit vector indexed by guest PFN relative to the
+ * given start address. Each bit indicates whether the given guest page
+ * is dirty or not.
+ * - `vdt_pfns_len` specifies the length of the of the guest physical memory
+ * region in bytes. It also de facto bounds the range of guest addresses
+ * we will examine on any one `VM_TRACK_DIRTY_PAGES` ioctl(). If the
+ * range of the bit vector spans an unallocated region (or extends beyond
+ * the end of the guest physical address space) the corresponding bits in
+ * `vdt_pfns` will be zeroed.
+ */
+struct vmm_dirty_tracker {
+ uint64_t vdt_start_gpa;
+ size_t vdt_len; /* length of region */
+ void *vdt_pfns; /* bit vector of dirty bits */
+};
+
+#define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8))
+#define VMM_IOC_BASE (('v' << 16) | ('m' << 8))
+#define VMM_LOCK_IOC_BASE (('v' << 16) | ('l' << 8))
+#define VMM_CPU_IOC_BASE (('v' << 16) | ('p' << 8))
+
+/* Operations performed on the vmmctl device */
+#define VMM_CREATE_VM (VMMCTL_IOC_BASE | 0x01)
+#define VMM_DESTROY_VM (VMMCTL_IOC_BASE | 0x02)
+#define VMM_VM_SUPPORTED (VMMCTL_IOC_BASE | 0x03)
+
+#define VMM_RESV_QUERY (VMMCTL_IOC_BASE | 0x10)
+#define VMM_RESV_ADD (VMMCTL_IOC_BASE | 0x11)
+#define VMM_RESV_REMOVE (VMMCTL_IOC_BASE | 0x12)
+
+/* Operations performed in the context of a given vCPU */
+#define VM_RUN (VMM_CPU_IOC_BASE | 0x01)
+#define VM_SET_REGISTER (VMM_CPU_IOC_BASE | 0x02)
+#define VM_GET_REGISTER (VMM_CPU_IOC_BASE | 0x03)
+#define VM_SET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x04)
+#define VM_GET_SEGMENT_DESCRIPTOR (VMM_CPU_IOC_BASE | 0x05)
+#define VM_SET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x06)
+#define VM_GET_REGISTER_SET (VMM_CPU_IOC_BASE | 0x07)
+#define VM_INJECT_EXCEPTION (VMM_CPU_IOC_BASE | 0x08)
+#define VM_SET_CAPABILITY (VMM_CPU_IOC_BASE | 0x09)
+#define VM_GET_CAPABILITY (VMM_CPU_IOC_BASE | 0x0a)
+#define VM_PPTDEV_MSI (VMM_CPU_IOC_BASE | 0x0b)
+#define VM_PPTDEV_MSIX (VMM_CPU_IOC_BASE | 0x0c)
+#define VM_SET_X2APIC_STATE (VMM_CPU_IOC_BASE | 0x0d)
+#define VM_GLA2GPA (VMM_CPU_IOC_BASE | 0x0e)
+#define VM_GLA2GPA_NOFAULT (VMM_CPU_IOC_BASE | 0x0f)
+#define VM_ACTIVATE_CPU (VMM_CPU_IOC_BASE | 0x10)
+#define VM_SET_INTINFO (VMM_CPU_IOC_BASE | 0x11)
+#define VM_GET_INTINFO (VMM_CPU_IOC_BASE | 0x12)
+#define VM_RESTART_INSTRUCTION (VMM_CPU_IOC_BASE | 0x13)
+#define VM_SET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x14)
+#define VM_GET_KERNEMU_DEV (VMM_CPU_IOC_BASE | 0x15)
+#define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16)
+#define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17)
+#define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18)
+#define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19)
+#define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a)
+
+/* Operations requiring write-locking the VM */
+#define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01)
+#define VM_BIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x02)
+#define VM_UNBIND_PPTDEV (VMM_LOCK_IOC_BASE | 0x03)
+#define VM_MAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x04)
+#define VM_ALLOC_MEMSEG (VMM_LOCK_IOC_BASE | 0x05)
+#define VM_MMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x06)
+#define VM_PMTMR_LOCATE (VMM_LOCK_IOC_BASE | 0x07)
+#define VM_MUNMAP_MEMSEG (VMM_LOCK_IOC_BASE | 0x08)
+#define VM_UNMAP_PPTDEV_MMIO (VMM_LOCK_IOC_BASE | 0x09)
+
+#define VM_WRLOCK_CYCLE (VMM_LOCK_IOC_BASE | 0xff)
+
+/* All other ioctls */
+#define VM_GET_GPA_PMAP (VMM_IOC_BASE | 0x01)
+#define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02)
+#define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03)
+
+#define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04)
+#define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05)
+#define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06)
+
+#define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07)
+#define VM_IOAPIC_DEASSERT_IRQ (VMM_IOC_BASE | 0x08)
+#define VM_IOAPIC_PULSE_IRQ (VMM_IOC_BASE | 0x09)
+
+#define VM_ISA_ASSERT_IRQ (VMM_IOC_BASE | 0x0a)
+#define VM_ISA_DEASSERT_IRQ (VMM_IOC_BASE | 0x0b)
+#define VM_ISA_PULSE_IRQ (VMM_IOC_BASE | 0x0c)
+#define VM_ISA_SET_IRQ_TRIGGER (VMM_IOC_BASE | 0x0d)
+
+#define VM_RTC_WRITE (VMM_IOC_BASE | 0x0e)
+#define VM_RTC_READ (VMM_IOC_BASE | 0x0f)
+#define VM_RTC_SETTIME (VMM_IOC_BASE | 0x10)
+#define VM_RTC_GETTIME (VMM_IOC_BASE | 0x11)
+
+#define VM_SUSPEND (VMM_IOC_BASE | 0x12)
+
+#define VM_IOAPIC_PINCOUNT (VMM_IOC_BASE | 0x13)
+#define VM_GET_PPTDEV_LIMITS (VMM_IOC_BASE | 0x14)
+#define VM_GET_HPET_CAPABILITIES (VMM_IOC_BASE | 0x15)
+
+#define VM_STATS_IOC (VMM_IOC_BASE | 0x16)
+#define VM_STAT_DESC (VMM_IOC_BASE | 0x17)
+
+#define VM_INJECT_NMI (VMM_IOC_BASE | 0x18)
+#define VM_GET_X2APIC_STATE (VMM_IOC_BASE | 0x19)
+#define VM_SET_TOPOLOGY (VMM_IOC_BASE | 0x1a)
+#define VM_GET_TOPOLOGY (VMM_IOC_BASE | 0x1b)
+#define VM_GET_CPUS (VMM_IOC_BASE | 0x1c)
+#define VM_SUSPEND_CPU (VMM_IOC_BASE | 0x1d)
+#define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e)
+
+#define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f)
+
+/* Note: forces a barrier on a flush operation before returning. */
+#define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20)
+#define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21)
+
+#define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff)
+
+#define VMM_CTL_DEV "/dev/vmmctl"
+
+#endif
diff --git a/usr/src/uts/intel/sys/vmm_drv.h b/usr/src/uts/intel/sys/vmm_drv.h
new file mode 100644
index 0000000000..0b7f622e53
--- /dev/null
+++ b/usr/src/uts/intel/sys/vmm_drv.h
@@ -0,0 +1,70 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
+ */
+
+#ifndef _VMM_DRV_H_
+#define _VMM_DRV_H_
+
+#ifdef _KERNEL
+
+#include <sys/file.h>
+#include <sys/stdbool.h>
+
+struct vmm_hold;
+typedef struct vmm_hold vmm_hold_t;
+
+struct vmm_lease;
+typedef struct vmm_lease vmm_lease_t;
+
+/*
+ * This is effectively a synonym for the bhyve-internal 'struct vm_page' type.
+ * Use of `vmm_page_t *` instead allows us to keep those implementation details
+ * hidden from vmm_drv consumers.
+ */
+struct vmm_page;
+typedef struct vmm_page vmm_page_t;
+
+/*
+ * Because of tangled headers, this definitions mirrors its ioport_handler_t
+ * counterpart in vmm_kernel.h.
+ */
+typedef int (*vmm_drv_iop_cb_t)(void *, bool, uint16_t, uint8_t, uint32_t *);
+
+extern int vmm_drv_hold(file_t *, cred_t *, vmm_hold_t **);
+extern void vmm_drv_rele(vmm_hold_t *);
+extern boolean_t vmm_drv_release_reqd(vmm_hold_t *);
+
+extern vmm_lease_t *vmm_drv_lease_sign(vmm_hold_t *, boolean_t (*)(void *),
+ void *);
+extern void vmm_drv_lease_break(vmm_hold_t *, vmm_lease_t *);
+extern boolean_t vmm_drv_lease_expired(vmm_lease_t *);
+
+extern vmm_page_t *vmm_drv_page_hold(vmm_lease_t *, uintptr_t, int);
+extern void vmm_drv_page_release(vmm_page_t *);
+extern void vmm_drv_page_release_chain(vmm_page_t *);
+extern const void *vmm_drv_page_readable(const vmm_page_t *);
+extern void *vmm_drv_page_writable(const vmm_page_t *);
+extern void vmm_drv_page_chain(vmm_page_t *, vmm_page_t *);
+extern vmm_page_t *vmm_drv_page_next(const vmm_page_t *);
+
+extern int vmm_drv_msi(vmm_lease_t *, uint64_t, uint64_t);
+
+extern int vmm_drv_ioport_hook(vmm_hold_t *, uint16_t, vmm_drv_iop_cb_t, void *,
+ void **);
+extern void vmm_drv_ioport_unhook(vmm_hold_t *, void **);
+#endif /* _KERNEL */
+
+#endif /* _VMM_DRV_H_ */