16 files changed, 1877 insertions, 142 deletions
diff --git a/usr/src/uts/i86pc/sys/Makefile b/usr/src/uts/i86pc/sys/Makefile
index 292cd04c2b..3d8332a930 100644
--- a/usr/src/uts/i86pc/sys/Makefile
+++ b/usr/src/uts/i86pc/sys/Makefile
@@ -37,7 +37,7 @@ include ../Makefile.i86pc
 #
 FILEMODE = 644
 
-HDRS=  \
+CHKHDRS=  \
 	acpidev.h	\
 	amd_iommu.h	\
 	asm_misc.h	\
@@ -46,6 +46,7 @@ HDRS=  \
 	ddi_subrdefs.h	\
 	debug_info.h	\
 	fastboot.h	\
+	hma.h		\
 	mach_mmu.h	\
 	machclock.h	\
 	machcpuvar.h	\
@@ -68,6 +69,16 @@ HDRS=  \
 	xc_levels.h	\
 	xsvc.h
 
+NOCHKHDRS= \
+	vmm.h		\
+	vmm_dev.h	\
+	vmm_impl.h	\
+	vmm_instruction_emul.h
+
+HDRS= \
+	$(CHKHDRS)	\
+	$(NOCHKHDRS)
+
 ROOTHDRS=	$(HDRS:%=$(USR_PSM_ISYS_DIR)/%)
 
 ROOTDIR=	$(ROOT)/usr/share/src
@@ -76,7 +87,7 @@ ROOTDIRS=	$(ROOTDIR)/uts $(ROOTDIR)/uts/$(PLATFORM)
 ROOTLINK=	$(ROOTDIR)/uts/$(PLATFORM)/sys
 LINKDEST=	../../../../platform/$(PLATFORM)/include/sys
 
-CHECKHDRS=	$(HDRS:%.h=%.check)
+CHECKHDRS=	$(CHKHDRS:%.h=%.check)
 
 .KEEP_STATE:
 
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index 26626ec5a4..f2528a632f 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -386,7 +386,7 @@ struct apic_io_intr {
 /* special or reserve vectors */
 #define	APIC_CHECK_RESERVE_VECTORS(v) \
 	(((v) == T_FASTTRAP) || ((v) == APIC_SPUR_INTR) || \
-	((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET))
+	((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET) || ((v) == 0x80))
 
 /* cmos shutdown code for BIOS						*/
 #define	BIOS_SHUTDOWN		0x0a
diff --git a/usr/src/uts/i86pc/sys/comm_page.h b/usr/src/uts/i86pc/sys/comm_page.h
index 520ad9001d..ea19c856a8 100644
--- a/usr/src/uts/i86pc/sys/comm_page.h
+++ b/usr/src/uts/i86pc/sys/comm_page.h
@@ -27,6 +27,7 @@ extern "C" {
 #endif
 
 #define	COMM_PAGE_SIZE	PAGESIZE
+#define	COMM_PAGE_ALIGN	0x4000
 
 #ifndef _ASM
 
diff --git a/usr/src/uts/i86pc/sys/gipt.h b/usr/src/uts/i86pc/sys/gipt.h
new file mode 100644
index 0000000000..4d7d523726
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/gipt.h
@@ -0,0 +1,92 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _GIPT_H_
+#define	_GIPT_H_
+
+#include <sys/types.h>
+#include <sys/mutex.h>
+#include <sys/param.h>
+#include <sys/list.h>
+
+struct gipt {
+	list_node_t	gipt_node;
+	uint64_t	gipt_vaddr;
+	uint64_t	gipt_pfn;
+	uint16_t	gipt_level;
+	uint16_t	gipt_valid_cnt;
+	uint32_t	_gipt_pad;
+	struct gipt	*gipt_parent;
+	uint64_t	*gipt_kva;
+	uint64_t	_gipt_pad2;
+};
+typedef struct gipt gipt_t;
+
+typedef enum {
+	PTET_EMPTY	= 0,
+	PTET_PAGE	= 1,
+	PTET_LINK	= 2,
+} gipt_pte_type_t;
+
+/* Given a PTE and its level, determine the type of that PTE */
+typedef gipt_pte_type_t (*gipt_pte_type_cb_t)(uint64_t, uint_t);
+/* Given the PFN of a child table, emit a PTE that references it */
+typedef uint64_t (*gipt_pte_map_cb_t)(uint64_t);
+
+struct gipt_cbs {
+	gipt_pte_type_cb_t	giptc_pte_type;
+	gipt_pte_map_cb_t	giptc_pte_map;
+};
+
+struct gipt_map {
+	kmutex_t	giptm_lock;
+	gipt_t		*giptm_root;
+	list_t		*giptm_hash;
+	struct gipt_cbs	giptm_cbs;
+	size_t		giptm_table_cnt;
+	uint_t		giptm_levels;
+};
+typedef struct gipt_map gipt_map_t;
+
+#define	GIPT_HASH_SIZE_DEFAULT	0x2000
+#define	GIPT_MAX_LEVELS	4
+
+#define	GIPT_VA2IDX(pt, va)			\
+	(((va) - (pt)->gipt_vaddr) >>		\
+	gipt_level_shift[(pt)->gipt_level])
+
+#define	GIPT_VA2PTE(pt, va)	((pt)->gipt_kva[GIPT_VA2IDX(pt, va)])
+#define	GIPT_VA2PTEP(pt, va)	(&(pt)->gipt_kva[GIPT_VA2IDX(pt, va)])
+
+extern const uint_t gipt_level_shift[GIPT_MAX_LEVELS+1];
+extern const uint64_t gipt_level_mask[GIPT_MAX_LEVELS+1];
+extern const uint64_t gipt_level_size[GIPT_MAX_LEVELS+1];
+extern const uint64_t gipt_level_count[GIPT_MAX_LEVELS+1];
+
+extern gipt_t *gipt_alloc(void);
+extern void gipt_free(gipt_t *);
+extern void gipt_map_init(gipt_map_t *, uint_t, uint_t,
+    const struct gipt_cbs *, gipt_t *);
+extern void gipt_map_fini(gipt_map_t *);
+extern gipt_t *gipt_map_lookup(gipt_map_t *, uint64_t, uint_t);
+extern gipt_t *gipt_map_lookup_deepest(gipt_map_t *, uint64_t);
+extern uint64_t gipt_map_next_page(gipt_map_t *, uint64_t, uint64_t,
+    gipt_t **);
+extern void gipt_map_insert(gipt_map_t *, gipt_t *);
+extern void gipt_map_remove(gipt_map_t *, gipt_t *);
+extern gipt_t *gipt_map_create_parents(gipt_map_t *, uint64_t, uint_t);
+extern void gipt_map_clean_parents(gipt_map_t *, gipt_t *);
+
+#endif /* _GIPT_H_ */
diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h
index 00009cf439..16ab708896 100644
--- a/usr/src/uts/i86pc/sys/hma.h
+++ b/usr/src/uts/i86pc/sys/hma.h
@@ -10,7 +10,7 @@
  */
 
 /*
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
  */
 
 #ifndef _SYS_HMA_H
@@ -30,6 +30,40 @@
 extern "C" {
 #endif
 
+
+/*
+ * Register a hypervisor with HMA.  On success, a pointer to the opaque
+ * registration token will be returned, indicating that proper host setup has
+ * occurred for further hypervisor actions.
+ */
+typedef struct hma_reg hma_reg_t;
+extern hma_reg_t *hma_register(const char *);
+extern hma_reg_t *hma_register_exclusive(const char *);
+extern void hma_unregister(hma_reg_t *);
+
+/*
+ * Allocate or free a VPID for use with VMX.
+ *
+ * This must not be performed by a hypervisor until it has successfully
+ * registered via hma_register().
+ */
+extern uint16_t hma_vmx_vpid_alloc(void);
+extern void hma_vmx_vpid_free(uint16_t);
+
+/*
+ * On all active CPUs, perform a single-context INVEPT on the given EPTP.
+ */
+extern void hma_vmx_invept_allcpus(uintptr_t);
+
+struct hma_svm_asid {
+	uint64_t hsa_gen;
+	uint32_t hsa_asid;
+};
+typedef struct hma_svm_asid hma_svm_asid_t;
+
+extern void hma_svm_asid_init(hma_svm_asid_t *);
+extern uint8_t hma_svm_asid_update(hma_svm_asid_t *, boolean_t, boolean_t);
+
 /*
  * FPU related management. These functions provide a set of APIs to manage the
  * FPU state and switch between host and guest management of this state.
@@ -96,6 +130,9 @@ extern void hma_fpu_stop_guest(hma_fpu_t *);
 extern void hma_fpu_get_fxsave_state(const hma_fpu_t *, struct fxsave_state *);
 extern int hma_fpu_set_fxsave_state(hma_fpu_t *, const struct fxsave_state *);
 
+/* Perform HMA initialization steps during boot-up. */
+extern void hma_init(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/usr/src/uts/i86pc/sys/machcpuvar.h b/usr/src/uts/i86pc/sys/machcpuvar.h
index f4e38dec98..772f3112cb 100644
--- a/usr/src/uts/i86pc/sys/machcpuvar.h
+++ b/usr/src/uts/i86pc/sys/machcpuvar.h
@@ -81,6 +81,12 @@ struct xen_evt_data {
 	ulong_t		evt_affinity[sizeof (ulong_t) * 8]; /* service on cpu */
 };
 
+enum fast_syscall_state {
+	FSS_DISABLED		= 0,
+	FSS_ASYSC_ENABLED	= (1 << 0),
+	FSS_SEP_ENABLED		= (1 << 1)
+};
+
 struct kpti_frame {
 	uint64_t	kf_lower_redzone;
 
@@ -214,6 +220,8 @@ struct	machcpu {
 	uint16_t mcpu_idle_type;	/* CPU next idle type */
 	uint16_t max_cstates;		/* supported max cstates */
 
+	enum fast_syscall_state	mcpu_fast_syscall_state;
+
 	struct cpu_ucode_info	*mcpu_ucode_info;
 
 	void			*mcpu_pm_mach_state;
diff --git a/usr/src/uts/i86pc/sys/machparam.h b/usr/src/uts/i86pc/sys/machparam.h
index 51d7559483..f79b582df4 100644
--- a/usr/src/uts/i86pc/sys/machparam.h
+++ b/usr/src/uts/i86pc/sys/machparam.h
@@ -31,14 +31,15 @@
 #ifndef _SYS_MACHPARAM_H
 #define	_SYS_MACHPARAM_H
 
-#if !defined(_ASM)
+#ifndef _ASM
+
 #include <sys/types.h>
 
 #if defined(__xpv)
 #include <sys/xpv_impl.h>
 #endif
 
-#endif
+#endif /* !_ASM */
 
 #ifdef	__cplusplus
 extern "C" {
@@ -54,17 +55,12 @@ extern "C" {
  * Machine dependent parameters and limits.
  */
 
-#if defined(__amd64)
 /*
  * If NCPU grows beyond 256, sizing for the x86 comm page will require
  * adjustment.
  */
 #define	NCPU	256
 #define	NCPU_LOG2	8
-#elif defined(__i386)
-#define	NCPU	32
-#define	NCPU_LOG2	5
-#endif
 
 /* NCPU_P2 is NCPU rounded to a power of 2 */
 #define	NCPU_P2	(1 << NCPU_LOG2)
@@ -116,11 +112,7 @@ extern "C" {
 /*
  * DEFAULT KERNEL THREAD stack size (in pages).
  */
-#if defined(__amd64)
 #define	DEFAULTSTKSZ_NPGS	5
-#elif defined(__i386)
-#define	DEFAULTSTKSZ_NPGS	3
-#endif
 
 #if !defined(_ASM)
 #define	DEFAULTSTKSZ	(DEFAULTSTKSZ_NPGS * PAGESIZE)
@@ -129,43 +121,42 @@ extern "C" {
 #endif	/* !_ASM */
 
 /*
- * KERNELBASE is the virtual address at which the kernel segments start in
- * all contexts.
- *
- * KERNELBASE is not fixed.  The value of KERNELBASE can change with
- * installed memory or on 32 bit systems the eprom variable 'eprom_kernelbase'.
- *
- * common/conf/param.c requires a compile time defined value for KERNELBASE.
- * This value is save in the variable _kernelbase.  _kernelbase may then be
- * modified with to a different value in i86pc/os/startup.c.
- *
- * Most code should be using kernelbase, which resolves to a reference to
- * _kernelbase.
+ * During intial boot we limit heap to the top 4Gig.
  */
-#define	KERNEL_TEXT_amd64	UINT64_C(0xfffffffffb800000)
-
-#ifdef __i386
-
-#define	KERNEL_TEXT_i386	ADDRESS_C(0xfe800000)
+#define	BOOT_KERNELHEAP_BASE	ADDRESS_C(0xffffffff00000000)
 
 /*
- * We don't use HYPERVISOR_VIRT_START, as we need both the PAE and non-PAE
- * versions in our code. We always compile based on the lower PAE address.
+ * VMWare works best if we don't use the top 64Meg of memory for amd64.
+ * Set KERNEL_TEXT to top_o_memory - 64Meg - 8 Meg for 8Meg of nucleus pages.
  */
-#define	KERNEL_TEXT_i386_xpv	\
-	(HYPERVISOR_VIRT_START_PAE - 3 * ADDRESS_C(0x400000))
-
-#endif /* __i386 */
+#define	PROMSTART	ADDRESS_C(0xffc00000)
 
-#if defined(__amd64)
+/*
+ * Virtual address range available to the debugger
+ */
+#define	SEGDEBUGBASE	ADDRESS_C(0xffffffffff800000)
+#define	SEGDEBUGSIZE	ADDRESS_C(0x400000)
 
-#define	KERNELBASE	ADDRESS_C(0xfffffd8000000000)
+#define	KERNEL_TEXT	UINT64_C(0xfffffffffb800000)
 
 /*
- * Size of the unmapped "red zone" at the very bottom of the kernel's
- * address space.  Corresponds to 1 slot in the toplevel pagetable.
+ * Reserve pages just below KERNEL_TEXT for the GDT, IDT, LDT, TSS and debug
+ * info.
+ *
+ * For now, DEBUG_INFO_VA must be first in this list for "xm" initiated dumps
+ * of solaris domUs to be usable with mdb. Relying on a fixed VA is not viable
+ * long term, but it's the best we've got for now.
  */
-#define	KERNEL_REDZONE_SIZE   ((uintptr_t)1 << 39)
+#if !defined(_ASM)
+#define	DEBUG_INFO_VA	(KERNEL_TEXT - MMU_PAGESIZE)
+#define	GDT_VA		(DEBUG_INFO_VA - MMU_PAGESIZE)
+#define	IDT_VA		(GDT_VA - MMU_PAGESIZE)
+#define	LDT_VA		(IDT_VA - (16 * MMU_PAGESIZE))
+#define	KTSS_VA		(LDT_VA - MMU_PAGESIZE)
+#define	DFTSS_VA	(KTSS_VA - MMU_PAGESIZE)
+#define	MISC_VA_BASE	(DFTSS_VA)
+#define	MISC_VA_SIZE	(KERNEL_TEXT - MISC_VA_BASE)
+#endif /* !_ASM */
 
 /*
  * Base of 'core' heap area, which is used for kernel and module text/data
@@ -174,52 +165,47 @@ extern "C" {
 #define	COREHEAP_BASE	ADDRESS_C(0xffffffffc0000000)
 
 /*
- * Beginning of the segkpm window. A lower value than this is used if
- * physical addresses exceed 1TB. See i86pc/os/startup.c
- */
-#define	SEGKPM_BASE	ADDRESS_C(0xfffffe0000000000)
-
-/*
  * This is valloc_base, above seg_kpm, but below everything else.
  * A lower value than this may be used if SEGKPM_BASE is adjusted.
  * See i86pc/os/startup.c
  */
-#define	VALLOC_BASE	ADDRESS_C(0xffffff0000000000)
+#define	VALLOC_BASE	ADDRESS_C(0xfffffe0000000000)
+
+#define	SEGZIOMINSIZE	(400L * 1024 * 1024L)			/* 400M */
+#define	SEGVMMMINSIZE	(4096L * 1024 * 1024L)			/* 4G */
 
-/*
- * default and boundary sizes for segkp
- */
 #define	SEGKPDEFSIZE	(2L * 1024L * 1024L * 1024L)		/*   2G */
 #define	SEGKPMAXSIZE	(8L * 1024L * 1024L * 1024L)		/*   8G */
 #define	SEGKPMINSIZE	(200L * 1024 * 1024L)			/* 200M */
 
-/*
- * minimum size for segzio
- */
-#define	SEGZIOMINSIZE	(400L * 1024 * 1024L)			/* 400M */
-
-/*
- * During intial boot we limit heap to the top 4Gig.
- */
-#define	BOOT_KERNELHEAP_BASE	ADDRESS_C(0xffffffff00000000)
+#define	SEGKPM_BASE	ADDRESS_C(0xfffffd0000000000)
 
 /*
- * VMWare works best if we don't use the top 64Meg of memory for amd64.
- * Set KERNEL_TEXT to top_o_memory - 64Meg - 8 Meg for 8Meg of nucleus pages.
+ * KERNELBASE is the virtual address at which the kernel segments start in
+ * all contexts.
+ *
+ * KERNELBASE is not fixed.  The value of KERNELBASE can change with
+ * installed memory size.
+ *
+ * common/conf/param.c requires a compile time defined value for KERNELBASE.
+ * This value is save in the variable _kernelbase.  _kernelbase may then be
+ * modified with to a different value in i86pc/os/startup.c.
+ *
+ * Most code should be using kernelbase, which resolves to a reference to
+ * _kernelbase.
  */
-#define	PROMSTART	ADDRESS_C(0xffc00000)
-#define	KERNEL_TEXT	KERNEL_TEXT_amd64
+#define	KERNELBASE	ADDRESS_C(0xfffffc8000000000)
 
 /*
- * Virtual address range available to the debugger
+ * Size of the unmapped "red zone" at the very bottom of the kernel's
+ * address space.  Corresponds to 1 slot in the toplevel pagetable.
  */
-#define	SEGDEBUGBASE	ADDRESS_C(0xffffffffff800000)
-#define	SEGDEBUGSIZE	ADDRESS_C(0x400000)
+#define	KERNEL_REDZONE_SIZE   ((uintptr_t)1 << 39)
 
 /*
  * Define upper limit on user address space
  *
- * In amd64, the upper limit on a 64-bit user address space is 1 large page
+ * The upper limit on a 64-bit user address space is 1 large page
  * (2MB) below kernelbase.  The upper limit for a 32-bit user address space
  * is 1 small page (4KB) below the top of the 32-bit range.  The 64-bit
  * limit give dtrace the red zone it needs below kernelbase.  The 32-bit
@@ -232,7 +218,7 @@ extern "C" {
 #if defined(__xpv)
 #define	USERLIMIT	ADDRESS_C(0x00007fffffe00000)
 #else
-#define	USERLIMIT	ADDRESS_C(0xfffffd7fffe00000)
+#define	USERLIMIT	ADDRESS_C(0xfffffc7fffe00000)
 #endif
 
 #ifdef bug_5074717_is_fixed
@@ -241,76 +227,6 @@ extern "C" {
 #define	USERLIMIT32	ADDRESS_C(0xfefff000)
 #endif
 
-#elif defined(__i386)
-
-#ifdef DEBUG
-#define	KERNELBASE	ADDRESS_C(0xc8000000)
-#else
-#define	KERNELBASE	ADDRESS_C(0xd4000000)
-#endif
-
-#define	KERNELBASE_MAX	ADDRESS_C(0xe0000000)
-
-/*
- * The i386 ABI requires that the user address space be at least 3Gb
- * in size.  KERNELBASE_ABI_MIN is used as the default KERNELBASE for
- * physical memory configurations > 4gb.
- */
-#define	KERNELBASE_ABI_MIN	ADDRESS_C(0xc0000000)
-
-/*
- * Size of the unmapped "red zone" at the very bottom of the kernel's
- * address space.  Since segmap start immediately above the red zone, this
- * needs to be MAXBSIZE aligned.
- */
-#define	KERNEL_REDZONE_SIZE   MAXBSIZE
-
-/*
- * This is the last 4MB of the 4G address space. Some psm modules
- * need this region of virtual address space mapped 1-1
- * The top 64MB of the address space is reserved for the hypervisor.
- */
-#define	PROMSTART	ADDRESS_C(0xffc00000)
-#ifdef __xpv
-#define	KERNEL_TEXT	KERNEL_TEXT_i386_xpv
-#else
-#define	KERNEL_TEXT	KERNEL_TEXT_i386
-#endif
-
-/*
- * Virtual address range available to the debugger
- * We place it just above the kernel text (4M) and kernel data (4M).
- */
-#define	SEGDEBUGBASE	(KERNEL_TEXT + ADDRESS_C(0x800000))
-#define	SEGDEBUGSIZE	ADDRESS_C(0x400000)
-
-/*
- * Define upper limit on user address space
- */
-#define	USERLIMIT	KERNELBASE
-#define	USERLIMIT32	USERLIMIT
-
-#endif	/* __i386 */
-
-/*
- * Reserve pages just below KERNEL_TEXT for the GDT, IDT, LDT, TSS and debug
- * info.
- *
- * For now, DEBUG_INFO_VA must be first in this list for "xm" initiated dumps
- * of solaris domUs to be usable with mdb. Relying on a fixed VA is not viable
- * long term, but it's the best we've got for now.
- */
-#if !defined(_ASM)
-#define	DEBUG_INFO_VA	(KERNEL_TEXT - MMU_PAGESIZE)
-#define	GDT_VA		(DEBUG_INFO_VA - MMU_PAGESIZE)
-#define	IDT_VA		(GDT_VA - MMU_PAGESIZE)
-#define	LDT_VA		(IDT_VA - (16 * MMU_PAGESIZE))
-#define	KTSS_VA		(LDT_VA - MMU_PAGESIZE)
-#define	DFTSS_VA	(KTSS_VA - MMU_PAGESIZE)
-#define	MISC_VA_BASE	(DFTSS_VA)
-#define	MISC_VA_SIZE	(KERNEL_TEXT - MISC_VA_BASE)
-#endif /* !_ASM */
-
 #if !defined(_ASM) && !defined(_KMDB)
 extern uintptr_t kernelbase, segmap_start, segmapsize;
 #endif
diff --git a/usr/src/uts/i86pc/sys/machsystm.h b/usr/src/uts/i86pc/sys/machsystm.h
index 7409c5af4a..5f286ca4c6 100644
--- a/usr/src/uts/i86pc/sys/machsystm.h
+++ b/usr/src/uts/i86pc/sys/machsystm.h
@@ -25,6 +25,7 @@
 /*
  * Copyright (c) 2010, Intel Corporation.
  * All rights reserved.
+ * Copyright 2018 Joyent, Inc.
  */
 
 #ifndef _SYS_MACHSYSTM_H
@@ -231,6 +232,7 @@ extern page_t *page_get_high_mfn(mfn_t);
 #endif
 
 extern hrtime_t tsc_gethrtime_tick_delta(void);
+extern hrtime_t tsc_gethrtime_params(uint64_t *, uint32_t *, uint8_t *);
 
 #endif /* _KERNEL */
 
diff --git a/usr/src/uts/i86pc/sys/ppt_dev.h b/usr/src/uts/i86pc/sys/ppt_dev.h
new file mode 100644
index 0000000000..e25f941f14
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/ppt_dev.h
@@ -0,0 +1,56 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc
+ */
+
+#ifndef _PPT_DEV_H
+#define	_PPT_DEV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	PPT_IOC			(('P' << 16)|('T' << 8))
+
+#define	PPT_CFG_READ		(PPT_IOC | 0x01)
+#define	PPT_CFG_WRITE		(PPT_IOC | 0x02)
+#define	PPT_BAR_QUERY		(PPT_IOC | 0x03)
+#define	PPT_BAR_READ		(PPT_IOC | 0x04)
+#define	PPT_BAR_WRITE		(PPT_IOC | 0x05)
+
+#define	PPT_MAXNAMELEN	32
+
+struct ppt_cfg_io {
+	uint64_t pci_off;
+	uint32_t pci_width;
+	uint32_t pci_data;
+};
+struct ppt_bar_io {
+	uint32_t pbi_bar;
+	uint32_t pbi_off;
+	uint32_t pbi_width;
+	uint32_t pbi_data;
+};
+
+struct ppt_bar_query {
+	uint32_t pbq_baridx;
+	uint32_t pbq_type;
+	uint64_t pbq_base;
+	uint64_t pbq_size;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PPT_DEV_H */
diff --git a/usr/src/uts/i86pc/sys/viona_io.h b/usr/src/uts/i86pc/sys/viona_io.h
new file mode 100644
index 0000000000..46cc72eb06
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/viona_io.h
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef	_VIONA_IO_H_
+#define	_VIONA_IO_H_
+
+#define	VNA_IOC			(('V' << 16)|('C' << 8))
+#define	VNA_IOC_CREATE		(VNA_IOC | 0x01)
+#define	VNA_IOC_DELETE		(VNA_IOC | 0x02)
+
+#define	VNA_IOC_RING_INIT	(VNA_IOC | 0x10)
+#define	VNA_IOC_RING_RESET	(VNA_IOC | 0x11)
+#define	VNA_IOC_RING_KICK	(VNA_IOC | 0x12)
+#define	VNA_IOC_RING_SET_MSI	(VNA_IOC | 0x13)
+#define	VNA_IOC_RING_INTR_CLR	(VNA_IOC | 0x14)
+
+#define	VNA_IOC_INTR_POLL	(VNA_IOC | 0x20)
+#define	VNA_IOC_SET_FEATURES	(VNA_IOC | 0x21)
+#define	VNA_IOC_GET_FEATURES	(VNA_IOC | 0x22)
+#define	VNA_IOC_SET_NOTIFY_IOP	(VNA_IOC | 0x23)
+
+typedef struct vioc_create {
+	datalink_id_t	c_linkid;
+	int		c_vmfd;
+} vioc_create_t;
+
+typedef struct vioc_ring_init {
+	uint16_t	ri_index;
+	uint16_t	ri_qsize;
+	uint64_t	ri_qaddr;
+} vioc_ring_init_t;
+
+typedef struct vioc_ring_msi {
+	uint16_t	rm_index;
+	uint64_t	rm_addr;
+	uint64_t	rm_msg;
+} vioc_ring_msi_t;
+
+enum viona_vq_id {
+	VIONA_VQ_RX = 0,
+	VIONA_VQ_TX = 1,
+	VIONA_VQ_MAX = 2
+};
+
+typedef struct vioc_intr_poll {
+	uint32_t	vip_status[VIONA_VQ_MAX];
+} vioc_intr_poll_t;
+
+
+#endif	/* _VIONA_IO_H_ */
diff --git a/usr/src/uts/i86pc/sys/vm_machparam.h b/usr/src/uts/i86pc/sys/vm_machparam.h
index 90a5245217..fde81e59ed 100644
--- a/usr/src/uts/i86pc/sys/vm_machparam.h
+++ b/usr/src/uts/i86pc/sys/vm_machparam.h
@@ -23,6 +23,7 @@
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright 2017 Joyent, Inc.
  */
 
 #ifndef _SYS_VM_MACHPARAM_H
@@ -133,7 +134,8 @@ extern "C" {
 
 /*
  * The maximum value for handspreadpages which is the the distance
- * between the two clock hands in pages.
+ * between the two clock hands in pages. This is only used when the page
+ * scanner is first started.
  */
 #define	MAXHANDSPREADPAGES	((64 * 1024 * 1024) / PAGESIZE)
 
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
new file mode 100644
index 0000000000..ac8f14b042
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -0,0 +1,748 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _VMM_H_
+#define	_VMM_H_
+
+#include <sys/sdt.h>
+#include <x86/segments.h>
+
+#ifdef _KERNEL
+SDT_PROVIDER_DECLARE(vmm);
+#endif
+
+enum vm_suspend_how {
+	VM_SUSPEND_NONE,
+	VM_SUSPEND_RESET,
+	VM_SUSPEND_POWEROFF,
+	VM_SUSPEND_HALT,
+	VM_SUSPEND_TRIPLEFAULT,
+	VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15,
+	VM_REG_GUEST_CR0,
+	VM_REG_GUEST_CR3,
+	VM_REG_GUEST_CR4,
+	VM_REG_GUEST_DR7,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RIP,
+	VM_REG_GUEST_RFLAGS,
+	VM_REG_GUEST_ES,
+	VM_REG_GUEST_CS,
+	VM_REG_GUEST_SS,
+	VM_REG_GUEST_DS,
+	VM_REG_GUEST_FS,
+	VM_REG_GUEST_GS,
+	VM_REG_GUEST_LDTR,
+	VM_REG_GUEST_TR,
+	VM_REG_GUEST_IDTR,
+	VM_REG_GUEST_GDTR,
+	VM_REG_GUEST_EFER,
+	VM_REG_GUEST_CR2,
+	VM_REG_GUEST_PDPTE0,
+	VM_REG_GUEST_PDPTE1,
+	VM_REG_GUEST_PDPTE2,
+	VM_REG_GUEST_PDPTE3,
+	VM_REG_GUEST_INTR_SHADOW,
+	VM_REG_GUEST_DR0,
+	VM_REG_GUEST_DR1,
+	VM_REG_GUEST_DR2,
+	VM_REG_GUEST_DR3,
+	VM_REG_GUEST_DR6,
+	VM_REG_LAST
+};
+
+enum x2apic_state {
+	X2APIC_DISABLED,
+	X2APIC_ENABLED,
+	X2APIC_STATE_LAST
+};
+
+#define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
+#define	VM_INTINFO_DEL_ERRCODE	0x800
+#define	VM_INTINFO_RSVD		0x7ffff000
+#define	VM_INTINFO_VALID	0x80000000
+#define	VM_INTINFO_TYPE		0x700
+#define	VM_INTINFO_HWINTR	(0 << 8)
+#define	VM_INTINFO_NMI		(2 << 8)
+#define	VM_INTINFO_HWEXCEPTION	(3 << 8)
+#define	VM_INTINFO_SWINTR	(4 << 8)
+
+
+#define	VM_MAX_NAMELEN	32
+
+#ifdef _KERNEL
+
+struct vm;
+struct vm_exception;
+struct seg_desc;
+struct vm_exit;
+struct vm_run;
+struct vhpet;
+struct vioapic;
+struct vlapic;
+struct vmspace;
+struct vm_object;
+struct vm_guest_paging;
+struct pmap;
+
+struct vm_eventinfo {
+	u_int	*rptr;		/* runblock cookie */
+	int	*sptr;		/* suspend cookie */
+	int	*iptr;		/* reqidle cookie */
+};
+
+typedef int	(*vmm_init_func_t)(int ipinum);
+typedef int	(*vmm_cleanup_func_t)(void);
+typedef void	(*vmm_resume_func_t)(void);
+typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
+typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
+		    struct pmap *pmap, struct vm_eventinfo *info);
+typedef void	(*vmi_cleanup_func_t)(void *vmi);
+typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t *retval);
+typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t val);
+typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
+				  struct seg_desc *desc);
+typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
+				  struct seg_desc *desc);
+typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
+typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
+typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
+typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+#ifndef __FreeBSD__
+typedef void	(*vmi_savectx)(void *vmi, int vcpu);
+typedef void	(*vmi_restorectx)(void *vmi, int vcpu);
+#endif
+
+struct vmm_ops {
+	vmm_init_func_t		init;		/* module wide initialization */
+	vmm_cleanup_func_t	cleanup;
+	vmm_resume_func_t	resume;
+
+	vmi_init_func_t		vminit;		/* vm-specific initialization */
+	vmi_run_func_t		vmrun;
+	vmi_cleanup_func_t	vmcleanup;
+	vmi_get_register_t	vmgetreg;
+	vmi_set_register_t	vmsetreg;
+	vmi_get_desc_t		vmgetdesc;
+	vmi_set_desc_t		vmsetdesc;
+	vmi_get_cap_t		vmgetcap;
+	vmi_set_cap_t		vmsetcap;
+	vmi_vmspace_alloc	vmspace_alloc;
+	vmi_vmspace_free	vmspace_free;
+	vmi_vlapic_init		vlapic_init;
+	vmi_vlapic_cleanup	vlapic_cleanup;
+
+#ifndef __FreeBSD__
+	vmi_savectx		vmsavectx;
+	vmi_restorectx		vmrestorectx;
+#endif
+};
+
+extern struct vmm_ops vmm_ops_intel;
+extern struct vmm_ops vmm_ops_amd;
+
+int vm_create(const char *name, struct vm **retvm);
+void vm_destroy(struct vm *vm);
+int vm_reinit(struct vm *vm);
+const char *vm_name(struct vm *vm);
+uint16_t vm_get_maxcpus(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+    uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+    uint16_t threads, uint16_t maxcpus);
+
+/*
+ * APIs that modify the guest memory map require all vcpus to be frozen.
+ */
+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+    size_t len, int prot, int flags);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+void vm_free_memseg(struct vm *vm, int ident);
+int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
+int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
+#ifdef __FreeBSD__
+int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
+int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
+#else
+int vm_assign_pptdev(struct vm *vm, int pptfd);
+int vm_unassign_pptdev(struct vm *vm, int pptfd);
+#endif /* __FreeBSD__ */
+
+/*
+ * APIs that inspect the guest memory map require only a *single* vcpu to
+ * be frozen. This acts like a read lock on the guest memory map since any
+ * modification requires *all* vcpus to be frozen.
+ */
+int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+    struct vm_object **objptr);
+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
+void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
+    int prot, void **cookie);
+void vm_gpa_release(void *cookie);
+bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
+
+int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
+int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
+		    struct seg_desc *ret_desc);
+int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
+		    struct seg_desc *desc);
+int vm_run(struct vm *vm, struct vm_run *vmrun);
+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
+int vm_inject_nmi(struct vm *vm, int vcpu);
+int vm_nmi_pending(struct vm *vm, int vcpuid);
+void vm_nmi_clear(struct vm *vm, int vcpuid);
+int vm_inject_extint(struct vm *vm, int vcpu);
+int vm_extint_pending(struct vm *vm, int vcpuid);
+void vm_extint_clear(struct vm *vm, int vcpuid);
+struct vlapic *vm_lapic(struct vm *vm, int cpu);
+struct vioapic *vm_ioapic(struct vm *vm);
+struct vhpet *vm_hpet(struct vm *vm);
+int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
+int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
+int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
+int vm_apicid2vcpuid(struct vm *vm, int apicid);
+int vm_activate_cpu(struct vm *vm, int vcpu);
+int vm_suspend_cpu(struct vm *vm, int vcpu);
+int vm_resume_cpu(struct vm *vm, int vcpu);
+struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
+
+#ifdef _SYS__CPUSET_H_
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+#endif	/* _SYS__CPUSET_H_ */
+
+static __inline int
+vcpu_runblocked(struct vm_eventinfo *info)
+{
+
+	return (*info->rptr != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
+{
+
+	return (*info->sptr);
+}
+
+static __inline int
+vcpu_reqidle(struct vm_eventinfo *info)
+{
+
+	return (*info->iptr);
+}
+
+int vcpu_debugged(struct vm *vm, int vcpuid);
+
+/*
+ * Return 1 if device indicated by bus/slot/func is supposed to be a
+ * pci passthrough device.
+ *
+ * Return 0 otherwise.
+ */
+int vmm_is_pptdev(int bus, int slot, int func);
+
+void *vm_iommu_domain(struct vm *vm);
+
+enum vcpu_state {
+	VCPU_IDLE,
+	VCPU_FROZEN,
+	VCPU_RUNNING,
+	VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
+    bool from_idle);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+void vcpu_block_run(struct vm *, int);
+void vcpu_unblock_run(struct vm *, int);
+
+#ifndef __FreeBSD__
+uint64_t vcpu_tsc_offset(struct vm *vm, int vcpuid);
+#endif
+
+static __inline int
+vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
+{
+	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+#ifdef _SYS_THREAD_H
+static __inline int
+vcpu_should_yield(struct vm *vm, int vcpu)
+{
+
+	if (curthread->t_astflag)
+		return (1);
+	else if (CPU->cpu_runrun)
+		return (1);
+	else
+		return (0);
+}
+#endif /* _SYS_THREAD_H */
+
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
+struct vmspace *vm_get_vmspace(struct vm *vm);
+struct vatpic *vm_atpic(struct vm *vm);
+struct vatpit *vm_atpit(struct vm *vm);
+struct vpmtmr *vm_pmtmr(struct vm *vm);
+struct vrtc *vm_rtc(struct vm *vm);
+
+/*
+ * Inject exception 'vector' into the guest vcpu. This function returns 0 on
+ * success and non-zero on failure.
+ *
+ * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
+ * this function directly because they enforce the trap-like or fault-like
+ * behavior of an exception.
+ *
+ * This function should only be called in the context of the thread that is
+ * executing this vcpu.
+ */
+int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid,
+    uint32_t errcode, int restart_instruction);
+
+/*
+ * This function is called after a VM-exit that occurred during exception or
+ * interrupt delivery through the IDT. The format of 'intinfo' is described
+ * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
+ *
+ * If a VM-exit handler completes the event delivery successfully then it
+ * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
+ * if the task switch emulation is triggered via a task gate then it should
+ * call this function with 'intinfo=0' to indicate that the external event
+ * is not pending anymore.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
+
+/*
+ * This function is called before every VM-entry to retrieve a pending
+ * event that should be injected into the guest. This function combines
+ * nested events into a double or triple fault.
+ *
+ * Returns 0 if there are no events that need to be injected into the guest
+ * and non-zero otherwise.
+ */
+int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
+
+int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+struct vm_copyinfo {
+	uint64_t	gpa;
+	size_t		len;
+	void		*hva;
+	void		*cookie;
+};
+
+/*
+ * Set up 'copyinfo[]' to copy to/from guest linear address space starting
+ * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
+ * a copyin or PROT_WRITE for a copyout. 
+ *
+ * retval	is_fault	Interpretation
+ *   0		   0		Success
+ *   0		   1		An exception was injected into the guest
+ * EFAULT	  N/A		Unrecoverable error
+ *
+ * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
+ * the return value is 0. The 'copyinfo[]' resources should be freed by calling
+ * 'vm_copy_teardown()' after the copy is done.
+ */
+int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
+    int num_copyinfo, int *is_fault);
+void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    int num_copyinfo);
+void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    void *kaddr, size_t len);
+void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
+    struct vm_copyinfo *copyinfo, size_t len);
+
+int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
+#endif	/* KERNEL */
+
+#define	VM_MAXCPU	32			/* maximum virtual cpus */
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+	VM_CAP_HALT_EXIT,
+	VM_CAP_MTRAP_EXIT,
+	VM_CAP_PAUSE_EXIT,
+	VM_CAP_UNRESTRICTED_GUEST,
+	VM_CAP_ENABLE_INVPCID,
+	VM_CAP_MAX
+};
+
+enum vm_intr_trigger {
+	EDGE_TRIGGER,
+	LEVEL_TRIGGER
+};
+	
+/*
+ * The 'access' field has the format specified in Table 21-2 of the Intel
+ * Architecture Manual vol 3b.
+ *
+ * XXX The contents of the 'access' field are architecturally defined except
+ * bit 16 - Segment Unusable.
+ */
+struct seg_desc {
+	uint64_t	base;
+	uint32_t	limit;
+	uint32_t	access;
+};
+#define	SEG_DESC_TYPE(access)		((access) & 0x001f)
+#define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
+#define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
+#define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
+#define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
+#define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
+
+enum vm_cpu_mode {
+	CPU_MODE_REAL,
+	CPU_MODE_PROTECTED,
+	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
+	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+	PAGING_MODE_FLAT,
+	PAGING_MODE_32,
+	PAGING_MODE_PAE,
+	PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+	uint64_t	cr3;
+	int		cpl;
+	enum vm_cpu_mode cpu_mode;
+	enum vm_paging_mode paging_mode;
+};
+
+/*
+ * The data structures 'vie' and 'vie_op' are meant to be opaque to the
+ * consumers of instruction decoding. The only reason why their contents
+ * need to be exposed is because they are part of the 'vm_exit' structure.
+ */
+struct vie_op {
+	uint8_t		op_byte;	/* actual opcode byte */
+	uint8_t		op_type;	/* type of operation (e.g. MOV) */
+	uint16_t	op_flags;
+};
+
+#define	VIE_INST_SIZE	15
+struct vie {
+	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
+	uint8_t		num_valid;		/* size of the instruction */
+	uint8_t		num_processed;
+
+	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
+	uint8_t		rex_w:1,		/* REX prefix */
+			rex_r:1,
+			rex_x:1,
+			rex_b:1,
+			rex_present:1,
+			repz_present:1,		/* REP/REPE/REPZ prefix */
+			repnz_present:1,	/* REPNE/REPNZ prefix */
+			opsize_override:1,	/* Operand size override */
+			addrsize_override:1,	/* Address size override */
+			segment_override:1;	/* Segment override */
+
+	uint8_t		mod:2,			/* ModRM byte */
+			reg:4,
+			rm:4;
+
+	uint8_t		ss:2,			/* SIB byte */
+			index:4,
+			base:4;
+
+	uint8_t		disp_bytes;
+	uint8_t		imm_bytes;
+
+	uint8_t		scale;
+	int		base_register;		/* VM_REG_GUEST_xyz */
+	int		index_register;		/* VM_REG_GUEST_xyz */
+	int		segment_register;	/* VM_REG_GUEST_xyz */
+
+	int64_t		displacement;		/* optional addr displacement */
+	int64_t		immediate;		/* optional immediate operand */
+
+	uint8_t		decoded;	/* set to 1 if successfully decoded */
+
+	struct vie_op	op;			/* opcode description */
+};
+
+enum vm_exitcode {
+	VM_EXITCODE_INOUT,
+	VM_EXITCODE_VMX,
+	VM_EXITCODE_BOGUS,
+	VM_EXITCODE_RDMSR,
+	VM_EXITCODE_WRMSR,
+	VM_EXITCODE_HLT,
+	VM_EXITCODE_MTRAP,
+	VM_EXITCODE_PAUSE,
+	VM_EXITCODE_PAGING,
+	VM_EXITCODE_INST_EMUL,
+	VM_EXITCODE_SPINUP_AP,
+	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
+	VM_EXITCODE_RUNBLOCK,
+	VM_EXITCODE_IOAPIC_EOI,
+	VM_EXITCODE_SUSPENDED,
+	VM_EXITCODE_INOUT_STR,
+	VM_EXITCODE_TASK_SWITCH,
+	VM_EXITCODE_MONITOR,
+	VM_EXITCODE_MWAIT,
+	VM_EXITCODE_SVM,
+	VM_EXITCODE_REQIDLE,
+	VM_EXITCODE_DEBUG,
+	VM_EXITCODE_VMINSN,
+#ifndef	__FreeBSD__
+	VM_EXITCODE_HT,
+#endif
+	VM_EXITCODE_MAX
+};
+
+struct vm_inout {
+	uint16_t	bytes:3;	/* 1 or 2 or 4 */
+	uint16_t	in:1;
+	uint16_t	string:1;
+	uint16_t	rep:1;
+	uint16_t	port;
+	uint32_t	eax;		/* valid for out */
+};
+
+struct vm_inout_str {
+	struct vm_inout	inout;		/* must be the first element */
+	struct vm_guest_paging paging;
+	uint64_t	rflags;
+	uint64_t	cr0;
+	uint64_t	index;
+	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
+	int		addrsize;
+	enum vm_reg_name seg_name;
+	struct seg_desc seg_desc;
+};
+
+enum task_switch_reason {
+	TSR_CALL,
+	TSR_IRET,
+	TSR_JMP,
+	TSR_IDT_GATE,	/* task gate in IDT */
+};
+
+struct vm_task_switch {
+	uint16_t	tsssel;		/* new TSS selector */
+	int		ext;		/* task switch due to external event */
+	uint32_t	errcode;
+	int		errcode_valid;	/* push 'errcode' on the new stack */
+	enum task_switch_reason reason;
+	struct vm_guest_paging paging;
+};
+
+struct vm_exit {
+	enum vm_exitcode	exitcode;
+	int			inst_length;	/* 0 means unknown */
+	uint64_t		rip;
+	union {
+		struct vm_inout	inout;
+		struct vm_inout_str inout_str;
+		struct {
+			uint64_t	gpa;
+			int		fault_type;
+		} paging;
+		struct {
+			uint64_t	gpa;
+			uint64_t	gla;
+			uint64_t	cs_base;
+			int		cs_d;		/* CS.D */
+			struct vm_guest_paging paging;
+			struct vie	vie;
+		} inst_emul;
+		/*
+		 * VMX specific payload. Used when there is no "better"
+		 * exitcode to represent the VM-exit.
+		 */
+		struct {
+			int		status;		/* vmx inst status */
+			/*
+			 * 'exit_reason' and 'exit_qualification' are valid
+			 * only if 'status' is zero.
+			 */
+			uint32_t	exit_reason;
+			uint64_t	exit_qualification;
+			/*
+			 * 'inst_error' and 'inst_type' are valid
+			 * only if 'status' is non-zero.
+			 */
+			int		inst_type;
+			int		inst_error;
+		} vmx;
+		/*
+		 * SVM specific payload.
+		 */
+		struct {
+			uint64_t	exitcode;
+			uint64_t	exitinfo1;
+			uint64_t	exitinfo2;
+		} svm;
+		struct {
+			uint32_t	code;		/* ecx value */
+			uint64_t	wval;
+		} msr;
+		struct {
+			int		vcpu;
+			uint64_t	rip;
+		} spinup_ap;
+		struct {
+			uint64_t	rflags;
+			uint64_t	intr_status;
+		} hlt;
+		struct {
+			int		vector;
+		} ioapic_eoi;
+		struct {
+			enum vm_suspend_how how;
+		} suspended;
+		struct vm_task_switch task_switch;
+	} u;
+};
+
+/* APIs to inject faults into the guest */
+void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
+    int errcode);
+
+static __inline void
+vm_inject_ud(void *vm, int vcpuid)
+{
+	vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
+}
+
+static __inline void
+vm_inject_gp(void *vm, int vcpuid)
+{
+	vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
+}
+
+static __inline void
+vm_inject_ac(void *vm, int vcpuid, int errcode)
+{
+	vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
+}
+
+static __inline void
+vm_inject_ss(void *vm, int vcpuid, int errcode)
+{
+	vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
+}
+
+void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
+
+int vm_restart_instruction(void *vm, int vcpuid);
+
+#ifndef	__FreeBSD__
+#ifdef	_KERNEL
+
+void vmm_sol_glue_init(void);
+void vmm_sol_glue_cleanup(void);
+
+int vmm_mod_load(void);
+int vmm_mod_unload(void);
+
+void vmm_call_trap(uint64_t);
+
+/*
+ * Because of tangled headers, these are mirrored by vmm_drv.h to present the
+ * interface to driver consumers.
+ */
+typedef int (*vmm_rmem_cb_t)(void *, uintptr_t, uint_t, uint64_t *);
+typedef int (*vmm_wmem_cb_t)(void *, uintptr_t, uint_t, uint64_t);
+
+int vm_ioport_hook(struct vm *, uint_t, vmm_rmem_cb_t, vmm_wmem_cb_t, void *,
+    void **);
+void vm_ioport_unhook(struct vm *, void **);
+int vm_ioport_handle_hook(struct vm *, int, bool, int, int, uint32_t *);
+
+#endif /* _KERNEL */
+#endif /* __FreeBSD */
+
+#endif	/* _VMM_H_ */
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
new file mode 100644
index 0000000000..dd87dcb0a6
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -0,0 +1,520 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef	_VMM_DEV_H_
+#define	_VMM_DEV_H_
+
+#include <machine/vmm.h>
+
+struct vm_memmap {
+	vm_paddr_t	gpa;
+	int		segid;		/* memory segment */
+	vm_ooffset_t	segoff;		/* offset into memory segment */
+	size_t		len;		/* mmap length */
+	int		prot;		/* RWX */
+	int		flags;
+};
+#define	VM_MEMMAP_F_WIRED	0x01
+#define	VM_MEMMAP_F_IOMMU	0x02
+
+#define	VM_MEMSEG_NAME(m)	((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+	int		segid;
+	size_t		len;
+	char		name[SPECNAMELEN + 1];
+};
+
+struct vm_register {
+	int		cpuid;
+	int		regnum;		/* enum vm_reg_name */
+	uint64_t	regval;
+};
+
+struct vm_seg_desc {			/* data or code segment */
+	int		cpuid;
+	int		regnum;		/* enum vm_reg_name */
+	struct seg_desc desc;
+};
+
+struct vm_register_set {
+	int		cpuid;
+	unsigned int	count;
+	const int	*regnums;	/* enum vm_reg_name */
+	uint64_t	*regvals;
+};
+
+struct vm_run {
+	int		cpuid;
+	struct vm_exit	vm_exit;
+};
+
+struct vm_exception {
+	int		cpuid;
+	int		vector;
+	uint32_t	error_code;
+	int		error_code_valid;
+	int		restart_instruction;
+};
+
+struct vm_lapic_msi {
+	uint64_t	msg;
+	uint64_t	addr;
+};
+
+struct vm_lapic_irq {
+	int		cpuid;
+	int		vector;
+};
+
+struct vm_ioapic_irq {
+	int		irq;
+};
+
+struct vm_isa_irq {
+	int		atpic_irq;
+	int		ioapic_irq;
+};
+
+struct vm_isa_irq_trigger {
+	int		atpic_irq;
+	enum vm_intr_trigger trigger;
+};
+
+struct vm_capability {
+	int		cpuid;
+	enum vm_cap_type captype;
+	int		capval;
+	int		allcpus;
+};
+
+#ifdef __FreeBSD__
+struct vm_pptdev {
+	int		bus;
+	int		slot;
+	int		func;
+};
+
+struct vm_pptdev_mmio {
+	int		bus;
+	int		slot;
+	int		func;
+	vm_paddr_t	gpa;
+	vm_paddr_t	hpa;
+	size_t		len;
+};
+
+struct vm_pptdev_msi {
+	int		vcpu;
+	int		bus;
+	int		slot;
+	int		func;
+	int		numvec;		/* 0 means disabled */
+	uint64_t	msg;
+	uint64_t	addr;
+};
+
+struct vm_pptdev_msix {
+	int		vcpu;
+	int		bus;
+	int		slot;
+	int		func;
+	int		idx;
+	uint64_t	msg;
+	uint32_t	vector_control;
+	uint64_t	addr;
+};
+
+struct vm_pptdev_limits {
+	int		bus;
+	int		slot;
+	int		func;
+	int		msi_limit;
+	int		msix_limit;
+};
+#else /* __FreeBSD__ */
+struct vm_pptdev {
+	int		pptfd;
+};
+
+struct vm_pptdev_mmio {
+	int		pptfd;
+	vm_paddr_t	gpa;
+	vm_paddr_t	hpa;
+	size_t		len;
+};
+
+struct vm_pptdev_msi {
+	int		vcpu;
+	int		pptfd;
+	int		numvec;		/* 0 means disabled */
+	uint64_t	msg;
+	uint64_t	addr;
+};
+
+struct vm_pptdev_msix {
+	int		vcpu;
+	int		pptfd;
+	int		idx;
+	uint64_t	msg;
+	uint32_t	vector_control;
+	uint64_t	addr;
+};
+
+struct vm_pptdev_limits {
+	int		pptfd;
+	int		msi_limit;
+	int		msix_limit;
+};
+#endif /* __FreeBSD__ */
+
+struct vm_nmi {
+	int		cpuid;
+};
+
+#ifdef __FreeBSD__
+#define	MAX_VM_STATS	64
+#else
+#define	MAX_VM_STATS	(64 + VM_MAXCPU)
+#endif
+
+struct vm_stats {
+	int		cpuid;				/* in */
+	int		num_entries;			/* out */
+	struct timeval	tv;
+	uint64_t	statbuf[MAX_VM_STATS];
+};
+
+struct vm_stat_desc {
+	int		index;				/* in */
+	char		desc[128];			/* out */
+};
+
+struct vm_x2apic {
+	int			cpuid;
+	enum x2apic_state	state;
+};
+
+struct vm_gpa_pte {
+	uint64_t	gpa;				/* in */
+	uint64_t	pte[4];				/* out */
+	int		ptenum;
+};
+
+struct vm_hpet_cap {
+	uint32_t	capabilities;	/* lower 32 bits of HPET capabilities */
+};
+
+struct vm_suspend {
+	enum vm_suspend_how how;
+};
+
+struct vm_gla2gpa {
+	int		vcpuid;		/* inputs */
+	int 		prot;		/* PROT_READ or PROT_WRITE */
+	uint64_t	gla;
+	struct vm_guest_paging paging;
+	int		fault;		/* outputs */
+	uint64_t	gpa;
+};
+
+struct vm_activate_cpu {
+	int		vcpuid;
+};
+
+struct vm_cpuset {
+	int		which;
+	int		cpusetsize;
+#ifndef _KERNEL
+	cpuset_t	*cpus;
+#else
+	void		*cpus;
+#endif
+};
+#define	VM_ACTIVE_CPUS		0
+#define	VM_SUSPENDED_CPUS	1
+#define	VM_DEBUG_CPUS		2
+
+struct vm_intinfo {
+	int		vcpuid;
+	uint64_t	info1;
+	uint64_t	info2;
+};
+
+struct vm_rtc_time {
+	time_t		secs;
+};
+
+struct vm_rtc_data {
+	int		offset;
+	uint8_t		value;
+};
+
+#ifndef __FreeBSD__
+struct vm_devmem_offset {
+	int		segid;
+	off_t		offset;
+};
+#endif
+
+struct vm_cpu_topology {
+	uint16_t	sockets;
+	uint16_t	cores;
+	uint16_t	threads;
+	uint16_t	maxcpus;
+};
+
+enum {
+	/* general routines */
+	IOCNUM_ABIVERS = 0,
+	IOCNUM_RUN = 1,
+	IOCNUM_SET_CAPABILITY = 2,
+	IOCNUM_GET_CAPABILITY = 3,
+	IOCNUM_SUSPEND = 4,
+	IOCNUM_REINIT = 5,
+
+	/* memory apis */
+	IOCNUM_MAP_MEMORY = 10,			/* deprecated */
+	IOCNUM_GET_MEMORY_SEG = 11,		/* deprecated */
+	IOCNUM_GET_GPA_PMAP = 12,
+	IOCNUM_GLA2GPA = 13,
+	IOCNUM_ALLOC_MEMSEG = 14,
+	IOCNUM_GET_MEMSEG = 15,
+	IOCNUM_MMAP_MEMSEG = 16,
+	IOCNUM_MMAP_GETNEXT = 17,
+	IOCNUM_GLA2GPA_NOFAULT = 18,
+
+	/* register/state accessors */
+	IOCNUM_SET_REGISTER = 20,
+	IOCNUM_GET_REGISTER = 21,
+	IOCNUM_SET_SEGMENT_DESCRIPTOR = 22,
+	IOCNUM_GET_SEGMENT_DESCRIPTOR = 23,
+	IOCNUM_SET_REGISTER_SET = 24,
+	IOCNUM_GET_REGISTER_SET = 25,
+
+	/* interrupt injection */
+	IOCNUM_GET_INTINFO = 28,
+	IOCNUM_SET_INTINFO = 29,
+	IOCNUM_INJECT_EXCEPTION = 30,
+	IOCNUM_LAPIC_IRQ = 31,
+	IOCNUM_INJECT_NMI = 32,
+	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
+	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
+	IOCNUM_IOAPIC_PULSE_IRQ = 35,
+	IOCNUM_LAPIC_MSI = 36,
+	IOCNUM_LAPIC_LOCAL_IRQ = 37,
+	IOCNUM_IOAPIC_PINCOUNT = 38,
+	IOCNUM_RESTART_INSTRUCTION = 39,
+
+	/* PCI pass-thru */
+	IOCNUM_BIND_PPTDEV = 40,
+	IOCNUM_UNBIND_PPTDEV = 41,
+	IOCNUM_MAP_PPTDEV_MMIO = 42,
+	IOCNUM_PPTDEV_MSI = 43,
+	IOCNUM_PPTDEV_MSIX = 44,
+	IOCNUM_GET_PPTDEV_LIMITS = 45,
+
+	/* statistics */
+	IOCNUM_VM_STATS = 50, 
+	IOCNUM_VM_STAT_DESC = 51,
+
+	/* kernel device state */
+	IOCNUM_SET_X2APIC_STATE = 60,
+	IOCNUM_GET_X2APIC_STATE = 61,
+	IOCNUM_GET_HPET_CAPABILITIES = 62,
+
+	/* CPU Topology */
+	IOCNUM_SET_TOPOLOGY = 63,
+	IOCNUM_GET_TOPOLOGY = 64,
+
+	/* legacy interrupt injection */
+	IOCNUM_ISA_ASSERT_IRQ = 80,
+	IOCNUM_ISA_DEASSERT_IRQ = 81,
+	IOCNUM_ISA_PULSE_IRQ = 82,
+	IOCNUM_ISA_SET_IRQ_TRIGGER = 83,
+
+	/* vm_cpuset */
+	IOCNUM_ACTIVATE_CPU = 90,
+	IOCNUM_GET_CPUSET = 91,
+	IOCNUM_SUSPEND_CPU = 92,
+	IOCNUM_RESUME_CPU = 93,
+
+	/* RTC */
+	IOCNUM_RTC_READ = 100,
+	IOCNUM_RTC_WRITE = 101,
+	IOCNUM_RTC_SETTIME = 102,
+	IOCNUM_RTC_GETTIME = 103,
+
+#ifndef __FreeBSD__
+	/* illumos-custom ioctls */
+	IOCNUM_DEVMEM_GETOFFSET = 256,
+	IOCNUM_WRLOCK_CYCLE = 257,
+#endif
+};
+
+#define	VM_RUN		\
+	_IOWR('v', IOCNUM_RUN, struct vm_run)
+#define	VM_SUSPEND	\
+	_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
+#define	VM_REINIT	\
+	_IO('v', IOCNUM_REINIT)
+#define	VM_ALLOC_MEMSEG	\
+	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
+#define	VM_GET_MEMSEG	\
+	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
+#define	VM_MMAP_MEMSEG	\
+	_IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
+#define	VM_MMAP_GETNEXT	\
+	_IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
+#define	VM_SET_REGISTER \
+	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define	VM_GET_REGISTER \
+	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define	VM_SET_SEGMENT_DESCRIPTOR \
+	_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
+#define	VM_GET_SEGMENT_DESCRIPTOR \
+	_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
+#define	VM_SET_REGISTER_SET \
+	_IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
+#define	VM_GET_REGISTER_SET \
+	_IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
+#define	VM_INJECT_EXCEPTION	\
+	_IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
+#define	VM_LAPIC_IRQ 		\
+	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_LOCAL_IRQ 	\
+	_IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_MSI		\
+	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
+#define	VM_IOAPIC_ASSERT_IRQ	\
+	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
+#define	VM_IOAPIC_DEASSERT_IRQ	\
+	_IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq)
+#define	VM_IOAPIC_PULSE_IRQ	\
+	_IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq)
+#define	VM_IOAPIC_PINCOUNT	\
+	_IOR('v', IOCNUM_IOAPIC_PINCOUNT, int)
+#define	VM_ISA_ASSERT_IRQ	\
+	_IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq)
+#define	VM_ISA_DEASSERT_IRQ	\
+	_IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq)
+#define	VM_ISA_PULSE_IRQ	\
+	_IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq)
+#define	VM_ISA_SET_IRQ_TRIGGER	\
+	_IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger)
+#define	VM_SET_CAPABILITY \
+	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define	VM_GET_CAPABILITY \
+	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define	VM_BIND_PPTDEV \
+	_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
+#define	VM_UNBIND_PPTDEV \
+	_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
+#define	VM_MAP_PPTDEV_MMIO \
+	_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
+#define	VM_PPTDEV_MSI \
+	_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
+#define	VM_PPTDEV_MSIX \
+	_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
+#define	VM_GET_PPTDEV_LIMITS \
+	_IOR('v', IOCNUM_GET_PPTDEV_LIMITS, struct vm_pptdev_limits)
+#define VM_INJECT_NMI \
+	_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
+#define	VM_STATS_IOC \
+	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define	VM_STAT_DESC \
+	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define	VM_SET_X2APIC_STATE \
+	_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
+#define	VM_GET_X2APIC_STATE \
+	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
+#define	VM_GET_HPET_CAPABILITIES \
+	_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
+#define VM_SET_TOPOLOGY \
+	_IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+	_IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
+#define	VM_GET_GPA_PMAP \
+	_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
+#define	VM_GLA2GPA	\
+	_IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
+#define	VM_GLA2GPA_NOFAULT \
+	_IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
+#define	VM_ACTIVATE_CPU	\
+	_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define	VM_GET_CPUS	\
+	_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define	VM_SUSPEND_CPU \
+	_IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
+#define	VM_RESUME_CPU \
+	_IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
+#define	VM_SET_INTINFO	\
+	_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
+#define	VM_GET_INTINFO	\
+	_IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
+#define VM_RTC_WRITE \
+	_IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data)
+#define VM_RTC_READ \
+	_IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data)
+#define VM_RTC_SETTIME	\
+	_IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time)
+#define VM_RTC_GETTIME	\
+	_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
+#define	VM_RESTART_INSTRUCTION \
+	_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
+
+#ifndef __FreeBSD__
+#define	VM_DEVMEM_GETOFFSET \
+	_IOW('v', IOCNUM_DEVMEM_GETOFFSET, struct vm_devmem_offset)
+#define	VM_WRLOCK_CYCLE _IO('v', IOCNUM_WRLOCK_CYCLE)
+
+/* ioctls used against ctl device for vm create/destroy */
+#define	VMM_IOC_BASE		(('V' << 16) | ('M' << 8))
+#define	VMM_CREATE_VM		(VMM_IOC_BASE | 0x01)
+#define	VMM_DESTROY_VM		(VMM_IOC_BASE | 0x02)
+#define	VMM_VM_SUPPORTED	(VMM_IOC_BASE | 0x03)
+
+#define	VMM_CTL_DEV		"/dev/vmmctl"
+
+#endif
+
+#endif
diff --git a/usr/src/uts/i86pc/sys/vmm_drv.h b/usr/src/uts/i86pc/sys/vmm_drv.h
new file mode 100644
index 0000000000..856b75e5cc
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm_drv.h
@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _VMM_DRV_H_
+#define	_VMM_DRV_H_
+
+#ifdef	_KERNEL
+
+#include <sys/file.h>
+
+struct vmm_hold;
+typedef struct vmm_hold vmm_hold_t;
+
+struct vmm_lease;
+typedef struct vmm_lease vmm_lease_t;
+
+/*
+ * Because of tangled headers, these definitions mirror their vmm_[rw]mem_cb_t
+ * counterparts in vmm.h.
+ */
+typedef int (*vmm_drv_rmem_cb_t)(void *, uintptr_t, uint_t, uint64_t *);
+typedef int (*vmm_drv_wmem_cb_t)(void *, uintptr_t, uint_t, uint64_t);
+
+extern int vmm_drv_hold(file_t *, cred_t *, vmm_hold_t **);
+extern void vmm_drv_rele(vmm_hold_t *);
+extern boolean_t vmm_drv_release_reqd(vmm_hold_t *);
+
+extern vmm_lease_t *vmm_drv_lease_sign(vmm_hold_t *, boolean_t (*)(void *),
+    void *);
+extern void vmm_drv_lease_break(vmm_hold_t *, vmm_lease_t *);
+extern boolean_t vmm_drv_lease_expired(vmm_lease_t *);
+
+extern void *vmm_drv_gpa2kva(vmm_lease_t *, uintptr_t, size_t);
+extern int vmm_drv_msi(vmm_lease_t *, uint64_t, uint64_t);
+
+extern int vmm_drv_ioport_hook(vmm_hold_t *, uint_t, vmm_drv_rmem_cb_t,
+    vmm_drv_wmem_cb_t, void *, void **);
+extern void vmm_drv_ioport_unhook(vmm_hold_t *, void **);
+#endif /* _KERNEL */
+
+#endif /* _VMM_DRV_H_ */
diff --git a/usr/src/uts/i86pc/sys/vmm_impl.h b/usr/src/uts/i86pc/sys/vmm_impl.h
new file mode 100644
index 0000000000..cdc56cc464
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm_impl.h
@@ -0,0 +1,89 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _VMM_IMPL_H_
+#define	_VMM_IMPL_H_
+
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/varargs.h>
+#include <sys/zone.h>
+
+#ifdef	_KERNEL
+
+#define	VMM_CTL_MINOR	0
+
+/*
+ * Rather than creating whole character devices for devmem mappings, they are
+ * available by mmap(2)ing the vmm handle at a specific offset.  These offsets
+ * begin just above the maximum allow guest physical address.
+ */
+#include <vm/vm_param.h>
+#define	VM_DEVMEM_START	(VM_MAXUSER_ADDRESS + 1)
+
+struct vmm_devmem_entry {
+	list_node_t	vde_node;
+	int		vde_segid;
+	char		vde_name[SPECNAMELEN + 1];
+	size_t		vde_len;
+	off_t		vde_off;
+};
+typedef struct vmm_devmem_entry vmm_devmem_entry_t;
+
+typedef struct vmm_zsd vmm_zsd_t;
+
+enum vmm_softc_state {
+	VMM_HELD	= 1,	/* external driver(s) possess hold on the VM */
+	VMM_CLEANUP	= 2,	/* request that holds are released */
+	VMM_PURGED	= 4,	/* all hold have been released */
+	VMM_BLOCK_HOOK	= 8,	/* mem hook install temporarily blocked */
+	VMM_DESTROY	= 16	/* VM is destroyed, softc still around */
+};
+
+struct vmm_softc {
+	list_node_t	vmm_node;
+	struct vm	*vmm_vm;
+	minor_t		vmm_minor;
+	char		vmm_name[VM_MAX_NAMELEN];
+	list_t		vmm_devmem_list;
+
+	kcondvar_t	vmm_cv;
+	list_t		vmm_holds;
+	uint_t		vmm_flags;
+	boolean_t	vmm_is_open;
+
+	kmutex_t	vmm_lease_lock;
+	list_t		vmm_lease_list;
+	uint_t		vmm_lease_blocker;
+	kcondvar_t	vmm_lease_cv;
+	krwlock_t	vmm_rwlock;
+
+	/* For zone specific data */
+	list_node_t	vmm_zsd_linkage;
+	zone_t		*vmm_zone;
+	vmm_zsd_t	*vmm_zsd;
+};
+typedef struct vmm_softc vmm_softc_t;
+
+void vmm_zsd_init(void);
+void vmm_zsd_fini(void);
+int vmm_zsd_add_vm(vmm_softc_t *sc);
+void vmm_zsd_rem_vm(vmm_softc_t *sc);
+int vmm_do_vm_destroy(vmm_softc_t *, boolean_t);
+
+#endif /* _KERNEL */
+
+#endif	/* _VMM_IMPL_H_ */
diff --git a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h b/usr/src/uts/i86pc/sys/vmm_instruction_emul.h
new file mode 100644
index 0000000000..f10f407164
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm_instruction_emul.h
@@ -0,0 +1,137 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef	_VMM_INSTRUCTION_EMUL_H_
+#define	_VMM_INSTRUCTION_EMUL_H_
+
+#include <sys/mman.h>
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
+				 uint64_t *rval, int rsize, void *arg);
+
+typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
+				  uint64_t wval, int wsize, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ * s
+ */
+int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t mrr,
+    mem_region_write_t mrw, void *mrarg);
+
+int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
+    uint64_t val, int size);
+
+/*
+ * Returns 1 if an alignment check exception should be injected and 0 otherwise.
+ */
+int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
+    uint64_t rflags, uint64_t gla);
+
+/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
+int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+
+uint64_t vie_size2mask(int size);
+
+int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+    struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
+    uint64_t *gla);
+
+#ifdef _KERNEL
+/*
+ * APIs to fetch and decode the instruction from nested page fault handler.
+ *
+ * 'vie' must be initialized before calling 'vmm_fetch_instruction()'
+ */
+int vmm_fetch_instruction(struct vm *vm, int cpuid,
+			  struct vm_guest_paging *guest_paging,
+			  uint64_t rip, int inst_length, struct vie *vie,
+			  int *is_fault);
+
+/*
+ * Translate the guest linear address 'gla' to a guest physical address.
+ *
+ * retval	is_fault	Interpretation
+ *   0		   0		'gpa' contains result of the translation
+ *   0		   1		An exception was injected into the guest
+ * EFAULT	  N/A		An unrecoverable hypervisor error occurred
+ */
+int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
+
+/*
+ * Like vm_gla2gpa, but no exceptions are injected into the guest and
+ * PTEs are not changed.
+ */
+int vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
+
+void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
+
+/*
+ * Decode the instruction fetched into 'vie' so it can be emulated.
+ *
+ * 'gla' is the guest linear address provided by the hardware assist
+ * that caused the nested page table fault. It is used to verify that
+ * the software instruction decoding is in agreement with the hardware.
+ * 
+ * Some hardware assists do not provide the 'gla' to the hypervisor.
+ * To skip the 'gla' verification for this or any other reason pass
+ * in VIE_INVALID_GLA instead.
+ */
+#define	VIE_INVALID_GLA		(1UL << 63)	/* a non-canonical address */
+int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
+			   enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
+#endif	/* _KERNEL */
+
+#endif	/* _VMM_INSTRUCTION_EMUL_H_ */