13896 bhyve VM interfaces should be better fit

13981 bhyve emulation should set dirty bits Reviewed by: Dan Cross <cross@oxidecomputer.com> Reviewed by: Joshua M. Clulow <josh@sysmgr.org> Approved by: Dan McDonald <danmcd@joyent.com>
author: Patrick Mooney <pmooney@pfmooney.com> 2021-09-05 01:38:39 +0000
committer: Patrick Mooney <pmooney@oxide.computer> 2021-11-19 23:00:59 +0000
commit: 0153d828c132fdb1a17c11b99386a3d1b87994cf (patch)
tree: c670df2f1d9cfceb92709c3cb2862fdd1f97f90a
parent: d8f839f91e21bea2f5200f95df55608cbecdeeb9 (diff)
download: illumos-joyent-0153d828c132fdb1a17c11b99386a3d1b87994cf.tar.gz
40 files changed, 2152 insertions, 2960 deletions
diff --git a/usr/src/compat/bhyve/amd64/machine/md_var.h b/usr/src/compat/bhyve/amd64/machine/md_var.h
index ed57a8bebc..ca3d68ef95 100644
--- a/usr/src/compat/bhyve/amd64/machine/md_var.h
+++ b/usr/src/compat/bhyve/amd64/machine/md_var.h
@@ -23,6 +23,4 @@ extern	char	cpu_vendor[];		/* CPU Origin code */
 
 #include <sys/systm.h>
 
-#define	Maxmem	(physmax + 1)
-
 #endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_MD_VAR_H_ */
diff --git a/usr/src/compat/bhyve/amd64/machine/pmap.h b/usr/src/compat/bhyve/amd64/machine/pmap.h
deleted file mode 100644
index 3b94d1b1a9..0000000000
--- a/usr/src/compat/bhyve/amd64/machine/pmap.h
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * All rights reserved. This copyright notice is Copyright Management
- * Information under 17 USC 1202 and is included to protect this work and
- * deter copyright infringement.  Removal or alteration of this Copyright
- * Management Information without the express written permission from
- * Pluribus Networks Inc is prohibited, and any such unauthorized removal
- * or alteration will be a violation of federal law.
- *
- * Copyright (c) 2003 Peter Wemm.
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Derived from hp300 version by Mike Hibler, this version by William
- * Jolitz uses a recursive map [a pde points to the page directory] to
- * map the page tables using the pagetables themselves. This is done to
- * reduce the impact on kernel virtual memory for lots of sparse address
- * space, and to reduce the cost of memory to each process.
- *
- *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
- *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
- * $FreeBSD$
- */
-
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2014 Pluribus Networks Inc.
- */
-
-
-#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_PMAP_H_
-#define	_COMPAT_FREEBSD_AMD64_MACHINE_PMAP_H_
-
-/*
- * Page-directory and page-table entries follow this format, with a few
- * of the fields not present here and there, depending on a lot of things.
- */
-				/* ---- Intel Nomenclature ---- */
-#define	X86_PG_V	0x001	/* P	Valid			*/
-#define	X86_PG_RW	0x002	/* R/W	Read/Write		*/
-#define	X86_PG_U	0x004	/* U/S  User/Supervisor		*/
-#define	X86_PG_NC_PWT	0x008	/* PWT	Write through		*/
-#define	X86_PG_NC_PCD	0x010	/* PCD	Cache disable		*/
-#define	X86_PG_A	0x020	/* A	Accessed		*/
-#define	X86_PG_M	0x040	/* D	Dirty			*/
-#define	X86_PG_PS	0x080	/* PS	Page size (0=4k,1=2M)	*/
-#define	X86_PG_PTE_PAT	0x080	/* PAT	PAT index		*/
-#define	X86_PG_G	0x100	/* G	Global			*/
-#define	X86_PG_AVAIL1	0x200	/*    /	Available for system	*/
-#define	X86_PG_AVAIL2	0x400	/*   <	programmers use		*/
-#define	X86_PG_AVAIL3	0x800	/*    \				*/
-#define	X86_PG_PDE_PAT	0x1000	/* PAT	PAT index		*/
-#define	X86_PG_NX	(1ul<<63) /* No-execute */
-#define	X86_PG_AVAIL(x)	(1ul << (x))
-
-/* Page level cache control fields used to determine the PAT type */
-#define	X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
-#define	X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
-
-/*
- * Intel extended page table (EPT) bit definitions.
- */
-#define	EPT_PG_READ		0x001	/* R	Read		*/
-#define	EPT_PG_WRITE		0x002	/* W	Write		*/
-#define	EPT_PG_EXECUTE		0x004	/* X	Execute		*/
-#define	EPT_PG_IGNORE_PAT	0x040	/* IPAT	Ignore PAT	*/
-#define	EPT_PG_PS		0x080	/* PS	Page size	*/
-#define	EPT_PG_A		0x100	/* A	Accessed	*/
-#define	EPT_PG_M		0x200	/* D	Dirty		*/
-#define	EPT_PG_MEMORY_TYPE(x)	((x) << 3) /* MT Memory Type	*/
-
-/*
- * Define the PG_xx macros in terms of the bits on x86 PTEs.
- */
-#define	PG_V		X86_PG_V
-#define	PG_RW		X86_PG_RW
-#define	PG_U		X86_PG_U
-#define	PG_NC_PWT	X86_PG_NC_PWT
-#define	PG_NC_PCD	X86_PG_NC_PCD
-#define	PG_A		X86_PG_A
-#define	PG_M		X86_PG_M
-#define	PG_PS		X86_PG_PS
-#define	PG_PTE_PAT	X86_PG_PTE_PAT
-#define	PG_G		X86_PG_G
-#define	PG_AVAIL1	X86_PG_AVAIL1
-#define	PG_AVAIL2	X86_PG_AVAIL2
-#define	PG_AVAIL3	X86_PG_AVAIL3
-#define	PG_PDE_PAT	X86_PG_PDE_PAT
-#define	PG_NX		X86_PG_NX
-#define	PG_PDE_CACHE	X86_PG_PDE_CACHE
-#define	PG_PTE_CACHE	X86_PG_PTE_CACHE
-
-/* Our various interpretations of the above */
-#define	PG_W		X86_PG_AVAIL3	/* "Wired" pseudoflag */
-#define	PG_MANAGED	X86_PG_AVAIL2
-#define	EPT_PG_EMUL_V	X86_PG_AVAIL(52)
-#define	EPT_PG_EMUL_RW	X86_PG_AVAIL(53)
-#define	PG_PROMOTED	X86_PG_AVAIL(54)	/* PDE only */
-#define	PG_FRAME	(0x000ffffffffff000ul)
-#define	PG_PS_FRAME	(0x000fffffffe00000ul)
-
-/*
- * Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB
- * (PTE) page mappings have identical settings for the following fields:
- */
-#define	PG_PTE_PROMOTE	(PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \
-	    PG_M | PG_A | PG_U | PG_RW | PG_V)
-
-/*
- * Page Protection Exception bits
- */
-
-#define PGEX_P		0x01	/* Protection violation vs. not present */
-#define PGEX_W		0x02	/* during a Write cycle */
-#define PGEX_U		0x04	/* access from User mode (UPL) */
-#define PGEX_RSV	0x08	/* reserved PTE field is non-zero */
-#define PGEX_I		0x10	/* during an instruction fetch */
-
-/*
- * undef the PG_xx macros that define bits in the regular x86 PTEs that
- * have a different position in nested PTEs. This is done when compiling
- * code that needs to be aware of the differences between regular x86 and
- * nested PTEs.
- *
- * The appropriate bitmask will be calculated at runtime based on the pmap
- * type.
- */
-#ifdef AMD64_NPT_AWARE
-#undef PG_AVAIL1		/* X86_PG_AVAIL1 aliases with EPT_PG_M */
-#undef PG_G
-#undef PG_A
-#undef PG_M
-#undef PG_PDE_PAT
-#undef PG_PDE_CACHE
-#undef PG_PTE_PAT
-#undef PG_PTE_CACHE
-#undef PG_RW
-#undef PG_V
-#endif
-
-/*
- * Pte related macros.  This is complicated by having to deal with
- * the sign extension of the 48th bit.
- */
-#define KVADDR(l4, l3, l2, l1) ( \
-	((unsigned long)-1 << 47) | \
-	((unsigned long)(l4) << PML4SHIFT) | \
-	((unsigned long)(l3) << PDPSHIFT) | \
-	((unsigned long)(l2) << PDRSHIFT) | \
-	((unsigned long)(l1) << PAGE_SHIFT))
-
-#define UVADDR(l4, l3, l2, l1) ( \
-	((unsigned long)(l4) << PML4SHIFT) | \
-	((unsigned long)(l3) << PDPSHIFT) | \
-	((unsigned long)(l2) << PDRSHIFT) | \
-	((unsigned long)(l1) << PAGE_SHIFT))
-
-/*
- * Number of kernel PML4 slots.  Can be anywhere from 1 to 64 or so,
- * but setting it larger than NDMPML4E makes no sense.
- *
- * Each slot provides .5 TB of kernel virtual space.
- */
-#define NKPML4E		4
-
-#define	NUPML4E		(NPML4EPG/2)	/* number of userland PML4 pages */
-#define	NUPDPE		(NUPML4E*NPDPEPG)/* number of userland PDP pages */
-#define	NUPDE		(NUPDPE*NPDEPG)	/* number of userland PD entries */
-
-/*
- * NDMPML4E is the maximum number of PML4 entries that will be
- * used to implement the direct map.  It must be a power of two,
- * and should generally exceed NKPML4E.  The maximum possible
- * value is 64; using 128 will make the direct map intrude into
- * the recursive page table map.
- */
-#define	NDMPML4E	8
-
-/*
- * These values control the layout of virtual memory.  The starting address
- * of the direct map, which is controlled by DMPML4I, must be a multiple of
- * its size.  (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
- *
- * Note: KPML4I is the index of the (single) level 4 page that maps
- * the KVA that holds KERNBASE, while KPML4BASE is the index of the
- * first level 4 page that maps VM_MIN_KERNEL_ADDRESS.  If NKPML4E
- * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra
- * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to
- * KERNBASE.
- *
- * (KPML4I combines with KPDPI to choose where KERNBASE starts.
- * Or, in other words, KPML4I provides bits 39..47 of KERNBASE,
- * and KPDPI provides bits 30..38.)
- */
-#define	PML4PML4I	(NPML4EPG/2)	/* Index of recursive pml4 mapping */
-
-#define	KPML4BASE	(NPML4EPG-NKPML4E) /* KVM at highest addresses */
-#define	DMPML4I		rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
-
-#define	KPML4I		(NPML4EPG-1)
-#define	KPDPI		(NPDPEPG-2)	/* kernbase at -2GB */
-
-/*
- * XXX doesn't really belong here I guess...
- */
-#define ISA_HOLE_START    0xa0000
-#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
-
-#define	PMAP_PCID_NONE		0xffffffff
-#define	PMAP_PCID_KERN		0
-#define	PMAP_PCID_OVERMAX	0x1000
-
-#ifndef LOCORE
-
-#ifdef __FreeBSD__
-#include <sys/queue.h>
-#include <sys/_cpuset.h>
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
-
-#include <vm/_vm_radix.h>
-#endif /* __FreeBSD__ */
-
-typedef u_int64_t pd_entry_t;
-typedef u_int64_t pt_entry_t;
-typedef u_int64_t pdp_entry_t;
-typedef u_int64_t pml4_entry_t;
-
-/*
- * Address of current address space page table maps and directories.
- */
-#ifdef _KERNEL
-#define	addr_PTmap	(KVADDR(PML4PML4I, 0, 0, 0))
-#define	addr_PDmap	(KVADDR(PML4PML4I, PML4PML4I, 0, 0))
-#define	addr_PDPmap	(KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
-#define	addr_PML4map	(KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
-#define	addr_PML4pml4e	(addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
-#define	PTmap		((pt_entry_t *)(addr_PTmap))
-#define	PDmap		((pd_entry_t *)(addr_PDmap))
-#define	PDPmap		((pd_entry_t *)(addr_PDPmap))
-#define	PML4map		((pd_entry_t *)(addr_PML4map))
-#define	PML4pml4e	((pd_entry_t *)(addr_PML4pml4e))
-
-extern int nkpt;		/* Initial number of kernel page tables */
-extern u_int64_t KPDPphys;	/* physical address of kernel level 3 */
-extern u_int64_t KPML4phys;	/* physical address of kernel level 4 */
-
-/*
- * virtual address to page table entry and
- * to physical address.
- * Note: these work recursively, thus vtopte of a pte will give
- * the corresponding pde that in turn maps it.
- */
-pt_entry_t *vtopte(vm_offset_t);
-#define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
-#ifndef __FreeBSD__
-extern vm_paddr_t pmap_kextract(vm_offset_t);
-#endif
-
-#define	pte_load_store(ptep, pte)	atomic_swap_long(ptep, pte)
-#define	pte_load_clear(ptep)		atomic_swap_long(ptep, 0)
-#define	pte_store(ptep, pte) do { \
-	*(u_long *)(ptep) = (u_long)(pte); \
-} while (0)
-#define	pte_clear(ptep)			pte_store(ptep, 0)
-
-#define	pde_store(pdep, pde)		pte_store(pdep, pde)
-
-extern pt_entry_t pg_nx;
-
-#endif /* _KERNEL */
-
-#ifdef __FreeBSD__
-/*
- * Pmap stuff
- */
-struct	pv_entry;
-struct	pv_chunk;
-
-/*
- * Locks
- * (p) PV list lock
- */
-struct md_page {
-	TAILQ_HEAD(, pv_entry)	pv_list;  /* (p) */
-	int			pv_gen;   /* (p) */
-	int			pat_mode;
-};
-#endif /* __FreeBSD__ */
-
-enum pmap_type {
-	PT_X86,			/* regular x86 page tables */
-	PT_EPT,			/* Intel's nested page tables */
-	PT_RVI,			/* AMD's nested page tables */
-};
-
-#ifdef __FreeBSD__
-struct pmap_pcids {
-	uint32_t	pm_pcid;
-	uint32_t	pm_gen;
-};
-
-/*
- * The kernel virtual address (KVA) of the level 4 page table page is always
- * within the direct map (DMAP) region.
- */
-struct pmap {
-	struct mtx		pm_mtx;
-	pml4_entry_t		*pm_pml4;	/* KVA of level 4 page table */
-	uint64_t		pm_cr3;
-	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
-	cpuset_t		pm_active;	/* active on cpus */
-	enum pmap_type		pm_type;	/* regular or nested tables */
-	struct pmap_statistics	pm_stats;	/* pmap statistics */
-	struct vm_radix		pm_root;	/* spare page table pages */
-	long			pm_eptgen;	/* EPT pmap generation id */
-	int			pm_flags;
-	struct pmap_pcids	pm_pcids[MAXCPU];
-};
-#endif /* __FreeBSD__ */
-
-/* flags */
-#define	PMAP_NESTED_IPIMASK	0xff
-#define	PMAP_PDE_SUPERPAGE	(1 << 8)	/* supports 2MB superpages */
-#define	PMAP_EMULATE_AD_BITS	(1 << 9)	/* needs A/D bits emulation */
-#define	PMAP_SUPPORTS_EXEC_ONLY	(1 << 10)	/* execute only mappings ok */
-
-typedef struct pmap	*pmap_t;
-
-#ifdef _KERNEL
-extern struct pmap	kernel_pmap_store;
-#define kernel_pmap	(&kernel_pmap_store)
-
-#define	PMAP_LOCK(pmap)		mtx_lock(&(pmap)->pm_mtx)
-#define	PMAP_LOCK_ASSERT(pmap, type) \
-				mtx_assert(&(pmap)->pm_mtx, (type))
-#define	PMAP_LOCK_DESTROY(pmap)	mtx_destroy(&(pmap)->pm_mtx)
-#define	PMAP_LOCK_INIT(pmap)	mtx_init(&(pmap)->pm_mtx, "pmap", \
-				    NULL, MTX_DEF | MTX_DUPOK)
-#define	PMAP_LOCKED(pmap)	mtx_owned(&(pmap)->pm_mtx)
-#define	PMAP_MTX(pmap)		(&(pmap)->pm_mtx)
-#define	PMAP_TRYLOCK(pmap)	mtx_trylock(&(pmap)->pm_mtx)
-#define	PMAP_UNLOCK(pmap)	mtx_unlock(&(pmap)->pm_mtx)
-
-int	pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags);
-int	pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype);
-#endif
-
-#ifdef	__FreeBSD__
-/*
- * For each vm_page_t, there is a list of all currently valid virtual
- * mappings of that page.  An entry is a pv_entry_t, the list is pv_list.
- */
-typedef struct pv_entry {
-	vm_offset_t	pv_va;		/* virtual address for mapping */
-	TAILQ_ENTRY(pv_entry)	pv_next;
-} *pv_entry_t;
-
-/*
- * pv_entries are allocated in chunks per-process.  This avoids the
- * need to track per-pmap assignments.
- */
-#define	_NPCM	3
-#define	_NPCPV	168
-struct pv_chunk {
-	pmap_t			pc_pmap;
-	TAILQ_ENTRY(pv_chunk)	pc_list;
-	uint64_t		pc_map[_NPCM];	/* bitmap; 1 = free */
-	TAILQ_ENTRY(pv_chunk)	pc_lru;
-	struct pv_entry		pc_pventry[_NPCPV];
-};
-
-#ifdef	_KERNEL
-
-extern caddr_t	CADDR1;
-extern pt_entry_t *CMAP1;
-extern vm_paddr_t phys_avail[];
-extern vm_paddr_t dump_avail[];
-extern vm_offset_t virtual_avail;
-extern vm_offset_t virtual_end;
-extern vm_paddr_t dmaplimit;
-extern int pmap_pcid_enabled;
-extern int invpcid_works;
-
-#define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
-#define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) != 0)
-#define	pmap_unmapbios(va, sz)	pmap_unmapdev((va), (sz))
-
-struct thread;
-
-void	pmap_activate_sw(struct thread *);
-void	pmap_bootstrap(vm_paddr_t *);
-int	pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde);
-int	pmap_change_attr(vm_offset_t, vm_size_t, int);
-void	pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate);
-void	pmap_init_pat(void);
-void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
-void	*pmap_kenter_temporary(vm_paddr_t pa, int i);
-vm_paddr_t pmap_kextract(vm_offset_t);
-void	pmap_kremove(vm_offset_t);
-void	*pmap_mapbios(vm_paddr_t, vm_size_t);
-void	*pmap_mapdev(vm_paddr_t, vm_size_t);
-void	*pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
-boolean_t pmap_page_is_mapped(vm_page_t m);
-void	pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
-void	pmap_pinit_pml4(vm_page_t);
-void	pmap_unmapdev(vm_offset_t, vm_size_t);
-void	pmap_invalidate_page(pmap_t, vm_offset_t);
-void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
-void	pmap_invalidate_all(pmap_t);
-void	pmap_invalidate_cache(void);
-void	pmap_invalidate_cache_pages(vm_page_t *pages, int count);
-void	pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
-	    boolean_t force);
-void	pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
-boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
-void	pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
-#endif /* _KERNEL */
-
-/* Return various clipped indexes for a given VA */
-static __inline vm_pindex_t
-pmap_pte_index(vm_offset_t va)
-{
-
-	return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
-}
-
-static __inline vm_pindex_t
-pmap_pde_index(vm_offset_t va)
-{
-
-	return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
-}
-
-static __inline vm_pindex_t
-pmap_pdpe_index(vm_offset_t va)
-{
-
-	return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
-}
-
-static __inline vm_pindex_t
-pmap_pml4e_index(vm_offset_t va)
-{
-
-	return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
-}
-
-#endif /* __FreeBSD__ */
-#endif /* !LOCORE */
-
-#endif /* !_COMPAT_FREEBSD_AMD64_MACHINE_PMAP_H_ */
diff --git a/usr/src/compat/bhyve/amd64/machine/smp.h b/usr/src/compat/bhyve/amd64/machine/smp.h
deleted file mode 100644
index 9c4f2d111b..0000000000
--- a/usr/src/compat/bhyve/amd64/machine/smp.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2013 Pluribus Networks Inc.
- * Copyright 2018 Joyent, Inc.
- */
-
-#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_SMP_H_
-#define	_COMPAT_FREEBSD_AMD64_MACHINE_SMP_H_
-
-#ifdef _KERNEL
-
-/*
- * APIC-related functions are replaced with native calls rather than shims
- * which attempt to replicate the FreeBSD interfaces.  This is empty, but will
- * remain present to appease sources which wish to include the path.
- */
-
-#endif /* _KERNEL */
-
-#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_SMP_H_ */
diff --git a/usr/src/compat/bhyve/sys/smp.h b/usr/src/compat/bhyve/sys/smp.h
deleted file mode 100644
index 3d6413ce16..0000000000
--- a/usr/src/compat/bhyve/sys/smp.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2014 Pluribus Networks Inc.
- * Copyright 2017 Joyent, Inc.
- */
-
-#ifndef _COMPAT_FREEBSD_SYS_SMP_H_
-#define	_COMPAT_FREEBSD_SYS_SMP_H_
-
-#include <sys/cpuset.h>
-
-#define	IPI_AST	0
-
-void	ipi_cpu(int cpu, u_int ipi);
-
-#endif	/* _COMPAT_FREEBSD_SYS_SMP_H_ */
diff --git a/usr/src/contrib/bhyve/amd64/machine/vm.h b/usr/src/contrib/bhyve/amd64/machine/vm.h
deleted file mode 100644
index 885c1607ea..0000000000
--- a/usr/src/contrib/bhyve/amd64/machine/vm.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*-
- * Copyright (c) 2009 Advanced Computing Technologies LLC
- * Written by: John H. Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD: head/sys/amd64/include/vm.h 233671 2012-03-29 16:51:22Z jhb $
- */
-
-#ifndef _MACHINE_VM_H_
-#define	_MACHINE_VM_H_
-
-#include <machine/specialreg.h>
-
-/* Memory attributes. */
-#define	VM_MEMATTR_UNCACHEABLE		((vm_memattr_t)PAT_UNCACHEABLE)
-#define	VM_MEMATTR_WRITE_COMBINING	((vm_memattr_t)PAT_WRITE_COMBINING)
-#define	VM_MEMATTR_WRITE_THROUGH	((vm_memattr_t)PAT_WRITE_THROUGH)
-#define	VM_MEMATTR_WRITE_PROTECTED	((vm_memattr_t)PAT_WRITE_PROTECTED)
-#define	VM_MEMATTR_WRITE_BACK		((vm_memattr_t)PAT_WRITE_BACK)
-#define	VM_MEMATTR_WEAK_UNCACHEABLE	((vm_memattr_t)PAT_UNCACHED)
-
-#define	VM_MEMATTR_DEFAULT		VM_MEMATTR_WRITE_BACK
-
-#endif /* !_MACHINE_VM_H_ */
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index caa660725c..9b83a780a5 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -247,7 +247,6 @@ VMM_OBJS += vmm.o \
 	vmm_instruction_emul.o \
 	vmm_ioport.o \
 	vmm_lapic.o \
-	vmm_mem.o \
 	vmm_stat.o \
 	vmm_util.o \
 	x86.o \
@@ -259,7 +258,6 @@ VMM_OBJS += vmm.o \
 	vlapic.o \
 	vrtc.o \
 	vpmtmr.o \
-	ept.o \
 	vmcs.o \
 	vmx_msr.o \
 	vmx.o \
@@ -268,18 +266,17 @@ VMM_OBJS += vmm.o \
 	vtd_sol.o \
 	svm.o \
 	svm_msr.o \
-	npt.o \
 	vmcb.o \
 	svm_support.o \
 	amdv.o \
 	vmm_gpt.o \
 	seg_vmm.o \
 	vmm_reservoir.o \
-	vmm_sol_vm.o \
 	vmm_sol_glue.o \
 	vmm_sol_ept.o \
 	vmm_sol_rvi.o \
 	vmm_support.o \
+	vmm_vm.o \
 	vmm_zsd.o
 
 VIONA_OBJS += viona_main.o \
diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
index c7b43b85ef..c381e350ed 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
@@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/rman.h>
-#include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <dev/pci/pcivar.h>
@@ -45,7 +44,6 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/resource.h>
 #include <machine/vmm.h>
-#include <machine/pmap.h>
 #include <machine/vmparam.h>
 #include <machine/pci_cfgreg.h>
 
diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.c b/usr/src/uts/i86pc/io/vmm/amd/npt.c
deleted file mode 100644
index 6fc6825242..0000000000
--- a/usr/src/uts/i86pc/io/vmm/amd/npt.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice unmodified, this list of conditions, and the following
- *    disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-
-#include <sys/vmm_vm.h>
-
-#include "npt.h"
-
-static int npt_flags;
-
-#define	NPT_IPIMASK	0xFF
-
-/*
- * AMD nested page table init.
- */
-int
-svm_npt_init(int ipinum)
-{
-	int enable_superpage = 1;
-
-	npt_flags = ipinum & NPT_IPIMASK;
-	TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage);
-	if (enable_superpage)
-		npt_flags |= PMAP_PDE_SUPERPAGE;
-
-	return (0);
-}
-
-static int
-npt_pinit(pmap_t pmap)
-{
-	return (pmap_pinit_type(pmap, PT_RVI, npt_flags));
-}
-
-struct vmspace *
-svm_npt_alloc(vm_offset_t min, vm_offset_t max)
-{
-	return (vmspace_alloc(min, max, npt_pinit));
-}
-
-void
-svm_npt_free(struct vmspace *vmspace)
-{
-	vmspace_free(vmspace);
-}
diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.h b/usr/src/uts/i86pc/io/vmm/amd/npt.h
deleted file mode 100644
index 95f3fbab9e..0000000000
--- a/usr/src/uts/i86pc/io/vmm/amd/npt.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice unmodified, this list of conditions, and the following
- *    disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef	_SVM_NPT_H_
-#define	_SVM_NPT_H_
-
-int	svm_npt_init(int ipinum);
-struct	vmspace *svm_npt_alloc(vm_offset_t min, vm_offset_t max);
-void	svm_npt_free(struct vmspace *vmspace);
-
-#endif /* _SVM_NPT_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 65fc4c3d0f..8ffc1c6557 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/smp.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
@@ -60,7 +59,6 @@ __FBSDID("$FreeBSD$");
 #include <machine/md_var.h>
 #include <machine/reg.h>
 #include <machine/specialreg.h>
-#include <machine/smp.h>
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <sys/vmm_instruction_emul.h>
@@ -79,7 +77,6 @@ __FBSDID("$FreeBSD$");
 #include "svm.h"
 #include "svm_softc.h"
 #include "svm_msr.h"
-#include "npt.h"
 
 SYSCTL_DECL(_hw_vmm);
 SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
@@ -151,12 +148,11 @@ svm_cleanup(void)
 }
 
 static int
-svm_init(int ipinum)
+svm_init(void)
 {
 	vmcb_clean &= VMCB_CACHE_DEFAULT;
 
 	svm_msr_init();
-	svm_npt_init(ipinum);
 
 	return (0);
 }
@@ -425,7 +421,7 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
  * Initialize a virtual machine.
  */
 static void *
-svm_vminit(struct vm *vm, pmap_t pmap)
+svm_vminit(struct vm *vm)
 {
 	struct svm_softc *svm_sc;
 	struct svm_vcpu *vcpu;
@@ -447,7 +443,7 @@ svm_vminit(struct vm *vm, pmap_t pmap)
 		panic("contigmalloc of SVM IO bitmap failed");
 
 	svm_sc->vm = vm;
-	svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
+	svm_sc->nptp = vmspace_table_root(vm_get_vmspace(vm));
 
 	/*
 	 * Intercept read and write accesses to all MSRs.
@@ -1776,23 +1772,21 @@ svm_inject_recheck(struct svm_softc *sc, int vcpu,
 
 
 static void
-check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, uint_t thiscpu)
+check_asid(struct svm_softc *sc, int vcpuid, uint_t thiscpu, uint64_t nptgen)
 {
 	struct svm_vcpu *vcpustate = svm_get_vcpu(sc, vcpuid);
 	struct vmcb_ctrl *ctrl = svm_get_vmcb_ctrl(sc, vcpuid);
-	long eptgen;
 	uint8_t flush;
 
-	eptgen = pmap->pm_eptgen;
 	flush = hma_svm_asid_update(&vcpustate->hma_asid, flush_by_asid(),
-	    vcpustate->eptgen != eptgen);
+	    vcpustate->nptgen != nptgen);
 
 	if (flush != VMCB_TLB_FLUSH_NOTHING) {
 		ctrl->asid = vcpustate->hma_asid.hsa_asid;
 		svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
 	}
 	ctrl->tlb_ctrl = flush;
-	vcpustate->eptgen = eptgen;
+	vcpustate->nptgen = nptgen;
 }
 
 static void
@@ -1810,8 +1804,8 @@ flush_asid(struct svm_softc *sc, int vcpuid)
 	ctrl->tlb_ctrl = flush;
 	svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
 	/*
-	 * A potential future optimization: We could choose to update the eptgen
-	 * associated with the vCPU, since any pending eptgen change requiring a
+	 * A potential future optimization: We could choose to update the nptgen
+	 * associated with the vCPU, since any pending nptgen change requiring a
 	 * flush will be satisfied by the one which has just now been queued.
 	 */
 }
@@ -1899,7 +1893,7 @@ svm_apply_tsc_adjust(struct svm_softc *svm_sc, int vcpuid)
  * Start vcpu with specified RIP.
  */
 static int
-svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
+svm_vmrun(void *arg, int vcpu, uint64_t rip)
 {
 	struct svm_regctx *gctx;
 	struct svm_softc *svm_sc;
@@ -1908,6 +1902,7 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 	struct vmcb_ctrl *ctrl;
 	struct vm_exit *vmexit;
 	struct vlapic *vlapic;
+	vm_client_t *vmc;
 	struct vm *vm;
 	uint64_t vmcb_pa;
 	int handled;
@@ -1921,6 +1916,7 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 	ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
 	vmexit = vm_exitinfo(vm, vcpu);
 	vlapic = vm_lapic(vm, vcpu);
+	vmc = vm_get_vmclient(vm, vcpu);
 
 	gctx = svm_get_guest_regctx(svm_sc, vcpu);
 	vmcb_pa = svm_sc->vcpu[vcpu].vmcb_pa;
@@ -1962,6 +1958,7 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 
 	do {
 		enum event_inject_state inject_state;
+		uint64_t nptgen;
 
 		/*
 		 * Initial event injection is complex and may involve mutex
@@ -2021,14 +2018,12 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 		 */
 		ldt_sel = sldt();
 
-		/* Activate the nested pmap on 'curcpu' */
-		CPU_SET_ATOMIC_ACQ(curcpu, &pmap->pm_active);
-
 		/*
-		 * Check the pmap generation and the ASID generation to
-		 * ensure that the vcpu does not use stale TLB mappings.
+		 * Check the vmspace and ASID generations to ensure that the
+		 * vcpu does not use stale TLB mappings.
 		 */
-		check_asid(svm_sc, vcpu, pmap, curcpu);
+		nptgen = vmc_table_enter(vmc);
+		check_asid(svm_sc, vcpu, curcpu, nptgen);
 
 		ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty;
 		vcpustate->dirty = 0;
@@ -2042,14 +2037,14 @@ svm_vmrun(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 		svm_dr_leave_guest(gctx);
 		vcpu_ustate_change(vm, vcpu, VU_EMU_KERN);
 
-		CPU_CLR_ATOMIC(curcpu, &pmap->pm_active);
-
 		/* Restore host LDTR. */
 		lldt(ldt_sel);
 
 		/* #VMEXIT disables interrupts so re-enable them here. */
 		enable_gintr();
 
+		vmc_table_exit(vmc);
+
 		/* Update 'nextrip' */
 		vcpustate->nextrip = state->rip;
 
@@ -2477,6 +2472,7 @@ struct vmm_ops vmm_ops_amd = {
 	.init		= svm_init,
 	.cleanup	= svm_cleanup,
 	.resume		= svm_restore,
+
 	.vminit		= svm_vminit,
 	.vmrun		= svm_vmrun,
 	.vmcleanup	= svm_vmcleanup,
@@ -2486,8 +2482,6 @@ struct vmm_ops vmm_ops_amd = {
 	.vmsetdesc	= svm_setdesc,
 	.vmgetcap	= svm_getcap,
 	.vmsetcap	= svm_setcap,
-	.vmspace_alloc	= svm_npt_alloc,
-	.vmspace_free	= svm_npt_free,
 	.vlapic_init	= svm_vlapic_init,
 	.vlapic_cleanup	= svm_vlapic_cleanup,
 
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
index e3ac603e71..adf9bb8ddd 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
@@ -50,7 +50,7 @@ struct svm_vcpu {
 	uint64_t	nextrip; /* next instruction to be executed by guest */
 	int		lastcpu; /* host cpu that the vcpu last ran on */
 	uint32_t	dirty;	 /* state cache bits that must be cleared */
-	long		eptgen;	 /* pmap->pm_eptgen when the vcpu last ran */
+	uint64_t	nptgen;	 /* page table gen when the vcpu last ran */
 	hma_svm_asid_t	hma_asid;
 	boolean_t	loaded;
 } __aligned(PAGE_SIZE);
@@ -61,7 +61,7 @@ struct svm_vcpu {
 struct svm_softc {
 	uint8_t apic_page[VM_MAXCPU][PAGE_SIZE];
 	struct svm_vcpu vcpu[VM_MAXCPU];
-	vm_offset_t	nptp;		/* nested page table */
+	uint64_t	nptp;		/* nested page table (host PA) */
 	uint8_t		*iopm_bitmap;	/* shared by all vcpus */
 	uint8_t		*msr_bitmap;	/* shared by all vcpus */
 	struct vm	*vm;
diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c
deleted file mode 100644
index 49b01ebd36..0000000000
--- a/usr/src/uts/i86pc/io/vmm/intel/ept.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- *
- * Copyright 2015 Pluribus Networks Inc.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-#include <sys/hma.h>
-
-#include <machine/specialreg.h>
-#include <machine/vmm.h>
-#include <sys/vmm_vm.h>
-
-#include "ept.h"
-
-#define	EPT_SUPPORTS_EXEC_ONLY(cap)	((cap) & (1UL << 0))
-#define	EPT_PWL4(cap)			((cap) & (1UL << 6))
-#define	EPT_MEMORY_TYPE_WB(cap)		((cap) & (1UL << 14))
-#define	EPT_PDE_SUPERPAGE(cap)		((cap) & (1UL << 16))	/* 2MB pages */
-#define	EPT_PDPTE_SUPERPAGE(cap)	((cap) & (1UL << 17))	/* 1GB pages */
-#define	INVEPT_SUPPORTED(cap)		((cap) & (1UL << 20))
-#define	AD_BITS_SUPPORTED(cap)		((cap) & (1UL << 21))
-#define	INVVPID_SUPPORTED(cap)		((cap) & (1UL << 32))
-
-#define	INVVPID_ALL_TYPES_MASK		0xF0000000000UL
-#define	INVVPID_ALL_TYPES_SUPPORTED(cap)	\
-	(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
-
-#define	INVEPT_ALL_TYPES_MASK		0x6000000UL
-#define	INVEPT_ALL_TYPES_SUPPORTED(cap)		\
-	(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
-
-#define	EPT_PWLEVELS		4		/* page walk levels */
-#define	EPT_ENABLE_AD_BITS	(1 << 6)
-
-SYSCTL_DECL(_hw_vmm);
-SYSCTL_NODE(_hw_vmm, OID_AUTO, ept, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
-    NULL);
-
-static int ept_enable_ad_bits;
-
-static int ept_pmap_flags;
-
-int
-ept_init(int ipinum)
-{
-	int use_hw_ad_bits, use_superpages, use_exec_only;
-	uint64_t cap;
-
-	cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
-
-	/*
-	 * Verify that:
-	 * - page walk length is 4 steps
-	 * - extended page tables can be laid out in write-back memory
-	 * - invvpid instruction with all possible types is supported
-	 * - invept instruction with all possible types is supported
-	 */
-	if (!EPT_PWL4(cap) ||
-	    !EPT_MEMORY_TYPE_WB(cap) ||
-	    !INVVPID_SUPPORTED(cap) ||
-	    !INVVPID_ALL_TYPES_SUPPORTED(cap) ||
-	    !INVEPT_SUPPORTED(cap) ||
-	    !INVEPT_ALL_TYPES_SUPPORTED(cap))
-		return (EINVAL);
-
-	ept_pmap_flags = ipinum & PMAP_NESTED_IPIMASK;
-
-	use_superpages = 1;
-	TUNABLE_INT_FETCH("hw.vmm.ept.use_superpages", &use_superpages);
-	if (use_superpages && EPT_PDE_SUPERPAGE(cap))
-		ept_pmap_flags |= PMAP_PDE_SUPERPAGE;	/* 2MB superpage */
-
-	use_hw_ad_bits = 1;
-	TUNABLE_INT_FETCH("hw.vmm.ept.use_hw_ad_bits", &use_hw_ad_bits);
-	if (use_hw_ad_bits && AD_BITS_SUPPORTED(cap))
-		ept_enable_ad_bits = 1;
-	else
-		ept_pmap_flags |= PMAP_EMULATE_AD_BITS;
-
-	use_exec_only = 1;
-	TUNABLE_INT_FETCH("hw.vmm.ept.use_exec_only", &use_exec_only);
-	if (use_exec_only && EPT_SUPPORTS_EXEC_ONLY(cap))
-		ept_pmap_flags |= PMAP_SUPPORTS_EXEC_ONLY;
-
-	return (0);
-}
-
-void
-ept_invalidate_mappings(ulong_t eptp)
-{
-	hma_vmx_invept_allcpus((uintptr_t)eptp);
-}
-
-static int
-ept_pinit(pmap_t pmap)
-{
-
-	return (pmap_pinit_type(pmap, PT_EPT, ept_pmap_flags));
-}
-
-struct vmspace *
-ept_vmspace_alloc(vm_offset_t min, vm_offset_t max)
-{
-
-	return (vmspace_alloc(min, max, ept_pinit));
-}
-
-void
-ept_vmspace_free(struct vmspace *vmspace)
-{
-
-	vmspace_free(vmspace);
-}
-
-uint64_t
-eptp(uint64_t pml4)
-{
-	uint64_t eptp_val;
-
-	eptp_val = pml4 | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK;
-	if (ept_enable_ad_bits)
-		eptp_val |= EPT_ENABLE_AD_BITS;
-
-	return (eptp_val);
-}
diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.h b/usr/src/uts/i86pc/io/vmm/intel/ept.h
deleted file mode 100644
index e4a6d6c959..0000000000
--- a/usr/src/uts/i86pc/io/vmm/intel/ept.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef	_EPT_H_
-#define	_EPT_H_
-
-struct vmx;
-
-int	ept_init(int ipinum);
-void	ept_invalidate_mappings(ulong_t eptp);
-struct vmspace *ept_vmspace_alloc(vm_offset_t min, vm_offset_t max);
-void	ept_vmspace_free(struct vmspace *vmspace);
-uint64_t eptp(uint64_t pml4);
-#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/offsets.in b/usr/src/uts/i86pc/io/vmm/intel/offsets.in
index d456693573..f467e7b1ca 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/offsets.in
+++ b/usr/src/uts/i86pc/io/vmm/intel/offsets.in
@@ -19,7 +19,6 @@
 #include <sys/systm.h>
 #include <sys/cpuvar.h>
 
-#include <machine/pmap.h>
 #include <machine/vmm.h>
 #include <sys/vmm_vm.h>
 
@@ -43,18 +42,6 @@ vmxctx
 	guest_r15		VMXCTX_GUEST_R15
 	guest_cr2		VMXCTX_GUEST_CR2
 	inst_fail_status	VMXCTX_INST_FAIL_STATUS
-	pmap			VMXCTX_PMAP
-
-vmx
-	eptgen		VMX_EPTGEN
-	eptp		VMX_EPTP
-
-pmap
-	pm_active	PM_ACTIVE
-	pm_eptgen	PM_EPTGEN
-
-cpu
-	cpu_id
 
 \#define	VM_SUCCESS		0
 \#define	VM_FAIL_INVALID		1
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index c58ad471a1..533adcbbf2 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -48,7 +48,6 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/smp.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
@@ -60,13 +59,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/smt.h>
 #include <sys/hma.h>
 #include <sys/trap.h>
+#include <sys/archsystm.h>
 
 #include <machine/psl.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/reg.h>
 #include <machine/segments.h>
-#include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/vmparam.h>
 #include <sys/vmm_vm.h>
@@ -83,7 +82,6 @@ __FBSDID("$FreeBSD$");
 #include "vlapic.h"
 #include "vlapic_priv.h"
 
-#include "ept.h"
 #include "vmcs.h"
 #include "vmx.h"
 #include "vmx_msr.h"
@@ -145,6 +143,22 @@ __FBSDID("$FreeBSD$");
 	(VM_ENTRY_INTO_SMM			|			\
 	VM_ENTRY_DEACTIVATE_DUAL_MONITOR)
 
+/*
+ * Cover the EPT capabilities used by bhyve at present:
+ * - 4-level page walks
+ * - write-back memory type
+ * - INVEPT operations (all types)
+ * - INVVPID operations (single-context only)
+ */
+#define	EPT_CAPS_REQUIRED			\
+	(IA32_VMX_EPT_VPID_PWL4 |		\
+	IA32_VMX_EPT_VPID_TYPE_WB |		\
+	IA32_VMX_EPT_VPID_INVEPT |		\
+	IA32_VMX_EPT_VPID_INVEPT_SINGLE |	\
+	IA32_VMX_EPT_VPID_INVEPT_ALL |		\
+	IA32_VMX_EPT_VPID_INVVPID |		\
+	IA32_VMX_EPT_VPID_INVVPID_SINGLE)
+
 #define	HANDLED		1
 #define	UNHANDLED	0
 
@@ -448,7 +462,7 @@ vmx_restore(void)
 }
 
 static int
-vmx_init(int ipinum)
+vmx_init(void)
 {
 	int error;
 	uint64_t fixed0, fixed1;
@@ -587,11 +601,16 @@ vmx_init(int ipinum)
 		}
 	}
 
-	/* Initialize EPT */
-	error = ept_init(ipinum);
-	if (error) {
-		printf("vmx_init: ept initialization failed (%d)\n", error);
-		return (error);
+	/*
+	 * Check for necessary EPT capabilities
+	 *
+	 * TODO: Properly handle when IA32_VMX_EPT_VPID_HW_AD is missing and the
+	 * hypervisor intends to utilize dirty page tracking.
+	 */
+	uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
+	if ((ept_caps & EPT_CAPS_REQUIRED) != EPT_CAPS_REQUIRED) {
+		cmn_err(CE_WARN, "!Inadequate EPT capabilities: %lx", ept_caps);
+		return (EINVAL);
 	}
 
 #ifdef __FreeBSD__
@@ -665,7 +684,7 @@ vmx_trigger_hostintr(int vector)
 }
 
 static void *
-vmx_vminit(struct vm *vm, pmap_t pmap)
+vmx_vminit(struct vm *vm)
 {
 	uint16_t vpid[VM_MAXCPU];
 	int i, error, datasel;
@@ -682,7 +701,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 	}
 	vmx->vm = vm;
 
-	vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pml4));
+	vmx->eptp = vmspace_table_root(vm_get_vmspace(vm));
 
 	/*
 	 * Clean up EPTP-tagged guest physical and combined mappings
@@ -693,7 +712,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 	 *
 	 * Combined mappings for this EP4TA are also invalidated for all VPIDs.
 	 */
-	ept_invalidate_mappings(vmx->eptp);
+	hma_vmx_invept_allcpus((uintptr_t)vmx->eptp);
 
 	vmx_msr_bitmap_initialize(vmx);
 
@@ -805,8 +824,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 		vmcs_write(VMCS_VPID, vpid[i]);
 
 		if (guest_l1d_flush && !guest_l1d_flush_sw) {
-			vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
-			    (vm_offset_t)&msr_load_list[0]));
+			vmcs_write(VMCS_ENTRY_MSR_LOAD,
+			    vtophys(&msr_load_list[0]));
 			vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT,
 			    nitems(msr_load_list));
 			vmcs_write(VMCS_EXIT_MSR_STORE, 0);
@@ -860,9 +879,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 		vmx->state[i].nextrip = ~0;
 		vmx->state[i].lastcpu = NOCPU;
 		vmx->state[i].vpid = vpid[i];
-
-
-		vmx->ctx[i].pmap = pmap;
 	}
 
 	return (vmx);
@@ -929,14 +945,16 @@ invvpid(uint64_t type, struct invvpid_desc desc)
  * Invalidate guest mappings identified by its vpid from the TLB.
  */
 static __inline void
-vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running)
+vmx_invvpid(struct vmx *vmx, int vcpu, int running)
 {
 	struct vmxstate *vmxstate;
 	struct invvpid_desc invvpid_desc;
+	struct vmspace *vms;
 
 	vmxstate = &vmx->state[vcpu];
 	if (vmxstate->vpid == 0)
 		return;
+	vms = vm_get_vmspace(vmx->vm);
 
 	if (!running) {
 		/*
@@ -964,7 +982,7 @@ vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running)
 	 * Note also that this will invalidate mappings tagged with 'vpid'
 	 * for "all" EP4TAs.
 	 */
-	if (pmap->pm_eptgen == vmx->eptgen[curcpu]) {
+	if (vmspace_table_gen(vms) == vmx->eptgen[curcpu]) {
 		invvpid_desc._res1 = 0;
 		invvpid_desc._res2 = 0;
 		invvpid_desc.vpid = vmxstate->vpid;
@@ -982,8 +1000,28 @@ vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running)
 	}
 }
 
+static __inline void
+invept(uint64_t type, uint64_t eptp)
+{
+	int error;
+	struct invept_desc {
+		uint64_t eptp;
+		uint64_t _resv;
+	} desc = { eptp, 0 };
+
+	__asm __volatile("invept %[desc], %[type];"
+	    VMX_SET_ERROR_CODE_ASM
+	    : [error] "=r" (error)
+	    : [desc] "m" (desc), [type] "r" (type)
+	    : "memory");
+
+	if (error != 0) {
+		panic("invvpid error %d", error);
+	}
+}
+
 static void
-vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
+vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu)
 {
 	struct vmxstate *vmxstate;
 
@@ -1014,7 +1052,7 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
 	vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
 	vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase());
 	vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase());
-	vmx_invvpid(vmx, vcpu, pmap, 1);
+	vmx_invvpid(vmx, vcpu, 1);
 }
 
 /*
@@ -1582,7 +1620,7 @@ vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
 	const uint64_t diff = crval ^ old;
 	/* Flush the TLB if the paging or write-protect bits are changing */
 	if ((diff & CR0_PG) != 0 || (diff & CR0_WP) != 0) {
-		vmx_invvpid(vmx, vcpu, vmx->ctx[vcpu].pmap, 1);
+		vmx_invvpid(vmx, vcpu, 1);
 	}
 
 	vmcs_write(VMCS_GUEST_CR0, crval);
@@ -2558,24 +2596,18 @@ vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit)
  * clear NMI blocking.
  */
 static __inline void
-vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
+vmx_exit_handle_possible_nmi(struct vm_exit *vmexit)
 {
-	uint32_t intr_info;
-
-	KASSERT((read_rflags() & PSL_I) == 0, ("interrupts enabled"));
+	ASSERT(!interrupts_enabled());
 
-	if (vmexit->u.vmx.exit_reason != EXIT_REASON_EXCEPTION)
-		return;
-
-	intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
-	KASSERT((intr_info & VMCS_INTR_VALID) != 0,
-	    ("VM exit interruption info invalid: %x", intr_info));
+	if (vmexit->u.vmx.exit_reason == EXIT_REASON_EXCEPTION) {
+		uint32_t intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
+		ASSERT(intr_info & VMCS_INTR_VALID);
 
-	if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) {
-		KASSERT((intr_info & 0xff) == IDT_NMI, ("VM exit due "
-		    "to NMI has invalid vector: %x", intr_info));
-		VCPU_CTR0(vmx->vm, vcpuid, "Vectoring to NMI handler");
-		vmm_call_trap(T_NMIFLT);
+		if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) {
+			ASSERT3U(intr_info & 0xff, ==, IDT_NMI);
+			vmm_call_trap(T_NMIFLT);
+		}
 	}
 }
 
@@ -2647,7 +2679,7 @@ vmx_dr_leave_guest(struct vmxctx *vmxctx)
 }
 
 static int
-vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
+vmx_run(void *arg, int vcpu, uint64_t rip)
 {
 	int rc, handled, launched;
 	struct vmx *vmx;
@@ -2658,6 +2690,7 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 	struct vlapic *vlapic;
 	uint32_t exit_reason;
 	bool tpr_shadow_active;
+	vm_client_t *vmc;
 
 	vmx = arg;
 	vm = vmx->vm;
@@ -2665,14 +2698,12 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 	vmxctx = &vmx->ctx[vcpu];
 	vlapic = vm_lapic(vm, vcpu);
 	vmexit = vm_exitinfo(vm, vcpu);
+	vmc = vm_get_vmclient(vm, vcpu);
 	launched = 0;
 	tpr_shadow_active = vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) &&
 	    !vmx_cap_en(vmx, VMX_CAP_APICV) &&
 	    (vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0;
 
-	KASSERT(vmxctx->pmap == pmap,
-	    ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
-
 	vmx_msr_guest_enter(vmx, vcpu);
 
 	vmcs_load(vmcs_pa);
@@ -2691,9 +2722,10 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 	vmcs_write(VMCS_HOST_CR3, rcr3());
 
 	vmcs_write(VMCS_GUEST_RIP, rip);
-	vmx_set_pcpu_defaults(vmx, vcpu, pmap);
+	vmx_set_pcpu_defaults(vmx, vcpu);
 	do {
 		enum event_inject_state inject_state;
+		uint64_t eptgen;
 
 		KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
 		    "%lx/%lx", __func__, vmcs_guest_rip(), rip));
@@ -2721,8 +2753,8 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 		 * because interrupts are disabled. The pending interrupt will
 		 * be recognized as soon as the guest state is loaded.
 		 *
-		 * The same reasoning applies to the IPI generated by
-		 * pmap_invalidate_ept().
+		 * The same reasoning applies to the IPI generated by vmspace
+		 * invalidation.
 		 */
 		disable_intr();
 
@@ -2804,10 +2836,28 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 			vmx_tpr_shadow_enter(vlapic);
 		}
 
+		/*
+		 * Indicate activation of vmspace (EPT) table just prior to VMX
+		 * entry, checking for the necessity of an invept invalidation.
+		 */
+		eptgen = vmc_table_enter(vmc);
+		if (vmx->eptgen[vcpu] != eptgen) {
+			/*
+			 * VMspace generate does not match what was previously
+			 * used for this CPU so all mappings associated with
+			 * this EPTP must be invalidated.
+			 */
+			invept(1, vmx->eptp);
+			vmx->eptgen[vcpu] = eptgen;
+		}
+
 		vmx_run_trace(vmx, vcpu);
 		vcpu_ustate_change(vm, vcpu, VU_RUN);
 		vmx_dr_enter_guest(vmxctx);
+
+		/* Perform VMX entry */
 		rc = vmx_enter_guest(vmxctx, vmx, launched);
+
 		vmx_dr_leave_guest(vmxctx);
 		vcpu_ustate_change(vm, vcpu, VU_EMU_KERN);
 
@@ -2823,16 +2873,18 @@ vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)
 		vmexit->inst_length = vmexit_instruction_length();
 		vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason();
 		vmexit->u.vmx.exit_qualification = vmcs_exit_qualification();
-
 		/* Update 'nextrip' */
 		vmx->state[vcpu].nextrip = rip;
 
 		if (rc == VMX_GUEST_VMEXIT) {
-			vmx_exit_handle_nmi(vmx, vcpu, vmexit);
-			enable_intr();
+			vmx_exit_handle_possible_nmi(vmexit);
+		}
+		enable_intr();
+		vmc_table_exit(vmc);
+
+		if (rc == VMX_GUEST_VMEXIT) {
 			handled = vmx_exit_process(vmx, vcpu, vmexit);
 		} else {
-			enable_intr();
 			vmx_exit_inst_error(vmxctx, rc, vmexit);
 		}
 		DTRACE_PROBE3(vmm__vexit, int, vcpu, uint64_t, rip,
@@ -3077,7 +3129,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
 			 * XXX the processor retains global mappings when %cr3
 			 * is updated but vmx_invvpid() does not.
 			 */
-			vmx_invvpid(vmx, vcpu, vmx->ctx[vcpu].pmap, running);
+			vmx_invvpid(vmx, vcpu, running);
 			break;
 		case VMCS_INVALID_ENCODING:
 			error = EINVAL;
@@ -3647,6 +3699,7 @@ struct vmm_ops vmm_ops_intel = {
 	.init		= vmx_init,
 	.cleanup	= vmx_cleanup,
 	.resume		= vmx_restore,
+
 	.vminit		= vmx_vminit,
 	.vmrun		= vmx_run,
 	.vmcleanup	= vmx_vmcleanup,
@@ -3656,8 +3709,6 @@ struct vmm_ops vmm_ops_intel = {
 	.vmsetdesc	= vmx_setdesc,
 	.vmgetcap	= vmx_getcap,
 	.vmsetcap	= vmx_setcap,
-	.vmspace_alloc	= ept_vmspace_alloc,
-	.vmspace_free	= ept_vmspace_free,
 	.vlapic_init	= vmx_vlapic_init,
 	.vlapic_cleanup	= vmx_vlapic_cleanup,
 
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
index c0d1fdd7fb..8ca7d993f7 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -39,7 +39,7 @@
  * http://www.illumos.org/license/CDDL.
  *
  * Copyright 2018 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
  */
 
 #ifndef _VMX_H_
@@ -47,8 +47,6 @@
 
 #include "vmcs.h"
 
-struct pmap;
-
 struct vmxctx {
 	uint64_t	guest_rdi;		/* Guest state */
 	uint64_t	guest_rsi;
@@ -82,12 +80,6 @@ struct vmxctx {
 	int		host_tf;
 
 	int		inst_fail_status;
-
-	/*
-	 * The pmap needs to be deactivated in vmx_enter_guest()
-	 * so keep a copy of the 'pmap' in each vmxctx.
-	 */
-	struct pmap	*pmap;
 };
 
 struct vmxcap {
@@ -151,7 +143,7 @@ struct vmx {
 	uint64_t	eptp;
 	enum vmx_caps	vmx_caps;
 	struct vm	*vm;
-	long		eptgen[MAXCPU];		/* cached pmap->pm_eptgen */
+	uint64_t	eptgen[MAXCPU];		/* cached vmspace generation */
 };
 CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
 CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s b/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
index aba844e8c3..60f761d652 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
@@ -151,35 +151,7 @@ ENTRY_NP(vmx_enter_guest)
 	movq	%rdi, %r12	/* vmxctx */
 	movq	%rsi, %r13	/* vmx */
 	movl	%edx, %r14d	/* launch state */
-	movq	VMXCTX_PMAP(%rdi), %rbx
 
-	/* Activate guest pmap on this cpu. */
-	leaq	PM_ACTIVE(%rbx), %rdi
-	movl	%gs:CPU_ID, %esi
-	call	cpuset_atomic_add
-	movq	%r12, %rdi
-
-	/*
-	 * If 'vmx->eptgen[curcpu]' is not identical to 'pmap->pm_eptgen'
-	 * then we must invalidate all mappings associated with this EPTP.
-	 */
-	movq	PM_EPTGEN(%rbx), %r10
-	movl	%gs:CPU_ID, %eax
-	cmpq	%r10, VMX_EPTGEN(%r13, %rax, 8)
-	je	guest_restore
-
-	/* Refresh 'vmx->eptgen[curcpu]' */
-	movq	%r10, VMX_EPTGEN(%r13, %rax, 8)
-
-	/* Setup the invept descriptor on the host stack */
-	pushq	$0x0
-	pushq	VMX_EPTP(%r13)
-	movl	$0x1, %eax	/* Single context invalidate */
-	invept	(%rsp), %rax
-	leaq	0x10(%rsp), %rsp
-	jbe	invept_error		/* Check invept instruction error */
-
-guest_restore:
 	/* Write the current %rsp into the VMCS to be restored on vmexit */
 	movl	$VMCS_HOST_RSP, %eax
 	vmwrite	%rsp, %rax
@@ -217,9 +189,6 @@ do_launch:
 vmwrite_error:
 	movl	$VMX_VMWRITE_ERROR, %eax
 	jmp	decode_inst_error
-invept_error:
-	movl	$VMX_INVEPT_ERROR, %eax
-	jmp	decode_inst_error
 decode_inst_error:
 	movl	$VM_FAIL_VALID, %r11d
 	jz	inst_error
@@ -227,13 +196,6 @@ decode_inst_error:
 inst_error:
 	movl	%r11d, VMXCTX_INST_FAIL_STATUS(%rdi)
 
-	movq	VMXCTX_PMAP(%rdi), %rdi
-	leaq	PM_ACTIVE(%rdi), %rdi
-	movl	%gs:CPU_ID, %esi
-	movq	%rax, %r12
-	call	cpuset_atomic_del
-	movq	%r12, %rax
-
 	movq	VMXSTK_RBX(%rsp), %rbx
 	movq	VMXSTK_R12(%rsp), %r12
 	movq	VMXSTK_R13(%rsp), %r13
@@ -256,12 +218,6 @@ ALTENTRY(vmx_exit_guest)
 	/* Save guest state that is not automatically saved in the vmcs. */
 	VMX_GUEST_SAVE
 
-	/* Deactivate guest pmap on this cpu. */
-	movq	VMXCTX_PMAP(%rdi), %rdi
-	leaq	PM_ACTIVE(%rdi), %rdi
-	movl	%gs:CPU_ID, %esi
-	call	cpuset_atomic_del
-
 	/*
 	 * This will return to the caller of 'vmx_enter_guest()' with a return
 	 * value of VMX_GUEST_VMEXIT.
@@ -287,12 +243,6 @@ ALTENTRY(vmx_exit_guest_flush_rsb)
 	/* Save guest state that is not automatically saved in the vmcs. */
 	VMX_GUEST_SAVE
 
-	/* Deactivate guest pmap on this cpu. */
-	movq	VMXCTX_PMAP(%rdi), %rdi
-	leaq	PM_ACTIVE(%rdi), %rdi
-	movl	%gs:CPU_ID, %esi
-	call	cpuset_atomic_del
-
 	VMX_GUEST_FLUSH_SCRATCH
 
 	/*
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
index 8784c94b48..a3773b54f0 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
@@ -254,7 +254,7 @@ vtd_wbflush(struct vtdmap *vtdmap)
 {
 
 	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
-		pmap_invalidate_cache();
+		invalidate_cache_all();
 
 	if (VTD_CAP_RWBF(vtdmap->cap)) {
 		vtdmap->gcr = VTD_GCR_WBF;
diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c
index 3630c36680..8fec022977 100644
--- a/usr/src/uts/i86pc/io/vmm/io/iommu.c
+++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c
@@ -48,7 +48,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/pci.h>
 
 #include "vmm_util.h"
-#include "vmm_mem.h"
 #include "iommu.h"
 
 static int iommu_avail;
@@ -191,6 +190,12 @@ iommu_find_device(dev_info_t *dip, void *arg)
 
 	return (DDI_WALK_CONTINUE);
 }
+
+static vm_paddr_t
+vmm_mem_maxaddr(void)
+{
+	return (ptoa(physmax + 1));
+}
 #endif
 
 static void
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c
index 8f3a276a93..96cc728a74 100644
--- a/usr/src/uts/i86pc/io/vmm/io/ppt.c
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/pciio.h>
-#include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <dev/pci/pcivar.h>
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index 8198ebfce6..06ee46c8e2 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -52,13 +52,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
-#include <sys/smp.h>
+#include <sys/cpuset.h>
 
 #include <x86/specialreg.h>
 #include <x86/apicreg.h>
 
 #include <machine/clock.h>
-#include <machine/smp.h>
 
 #include <machine/vmm.h>
 
@@ -1602,7 +1601,7 @@ vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
 }
 
 void
-vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
+vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
 {
 	/*
 	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
@@ -1616,7 +1615,7 @@ vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
 	if (vlapic->ops.post_intr)
 		(*vlapic->ops.post_intr)(vlapic, hostcpu);
 	else
-		ipi_cpu(hostcpu, ipinum);
+		poke_cpu(hostcpu);
 }
 
 bool
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.h b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
index f490eff637..a46bae9d34 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
@@ -72,9 +72,9 @@ vcpu_notify_t vlapic_set_intr_ready(struct vlapic *vlapic, int vector,
 /*
  * Post an interrupt to the vcpu running on 'hostcpu'. This will use a
  * hardware assist if available (e.g. Posted Interrupt) or fall back to
- * sending an 'ipinum' to interrupt the 'hostcpu'.
+ * sending an IPI to interrupt the 'hostcpu'.
  */
-void vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum);
+void vlapic_post_intr(struct vlapic *vlapic, int hostcpu);
 
 void vlapic_fire_cmci(struct vlapic *vlapic);
 int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
diff --git a/usr/src/uts/i86pc/io/vmm/seg_vmm.c b/usr/src/uts/i86pc/io/vmm/seg_vmm.c
index 23a8da3bc5..863b283418 100644
--- a/usr/src/uts/i86pc/io/vmm/seg_vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/seg_vmm.c
@@ -46,8 +46,9 @@
 
 typedef struct segvmm_data {
 	krwlock_t	svmd_lock;
-	vm_object_t	svmd_obj;
-	uintptr_t	svmd_obj_off;
+	vm_object_t	*svmd_vmo;
+	vm_client_t	*svmd_vmc;
+	uintptr_t	svmd_off;
 	uchar_t		svmd_prot;
 	size_t		svmd_softlockcnt;
 } segvmm_data_t;
@@ -104,9 +105,41 @@ static struct seg_ops segvmm_ops = {
 	.inherit	= seg_inherit_notsup
 };
 
+/*
+ * Unload a region from the HAT for A/D tracking.
+ */
+static void
+segvmm_invalidate(void *arg, uintptr_t gpa, size_t sz)
+{
+	struct seg *seg = arg;
+	segvmm_data_t *svmd = seg->s_data;
+
+	/*
+	 * Invalidations are only necessary (and configured) for vmspace
+	 * mappings.  Direct vm_object mappings are not involved.
+	 */
+	ASSERT3P(svmd->svmd_vmo, ==, NULL);
+
+	/*
+	 * The region being invalidated may overlap with all, some, or none of
+	 * this segment.  We are only concerned about that overlap.
+	 */
+	const uintptr_t start = MAX(gpa, svmd->svmd_off);
+	const uintptr_t end = MIN(gpa + sz, svmd->svmd_off + seg->s_size);
+	if (start >= end) {
+		return;
+	}
+	ASSERT(start >= svmd->svmd_off && end <= svmd->svmd_off + seg->s_size);
+	ASSERT(start >= gpa && end <= gpa + sz);
+	const caddr_t unload_va = seg->s_base + (start - svmd->svmd_off);
+	const size_t unload_sz = (end - start);
+	ASSERT3U(unload_sz, <=, seg->s_size);
+
+	hat_unload(seg->s_as->a_hat, unload_va, unload_sz, HAT_UNLOAD);
+}
 
 /*
- * Create a kernel/user-mapped segment.  ->kaddr is the segkvmm mapping.
+ * Create a VMM-memory-backed segment.
  */
 int
 segvmm_create(struct seg **segpp, void *argsp)
@@ -115,17 +148,35 @@ segvmm_create(struct seg **segpp, void *argsp)
 	segvmm_crargs_t *cra = argsp;
 	segvmm_data_t *data;
 
+	VERIFY((cra->vmo == NULL && cra->vmc != NULL) ||
+	    (cra->vmo != NULL && cra->vmc == NULL));
+	VERIFY(cra->prot & PROT_USER);
+	VERIFY0(cra->offset & PAGEOFFSET);
+
 	data = kmem_zalloc(sizeof (*data), KM_SLEEP);
 	rw_init(&data->svmd_lock, NULL, RW_DEFAULT, NULL);
-	data->svmd_obj = cra->obj;
-	data->svmd_obj_off = cra->offset;
-	data->svmd_prot = cra->prot;
-
-	/* Grab a hold on the VM object for the duration of this seg mapping */
-	vm_object_reference(data->svmd_obj);
+	data->svmd_off = cra->offset;
+	data->svmd_prot = cra->prot & ~PROT_USER;
 
 	seg->s_ops = &segvmm_ops;
 	seg->s_data = data;
+
+	if (cra->vmo != NULL) {
+		data->svmd_vmo = cra->vmo;
+		/* Grab a hold on the VM object for the lifetime of segment */
+		vm_object_reference(data->svmd_vmo);
+	} else {
+		int err;
+
+		data->svmd_vmc = cra->vmc;
+		err = vmc_set_inval_cb(data->svmd_vmc, segvmm_invalidate, seg);
+		if (err != 0) {
+			seg->s_ops = NULL;
+			seg->s_data = NULL;
+			kmem_free(data, sizeof (*data));
+			return (err);
+		}
+	}
 	return (0);
 }
 
@@ -139,15 +190,34 @@ segvmm_dup(struct seg *seg, struct seg *newseg)
 
 	newsvmd = kmem_zalloc(sizeof (segvmm_data_t), KM_SLEEP);
 	rw_init(&newsvmd->svmd_lock, NULL, RW_DEFAULT, NULL);
-	newsvmd->svmd_obj = svmd->svmd_obj;
-	newsvmd->svmd_obj_off = svmd->svmd_obj_off;
+	newsvmd->svmd_off = svmd->svmd_off;
 	newsvmd->svmd_prot = svmd->svmd_prot;
 
-	/* Grab another hold for the duplicate segment */
-	vm_object_reference(svmd->svmd_obj);
-
 	newseg->s_ops = seg->s_ops;
 	newseg->s_data = newsvmd;
+
+	if (svmd->svmd_vmo != NULL) {
+		/* Grab another hold for the duplicate segment */
+		vm_object_reference(svmd->svmd_vmo);
+		newsvmd->svmd_vmo = svmd->svmd_vmo;
+	} else {
+		int err;
+
+		newsvmd->svmd_vmc = vmc_clone(svmd->svmd_vmc);
+		/*
+		 * The cloned client does not inherit the invalidation
+		 * configuration, so attempt to set it here for the new segment.
+		 */
+		err = vmc_set_inval_cb(newsvmd->svmd_vmc, segvmm_invalidate,
+		    newseg);
+		if (err != 0) {
+			newseg->s_ops = NULL;
+			newseg->s_data = NULL;
+			kmem_free(newsvmd, sizeof (*newsvmd));
+			return (err);
+		}
+	}
+
 	return (0);
 }
 
@@ -169,9 +239,6 @@ segvmm_unmap(struct seg *seg, caddr_t addr, size_t len)
 	/* Unconditionally unload the entire segment range.  */
 	hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 
-	/* Release the VM object hold this segment possessed */
-	vm_object_deallocate(svmd->svmd_obj);
-
 	seg_free(seg);
 	return (0);
 }
@@ -179,35 +246,93 @@ segvmm_unmap(struct seg *seg, caddr_t addr, size_t len)
 static void
 segvmm_free(struct seg *seg)
 {
-	segvmm_data_t *data = seg->s_data;
+	segvmm_data_t *svmd = seg->s_data;
 
-	ASSERT(data != NULL);
+	ASSERT(svmd != NULL);
 
-	rw_destroy(&data->svmd_lock);
-	VERIFY(data->svmd_softlockcnt == 0);
-	kmem_free(data, sizeof (*data));
+	if (svmd->svmd_vmo != NULL) {
+		/* Release the VM object hold this segment possessed */
+		vm_object_release(svmd->svmd_vmo);
+		svmd->svmd_vmo = NULL;
+	} else {
+		vmc_destroy(svmd->svmd_vmc);
+		svmd->svmd_vmc = NULL;
+	}
+	rw_destroy(&svmd->svmd_lock);
+	VERIFY(svmd->svmd_softlockcnt == 0);
+	kmem_free(svmd, sizeof (*svmd));
 	seg->s_data = NULL;
 }
 
 static int
-segvmm_fault_in(struct hat *hat, struct seg *seg, uintptr_t va, size_t len)
+segvmm_fault_obj(struct hat *hat, struct seg *seg, uintptr_t va, size_t len)
 {
 	segvmm_data_t *svmd = seg->s_data;
 	const uintptr_t end = va + len;
-	const uintptr_t prot = svmd->svmd_prot;
+	const int prot = svmd->svmd_prot;
+	const int uprot = prot | PROT_USER;
+	vm_object_t *vmo = svmd->svmd_vmo;
+
+	ASSERT(vmo != NULL);
 
 	va &= PAGEMASK;
-	uintptr_t off = va - (uintptr_t)seg->s_base;
+	uintptr_t off = va - (uintptr_t)seg->s_base + svmd->svmd_off;
 	do {
 		pfn_t pfn;
 
-		pfn = vm_object_pfn(svmd->svmd_obj, off);
+		pfn = vm_object_pfn(vmo, off);
 		if (pfn == PFN_INVALID) {
-			return (-1);
+			return (FC_NOMAP);
+		}
+
+		/* Ignore any large-page possibilities for now */
+		hat_devload(hat, (caddr_t)va, PAGESIZE, pfn, uprot, HAT_LOAD);
+		va += PAGESIZE;
+		off += PAGESIZE;
+	} while (va < end);
+
+	return (0);
+}
+
+static int
+segvmm_fault_space(struct hat *hat, struct seg *seg, uintptr_t va, size_t len)
+{
+	segvmm_data_t *svmd = seg->s_data;
+	const uintptr_t end = va + len;
+	const int prot = svmd->svmd_prot;
+	const int uprot = prot | PROT_USER;
+	vm_client_t *vmc = svmd->svmd_vmc;
+
+	ASSERT(vmc != NULL);
+
+	va &= PAGEMASK;
+	uintptr_t off = va - (uintptr_t)seg->s_base + svmd->svmd_off;
+
+	do {
+		vm_page_t *vmp;
+		pfn_t pfn;
+
+		vmp = vmc_hold(vmc, off, prot);
+		if (vmp == NULL) {
+			return (FC_NOMAP);
 		}
 
+		pfn = vmp_get_pfn(vmp);
+		ASSERT3U(pfn, !=, PFN_INVALID);
+
 		/* Ignore any large-page possibilities for now */
-		hat_devload(hat, (caddr_t)va, PAGESIZE, pfn, prot, HAT_LOAD);
+		hat_devload(hat, (caddr_t)va, PAGESIZE, pfn, uprot, HAT_LOAD);
+
+		if (vmp_release(vmp)) {
+			/*
+			 * Region was unmapped from vmspace while we were
+			 * loading it into this AS.  Communicate it as if it
+			 * were a fault.
+			 */
+			hat_unload(hat, (caddr_t)va, PAGESIZE, HAT_UNLOAD);
+			return (FC_NOMAP);
+		}
+
 		va += PAGESIZE;
 		off += PAGESIZE;
 	} while (va < end);
@@ -218,7 +343,7 @@ segvmm_fault_in(struct hat *hat, struct seg *seg, uintptr_t va, size_t len)
 /* ARGSUSED */
 static faultcode_t
 segvmm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
-    enum fault_type type, enum seg_rw tw)
+    enum fault_type type, enum seg_rw rw)
 {
 	segvmm_data_t *svmd = seg->s_data;
 	int err = 0;
@@ -244,7 +369,11 @@ segvmm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
 	VERIFY(type == F_INVAL || type == F_SOFTLOCK);
 	rw_enter(&svmd->svmd_lock, RW_WRITER);
 
-	err = segvmm_fault_in(hat, seg, (uintptr_t)addr, len);
+	if (svmd->svmd_vmo != NULL) {
+		err = segvmm_fault_obj(hat, seg, (uintptr_t)addr, len);
+	} else {
+		err = segvmm_fault_space(hat, seg, (uintptr_t)addr, len);
+	}
 	if (type == F_SOFTLOCK && err == 0) {
 		size_t nval = svmd->svmd_softlockcnt + btop(len);
 
@@ -426,8 +555,8 @@ segvmm_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 {
 	segvmm_data_t *svmd = seg->s_data;
 
-	memidp->val[0] = (uintptr_t)svmd->svmd_obj;
-	memidp->val[1] = (uintptr_t)(addr - seg->s_base) + svmd->svmd_obj_off;
+	memidp->val[0] = (uintptr_t)svmd->svmd_vmo;
+	memidp->val[1] = (uintptr_t)(addr - seg->s_base) + svmd->svmd_off;
 	return (0);
 }
 
diff --git a/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h b/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h
index a4f72f816e..5ba0dad5c3 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/seg_vmm.h
@@ -21,8 +21,9 @@
 
 typedef struct segvmm_crargs {
 	uchar_t		prot;		/* protection */
-	vm_object_t	obj;
 	uintptr_t	offset;
+	vm_object_t	*vmo;
+	vm_client_t	*vmc;
 } segvmm_crargs_t;
 
 int segvmm_create(struct seg **, void *);
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_gpt.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_gpt.h
index 554f51bbb6..a425fb53ec 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_gpt.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_gpt.h
@@ -19,20 +19,6 @@
 
 #include <sys/types.h>
 
-typedef struct vmm_pt_ops vmm_pt_ops_t;
-struct vmm_pt_ops {
-	void *		(*vpo_init)(uint64_t *);
-	void		(*vpo_free)(void *);
-	uint64_t	(*vpo_wired_cnt)(void *);
-	int		(*vpo_is_wired)(void *, uint64_t, uint_t *);
-	int		(*vpo_map)(void *, uint64_t, pfn_t, uint_t, uint_t,
-			    uint8_t);
-	uint64_t	(*vpo_unmap)(void *, uint64_t, uint64_t);
-};
-
-extern struct vmm_pt_ops ept_ops;
-extern struct vmm_pt_ops rvi_ops;
-
 /*
  * Constants for the nodes in the GPT radix tree.  Note
  * that, in accordance with hardware page table descriptions,
@@ -64,6 +50,8 @@ enum vmm_gpt_node_level {
  * vpeo_reset_accessed: Resets the accessed bit on the given PTE.  If the
  *   second argument is `true`, the bit will be set, otherwise it will be
  *   cleared.  Returns non-zero if the previous value of the bit was set.
+ * vpeo_get_pmtp: Generate a properly formatted PML4 (EPTP/nCR3), given the root
+ *   PFN for the GPT.
  */
 typedef struct vmm_pte_ops vmm_pte_ops_t;
 struct vmm_pte_ops {
@@ -74,30 +62,29 @@ struct vmm_pte_ops {
 	uint_t		(*vpeo_pte_prot)(uint64_t);
 	uint_t		(*vpeo_reset_dirty)(uint64_t *, bool);
 	uint_t		(*vpeo_reset_accessed)(uint64_t *, bool);
+	uint64_t	(*vpeo_get_pmtp)(pfn_t);
 };
 
+extern vmm_pte_ops_t ept_pte_ops;
+extern vmm_pte_ops_t rvi_pte_ops;
+
 struct vmm_gpt;
 typedef struct vmm_gpt vmm_gpt_t;
 
-vmm_gpt_t *ept_create(void);
-vmm_gpt_t *rvi_create(void);
-
 vmm_gpt_t *vmm_gpt_alloc(vmm_pte_ops_t *);
 void vmm_gpt_free(vmm_gpt_t *);
 
-void *vmm_gpt_root_kaddr(vmm_gpt_t *);
-pfn_t vmm_gpt_root_pfn(vmm_gpt_t *);
 uint64_t *vmm_gpt_lookup(vmm_gpt_t *, uint64_t);
 void vmm_gpt_walk(vmm_gpt_t *, uint64_t, uint64_t **, enum vmm_gpt_node_level);
-void vmm_gpt_populate_entry(vmm_gpt_t *, uint64_t);
 void vmm_gpt_populate_region(vmm_gpt_t *, uint64_t, uint64_t);
+bool vmm_gpt_map_at(vmm_gpt_t *, uint64_t *, pfn_t, uint_t, uint8_t);
 void vmm_gpt_vacate_region(vmm_gpt_t *, uint64_t, uint64_t);
 bool vmm_gpt_map(vmm_gpt_t *, uint64_t, pfn_t, uint_t, uint8_t);
 bool vmm_gpt_unmap(vmm_gpt_t *, uint64_t);
 size_t vmm_gpt_unmap_region(vmm_gpt_t *, uint64_t, uint64_t);
+uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *);
 
-bool vmm_gpt_is_mapped(vmm_gpt_t *, uint64_t, uint_t *);
-size_t vmm_gpt_mapped_count(vmm_gpt_t *);
+bool vmm_gpt_is_mapped(vmm_gpt_t *, uint64_t *, pfn_t *, uint_t *);
 uint_t vmm_gpt_reset_accessed(vmm_gpt_t *, uint64_t *, bool);
 uint_t vmm_gpt_reset_dirty(vmm_gpt_t *, uint64_t *, bool);
 
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 3a50dafd6d..5f0ba4b875 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -48,6 +48,7 @@
 
 #include <sys/sdt.h>
 #include <x86/segments.h>
+#include <sys/vmm.h>
 
 SDT_PROVIDER_DECLARE(vmm);
 
@@ -61,16 +62,15 @@ struct vhpet;
 struct vioapic;
 struct vlapic;
 struct vmspace;
+struct vm_client;
 struct vm_object;
 struct vm_guest_paging;
-struct pmap;
 
-typedef int	(*vmm_init_func_t)(int ipinum);
+typedef int	(*vmm_init_func_t)(void);
 typedef int	(*vmm_cleanup_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
-typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
-typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip,
-    struct pmap *pmap);
+typedef void *	(*vmi_init_func_t)(struct vm *vm);
+typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip);
 typedef void	(*vmi_cleanup_func_t)(void *vmi);
 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
     uint64_t *retval);
@@ -82,8 +82,6 @@ typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
     const struct seg_desc *desc);
 typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
 typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
-typedef struct vmspace *(*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
-typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
 typedef struct vlapic *(*vmi_vlapic_init)(void *vmi, int vcpu);
 typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
 typedef void	(*vmi_savectx)(void *vmi, int vcpu);
@@ -103,8 +101,6 @@ struct vmm_ops {
 	vmi_set_desc_t		vmsetdesc;
 	vmi_get_cap_t		vmgetcap;
 	vmi_set_cap_t		vmsetcap;
-	vmi_vmspace_alloc	vmspace_alloc;
-	vmi_vmspace_free	vmspace_free;
 	vmi_vlapic_init		vlapic_init;
 	vmi_vlapic_cleanup	vlapic_cleanup;
 
@@ -148,9 +144,6 @@ int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
 int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
     struct vm_object **objptr);
 vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
-void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
-    int prot, void **cookie);
-void vm_gpa_release(void *cookie);
 bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
 
 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
@@ -261,6 +254,7 @@ void *vcpu_stats(struct vm *vm, int vcpu);
 void vcpu_notify_event(struct vm *vm, int vcpuid);
 void vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t);
 struct vmspace *vm_get_vmspace(struct vm *vm);
+struct vm_client *vm_get_vmclient(struct vm *vm, int vcpuid);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
 struct vpmtmr *vm_pmtmr(struct vm *vm);
@@ -312,6 +306,7 @@ enum vm_reg_name vm_segment_name(int seg_encoding);
 struct vm_copyinfo {
 	uint64_t	gpa;
 	size_t		len;
+	int		prot;
 	void		*hva;
 	void		*cookie;
 };
@@ -332,9 +327,9 @@ struct vm_copyinfo {
  */
 int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
-    int num_copyinfo, int *is_fault);
+    uint_t num_copyinfo, int *is_fault);
 void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
-    int num_copyinfo);
+    uint_t num_copyinfo);
 void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     void *kaddr, size_t len);
 void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h
index 76d5fec8b7..a01b909ff6 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_vm.h
@@ -18,40 +18,64 @@
 #ifndef	_VMM_VM_H
 #define	_VMM_VM_H
 
-#include <sys/list.h>
 #include <sys/types.h>
-#include <vm/hat_pte.h>
-#include <machine/pmap.h>
 
-/*
- * vm_map_wire and vm_map_unwire option flags
- */
-#define	VM_MAP_WIRE_SYSTEM	0	/* wiring in a kernel map */
-#define	VM_MAP_WIRE_USER	1	/* wiring in a user map */
-
-#define	VM_MAP_WIRE_NOHOLES	0	/* region must not have holes */
-#define	VM_MAP_WIRE_HOLESOK	2	/* region may have holes */
-
-#define	VM_MAP_WIRE_WRITE	4	/* Validate writable. */
-
-/*
- * The following "find_space" options are supported by vm_map_find().
- *
- * For VMFS_ALIGNED_SPACE, the desired alignment is specified to
- * the macro argument as log base 2 of the desired alignment.
- */
-#define	VMFS_NO_SPACE		0	/* don't find; use the given range */
-#define	VMFS_ANY_SPACE		1	/* find range with any alignment */
-#define	VMFS_OPTIMAL_SPACE	2	/* find range with optimal alignment */
-#define	VMFS_SUPER_SPACE	3	/* find superpage-aligned range */
-#define	VMFS_ALIGNED_SPACE(x) ((x) << 8) /* find range with fixed alignment */
+typedef struct vmspace vmspace_t;
+typedef struct vm_client vm_client_t;
+typedef struct vm_page vm_page_t;
+typedef struct vm_object vm_object_t;
+
+struct vmm_pte_ops;
+
+typedef void (*vmc_inval_cb_t)(void *, uintptr_t, size_t);
+
+/* vmspace_t operations */
+vmspace_t *vmspace_alloc(size_t, struct vmm_pte_ops *, bool);
+void vmspace_destroy(vmspace_t *);
+int vmspace_map(vmspace_t *, vm_object_t *, uintptr_t, uintptr_t, size_t,
+    uint8_t);
+int vmspace_unmap(vmspace_t *, uintptr_t, uintptr_t);
+int vmspace_populate(vmspace_t *, uintptr_t, uintptr_t);
+vm_client_t *vmspace_client_alloc(vmspace_t *);
+uint64_t vmspace_table_root(vmspace_t *);
+uint64_t vmspace_table_gen(vmspace_t *);
+uint64_t vmspace_resident_count(vmspace_t *);
+
+/* vm_client_t operations */
+vm_page_t *vmc_hold(vm_client_t *, uintptr_t, int);
+uint64_t vmc_table_enter(vm_client_t *);
+void vmc_table_exit(vm_client_t *);
+int vmc_fault(vm_client_t *, uintptr_t, int);
+vm_client_t *vmc_clone(vm_client_t *);
+int vmc_set_inval_cb(vm_client_t *, vmc_inval_cb_t, void *);
+void vmc_destroy(vm_client_t *);
+
+/* vm_object_t operations */
+vm_object_t *vm_object_mem_allocate(size_t, bool);
+vm_object_t *vmm_mmio_alloc(vmspace_t *, uintptr_t, size_t, uintptr_t);
+void vm_object_reference(vm_object_t *);
+void vm_object_release(vm_object_t *);
+pfn_t vm_object_pfn(vm_object_t *, uintptr_t);
+
+/* vm_page_t operations */
+const void *vmp_get_readable(const vm_page_t *);
+void *vmp_get_writable(const vm_page_t *);
+pfn_t vmp_get_pfn(const vm_page_t *);
+void vmp_chain(vm_page_t *, vm_page_t *);
+vm_page_t *vmp_next(const vm_page_t *);
+bool vmp_release(vm_page_t *);
+bool vmp_release_chain(vm_page_t *);
+
+/* seg_vmm mapping */
+struct vm;
+int vm_segmap_obj(struct vm *, int, off_t, off_t, struct as *, caddr_t *,
+    uint_t, uint_t, uint_t);
+int vm_segmap_space(struct vm *, off_t, struct as *, caddr_t *, off_t, uint_t,
+    uint_t, uint_t);
 
-/*
- * vm_fault option flags
- */
-#define	VM_FAULT_NORMAL		0	/* Nothing special */
-#define	VM_FAULT_WIRE		1	/* Wire the mapped page */
-#define	VM_FAULT_DIRTY		2	/* Dirty the page; use w/PROT_COPY */
+/* Glue functions */
+vm_paddr_t vtophys(void *);
+void invalidate_cache_all(void);
 
 /*
  * The VM_MAXUSER_ADDRESS determines the upper size limit of a vmspace.
@@ -61,131 +85,4 @@
  */
 #define	VM_MAXUSER_ADDRESS	0x00003ffffffffffful
 
-/*
- * Type definitions used in the hypervisor.
- */
-typedef uchar_t vm_prot_t;
-
-/* New type declarations. */
-struct vm;
-struct vmspace;
-struct pmap;
-
-struct vm_object;
-typedef struct vm_object *vm_object_t;
-
-struct vmm_pt_ops;
-
-struct vm_page;
-typedef struct vm_page *vm_page_t;
-
-enum obj_type { OBJT_DEFAULT, OBJT_SWAP, OBJT_VNODE, OBJT_DEVICE, OBJT_PHYS,
-    OBJT_DEAD, OBJT_SG, OBJT_MGTDEVICE };
-typedef uchar_t objtype_t;
-
-union vm_map_object;
-typedef union vm_map_object vm_map_object_t;
-
-struct vm_map_entry;
-typedef struct vm_map_entry *vm_map_entry_t;
-
-struct vm_map;
-typedef struct vm_map *vm_map_t;
-
-pmap_t vmspace_pmap(struct vmspace *);
-
-int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t,
-    vm_offset_t, int, vm_prot_t, vm_prot_t, int);
-int vm_map_remove(vm_map_t, vm_offset_t, vm_offset_t);
-int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags);
-
-long vmspace_resident_count(struct vmspace *vmspace);
-
-void	pmap_invalidate_cache(void);
-void	pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
-int	pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype);
-long	pmap_wired_count(pmap_t pmap);
-
-struct vm_map {
-	struct vmspace *vmm_space;
-};
-
-struct pmap {
-	void		*pm_pml4;
-	cpuset_t	pm_active;
-	long		pm_eptgen;
-
-	/* Implementation private */
-	enum pmap_type	pm_type;
-	struct vmm_pt_ops *pm_ops;
-	void		*pm_impl;
-};
-
-struct vmspace {
-	struct vm_map vm_map;
-
-	/* Implementation private */
-	kmutex_t	vms_lock;
-	boolean_t	vms_map_changing;
-	struct pmap	vms_pmap;
-	uintptr_t	vms_size;	/* fixed after creation */
-
-	list_t		vms_maplist;
-};
-
-typedef pfn_t (*vm_pager_fn_t)(vm_object_t, uintptr_t, pfn_t *, uint_t *);
-
-struct vm_object {
-	uint_t		vmo_refcnt;	/* manipulated with atomic ops */
-
-	/* This group of fields are fixed at creation time */
-	objtype_t	vmo_type;
-	size_t		vmo_size;
-	vm_pager_fn_t	vmo_pager;
-	void		*vmo_data;
-
-	kmutex_t	vmo_lock;	/* protects fields below */
-	vm_memattr_t	vmo_attr;
-};
-
-struct vm_page {
-	kmutex_t		vmp_lock;
-	pfn_t			vmp_pfn;
-	struct vm_object	*vmp_obj_held;
-};
-
-/* illumos-specific functions for setup and operation */
-int vm_segmap_obj(vm_object_t, off_t, size_t, struct as *, caddr_t *, uint_t,
-    uint_t, uint_t);
-int vm_segmap_space(struct vmspace *, off_t, struct as *, caddr_t *, off_t,
-    uint_t, uint_t, uint_t);
-void *vmspace_find_kva(struct vmspace *, uintptr_t, size_t);
-
-typedef int (*pmap_pinit_t)(struct pmap *pmap);
-
-struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t, pmap_pinit_t);
-void vmspace_free(struct vmspace *);
-
-int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
-int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
-    vm_prot_t prot, vm_page_t *ma, int max_count);
-
-struct vm_object *vm_object_allocate(objtype_t, vm_pindex_t, bool);
-void vm_object_deallocate(vm_object_t);
-void vm_object_reference(vm_object_t);
-int vm_object_set_memattr(vm_object_t, vm_memattr_t);
-pfn_t vm_object_pfn(vm_object_t, uintptr_t);
-
-#define	VM_OBJECT_WLOCK(vmo)	mutex_enter(&(vmo)->vmo_lock)
-#define	VM_OBJECT_WUNLOCK(vmo)	mutex_exit(&(vmo)->vmo_lock)
-
-#define	PQ_ACTIVE	1
-
-void vm_page_unwire(vm_page_t, uint8_t);
-
-#define	VM_PAGE_TO_PHYS(page)	(mmu_ptob((uintptr_t)((page)->vmp_pfn)))
-
-vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t,
-    vm_ooffset_t, void *);
-
 #endif /* _VMM_VM_H */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index f95e415e40..998e483ecf 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -58,12 +58,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
-#include <sys/smp.h>
 #include <sys/systm.h>
 #include <sys/sunddi.h>
 
 #include <machine/pcb.h>
-#include <machine/smp.h>
 #include <machine/md_var.h>
 #include <x86/psl.h>
 #include <x86/apicreg.h>
@@ -74,11 +72,11 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmparam.h>
 #include <sys/vmm_instruction_emul.h>
 #include <sys/vmm_vm.h>
+#include <sys/vmm_gpt.h>
 
 #include "vmm_ioport.h"
 #include "vmm_ktr.h"
 #include "vmm_host.h"
-#include "vmm_mem.h"
 #include "vmm_util.h"
 #include "vatpic.h"
 #include "vatpit.h"
@@ -129,6 +127,7 @@ struct vcpu {
 	struct vm_exit	exitinfo;	/* (x) exit reason and collateral */
 	uint64_t	nextrip;	/* (x) next instruction to execute */
 	struct vie	*vie_ctx;	/* (x) instruction emulation context */
+	vm_client_t	*vmclient;	/* (a) VM-system client */
 	uint64_t	tsc_offset;	/* (x) offset from host TSC */
 
 	enum vcpu_ustate ustate;	/* (i) microstate for the vcpu */
@@ -145,7 +144,7 @@ struct vcpu {
 struct mem_seg {
 	size_t	len;
 	bool	sysmem;
-	struct vm_object *object;
+	vm_object_t *object;
 };
 #define	VM_MAX_MEMSEGS	4
 
@@ -219,8 +218,6 @@ static struct vmm_ops vmm_ops_null = {
 	.vmsetdesc	= (vmi_set_desc_t)nullop_panic,
 	.vmgetcap	= (vmi_get_cap_t)nullop_panic,
 	.vmsetcap	= (vmi_set_cap_t)nullop_panic,
-	.vmspace_alloc	= (vmi_vmspace_alloc)nullop_panic,
-	.vmspace_free	= (vmi_vmspace_free)nullop_panic,
 	.vlapic_init	= (vmi_vlapic_init)nullop_panic,
 	.vlapic_cleanup	= (vmi_vlapic_cleanup)nullop_panic,
 	.vmsavectx	= (vmi_savectx)nullop_panic,
@@ -228,17 +225,15 @@ static struct vmm_ops vmm_ops_null = {
 };
 
 static struct vmm_ops *ops = &vmm_ops_null;
+static vmm_pte_ops_t *pte_ops = NULL;
 
-#define	VMM_INIT(num)			((*ops->init)(num))
+#define	VMM_INIT()			((*ops->init)())
 #define	VMM_CLEANUP()			((*ops->cleanup)())
 #define	VMM_RESUME()			((*ops->resume)())
 
-#define	VMINIT(vm, pmap)		((*ops->vminit)(vm, pmap))
-#define	VMRUN(vmi, vcpu, rip, pmap) \
-	((*ops->vmrun)(vmi, vcpu, rip, pmap))
+#define	VMINIT(vm)		((*ops->vminit)(vm))
+#define	VMRUN(vmi, vcpu, rip)	((*ops->vmrun)(vmi, vcpu, rip))
 #define	VMCLEANUP(vmi)			((*ops->vmcleanup)(vmi))
-#define	VMSPACE_ALLOC(min, max)		((*ops->vmspace_alloc)(min, max))
-#define	VMSPACE_FREE(vmspace)		((*ops->vmspace_free)(vmspace))
 
 #define	VMGETREG(vmi, vcpu, num, rv)	((*ops->vmgetreg)(vmi, vcpu, num, rv))
 #define	VMSETREG(vmi, vcpu, num, val)	((*ops->vmsetreg)(vmi, vcpu, num, val))
@@ -265,9 +260,6 @@ SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
  */
 static int halt_detection_enabled = 1;
 
-/* IPI vector used for vcpu notifications */
-static int vmm_ipinum;
-
 /* Trap into hypervisor on all guest exceptions and reflect them back */
 static int trace_guest_exceptions;
 
@@ -319,6 +311,8 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy)
 		fpu_save_area_free(vcpu->guestfpu);
 		vie_free(vcpu->vie_ctx);
 		vcpu->vie_ctx = NULL;
+		vmc_destroy(vcpu->vmclient);
+		vcpu->vmclient = NULL;
 	}
 }
 
@@ -397,25 +391,19 @@ vm_vie_ctx(struct vm *vm, int cpuid)
 static int
 vmm_init(void)
 {
-	int error;
-
 	vmm_host_state_init();
 
-	/* We use cpu_poke() for IPIs */
-	vmm_ipinum = 0;
-
-	error = vmm_mem_init();
-	if (error)
-		return (error);
-
-	if (vmm_is_intel())
+	if (vmm_is_intel()) {
 		ops = &vmm_ops_intel;
-	else if (vmm_is_svm())
+		pte_ops = &ept_pte_ops;
+	} else if (vmm_is_svm()) {
 		ops = &vmm_ops_amd;
-	else
+		pte_ops = &rvi_pte_ops;
+	} else {
 		return (ENXIO);
+	}
 
-	return (VMM_INIT(vmm_ipinum));
+	return (VMM_INIT());
 }
 
 int
@@ -453,7 +441,7 @@ vm_init(struct vm *vm, bool create)
 {
 	int i;
 
-	vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
+	vm->cookie = VMINIT(vm);
 	vm->iommu = NULL;
 	vm->vioapic = vioapic_init(vm);
 	vm->vhpet = vhpet_init(vm);
@@ -492,6 +480,12 @@ vm_init(struct vm *vm, bool create)
 uint_t cores_per_package = 1;
 uint_t threads_per_core = 1;
 
+/*
+ * Debugging tunable to enable dirty-page-tracking.
+ * (Remains off by default for now)
+ */
+bool gpt_track_dirty = false;
+
 int
 vm_create(const char *name, uint64_t flags, struct vm **retvm)
 {
@@ -508,14 +502,18 @@ vm_create(const char *name, uint64_t flags, struct vm **retvm)
 	/* Name validation has already occurred */
 	VERIFY3U(strnlen(name, VM_MAX_NAMELEN), <, VM_MAX_NAMELEN);
 
-	vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS);
+	vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, gpt_track_dirty);
 	if (vmspace == NULL)
 		return (ENOMEM);
 
 	vm = malloc(sizeof (struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
+
 	vm->vmspace = vmspace;
 	vm->mem_transient = (flags & VCF_RESERVOIR_MEM) == 0;
+	for (uint_t i = 0; i < VM_MAXCPU; i++) {
+		vm->vcpu[i].vmclient = vmspace_client_alloc(vmspace);
+	}
 
 	vm->sockets = 1;
 	vm->cores = cores_per_package;	/* XXX backwards compatibility */
@@ -621,7 +619,7 @@ vm_cleanup(struct vm *vm, bool destroy)
 		for (i = 0; i < VM_MAX_MEMSEGS; i++)
 			vm_free_memseg(vm, i);
 
-		VMSPACE_FREE(vm->vmspace);
+		vmspace_destroy(vm->vmspace);
 		vm->vmspace = NULL;
 	}
 }
@@ -681,7 +679,7 @@ vm_name(struct vm *vm)
 int
 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
-	vm_object_t obj;
+	vm_object_t *obj;
 
 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
 		return (ENOMEM);
@@ -692,7 +690,7 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 int
 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
-	return (vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len));
+	return (vmspace_unmap(vm->vmspace, gpa, gpa + len));
 }
 
 /*
@@ -730,7 +728,7 @@ int
 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 {
 	struct mem_seg *seg;
-	vm_object_t obj;
+	vm_object_t *obj;
 
 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
 		return (EINVAL);
@@ -746,8 +744,7 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 			return (EINVAL);
 	}
 
-	obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT,
-	    vm->mem_transient);
+	obj = vm_object_mem_allocate(len, vm->mem_transient);
 	if (obj == NULL)
 		return (ENOMEM);
 
@@ -759,7 +756,7 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 
 int
 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
-    vm_object_t *objptr)
+    vm_object_t **objptr)
 {
 	struct mem_seg *seg;
 
@@ -786,7 +783,7 @@ vm_free_memseg(struct vm *vm, int ident)
 
 	seg = &vm->mem_segs[ident];
 	if (seg->object != NULL) {
-		vm_object_deallocate(seg->object);
+		vm_object_release(seg->object);
 		bzero(seg, sizeof (struct mem_seg));
 	}
 }
@@ -832,18 +829,16 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
 	if (map == NULL)
 		return (ENOSPC);
 
-	error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
-	    len, 0, VMFS_NO_SPACE, prot, prot, 0);
+	error = vmspace_map(vm->vmspace, seg->object, first, gpa, len, prot);
 	if (error != 0)
 		return (EFAULT);
 
 	vm_object_reference(seg->object);
 
 	if ((flags & VM_MEMMAP_F_WIRED) != 0) {
-		error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
-		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+		error = vmspace_populate(vm->vmspace, gpa, gpa + len);
 		if (error != 0) {
-			vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
+			vmspace_unmap(vm->vmspace, gpa, gpa + len);
 			return (EFAULT);
 		}
 	}
@@ -917,9 +912,9 @@ vm_free_memmap(struct vm *vm, int ident)
 
 	mm = &vm->mem_maps[ident];
 	if (mm->len) {
-		error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
+		error = vmspace_unmap(vm->vmspace, mm->gpa,
 		    mm->gpa + mm->len);
-		KASSERT(error == 0, ("%s: vm_map_remove error %d",
+		KASSERT(error == 0, ("%s: vmspace_unmap error %d",
 		    __func__, error));
 		bzero(mm, sizeof (struct mem_map));
 	}
@@ -961,12 +956,14 @@ vm_iommu_modify(struct vm *vm, bool map)
 	struct mem_map *mm;
 #ifdef __FreeBSD__
 	void *vp, *cookie, *host_domain;
-#else
-	void *vp, *cookie, *host_domain __unused;
 #endif
+	vm_client_t *vmc;
 
 	sz = PAGE_SIZE;
+#ifdef __FreeBSD__
 	host_domain = iommu_host_domain();
+#endif
+	vmc = vmspace_client_alloc(vm->vmspace);
 
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
@@ -991,14 +988,13 @@ vm_iommu_modify(struct vm *vm, bool map)
 
 		gpa = mm->gpa;
 		while (gpa < mm->gpa + mm->len) {
-			vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, PROT_WRITE,
-			    &cookie);
-			KASSERT(vp != NULL, ("vm(%s) could not map gpa %lx",
-			    vm_name(vm), gpa));
+			vm_page_t *vmp;
 
-			vm_gpa_release(cookie);
+			vmp = vmc_hold(vmc, gpa, PROT_WRITE);
+			ASSERT(vmp != NULL);
+			hpa = ((uintptr_t)vmp_get_pfn(vmp) << PAGESHIFT);
+			vmp_release(vmp);
 
-			hpa = DMAP_TO_PHYS((uintptr_t)vp);
 			if (map) {
 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
 #ifdef __FreeBSD__
@@ -1014,6 +1010,7 @@ vm_iommu_modify(struct vm *vm, bool map)
 			gpa += PAGE_SIZE;
 		}
 	}
+	vmc_destroy(vmc);
 
 	/*
 	 * Invalidate the cached translations associated with the domain
@@ -1029,9 +1026,6 @@ vm_iommu_modify(struct vm *vm, bool map)
 #endif
 }
 
-#define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), false)
-#define	vm_iommu_map(vm)	vm_iommu_modify((vm), true)
-
 int
 vm_unassign_pptdev(struct vm *vm, int pptfd)
 {
@@ -1042,7 +1036,7 @@ vm_unassign_pptdev(struct vm *vm, int pptfd)
 		return (error);
 
 	if (ppt_assigned_devices(vm) == 0)
-		vm_iommu_unmap(vm);
+		vm_iommu_modify(vm, false);
 
 	return (0);
 }
@@ -1061,71 +1055,13 @@ vm_assign_pptdev(struct vm *vm, int pptfd)
 		vm->iommu = iommu_create_domain(maxaddr);
 		if (vm->iommu == NULL)
 			return (ENXIO);
-		vm_iommu_map(vm);
+		vm_iommu_modify(vm, true);
 	}
 
 	error = ppt_assign_device(vm, pptfd);
 	return (error);
 }
 
-void *
-vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
-    void **cookie)
-{
-	int i, count, pageoff;
-	struct mem_map *mm;
-	vm_page_t m;
-#ifdef INVARIANTS
-	/*
-	 * All vcpus are frozen by ioctls that modify the memory map
-	 * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
-	 * guaranteed if at least one vcpu is in the VCPU_FROZEN state.
-	 */
-	int state;
-	KASSERT(vcpuid >= -1 && vcpuid < vm->maxcpus, ("%s: invalid vcpuid %d",
-	    __func__, vcpuid));
-	for (i = 0; i < vm->maxcpus; i++) {
-		if (vcpuid != -1 && vcpuid != i)
-			continue;
-		state = vcpu_get_state(vm, i, NULL);
-		KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
-		    __func__, state));
-	}
-#endif
-	pageoff = gpa & PAGE_MASK;
-	if (len > PAGE_SIZE - pageoff)
-		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
-
-	count = 0;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (mm->len == 0) {
-			continue;
-		}
-		if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) {
-			count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
-			    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
-			break;
-		}
-	}
-
-	if (count == 1) {
-		*cookie = m;
-		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
-	} else {
-		*cookie = NULL;
-		return (NULL);
-	}
-}
-
-void
-vm_gpa_release(void *cookie)
-{
-	vm_page_t m = cookie;
-
-	vm_page_unwire(m, PQ_ACTIVE);
-}
-
 int
 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
 {
@@ -1478,13 +1414,10 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
 static int
 vm_handle_paging(struct vm *vm, int vcpuid)
 {
+	struct vcpu *vcpu = &vm->vcpu[vcpuid];
+	vm_client_t *vmc = vcpu->vmclient;
+	struct vm_exit *vme = &vcpu->exitinfo;
 	int rv, ftype;
-	struct vm_map *map;
-	struct vcpu *vcpu;
-	struct vm_exit *vme;
-
-	vcpu = &vm->vcpu[vcpuid];
-	vme = &vcpu->exitinfo;
 
 	KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
 	    __func__, vme->inst_length));
@@ -1494,26 +1427,13 @@ vm_handle_paging(struct vm *vm, int vcpuid)
 	    ftype == PROT_WRITE || ftype == PROT_EXEC,
 	    ("vm_handle_paging: invalid fault_type %d", ftype));
 
-	if (ftype == PROT_READ || ftype == PROT_WRITE) {
-		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
-		    vme->u.paging.gpa, ftype);
-		if (rv == 0) {
-			VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %lx",
-			    ftype == PROT_READ ? "accessed" : "dirty",
-			    vme->u.paging.gpa);
-			goto done;
-		}
-	}
-
-	map = &vm->vmspace->vm_map;
-	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
+	rv = vmc_fault(vmc, vme->u.paging.gpa, ftype);
 
 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %lx, "
 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
 
 	if (rv != 0)
 		return (EFAULT);
-done:
 	return (0);
 }
 
@@ -2221,7 +2141,6 @@ vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
 	bool intr_disabled;
-	pmap_t pmap;
 	vm_thread_ctx_t vtc;
 	int affinity_type = CPU_CURRENT;
 
@@ -2230,7 +2149,6 @@ vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
 	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
 		return (EINVAL);
 
-	pmap = vmspace_pmap(vm->vmspace);
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
@@ -2266,9 +2184,6 @@ restart:
 	affinity_type = CPU_CURRENT;
 	critical_enter();
 
-	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
-	    ("vm_run: absurd pm_active"));
-
 	/* Force a trip through update_sregs to reload %fs/%gs and friends */
 	PCB_SET_UPDATE_SEGS(&ttolwp(curthread)->lwp_pcb);
 
@@ -2279,7 +2194,7 @@ restart:
 	vtc.vtc_status |= VTCS_FPU_CTX_CRITICAL;
 
 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
-	error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap);
+	error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip);
 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
 	/*
@@ -3355,10 +3270,9 @@ vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t ntype)
 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
 		if (hostcpu != curcpu) {
 			if (ntype == VCPU_NOTIFY_APIC) {
-				vlapic_post_intr(vcpu->vlapic, hostcpu,
-				    vmm_ipinum);
+				vlapic_post_intr(vcpu->vlapic, hostcpu);
 			} else {
-				ipi_cpu(hostcpu, vmm_ipinum);
+				poke_cpu(hostcpu);
 			}
 		} else {
 			/*
@@ -3427,6 +3341,12 @@ vm_get_vmspace(struct vm *vm)
 	return (vm->vmspace);
 }
 
+struct vm_client *
+vm_get_vmclient(struct vm *vm, int vcpuid)
+{
+	return (vm->vcpu[vcpuid].vmclient);
+}
+
 int
 vm_apicid2vcpuid(struct vm *vm, int apicid)
 {
@@ -3481,13 +3401,12 @@ vm_segment_name(int seg)
 
 void
 vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
-    int num_copyinfo)
+    uint_t num_copyinfo)
 {
-	int idx;
-
-	for (idx = 0; idx < num_copyinfo; idx++) {
-		if (copyinfo[idx].cookie != NULL)
-			vm_gpa_release(copyinfo[idx].cookie);
+	for (uint_t idx = 0; idx < num_copyinfo; idx++) {
+		if (copyinfo[idx].cookie != NULL) {
+			vmp_release((vm_page_t *)copyinfo[idx].cookie);
+		}
 	}
 	bzero(copyinfo, num_copyinfo * sizeof (struct vm_copyinfo));
 }
@@ -3495,24 +3414,26 @@ vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
 int
 vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
-    int num_copyinfo, int *fault)
+    uint_t num_copyinfo, int *fault)
 {
-	int error, idx, nused;
+	uint_t idx, nused;
 	size_t n, off, remaining;
-	void *hva, *cookie;
-	uint64_t gpa;
+	vm_client_t *vmc = vm_get_vmclient(vm, vcpuid);
 
 	bzero(copyinfo, sizeof (struct vm_copyinfo) * num_copyinfo);
 
 	nused = 0;
 	remaining = len;
 	while (remaining > 0) {
+		uint64_t gpa;
+		int error;
+
 		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
 		error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault);
 		if (error || *fault)
 			return (error);
-		off = gpa & PAGE_MASK;
-		n = min(remaining, PAGE_SIZE - off);
+		off = gpa & PAGEOFFSET;
+		n = min(remaining, PAGESIZE - off);
 		copyinfo[nused].gpa = gpa;
 		copyinfo[nused].len = n;
 		remaining -= n;
@@ -3521,12 +3442,21 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
 	}
 
 	for (idx = 0; idx < nused; idx++) {
-		hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa,
-		    copyinfo[idx].len, prot, &cookie);
-		if (hva == NULL)
+		vm_page_t *vmp;
+		caddr_t hva;
+
+		vmp = vmc_hold(vmc, copyinfo[idx].gpa & PAGEMASK, prot);
+		if (vmp == NULL) {
 			break;
-		copyinfo[idx].hva = hva;
-		copyinfo[idx].cookie = cookie;
+		}
+		if ((prot & PROT_WRITE) != 0) {
+			hva = (caddr_t)vmp_get_writable(vmp);
+		} else {
+			hva = (caddr_t)vmp_get_readable(vmp);
+		}
+		copyinfo[idx].hva = hva + (copyinfo[idx].gpa & PAGEOFFSET);
+		copyinfo[idx].cookie = vmp;
+		copyinfo[idx].prot = prot;
 	}
 
 	if (idx != nused) {
@@ -3548,6 +3478,8 @@ vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
 	dst = kaddr;
 	idx = 0;
 	while (len > 0) {
+		ASSERT(copyinfo[idx].prot & PROT_READ);
+
 		bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
 		len -= copyinfo[idx].len;
 		dst += copyinfo[idx].len;
@@ -3565,6 +3497,8 @@ vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
 	src = kaddr;
 	idx = 0;
 	while (len > 0) {
+		ASSERT(copyinfo[idx].prot & PROT_WRITE);
+
 		bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
 		len -= copyinfo[idx].len;
 		src += copyinfo[idx].len;
@@ -3577,30 +3511,17 @@ vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
  * these are global stats, only return the values with for vCPU 0
  */
 VMM_STAT_DECLARE(VMM_MEM_RESIDENT);
-VMM_STAT_DECLARE(VMM_MEM_WIRED);
 
 static void
 vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
 {
-
 	if (vcpu == 0) {
 		vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
 		    PAGE_SIZE * vmspace_resident_count(vm->vmspace));
 	}
 }
 
-static void
-vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
-{
-
-	if (vcpu == 0) {
-		vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
-		    PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
-	}
-}
-
 VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
-VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
 
 int
 vm_ioport_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_gpt.c b/usr/src/uts/i86pc/io/vmm/vmm_gpt.c
index 9f6cc44aac..146ad958a8 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_gpt.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_gpt.c
@@ -92,7 +92,8 @@ struct vmm_gpt_node {
 	vmm_gpt_node_t	*vgn_children;
 	vmm_gpt_node_t	*vgn_siblings;
 	uint64_t	*vgn_entries;
-	uint64_t	_vgn_pad[2];
+	uint64_t	vgn_gpa;
+	uint64_t	_vgn_pad;
 };
 
 /*
@@ -107,7 +108,6 @@ struct vmm_gpt_node {
 struct vmm_gpt {
 	vmm_gpt_node_t	*vgpt_root;
 	vmm_pte_ops_t	*vgpt_pte_ops;
-	uint64_t	vgpt_mapped_page_count;
 };
 
 /*
@@ -153,24 +153,6 @@ vmm_gpt_alloc(vmm_pte_ops_t *pte_ops)
 }
 
 /*
- * Retrieves the host kernel address of the GPT root.
- */
-void *
-vmm_gpt_root_kaddr(vmm_gpt_t *gpt)
-{
-	return (gpt->vgpt_root->vgn_entries);
-}
-
-/*
- * Retrieves the host PFN of the GPT root.
- */
-uint64_t
-vmm_gpt_root_pfn(vmm_gpt_t *gpt)
-{
-	return (gpt->vgpt_root->vgn_host_pfn);
-}
-
-/*
  * Frees the given node, first nulling out all of its links to other nodes in
  * the tree, adjusting its parents reference count, and unlinking itself from
  * its parents page table.
@@ -310,11 +292,18 @@ vmm_gpt_add_child(vmm_gpt_t *gpt, vmm_gpt_node_t *parent, vmm_gpt_node_t *child,
 	ASSERT(gpt->vgpt_pte_ops != NULL);
 	ASSERT(parent != NULL);
 	ASSERT(child != NULL);
+	ASSERT3U(parent->vgn_level, <, LEVEL1);
 
+	const uint64_t gpa_mask[3] = {
+		[LEVEL4] = 0xffffff8000000000ul, /* entries cover 512G */
+		[LEVEL3] = 0xffffffffc0000000ul, /* entries cover 1G */
+		[LEVEL2] = 0xffffffffffe00000ul, /* entries cover 2M */
+	};
 	const int index = vmm_gpt_node_index(gpa, parent->vgn_level);
 	child->vgn_index = index;
 	child->vgn_level = parent->vgn_level + 1;
 	child->vgn_parent = parent;
+	child->vgn_gpa = gpa & gpa_mask[parent->vgn_level];
 	parent_entries = parent->vgn_entries;
 	entry = gpt->vgpt_pte_ops->vpeo_map_table(child->vgn_host_pfn);
 	parent_entries[index] = entry;
@@ -338,12 +327,14 @@ vmm_gpt_add_child(vmm_gpt_t *gpt, vmm_gpt_node_t *parent, vmm_gpt_node_t *child,
  * that this does not actually map the entry, but simply ensures that the
  * entries exist.
  */
-void
+static void
 vmm_gpt_populate_entry(vmm_gpt_t *gpt, uint64_t gpa)
 {
 	vmm_gpt_node_t *node, *child;
 
 	ASSERT(gpt != NULL);
+	ASSERT0(gpa & PAGEOFFSET);
+
 	node = gpt->vgpt_root;
 	for (uint_t i = 0; i < LEVEL1; i++) {
 		ASSERT(node != NULL);
@@ -364,41 +355,53 @@ vmm_gpt_populate_entry(vmm_gpt_t *gpt, uint64_t gpa)
 void
 vmm_gpt_populate_region(vmm_gpt_t *gpt, uint64_t start, uint64_t end)
 {
+	ASSERT0(start & PAGEOFFSET);
+	ASSERT0(end & PAGEOFFSET);
+
 	for (uint64_t page = start; page < end; page += PAGESIZE) {
 		vmm_gpt_populate_entry(gpt, page);
 	}
 }
 
 /*
- * Inserts an entry for a given GPA into the table.  The caller must
- * ensure that the entry is not currently mapped, though note that this
- * can race with another thread inserting the same page into the tree.
- * If we lose the race, we ensure that the page we thought we were
- * inserting is the page that was inserted.
+ * Format a PTE and install it in the provided PTE-pointer.
  */
 bool
-vmm_gpt_map(vmm_gpt_t *gpt, uint64_t gpa, pfn_t pfn, uint_t prot, uint8_t attr)
+vmm_gpt_map_at(vmm_gpt_t *gpt, uint64_t *ptep, pfn_t pfn, uint_t prot,
+    uint8_t attr)
 {
-	uint64_t *entries[MAX_GPT_LEVEL], entry, old_entry;
-
-	ASSERT(gpt != NULL);
-	vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
-	ASSERT(entries[LEVEL1] != NULL);
+	uint64_t entry, old_entry;
 
 	entry = gpt->vgpt_pte_ops->vpeo_map_page(pfn, prot, attr);
-	old_entry = atomic_cas_64(entries[LEVEL1], 0, entry);
+	old_entry = atomic_cas_64(ptep, 0, entry);
 	if (old_entry != 0) {
-		ASSERT3U(gpt->vgpt_pte_ops->vpeo_pte_pfn(entry),
-		    ==,
+		ASSERT3U(gpt->vgpt_pte_ops->vpeo_pte_pfn(entry), ==,
 		    gpt->vgpt_pte_ops->vpeo_pte_pfn(old_entry));
 		return (false);
 	}
-	gpt->vgpt_mapped_page_count++;
 
 	return (true);
 }
 
 /*
+ * Inserts an entry for a given GPA into the table.  The caller must
+ * ensure that a conflicting PFN is not mapped at the requested location.
+ * Racing operations to map the same PFN at one location is acceptable and
+ * properly handled.
+ */
+bool
+vmm_gpt_map(vmm_gpt_t *gpt, uint64_t gpa, pfn_t pfn, uint_t prot, uint8_t attr)
+{
+	uint64_t *entries[MAX_GPT_LEVEL];
+
+	ASSERT(gpt != NULL);
+	vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
+	ASSERT(entries[LEVEL1] != NULL);
+
+	return (vmm_gpt_map_at(gpt, entries[LEVEL1], pfn, prot, attr));
+}
+
+/*
  * Removes a child node from its parent's list of children, and then frees
  * the now-orphaned child.
  */
@@ -421,9 +424,8 @@ vmm_gpt_node_remove_child(vmm_gpt_node_t *parent, vmm_gpt_node_t *child)
 }
 
 /*
- * Cleans up unused inner nodes in the GPT.  Asserts that the
- * leaf corresponding to the entry does not map any additional
- * pages.
+ * Cleans up unused inner nodes in the GPT.  Asserts that the leaf corresponding
+ * to the entry does not map any additional pages.
  */
 static void
 vmm_gpt_vacate_entry(vmm_gpt_t *gpt, uint64_t gpa)
@@ -450,27 +452,28 @@ vmm_gpt_vacate_entry(vmm_gpt_t *gpt, uint64_t gpa)
 }
 
 /*
- * Cleans up the unused inner nodes in the GPT for a region of guest
- * physical address space bounded by [start..end).  The region must
- * map no pages.
+ * Cleans up the unused inner nodes in the GPT for a region of guest physical
+ * address space of [start, end).  The region must map no pages.
  */
 void
 vmm_gpt_vacate_region(vmm_gpt_t *gpt, uint64_t start, uint64_t end)
 {
+	ASSERT0(start & PAGEOFFSET);
+	ASSERT0(end & PAGEOFFSET);
+
 	for (uint64_t page = start; page < end; page += PAGESIZE) {
 		vmm_gpt_vacate_entry(gpt, page);
 	}
 }
 
 /*
- * Remove a mapping from the table.  Returns false if the page was not
- * mapped, otherwise returns true.
+ * Remove a mapping from the table.  Returns false if the page was not mapped,
+ * otherwise returns true.
  */
 bool
 vmm_gpt_unmap(vmm_gpt_t *gpt, uint64_t gpa)
 {
 	uint64_t *entries[MAX_GPT_LEVEL], entry;
-	bool was_mapped;
 
 	ASSERT(gpt != NULL);
 	vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
@@ -479,28 +482,27 @@ vmm_gpt_unmap(vmm_gpt_t *gpt, uint64_t gpa)
 
 	entry = *entries[LEVEL1];
 	*entries[LEVEL1] = 0;
-	was_mapped = gpt->vgpt_pte_ops->vpeo_pte_is_present(entry);
-	if (was_mapped)
-		gpt->vgpt_mapped_page_count--;
-
-	return (was_mapped);
+	return (gpt->vgpt_pte_ops->vpeo_pte_is_present(entry));
 }
 
 /*
- * Un-maps the region of guest physical address space bounded by
- * [start..end).  Returns the number of pages that are unmapped.
+ * Un-maps the region of guest physical address space bounded by [start..end).
+ * Returns the number of pages that are unmapped.
  */
 size_t
 vmm_gpt_unmap_region(vmm_gpt_t *gpt, uint64_t start, uint64_t end)
 {
-	size_t n = 0;
+	ASSERT0(start & PAGEOFFSET);
+	ASSERT0(end & PAGEOFFSET);
 
+	size_t num_unmapped = 0;
 	for (uint64_t page = start; page < end; page += PAGESIZE) {
-		if (vmm_gpt_unmap(gpt, page) != 0)
-			n++;
+		if (vmm_gpt_unmap(gpt, page) != 0) {
+			num_unmapped++;
+		}
 	}
 
-	return (n);
+	return (num_unmapped);
 }
 
 /*
@@ -509,31 +511,23 @@ vmm_gpt_unmap_region(vmm_gpt_t *gpt, uint64_t start, uint64_t end)
  * bits of the entry.  Otherwise, it will be ignored.
  */
 bool
-vmm_gpt_is_mapped(vmm_gpt_t *gpt, uint64_t gpa, uint_t *protp)
+vmm_gpt_is_mapped(vmm_gpt_t *gpt, uint64_t *ptep, pfn_t *pfnp, uint_t *protp)
 {
-	uint64_t *entries[MAX_GPT_LEVEL], entry;
+	uint64_t entry;
 
-	vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
-	if (entries[LEVEL1] == NULL)
+	if (ptep == NULL) {
 		return (false);
-	entry = *entries[LEVEL1];
-	if (!gpt->vgpt_pte_ops->vpeo_pte_is_present(entry))
+	}
+	entry = *ptep;
+	if (!gpt->vgpt_pte_ops->vpeo_pte_is_present(entry)) {
 		return (false);
+	}
+	*pfnp = gpt->vgpt_pte_ops->vpeo_pte_pfn(entry);
 	*protp = gpt->vgpt_pte_ops->vpeo_pte_prot(entry);
-
 	return (true);
 }
 
 /*
- * Returns the number of pages that are mapped in by this GPT.
- */
-size_t
-vmm_gpt_mapped_count(vmm_gpt_t *gpt)
-{
-	return (gpt->vgpt_mapped_page_count);
-}
-
-/*
  * Resets the accessed bit on the page table entry pointed to be `entry`.
  * If `on` is true, the bit will be set, otherwise it will be cleared.
  * The old value of the bit is returned.
@@ -556,3 +550,12 @@ vmm_gpt_reset_dirty(vmm_gpt_t *gpt, uint64_t *entry, bool on)
 	ASSERT(entry != NULL);
 	return (gpt->vgpt_pte_ops->vpeo_reset_dirty(entry, on));
 }
+
+/*
+ * Get properly formatted PML4 (EPTP/nCR3) for GPT.
+ */
+uint64_t
+vmm_gpt_get_pmtp(vmm_gpt_t *gpt)
+{
+	return (gpt->vgpt_pte_ops->vpeo_get_pmtp(gpt->vgpt_root->vgn_host_pfn));
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
index 1dc2616599..d2a790ec03 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -373,6 +373,27 @@ static const struct vie_op one_byte_opcodes[256] = {
 
 #define	GB				(1024 * 1024 * 1024)
 
+
+/*
+ * Paging defines, previously pulled in from machine/pmap.h
+ */
+#define	PG_V	(1 << 0) /* Present */
+#define	PG_RW	(1 << 1) /* Read/Write */
+#define	PG_U	(1 << 2) /* User/Supervisor */
+#define	PG_A	(1 << 5) /* Accessed */
+#define	PG_M	(1 << 6) /* Dirty */
+#define	PG_PS	(1 << 7) /* Largepage */
+
+/*
+ * Paging except defines, previously pulled in from machine/pmap.h
+ */
+#define	PGEX_P		(1 << 0) /* Non-present/Protection */
+#define	PGEX_W		(1 << 1) /* Read/Write */
+#define	PGEX_U		(1 << 2) /* User/Supervisor */
+#define	PGEX_RSV	(1 << 3) /* (Non-)Reserved */
+#define	PGEX_I		(1 << 4) /* Instruction */
+
+
 static enum vm_reg_name gpr_map[16] = {
 	VM_REG_GUEST_RAX,
 	VM_REG_GUEST_RCX,
@@ -2875,43 +2896,48 @@ pf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
 }
 
 static void
-ptp_release(void **cookie)
+ptp_release(vm_page_t **vmp)
 {
-	if (*cookie != NULL) {
-		vm_gpa_release(*cookie);
-		*cookie = NULL;
+	if (*vmp != NULL) {
+		vmp_release(*vmp);
+		*vmp = NULL;
 	}
 }
 
 static void *
-ptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
+ptp_hold(struct vm *vm, int vcpu, uintptr_t gpa, size_t len, vm_page_t **vmp)
 {
-	void *ptr;
+	vm_client_t *vmc = vm_get_vmclient(vm, vcpu);
+	const uintptr_t hold_gpa = gpa & PAGEMASK;
+
+	/* Hold must not cross a page boundary */
+	VERIFY3U(gpa + len, <=, hold_gpa + PAGESIZE);
 
-	ptp_release(cookie);
-	ptr = vm_gpa_hold(vm, vcpu, ptpphys, len,  PROT_READ | PROT_WRITE,
-	    cookie);
+	if (*vmp != NULL) {
+		vmp_release(*vmp);
+	}
+
+	*vmp = vmc_hold(vmc, hold_gpa, PROT_READ | PROT_WRITE);
+	if (*vmp == NULL) {
+		return (NULL);
+	}
 
-	return (ptr);
+	return ((caddr_t)vmp_get_writable(*vmp) + (gpa - hold_gpa));
 }
 
 static int
 _vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *guest_fault, bool check_only)
 {
-	int nlevels, pfcode, retval, usermode, writable;
+	int nlevels, pfcode;
 	int ptpshift = 0, ptpindex = 0;
 	uint64_t ptpphys;
 	uint64_t *ptpbase = NULL, pte = 0, pgsize = 0;
-	uint32_t *ptpbase32, pte32;
-	void *cookie;
+	vm_page_t *cookie = NULL;
+	const bool usermode = paging->cpl == 3;
+	const bool writable = (prot & PROT_WRITE) != 0;
 
 	*guest_fault = 0;
-
-	usermode = (paging->cpl == 3 ? 1 : 0);
-	writable = prot & PROT_WRITE;
-	cookie = NULL;
-	retval = 0;
 restart:
 	ptpphys = paging->cr3;		/* root of the page tables */
 	ptp_release(&cookie);
@@ -2923,15 +2949,18 @@ restart:
 		 */
 		if (!check_only)
 			vm_inject_gp(vm, vcpuid);
-		goto fault;
+		*guest_fault = 1;
+		return (0);
 	}
 
 	if (paging->paging_mode == PAGING_MODE_FLAT) {
 		*gpa = gla;
-		goto done;
+		return (0);
 	}
 
 	if (paging->paging_mode == PAGING_MODE_32) {
+		uint32_t *ptpbase32, pte32;
+
 		nlevels = 2;
 		while (--nlevels >= 0) {
 			/* Zero out the lower 12 bits. */
@@ -2940,8 +2969,9 @@ restart:
 			ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE,
 			    &cookie);
 
-			if (ptpbase32 == NULL)
-				goto error;
+			if (ptpbase32 == NULL) {
+				return (EFAULT);
+			}
 
 			ptpshift = PAGE_SHIFT + nlevels * 10;
 			ptpindex = (gla >> ptpshift) & 0x3FF;
@@ -2957,7 +2987,10 @@ restart:
 					    0, pte32);
 					vm_inject_pf(vm, vcpuid, pfcode, gla);
 				}
-				goto fault;
+
+				ptp_release(&cookie);
+				*guest_fault = 1;
+				return (0);
 			}
 
 			/*
@@ -2992,7 +3025,8 @@ restart:
 		/* Zero out the lower 'ptpshift' bits */
 		pte32 >>= ptpshift; pte32 <<= ptpshift;
 		*gpa = pte32 | (gla & (pgsize - 1));
-		goto done;
+		ptp_release(&cookie);
+		return (0);
 	}
 
 	if (paging->paging_mode == PAGING_MODE_PAE) {
@@ -3001,8 +3035,9 @@ restart:
 
 		ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof (*ptpbase) * 4,
 		    &cookie);
-		if (ptpbase == NULL)
-			goto error;
+		if (ptpbase == NULL) {
+			return (EFAULT);
+		}
 
 		ptpindex = (gla >> 30) & 0x3;
 
@@ -3013,21 +3048,27 @@ restart:
 				pfcode = pf_error_code(usermode, prot, 0, pte);
 				vm_inject_pf(vm, vcpuid, pfcode, gla);
 			}
-			goto fault;
+
+			ptp_release(&cookie);
+			*guest_fault = 1;
+			return (0);
 		}
 
 		ptpphys = pte;
 
 		nlevels = 2;
-	} else
+	} else {
 		nlevels = 4;
+	}
+
 	while (--nlevels >= 0) {
 		/* Zero out the lower 12 bits and the upper 12 bits */
-		ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
+		ptpphys &= 0x000ffffffffff000UL;
 
 		ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie);
-		if (ptpbase == NULL)
-			goto error;
+		if (ptpbase == NULL) {
+			return (EFAULT);
+		}
 
 		ptpshift = PAGE_SHIFT + nlevels * 9;
 		ptpindex = (gla >> ptpshift) & 0x1FF;
@@ -3042,7 +3083,10 @@ restart:
 				pfcode = pf_error_code(usermode, prot, 0, pte);
 				vm_inject_pf(vm, vcpuid, pfcode, gla);
 			}
-			goto fault;
+
+			ptp_release(&cookie);
+			*guest_fault = 1;
+			return (0);
 		}
 
 		/* Set the accessed bit in the page table entry */
@@ -3060,7 +3104,10 @@ restart:
 					    1, pte);
 					vm_inject_pf(vm, vcpuid, pfcode, gla);
 				}
-				goto fault;
+
+				ptp_release(&cookie);
+				*guest_fault = 1;
+				return (0);
 			}
 			break;
 		}
@@ -3073,21 +3120,12 @@ restart:
 		if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0)
 			goto restart;
 	}
+	ptp_release(&cookie);
 
 	/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
 	pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
 	*gpa = pte | (gla & (pgsize - 1));
-done:
-	ptp_release(&cookie);
-	KASSERT(retval == 0 || retval == EFAULT, ("%s: unexpected retval %d",
-	    __func__, retval));
-	return (retval);
-error:
-	retval = EFAULT;
-	goto done;
-fault:
-	*guest_fault = 1;
-	goto done;
+	return (0);
 }
 
 int
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
index a5118c15af..e95f444051 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
@@ -46,7 +46,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/smp.h>
+#include <sys/cpuset.h>
 
 #include <x86/specialreg.h>
 #include <x86/apicreg.h>
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_mem.c b/usr/src/uts/i86pc/io/vmm/vmm_mem.c
deleted file mode 100644
index 4ffe5bf509..0000000000
--- a/usr/src/uts/i86pc/io/vmm/vmm_mem.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mman.h>
-#include <sys/sglist.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
-
-#include <machine/md_var.h>
-#include <machine/vm.h>
-#include <sys/vmm_vm.h>
-
-#include "vmm_mem.h"
-
-int
-vmm_mem_init(void)
-{
-
-	return (0);
-}
-
-vm_object_t
-vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
-    vm_paddr_t hpa)
-{
-	int error;
-	vm_object_t obj;
-	struct sglist *sg;
-
-	sg = sglist_alloc(1, M_WAITOK);
-	error = sglist_append_phys(sg, hpa, len);
-	KASSERT(error == 0, ("error %d appending physaddr to sglist", error));
-
-	const int prot = PROT_READ | PROT_WRITE;
-	obj = vm_pager_allocate(OBJT_SG, sg, len, prot, 0, NULL);
-	if (obj != NULL) {
-		/*
-		 * VT-x ignores the MTRR settings when figuring out the
-		 * memory type for translations obtained through EPT.
-		 *
-		 * Therefore we explicitly force the pages provided by
-		 * this object to be mapped as uncacheable.
-		 */
-		VM_OBJECT_WLOCK(obj);
-		error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE);
-		VM_OBJECT_WUNLOCK(obj);
-		if (error != 0) {
-			panic("vmm_mmio_alloc: vm_object_set_memattr error %d",
-			    error);
-		}
-		error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
-		    VMFS_NO_SPACE, prot, prot, 0);
-		if (error != 0) {
-			vm_object_deallocate(obj);
-			obj = NULL;
-		}
-	}
-
-	/*
-	 * Drop the reference on the sglist.
-	 *
-	 * If the scatter/gather object was successfully allocated then it
-	 * has incremented the reference count on the sglist. Dropping the
-	 * initial reference count ensures that the sglist will be freed
-	 * when the object is deallocated.
-	 *
-	 * If the object could not be allocated then we end up freeing the
-	 * sglist.
-	 */
-	sglist_free(sg);
-
-	return (obj);
-}
-
-vm_paddr_t
-vmm_mem_maxaddr(void)
-{
-
-	return (ptoa(Maxmem));
-}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_mem.h b/usr/src/uts/i86pc/io/vmm/vmm_mem.h
deleted file mode 100644
index b27501eef2..0000000000
--- a/usr/src/uts/i86pc/io/vmm/vmm_mem.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- *
- * Copyright 2013 Pluribus Networks Inc.
- */
-
-#ifndef	_VMM_MEM_H_
-#define	_VMM_MEM_H_
-
-struct vmspace;
-struct vm_object;
-
-int		vmm_mem_init(void);
-struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len,
-    vm_paddr_t hpa);
-vm_paddr_t	vmm_mem_maxaddr(void);
-
-#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 92d1494e04..823097b285 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -100,6 +100,7 @@ struct vmm_hold {
 struct vmm_lease {
 	list_node_t		vml_node;
 	struct vm		*vml_vm;
+	vm_client_t		*vml_vmclient;
 	boolean_t		vml_expired;
 	boolean_t		vml_break_deferred;
 	boolean_t		(*vml_expire_func)(void *);
@@ -444,7 +445,6 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 		lock_type = LOCK_WRITE_HOLD;
 		break;
 
-	case VM_GET_GPA_PMAP:
 	case VM_GET_MEMSEG:
 	case VM_MMAP_GETNEXT:
 	case VM_LAPIC_IRQ:
@@ -465,6 +465,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 		lock_type = LOCK_READ_HOLD;
 		break;
 
+	case VM_GET_GPA_PMAP:
 	case VM_IOAPIC_PINCOUNT:
 	case VM_SUSPEND:
 	default:
@@ -1127,18 +1128,11 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 		break;
 	}
 	case VM_GET_GPA_PMAP: {
-		struct vm_gpa_pte gpapte;
-
-		if (ddi_copyin(datap, &gpapte, sizeof (gpapte), md)) {
-			error = EFAULT;
-			break;
-		}
-#ifdef __FreeBSD__
-		/* XXXJOY: add function? */
-		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vmm_vm)),
-		    gpapte.gpa, gpapte.pte, &gpapte.ptenum);
-#endif
-		error = 0;
+		/*
+		 * Until there is a necessity to leak EPT/RVI PTE values to
+		 * userspace, this will remain unimplemented
+		 */
+		error = EINVAL;
 		break;
 	}
 	case VM_GET_HPET_CAPABILITIES: {
@@ -1690,6 +1684,7 @@ vmm_drv_lease_sign(vmm_hold_t *hold, boolean_t (*expiref)(void *), void *arg)
 	lease->vml_hold = hold;
 	/* cache the VM pointer for one less pointer chase */
 	lease->vml_vm = sc->vmm_vm;
+	lease->vml_vmclient = vmspace_client_alloc(vm_get_vmspace(sc->vmm_vm));
 
 	mutex_enter(&sc->vmm_lease_lock);
 	while (sc->vmm_lease_blocker != 0) {
@@ -1709,6 +1704,7 @@ vmm_lease_break_locked(vmm_softc_t *sc, vmm_lease_t *lease)
 
 	list_remove(&sc->vmm_lease_list, lease);
 	vmm_read_unlock(sc);
+	vmc_destroy(lease->vml_vmclient);
 	kmem_free(lease, sizeof (*lease));
 }
 
@@ -1841,9 +1837,30 @@ vmm_drv_lease_expired(vmm_lease_t *lease)
 void *
 vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz)
 {
+	vm_page_t *vmp;
+	void *res = NULL;
+
 	ASSERT(lease != NULL);
+	ASSERT3U(sz, ==, PAGESIZE);
+	ASSERT0(gpa & PAGEOFFSET);
+
+	vmp = vmc_hold(lease->vml_vmclient, gpa, PROT_READ | PROT_WRITE);
+	/*
+	 * Break the rules for now and just extract the pointer.  This is
+	 * nominally safe, since holding a driver lease on the VM read-locks it.
+	 *
+	 * A pointer which would otherwise be at risk of being a use-after-free
+	 * vector is made safe since actions such as vmspace_unmap() require
+	 * acquisition of the VM write-lock, (causing all driver leases to be
+	 * broken) allowing the consumers to cease their access prior to
+	 * modification of the vmspace.
+	 */
+	if (vmp != NULL) {
+		res = vmp_get_writable(vmp);
+		vmp_release(vmp);
+	}
 
-	return (vmspace_find_kva(vm_get_vmspace(lease->vml_vm), gpa, sz));
+	return (res);
 }
 
 int
@@ -2191,6 +2208,14 @@ vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 	minor_t		minor;
 	vmm_softc_t	*sc;
 
+	/*
+	 * Forbid running bhyve in a 32-bit process until it has been tested and
+	 * verified to be safe.
+	 */
+	if (curproc->p_model != DATAMODEL_LP64) {
+		return (EFBIG);
+	}
+
 	minor = getminor(*devp);
 	if (minor == VMM_CTL_MINOR) {
 		/*
@@ -2330,6 +2355,14 @@ vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
 	vmm_softc_t	*sc;
 	minor_t		minor;
 
+	/*
+	 * Forbid running bhyve in a 32-bit process until it has been tested and
+	 * verified to be safe.
+	 */
+	if (curproc->p_model != DATAMODEL_LP64) {
+		return (EFBIG);
+	}
+
 	/* The structs in bhyve ioctls assume a 64-bit datamodel */
 	if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) {
 		return (ENOTSUP);
@@ -2356,10 +2389,7 @@ vmm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
 {
 	vmm_softc_t *sc;
 	const minor_t minor = getminor(dev);
-	struct vm *vm;
 	int err;
-	vm_object_t vmo = NULL;
-	struct vmspace *vms;
 
 	if (minor == VMM_CTL_MINOR) {
 		return (ENODEV);
@@ -2380,31 +2410,23 @@ vmm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
 	/* Grab read lock on the VM to prevent any changes to the memory map */
 	vmm_read_lock(sc);
 
-	vm = sc->vmm_vm;
-	vms = vm_get_vmspace(vm);
 	if (off >= VM_DEVMEM_START) {
 		int segid;
-		off_t map_off = 0;
+		off_t segoff;
 
 		/* Mapping a devmem "device" */
-		if (!vmmdev_devmem_segid(sc, off, len, &segid, &map_off)) {
+		if (!vmmdev_devmem_segid(sc, off, len, &segid, &segoff)) {
 			err = ENODEV;
-			goto out;
-		}
-		err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
-		if (err != 0) {
-			goto out;
+		} else {
+			err = vm_segmap_obj(sc->vmm_vm, segid, segoff, len, as,
+			    addrp, prot, maxprot, flags);
 		}
-		err = vm_segmap_obj(vmo, map_off, len, as, addrp, prot, maxprot,
-		    flags);
 	} else {
 		/* Mapping a part of the guest physical space */
-		err = vm_segmap_space(vms, off, as, addrp, len, prot, maxprot,
-		    flags);
+		err = vm_segmap_space(sc->vmm_vm, off, as, addrp, len, prot,
+		    maxprot, flags);
 	}
 
-
-out:
 	vmm_read_unlock(sc);
 	return (err);
 }
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_ept.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_ept.c
index 3d357f37d2..fde4a030ce 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_ept.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_ept.c
@@ -21,17 +21,12 @@
 #include <sys/kmem.h>
 #include <sys/machsystm.h>
 #include <sys/mman.h>
+#include <sys/x86_archext.h>
+#include <vm/hat_pte.h>
 
 #include <sys/vmm_gpt.h>
 #include <sys/vmm_vm.h>
 
-
-typedef struct ept_map ept_map_t;
-struct ept_map {
-	vmm_gpt_t	*em_gpt;
-	kmutex_t	em_lock;
-};
-
 #define	EPT_R		(1 << 0)
 #define	EPT_W		(1 << 1)
 #define	EPT_X		(1 << 2)
@@ -42,6 +37,9 @@ struct ept_map {
 
 #define	EPT_PA_MASK	(0x000ffffffffff000ull)
 
+#define	EPT_MAX_LEVELS	4
+CTASSERT(EPT_MAX_LEVELS <= MAX_GPT_LEVEL);
+
 CTASSERT(EPT_R == PROT_READ);
 CTASSERT(EPT_W == PROT_WRITE);
 CTASSERT(EPT_X == PROT_EXEC);
@@ -121,7 +119,15 @@ ept_reset_accessed(uint64_t *entry, bool on)
 	    on ? EPT_ACCESSED : 0));
 }
 
-static vmm_pte_ops_t ept_pte_ops = {
+static uint64_t
+ept_get_pmtp(pfn_t root_pfn)
+{
+	/* TODO: enable AD tracking when required */
+	return ((root_pfn << PAGESHIFT |
+	    (EPT_MAX_LEVELS - 1) << 3 | MTRR_TYPE_WB));
+}
+
+vmm_pte_ops_t ept_pte_ops = {
 	.vpeo_map_table		= ept_map_table,
 	.vpeo_map_page		= ept_map_page,
 	.vpeo_pte_pfn		= ept_pte_pfn,
@@ -129,100 +135,5 @@ static vmm_pte_ops_t ept_pte_ops = {
 	.vpeo_pte_prot		= ept_pte_prot,
 	.vpeo_reset_dirty	= ept_reset_dirty,
 	.vpeo_reset_accessed	= ept_reset_accessed,
-};
-
-vmm_gpt_t *
-ept_create(void)
-{
-	return (vmm_gpt_alloc(&ept_pte_ops));
-}
-
-static void *
-ept_ops_create(uintptr_t *root_kaddr)
-{
-	ept_map_t *map;
-
-	map = kmem_zalloc(sizeof (*map), KM_SLEEP);
-	mutex_init(&map->em_lock, NULL, MUTEX_DEFAULT, NULL);
-	map->em_gpt = ept_create();
-	*root_kaddr = (uintptr_t)vmm_gpt_root_kaddr(map->em_gpt);
-
-	return (map);
-}
-
-static void
-ept_ops_destroy(void *arg)
-{
-	ept_map_t *map = arg;
-
-	if (map != NULL) {
-		vmm_gpt_free(map->em_gpt);
-		mutex_destroy(&map->em_lock);
-		kmem_free(map, sizeof (*map));
-	}
-}
-
-static uint64_t
-ept_ops_wired_count(void *arg)
-{
-	ept_map_t *map = arg;
-	uint64_t res;
-
-	mutex_enter(&map->em_lock);
-	res = vmm_gpt_mapped_count(map->em_gpt);
-	mutex_exit(&map->em_lock);
-
-	return (res);
-}
-
-static int
-ept_ops_is_wired(void *arg, uint64_t gpa, uint_t *protp)
-{
-	ept_map_t *map = arg;
-	bool mapped;
-
-	mutex_enter(&map->em_lock);
-	mapped = vmm_gpt_is_mapped(map->em_gpt, gpa, protp);
-	mutex_exit(&map->em_lock);
-
-	return (mapped ? 0 : -1);
-}
-
-static int
-ept_ops_map(void *arg, uint64_t gpa, pfn_t pfn, uint_t _lvl, uint_t prot,
-    uint8_t attr)
-{
-	ept_map_t *map = arg;
-
-	ASSERT((prot & EPT_RWX) != 0 && (prot & ~EPT_RWX) == 0);
-
-	mutex_enter(&map->em_lock);
-	vmm_gpt_populate_entry(map->em_gpt, gpa);
-	(void) vmm_gpt_map(map->em_gpt, gpa, pfn, prot, attr);
-	mutex_exit(&map->em_lock);
-
-	return (0);
-}
-
-static uint64_t
-ept_ops_unmap(void *arg, uint64_t start, uint64_t end)
-{
-	ept_map_t *map = arg;
-	size_t unmapped = 0;
-
-	mutex_enter(&map->em_lock);
-	unmapped = vmm_gpt_unmap_region(map->em_gpt, start, end);
-	vmm_gpt_vacate_region(map->em_gpt, start, end);
-	mutex_exit(&map->em_lock);
-
-	return ((uint64_t)unmapped);
-}
-
-struct vmm_pt_ops ept_ops = {
-	.vpo_init		= ept_ops_create,
-	.vpo_free		= ept_ops_destroy,
-	.vpo_wired_cnt		= ept_ops_wired_count,
-	.vpo_is_wired		= ept_ops_is_wired,
-	.vpo_map		= ept_ops_map,
-	.vpo_unmap		= ept_ops_unmap,
+	.vpeo_get_pmtp		= ept_get_pmtp,
 };
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index afd686f197..f78db731d6 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -60,7 +60,6 @@
 #include <machine/cpufunc.h>
 #include <machine/fpu.h>
 #include <machine/md_var.h>
-#include <machine/pmap.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
 #include <machine/vmparam.h>
@@ -95,7 +94,7 @@ uint8_t const bin2bcd_data[] = {
 };
 
 void
-pmap_invalidate_cache(void)
+invalidate_cache_all(void)
 {
 	cpuset_t cpuset;
 
@@ -108,7 +107,7 @@ pmap_invalidate_cache(void)
 }
 
 vm_paddr_t
-pmap_kextract(vm_offset_t va)
+vtophys(void *va)
 {
 	pfn_t	pfn;
 
@@ -411,18 +410,6 @@ vmm_glue_callout_localize(struct callout *c)
 	mutex_exit(&cpu_lock);
 }
 
-void
-ipi_cpu(int cpu, uint_t ipi)
-{
-	/*
-	 * This was previously implemented as an invocation of asynchronous
-	 * no-op crosscalls to interrupt the target CPU.  Since even nowait
-	 * crosscalls can block in certain circumstances, a direct poke_cpu()
-	 * is safer when called from delicate contexts.
-	 */
-	poke_cpu(cpu);
-}
-
 uint_t	cpu_high;		/* Highest arg to CPUID */
 uint_t	cpu_exthigh;		/* Highest arg to extended CPUID */
 uint_t	cpu_id;			/* Stepping ID */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c
index c66a4e7962..8b45782d25 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_rvi.c
@@ -28,12 +28,6 @@
 #include <sys/vmm_gpt.h>
 #include <sys/vmm_vm.h>
 
-typedef struct rvi_map rvi_map_t;
-struct rvi_map {
-	vmm_gpt_t	*rm_gpt;
-	kmutex_t	rm_lock;
-};
-
 static inline uint64_t
 rvi_prot(uint_t prot)
 {
@@ -145,7 +139,13 @@ rvi_reset_accessed(uint64_t *entry, bool on)
 	return (rvi_reset_bits(entry, (PT_MOD | PT_REF), on ? PT_REF : 0));
 }
 
-static vmm_pte_ops_t rvi_pte_ops = {
+static uint64_t
+rvi_get_pmtp(pfn_t root_pfn)
+{
+	return (root_pfn << PAGESHIFT);
+}
+
+vmm_pte_ops_t rvi_pte_ops = {
 	.vpeo_map_table		= rvi_map_table,
 	.vpeo_map_page		= rvi_map_page,
 	.vpeo_pte_pfn		= rvi_pte_pfn,
@@ -153,101 +153,5 @@ static vmm_pte_ops_t rvi_pte_ops = {
 	.vpeo_pte_prot		= rvi_pte_prot,
 	.vpeo_reset_dirty	= rvi_reset_dirty,
 	.vpeo_reset_accessed	= rvi_reset_accessed,
-};
-
-vmm_gpt_t *
-rvi_create(void)
-{
-	return (vmm_gpt_alloc(&rvi_pte_ops));
-}
-
-static void *
-rvi_ops_create(uintptr_t *root_kaddr)
-{
-	rvi_map_t *map;
-
-	map = kmem_zalloc(sizeof (*map), KM_SLEEP);
-	mutex_init(&map->rm_lock, NULL, MUTEX_DEFAULT, NULL);
-	map->rm_gpt = rvi_create();
-	*root_kaddr = (uintptr_t)vmm_gpt_root_kaddr(map->rm_gpt);
-
-	return (map);
-}
-
-static void
-rvi_ops_destroy(void *arg)
-{
-	rvi_map_t *map = arg;
-
-	if (map != NULL) {
-		vmm_gpt_free(map->rm_gpt);
-		mutex_destroy(&map->rm_lock);
-		kmem_free(map, sizeof (*map));
-	}
-}
-
-static uint64_t
-rvi_ops_wired_count(void *arg)
-{
-	rvi_map_t *map = arg;
-	uint64_t res;
-
-	mutex_enter(&map->rm_lock);
-	res = vmm_gpt_mapped_count(map->rm_gpt);
-	mutex_exit(&map->rm_lock);
-
-	return (res);
-}
-
-static int
-rvi_ops_is_wired(void *arg, uint64_t gpa, uint_t *protp)
-{
-	rvi_map_t *map = arg;
-	bool mapped;
-
-	mutex_enter(&map->rm_lock);
-	mapped = vmm_gpt_is_mapped(map->rm_gpt, gpa, protp);
-	mutex_exit(&map->rm_lock);
-
-	return (mapped ? 0 : -1);
-}
-
-static int
-rvi_ops_map(void *arg, uint64_t gpa, pfn_t pfn, uint_t _lvl, uint_t prot,
-    uint8_t attr)
-{
-	rvi_map_t *map = arg;
-
-	ASSERT((prot & PROT_READ) != 0);
-	ASSERT3U((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)), ==, 0);
-
-	mutex_enter(&map->rm_lock);
-	vmm_gpt_populate_entry(map->rm_gpt, gpa);
-	(void) vmm_gpt_map(map->rm_gpt, gpa, pfn, prot, attr);
-	mutex_exit(&map->rm_lock);
-
-	return (0);
-}
-
-static uint64_t
-rvi_ops_unmap(void *arg, uint64_t start, uint64_t end)
-{
-	rvi_map_t *map = arg;
-	size_t unmapped = 0;
-
-	mutex_enter(&map->rm_lock);
-	unmapped = vmm_gpt_unmap_region(map->rm_gpt, start, end);
-	vmm_gpt_vacate_region(map->rm_gpt, start, end);
-	mutex_exit(&map->rm_lock);
-
-	return ((uint64_t)unmapped);
-}
-
-struct vmm_pt_ops rvi_ops = {
-	.vpo_init		= rvi_ops_create,
-	.vpo_free		= rvi_ops_destroy,
-	.vpo_wired_cnt		= rvi_ops_wired_count,
-	.vpo_is_wired		= rvi_ops_is_wired,
-	.vpo_map		= rvi_ops_map,
-	.vpo_unmap		= rvi_ops_unmap,
+	.vpeo_get_pmtp		= rvi_get_pmtp,
 };
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
deleted file mode 100644
index bd1f1890d4..0000000000
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
+++ /dev/null
@@ -1,932 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-/* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
-
-/*
- * Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
- * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
- */
-
-#include <sys/param.h>
-#include <sys/kmem.h>
-#include <sys/thread.h>
-#include <sys/list.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/ddi.h>
-#include <sys/sysmacros.h>
-#include <sys/machsystm.h>
-#include <sys/vmsystm.h>
-#include <sys/malloc.h>
-#include <sys/x86_archext.h>
-#include <vm/as.h>
-#include <vm/hat_i86.h>
-#include <vm/seg_vn.h>
-#include <vm/seg_kmem.h>
-
-#include <machine/vm.h>
-#include <sys/vmm_gpt.h>
-#include <sys/vmm_vm.h>
-#include <sys/seg_vmm.h>
-#include <sys/vmm_reservoir.h>
-
-#define	PMAP_TO_VMMAP(pm)	((vm_map_t)		\
-	((caddr_t)(pm) - offsetof(struct vmspace, vms_pmap)))
-#define	VMMAP_TO_VMSPACE(vmmap)	((struct vmspace *)		\
-	((caddr_t)(vmmap) - offsetof(struct vmspace, vm_map)))
-
-
-struct vmspace_mapping {
-	list_node_t	vmsm_node;
-	vm_object_t	vmsm_object;
-	uintptr_t	vmsm_addr;
-	size_t		vmsm_len;
-	off_t		vmsm_offset;
-	uint_t		vmsm_prot;
-};
-typedef struct vmspace_mapping vmspace_mapping_t;
-
-#define	VMSM_OFFSET(vmsm, addr)	(			\
-	    (vmsm)->vmsm_offset +			\
-	    ((addr) - (uintptr_t)(vmsm)->vmsm_addr))
-
-
-/* Private glue interfaces */
-static void pmap_free(pmap_t);
-static vmspace_mapping_t *vm_mapping_find(struct vmspace *, uintptr_t, size_t,
-    boolean_t);
-static void vm_mapping_remove(struct vmspace *, vmspace_mapping_t *);
-
-struct vmspace *
-vmspace_alloc(vm_offset_t start, vm_offset_t end, pmap_pinit_t pinit)
-{
-	struct vmspace *vms;
-	const uintptr_t size = end + 1;
-
-	/*
-	 * This whole mess is built on the assumption that a 64-bit address
-	 * space is available to work with for the various pagetable tricks.
-	 */
-	VERIFY(ttoproc(curthread)->p_model == DATAMODEL_LP64);
-	VERIFY(start == 0 && size > 0 && (size & PAGEOFFSET) == 0 &&
-	    size <= (uintptr_t)USERLIMIT);
-
-	vms = kmem_zalloc(sizeof (*vms), KM_SLEEP);
-	vms->vms_size = size;
-	list_create(&vms->vms_maplist, sizeof (vmspace_mapping_t),
-	    offsetof(vmspace_mapping_t, vmsm_node));
-
-	if (pinit(&vms->vms_pmap) == 0) {
-		kmem_free(vms, sizeof (*vms));
-		return (NULL);
-	}
-
-	return (vms);
-}
-
-void
-vmspace_free(struct vmspace *vms)
-{
-	VERIFY(list_is_empty(&vms->vms_maplist));
-
-	pmap_free(&vms->vms_pmap);
-	kmem_free(vms, sizeof (*vms));
-}
-
-pmap_t
-vmspace_pmap(struct vmspace *vms)
-{
-	return (&vms->vms_pmap);
-}
-
-long
-vmspace_resident_count(struct vmspace *vms)
-{
-	/* XXXJOY: finish */
-	return (0);
-}
-
-void *
-vmspace_find_kva(struct vmspace *vms, uintptr_t addr, size_t size)
-{
-	vmspace_mapping_t *vmsm;
-	void *result = NULL;
-
-	/*
-	 * Since vmspace_find_kva is provided so that vmm_drv consumers can do
-	 * GPA2KVA translations, it is expected to be called when there is a
-	 * read lock preventing vmspace alterations.  As such, it can do the
-	 * lockless vm_mapping_find() lookup.
-	 */
-	vmsm = vm_mapping_find(vms, addr, size, B_TRUE);
-	if (vmsm != NULL) {
-		struct vm_object *vmo = vmsm->vmsm_object;
-
-		switch (vmo->vmo_type) {
-		case OBJT_DEFAULT:
-			result = vmmr_region_mem_at(
-			    (vmmr_region_t *)vmo->vmo_data,
-			    VMSM_OFFSET(vmsm, addr) & PAGEMASK);
-			break;
-		default:
-			break;
-		}
-	}
-
-	return (result);
-}
-
-static int
-vmspace_pmap_iswired(struct vmspace *vms, uintptr_t addr, uint_t *prot)
-{
-	pmap_t pmap = &vms->vms_pmap;
-	int rv;
-
-	ASSERT(MUTEX_HELD(&vms->vms_lock));
-
-	rv = pmap->pm_ops->vpo_is_wired(pmap->pm_impl, addr, prot);
-	return (rv);
-}
-
-static void
-pmap_free(pmap_t pmap)
-{
-	void *pmi = pmap->pm_impl;
-	struct vmm_pt_ops *ops = pmap->pm_ops;
-
-	pmap->pm_pml4 = NULL;
-	pmap->pm_impl = NULL;
-	pmap->pm_ops = NULL;
-
-	ops->vpo_free(pmi);
-}
-
-int
-pmap_pinit_type(pmap_t pmap, enum pmap_type type, int flags)
-{
-	/* For use in vmm only */
-	pmap->pm_type = type;
-	switch (type) {
-	case PT_EPT: {
-		struct vmm_pt_ops *ops = &ept_ops;
-		void *pml4, *pmi;
-
-		pmi = ops->vpo_init((uintptr_t *)&pml4);
-
-		pmap->pm_ops = ops;
-		pmap->pm_impl = pmi;
-		pmap->pm_pml4 = pml4;
-		return (1);
-	}
-	case PT_RVI: {
-		struct vmm_pt_ops *ops = &rvi_ops;
-		void *pml4, *pmi;
-
-		pmi = ops->vpo_init((uintptr_t *)&pml4);
-
-		pmap->pm_ops = ops;
-		pmap->pm_impl = pmi;
-		pmap->pm_pml4 = pml4;
-		return (1);
-	}
-	default:
-		panic("unsupported pmap type: %x", type);
-		break;
-	}
-
-	return (1);
-}
-
-long
-pmap_wired_count(pmap_t pmap)
-{
-	long val;
-
-	val = pmap->pm_ops->vpo_wired_cnt(pmap->pm_impl);
-	VERIFY3S(val, >=, 0);
-
-	return (val);
-}
-
-int
-pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype)
-{
-	/* Allow the fallback to vm_fault to handle this */
-	return (-1);
-}
-
-
-
-struct sglist_ent {
-	vm_paddr_t	sge_pa;
-	size_t		sge_len;
-};
-struct sglist {
-	kmutex_t		sg_lock;
-	uint_t			sg_refcnt;
-	uint_t			sg_len;
-	uint_t			sg_next;
-	struct sglist_ent	sg_entries[];
-};
-
-#define	SG_SIZE(cnt)	(sizeof (struct sglist) + \
-	(sizeof (struct sglist_ent) * (cnt)))
-
-struct sglist *
-sglist_alloc(int nseg, int flags)
-{
-	const size_t sz = SG_SIZE(nseg);
-	const int flag = (flags & M_WAITOK) ? KM_SLEEP : KM_NOSLEEP;
-	struct sglist *sg;
-
-	ASSERT(nseg > 0);
-
-	sg = kmem_zalloc(sz, flag);
-	if (sg != NULL) {
-		sg->sg_len = nseg;
-		sg->sg_refcnt = 1;
-	}
-	return (sg);
-}
-
-void
-sglist_free(struct sglist *sg)
-{
-	size_t sz;
-
-	mutex_enter(&sg->sg_lock);
-	if (sg->sg_refcnt > 1) {
-		sg->sg_refcnt--;
-		mutex_exit(&sg->sg_lock);
-		return;
-	}
-
-	VERIFY(sg->sg_refcnt == 1);
-	sg->sg_refcnt = 0;
-	sz = SG_SIZE(sg->sg_len);
-	mutex_exit(&sg->sg_lock);
-	kmem_free(sg, sz);
-}
-
-int
-sglist_append_phys(struct sglist *sg, vm_paddr_t pa, size_t len)
-{
-	uint_t idx;
-	struct sglist_ent *ent;
-
-	/* Restrict to page-aligned entries */
-	if ((pa & PAGEOFFSET) != 0 || (len & PAGEOFFSET) != 0 || len == 0) {
-		return (EINVAL);
-	}
-
-	mutex_enter(&sg->sg_lock);
-	idx = sg->sg_next;
-	if (idx >= sg->sg_len) {
-		mutex_exit(&sg->sg_lock);
-		return (ENOSPC);
-	}
-
-	ent = &sg->sg_entries[idx];
-	ASSERT(ent->sge_pa == 0 && ent->sge_len == 0);
-	ent->sge_pa = pa;
-	ent->sge_len = len;
-	sg->sg_next++;
-
-	mutex_exit(&sg->sg_lock);
-	return (0);
-}
-
-
-static pfn_t
-vm_object_pager_none(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl)
-{
-	panic("bad vm_object pager");
-	return (PFN_INVALID);
-}
-
-static pfn_t
-vm_object_pager_reservoir(vm_object_t vmo, uintptr_t off, pfn_t *lpfn,
-    uint_t *lvl)
-{
-	vmmr_region_t *region;
-	pfn_t pfn;
-
-	ASSERT(vmo->vmo_type == OBJT_DEFAULT);
-
-	region = vmo->vmo_data;
-	pfn = vmmr_region_pfn_at(region, off & PAGEMASK);
-
-	/* TODO: handle large pages */
-	if (lpfn != NULL) {
-		*lpfn = pfn;
-	}
-	if (lvl != NULL) {
-		*lvl = 0;
-	}
-	return (pfn);
-}
-
-static pfn_t
-vm_object_pager_sg(vm_object_t vmo, uintptr_t off, pfn_t *lpfn, uint_t *lvl)
-{
-	const uintptr_t aoff = ALIGN2PAGE(off);
-	uint_t level = 0;
-	uintptr_t pos = 0;
-	struct sglist *sg;
-	struct sglist_ent *ent;
-	pfn_t pfn = PFN_INVALID;
-
-	ASSERT(vmo->vmo_type == OBJT_SG);
-	ASSERT(off < vmo->vmo_size);
-
-	sg = vmo->vmo_data;
-	if (sg == NULL) {
-		return (PFN_INVALID);
-	}
-
-	ent = &sg->sg_entries[0];
-	for (uint_t i = 0; i < sg->sg_next; i++, ent++) {
-		if (aoff >= pos && aoff < (pos + ent->sge_len)) {
-			/* XXXJOY: Punt on large pages for now */
-			level = 0;
-			pfn = mmu_btop(ent->sge_pa + (aoff - pos));
-			break;
-		}
-		pos += ent->sge_len;
-	}
-
-	if (lpfn != 0) {
-		*lpfn = pfn;
-	}
-	if (lvl != 0) {
-		*lvl = level;
-	}
-	return (pfn);
-}
-
-vm_object_t
-vm_object_allocate(objtype_t type, vm_pindex_t psize, bool transient)
-{
-	vm_object_t vmo;
-	const size_t size = ptob((size_t)psize);
-
-	vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
-	mutex_init(&vmo->vmo_lock, NULL, MUTEX_DEFAULT, NULL);
-
-	/* For now, these are to stay fixed after allocation */
-	vmo->vmo_type = type;
-	vmo->vmo_size = size;
-	vmo->vmo_attr = VM_MEMATTR_DEFAULT;
-
-	switch (type) {
-	case OBJT_DEFAULT: {
-
-		/* TODO: opt-in to larger pages? */
-		int err;
-		vmmr_region_t *region = NULL;
-
-		err = vmmr_alloc(size, transient, &region);
-		if (err != 0) {
-			mutex_destroy(&vmo->vmo_lock);
-			kmem_free(vmo, sizeof (*vmo));
-			return (NULL);
-		}
-		vmo->vmo_data = region;
-		vmo->vmo_pager = vm_object_pager_reservoir;
-	}
-		break;
-	case OBJT_SG:
-		vmo->vmo_data = NULL;
-		vmo->vmo_pager = vm_object_pager_sg;
-		break;
-	default:
-		panic("Unsupported vm_object type");
-		break;
-	}
-
-	vmo->vmo_refcnt = 1;
-	return (vmo);
-}
-
-vm_object_t
-vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size,
-    vm_prot_t prot, vm_ooffset_t off, void *cred)
-{
-	struct vm_object *vmo;
-	struct sglist *sg = (struct sglist *)handle;
-
-	/* XXXJOY: be very restrictive for now */
-	VERIFY(type == OBJT_SG);
-	VERIFY(off == 0);
-
-	vmo = vm_object_allocate(type, size, false);
-	vmo->vmo_data = sg;
-
-	mutex_enter(&sg->sg_lock);
-	VERIFY(sg->sg_refcnt++ >= 1);
-	mutex_exit(&sg->sg_lock);
-
-	return (vmo);
-}
-
-void
-vm_object_deallocate(vm_object_t vmo)
-{
-	ASSERT(vmo != NULL);
-
-	uint_t ref = atomic_dec_uint_nv(&vmo->vmo_refcnt);
-	/* underflow would be a deadly serious mistake */
-	VERIFY3U(ref, !=, UINT_MAX);
-	if (ref != 0) {
-		return;
-	}
-
-	switch (vmo->vmo_type) {
-	case OBJT_DEFAULT:
-		vmmr_free((vmmr_region_t *)vmo->vmo_data);
-		break;
-	case OBJT_SG:
-		sglist_free((struct sglist *)vmo->vmo_data);
-		break;
-	default:
-		panic("Unsupported vm_object type");
-		break;
-	}
-
-	vmo->vmo_pager = vm_object_pager_none;
-	vmo->vmo_data = NULL;
-	vmo->vmo_size = 0;
-	mutex_destroy(&vmo->vmo_lock);
-	kmem_free(vmo, sizeof (*vmo));
-}
-
-CTASSERT(VM_MEMATTR_UNCACHEABLE == MTRR_TYPE_UC);
-CTASSERT(VM_MEMATTR_WRITE_BACK == MTRR_TYPE_WB);
-int
-vm_object_set_memattr(vm_object_t vmo, vm_memattr_t attr)
-{
-	ASSERT(MUTEX_HELD(&vmo->vmo_lock));
-
-	switch (attr) {
-	case VM_MEMATTR_UNCACHEABLE:
-	case VM_MEMATTR_WRITE_BACK:
-		vmo->vmo_attr = attr;
-		return (0);
-	default:
-		break;
-	}
-	return (EINVAL);
-}
-
-void
-vm_object_reference(vm_object_t vmo)
-{
-	ASSERT(vmo != NULL);
-
-	uint_t ref = atomic_inc_uint_nv(&vmo->vmo_refcnt);
-	/* overflow would be a deadly serious mistake */
-	VERIFY3U(ref, !=, 0);
-}
-
-pfn_t
-vm_object_pfn(vm_object_t vmo, uintptr_t off)
-{
-	/* This is expected to be used only on reservoir-backed memory */
-	if (vmo->vmo_type != OBJT_DEFAULT) {
-		return (PFN_INVALID);
-	}
-
-	return (vmo->vmo_pager(vmo, off, NULL, NULL));
-}
-
-static vmspace_mapping_t *
-vm_mapping_find(struct vmspace *vms, uintptr_t addr, size_t size,
-    boolean_t no_lock)
-{
-	vmspace_mapping_t *vmsm;
-	list_t *ml = &vms->vms_maplist;
-	const uintptr_t range_end = addr + size;
-
-	ASSERT(addr <= range_end);
-
-	if (no_lock) {
-		/*
-		 * This check should be superflous with the protections
-		 * promised by the bhyve logic which calls into the VM shim.
-		 * All the same, it is cheap to be paranoid.
-		 */
-		VERIFY(!vms->vms_map_changing);
-	} else {
-		VERIFY(MUTEX_HELD(&vms->vms_lock));
-	}
-
-	if (addr >= vms->vms_size) {
-		return (NULL);
-	}
-	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
-		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len;
-
-		if (addr >= vmsm->vmsm_addr && addr < seg_end) {
-			if (range_end <= seg_end) {
-				return (vmsm);
-			} else {
-				return (NULL);
-			}
-		}
-	}
-	return (NULL);
-}
-
-static boolean_t
-vm_mapping_gap(struct vmspace *vms, uintptr_t addr, size_t size)
-{
-	vmspace_mapping_t *vmsm;
-	list_t *ml = &vms->vms_maplist;
-	const uintptr_t range_end = addr + size - 1;
-
-	ASSERT(MUTEX_HELD(&vms->vms_lock));
-	ASSERT(size > 0);
-
-	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
-		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len - 1;
-
-		/*
-		 * The two ranges do not overlap if the start of either of
-		 * them is after the end of the other.
-		 */
-		if (vmsm->vmsm_addr > range_end || addr > seg_end)
-			continue;
-		return (B_FALSE);
-	}
-	return (B_TRUE);
-}
-
-static void
-vm_mapping_remove(struct vmspace *vms, vmspace_mapping_t *vmsm)
-{
-	list_t *ml = &vms->vms_maplist;
-
-	ASSERT(MUTEX_HELD(&vms->vms_lock));
-	ASSERT(vms->vms_map_changing);
-
-	list_remove(ml, vmsm);
-	vm_object_deallocate(vmsm->vmsm_object);
-	kmem_free(vmsm, sizeof (*vmsm));
-}
-
-int
-vm_fault(vm_map_t map, vm_offset_t off, vm_prot_t type, int flag)
-{
-	struct vmspace *vms = VMMAP_TO_VMSPACE(map);
-	pmap_t pmap = &vms->vms_pmap;
-	void *pmi = pmap->pm_impl;
-	const uintptr_t addr = off;
-	vmspace_mapping_t *vmsm;
-	struct vm_object *vmo;
-	uint_t prot, map_lvl;
-	pfn_t pfn;
-	uintptr_t map_addr;
-
-	mutex_enter(&vms->vms_lock);
-	if (vmspace_pmap_iswired(vms, addr, &prot) == 0) {
-		int err = 0;
-
-		/*
-		 * It is possible that multiple vCPUs will race to fault-in a
-		 * given address.  In such cases, the race loser(s) will
-		 * encounter the already-mapped page, needing to do nothing
-		 * more than consider it a success.
-		 *
-		 * If the fault exceeds protection, it is an obvious error.
-		 */
-		if ((prot & type) != type) {
-			err = FC_PROT;
-		}
-
-		mutex_exit(&vms->vms_lock);
-		return (err);
-	}
-
-	/* Try to wire up the address */
-	if ((vmsm = vm_mapping_find(vms, addr, 0, B_FALSE)) == NULL) {
-		mutex_exit(&vms->vms_lock);
-		return (FC_NOMAP);
-	}
-	vmo = vmsm->vmsm_object;
-	prot = vmsm->vmsm_prot;
-
-	/* XXXJOY: punt on large pages for now */
-	pfn = vmo->vmo_pager(vmo, VMSM_OFFSET(vmsm, addr), NULL, NULL);
-	map_lvl = 0;
-	map_addr = P2ALIGN((uintptr_t)addr, LEVEL_SIZE(map_lvl));
-	VERIFY(pfn != PFN_INVALID);
-
-	/*
-	 * If pmap failure is to be handled, the previously acquired page locks
-	 * would need to be released.
-	 */
-	VERIFY0(pmap->pm_ops->vpo_map(pmi, map_addr, pfn, map_lvl, prot,
-	    vmo->vmo_attr));
-	pmap->pm_eptgen++;
-
-	mutex_exit(&vms->vms_lock);
-	return (0);
-}
-
-int
-vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
-    vm_prot_t prot, vm_page_t *ma, int max_count)
-{
-	struct vmspace *vms = VMMAP_TO_VMSPACE(map);
-	const uintptr_t vaddr = addr;
-	vmspace_mapping_t *vmsm;
-	struct vm_object *vmo;
-	vm_page_t vmp;
-
-	ASSERT0(addr & PAGEOFFSET);
-	ASSERT(len == PAGESIZE);
-	ASSERT(max_count == 1);
-
-	/*
-	 * Unlike practically all of the other logic that queries or
-	 * manipulates vmspace objects, vm_fault_quick_hold_pages() does so
-	 * without holding vms_lock.  This is safe because bhyve ensures that
-	 * changes to the vmspace map occur only when all other threads have
-	 * been excluded from running.
-	 *
-	 * Since this task can count on vms_maplist remaining static and does
-	 * not need to modify the pmap (like vm_fault might), it can proceed
-	 * without the lock.  The vm_object has independent refcount and lock
-	 * protection, while the vmo_pager methods do not rely on vms_lock for
-	 * safety.
-	 *
-	 * Performing this work without locks is critical in cases where
-	 * multiple vCPUs require simultaneous instruction emulation, such as
-	 * for frequent guest APIC accesses on a host that lacks hardware
-	 * acceleration for that behavior.
-	 */
-	if ((vmsm = vm_mapping_find(vms, vaddr, PAGESIZE, B_TRUE)) == NULL ||
-	    (prot & ~vmsm->vmsm_prot) != 0) {
-		return (-1);
-	}
-
-	vmp = kmem_zalloc(sizeof (struct vm_page), KM_SLEEP);
-
-	vmo = vmsm->vmsm_object;
-	vm_object_reference(vmo);
-	vmp->vmp_obj_held = vmo;
-	vmp->vmp_pfn = vmo->vmo_pager(vmo, VMSM_OFFSET(vmsm, vaddr), NULL,
-	    NULL);
-
-	*ma = vmp;
-	return (1);
-}
-
-/*
- * Find a suitable location for a mapping (and install it).
- */
-int
-vm_map_find(vm_map_t map, vm_object_t vmo, vm_ooffset_t off, vm_offset_t *addr,
-    vm_size_t len, vm_offset_t max_addr, int find_flags, vm_prot_t prot,
-    vm_prot_t prot_max, int cow)
-{
-	struct vmspace *vms = VMMAP_TO_VMSPACE(map);
-	const size_t size = (size_t)len;
-	const uintptr_t uoff = (uintptr_t)off;
-	uintptr_t base = *addr;
-	vmspace_mapping_t *vmsm;
-	int res = 0;
-
-	/* For use in vmm only */
-	VERIFY(find_flags == VMFS_NO_SPACE); /* essentially MAP_FIXED */
-	VERIFY(max_addr == 0);
-
-	if (size == 0 || off < 0 ||
-	    uoff >= (uoff + size) || vmo->vmo_size < (uoff + size)) {
-		return (EINVAL);
-	}
-
-	if (*addr >= vms->vms_size) {
-		return (ENOMEM);
-	}
-
-	vmsm = kmem_alloc(sizeof (*vmsm), KM_SLEEP);
-
-	mutex_enter(&vms->vms_lock);
-	vms->vms_map_changing = B_TRUE;
-	if (!vm_mapping_gap(vms, base, size)) {
-		res = ENOMEM;
-		goto out;
-	}
-
-	if (res == 0) {
-		vmsm->vmsm_object = vmo;
-		vmsm->vmsm_addr = base;
-		vmsm->vmsm_len = len;
-		vmsm->vmsm_offset = (off_t)uoff;
-		vmsm->vmsm_prot = prot;
-		list_insert_tail(&vms->vms_maplist, vmsm);
-
-		/* Communicate out the chosen address. */
-		*addr = (vm_offset_t)base;
-	}
-out:
-	vms->vms_map_changing = B_FALSE;
-	mutex_exit(&vms->vms_lock);
-	if (res != 0) {
-		kmem_free(vmsm, sizeof (*vmsm));
-	}
-	return (res);
-}
-
-int
-vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
-{
-	struct vmspace *vms = VMMAP_TO_VMSPACE(map);
-	pmap_t pmap = &vms->vms_pmap;
-	void *pmi = pmap->pm_impl;
-	const uintptr_t addr = start;
-	const size_t size = (size_t)(end - start);
-	vmspace_mapping_t *vmsm;
-
-	ASSERT(start < end);
-
-	mutex_enter(&vms->vms_lock);
-	vms->vms_map_changing = B_TRUE;
-	/* expect to match existing mapping exactly */
-	if ((vmsm = vm_mapping_find(vms, addr, size, B_FALSE)) == NULL ||
-	    vmsm->vmsm_addr != addr || vmsm->vmsm_len != size) {
-		vms->vms_map_changing = B_FALSE;
-		mutex_exit(&vms->vms_lock);
-		return (ENOENT);
-	}
-
-	(void) pmap->pm_ops->vpo_unmap(pmi, addr, end);
-	pmap->pm_eptgen++;
-
-	vm_mapping_remove(vms, vmsm);
-	vms->vms_map_changing = B_FALSE;
-	mutex_exit(&vms->vms_lock);
-	return (0);
-}
-
-int
-vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
-{
-	struct vmspace *vms = VMMAP_TO_VMSPACE(map);
-	pmap_t pmap = &vms->vms_pmap;
-	void *pmi = pmap->pm_impl;
-	const uintptr_t addr = start;
-	const size_t size = end - start;
-	vmspace_mapping_t *vmsm;
-	struct vm_object *vmo;
-	uint_t prot;
-
-	mutex_enter(&vms->vms_lock);
-
-	/* For the time being, only exact-match mappings are expected */
-	if ((vmsm = vm_mapping_find(vms, addr, size, B_FALSE)) == NULL) {
-		mutex_exit(&vms->vms_lock);
-		return (FC_NOMAP);
-	}
-	vmo = vmsm->vmsm_object;
-	prot = vmsm->vmsm_prot;
-
-	for (uintptr_t pos = addr; pos < end; ) {
-		pfn_t pfn;
-		uintptr_t pg_size, map_addr;
-		uint_t map_lvl = 0;
-
-		/* XXXJOY: punt on large pages for now */
-		pfn = vmo->vmo_pager(vmo, VMSM_OFFSET(vmsm, pos), NULL, NULL);
-		pg_size = LEVEL_SIZE(map_lvl);
-		map_addr = P2ALIGN(pos, pg_size);
-		VERIFY(pfn != PFN_INVALID);
-
-		VERIFY0(pmap->pm_ops->vpo_map(pmi, map_addr, pfn, map_lvl,
-		    prot, vmo->vmo_attr));
-		vms->vms_pmap.pm_eptgen++;
-
-		pos += pg_size;
-	}
-
-	mutex_exit(&vms->vms_lock);
-
-	return (0);
-}
-
-/* Provided custom for bhyve 'devmem' segment mapping */
-int
-vm_segmap_obj(vm_object_t vmo, off_t map_off, size_t size, struct as *as,
-    caddr_t *addrp, uint_t prot, uint_t maxprot, uint_t flags)
-{
-	int err;
-
-	VERIFY(map_off >= 0);
-	VERIFY(size <= vmo->vmo_size);
-	VERIFY((size + map_off) <= vmo->vmo_size);
-
-	if (vmo->vmo_type != OBJT_DEFAULT) {
-		/* Only support default objects for now */
-		return (ENOTSUP);
-	}
-
-	as_rangelock(as);
-
-	err = choose_addr(as, addrp, size, 0, ADDR_VACALIGN, flags);
-	if (err == 0) {
-		segvmm_crargs_t svma;
-
-		svma.obj = vmo;
-		svma.offset = map_off;
-		svma.prot = prot;
-
-		err = as_map(as, *addrp, size, segvmm_create, &svma);
-	}
-
-	as_rangeunlock(as);
-	return (err);
-}
-
-int
-vm_segmap_space(struct vmspace *vms, off_t off, struct as *as, caddr_t *addrp,
-    off_t len, uint_t prot, uint_t maxprot, uint_t flags)
-{
-	const uintptr_t addr = (uintptr_t)off;
-	const size_t size = (uintptr_t)len;
-	vmspace_mapping_t *vmsm;
-	vm_object_t vmo;
-	int err;
-
-	if (off < 0 || len <= 0 ||
-	    (addr & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
-		return (EINVAL);
-	}
-
-	mutex_enter(&vms->vms_lock);
-	if ((vmsm = vm_mapping_find(vms, addr, size, B_FALSE)) == NULL) {
-		mutex_exit(&vms->vms_lock);
-		return (ENXIO);
-	}
-	if ((prot & ~(vmsm->vmsm_prot | PROT_USER)) != 0) {
-		mutex_exit(&vms->vms_lock);
-		return (EACCES);
-	}
-	vmo = vmsm->vmsm_object;
-	if (vmo->vmo_type != OBJT_DEFAULT) {
-		/* Only support default objects for now */
-		mutex_exit(&vms->vms_lock);
-		return (ENOTSUP);
-	}
-
-	as_rangelock(as);
-
-	err = choose_addr(as, addrp, size, off, ADDR_VACALIGN, flags);
-	if (err == 0) {
-		segvmm_crargs_t svma;
-		const uintptr_t addroff = addr - vmsm->vmsm_addr;
-		const uintptr_t mapoff = addroff + vmsm->vmsm_offset;
-
-		VERIFY(addroff < vmsm->vmsm_len);
-		VERIFY((vmsm->vmsm_len - addroff) >= size);
-		VERIFY(mapoff < vmo->vmo_size);
-		VERIFY((mapoff + size) <= vmo->vmo_size);
-
-		svma.obj = vmo;
-		svma.offset = mapoff;
-		svma.prot = prot;
-
-		err = as_map(as, *addrp, len, segvmm_create, &svma);
-	}
-
-	as_rangeunlock(as);
-	mutex_exit(&vms->vms_lock);
-	return (err);
-}
-
-void
-vm_page_unwire(vm_page_t vmp, uint8_t nqueue __unused)
-{
-	ASSERT(!MUTEX_HELD(&vmp->vmp_lock));
-	mutex_enter(&vmp->vmp_lock);
-
-	VERIFY(vmp->vmp_pfn != PFN_INVALID);
-
-	vm_object_deallocate(vmp->vmp_obj_held);
-	vmp->vmp_obj_held = NULL;
-	vmp->vmp_pfn = PFN_INVALID;
-
-	mutex_exit(&vmp->vmp_lock);
-
-	mutex_destroy(&vmp->vmp_lock);
-	kmem_free(vmp, sizeof (*vmp));
-}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_vm.c
new file mode 100644
index 0000000000..debeec605a
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_vm.c
@@ -0,0 +1,1430 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
+ * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+ */
+
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/thread.h>
+#include <sys/list.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/sysmacros.h>
+#include <sys/machsystm.h>
+#include <sys/vmsystm.h>
+#include <sys/malloc.h>
+#include <sys/x86_archext.h>
+#include <vm/as.h>
+#include <vm/hat_i86.h>
+#include <vm/seg_vn.h>
+#include <vm/seg_kmem.h>
+
+#include <sys/vmm_vm.h>
+#include <sys/seg_vmm.h>
+#include <sys/vmm_kernel.h>
+#include <sys/vmm_reservoir.h>
+#include <sys/vmm_gpt.h>
+
+
+/*
+ * VMM Virtual Memory
+ *
+ * History
+ *
+ * When bhyve was ported to illumos, one significant hole was handling guest
+ * memory and memory accesses.  In the original Pluribus port, bhyve itself
+ * manually handled the EPT structures for guest memory.  The updated sources
+ * (from FreeBSD 11) took a different approach, using the native FreeBSD VM
+ * system for memory allocations and management of the EPT structures.  Keeping
+ * source differences to a minimum was a priority, so illumos-bhyve implemented
+ * a makeshift "VM shim" which exposed the bare minimum of those interfaces to
+ * boot and run guests.
+ *
+ * While the VM shim was successful in getting illumos-bhyve to a functional
+ * state on Intel (and later AMD) gear, the FreeBSD-specific nature of the
+ * compatibility interfaces made it awkward to use.  As source differences with
+ * the upstream kernel code became less of a concern, and upcoming features
+ * (such as live migration) would demand more of those VM interfaces, it became
+ * clear that an overhaul was prudent.
+ *
+ * Design
+ *
+ * The new VM system for bhyve retains a number of the same concepts as what it
+ * replaces:
+ *
+ * - `vmspace_t` is the top-level entity for a guest memory space
+ * - `vm_object_t` represents a memory object which can be mapped into a vmspace
+ * - `vm_page_t` represents a page hold within a given vmspace, providing access
+ *   to the underlying memory page
+ *
+ * Unlike the old code, where most of the involved structures were exposed via
+ * public definitions, this replacement VM interface keeps all involved
+ * structures opaque to consumers.  Furthermore, there is a clear delineation
+ * between infrequent administrative operations (such as mapping/unmapping
+ * regions) and common data-path operations (attempting a page hold at a given
+ * guest-physical address).  Those administrative operations are performed
+ * directly against the vmspace, whereas the data-path operations are performed
+ * through a `vm_client_t` handle.  That VM client abstraction is meant to
+ * reduce contention and overhead for frequent access operations and provide
+ * debugging insight into how different subcomponents are accessing the vmspace.
+ * A VM client is allocated for each vCPU, each viona ring (via the vmm_drv
+ * interface) and each VMM userspace segment mapping.
+ *
+ * Exclusion
+ *
+ * Making changes to the vmspace (such as mapping or unmapping regions) requires
+ * other accessors be excluded while the change is underway to prevent them from
+ * observing invalid intermediate states.  A simple approach could use a mutex
+ * or rwlock to achieve this, but that risks contention when the rate of access
+ * to the vmspace is high.
+ *
+ * Since vmspace changes (map/unmap) are rare, we can instead do the exclusion
+ * at a per-vm_client_t basis.  While this raises the cost for vmspace changes,
+ * it means that the much more common page accesses through the vm_client can
+ * normally proceed unimpeded and independently.
+ *
+ * When a change to the vmspace is required, the caller will put the vmspace in
+ * a 'hold' state, iterating over all associated vm_client instances, waiting
+ * for them to complete any in-flight lookup (indicated by VCS_ACTIVE) before
+ * setting VCS_HOLD in their state flag fields.  With VCS_HOLD set, any call on
+ * the vm_client which would access the vmspace state (vmc_hold or vmc_fault)
+ * will block until the hold condition is cleared.  Once the hold is asserted
+ * for all clients, the vmspace change can proceed with confidence.  Upon
+ * completion of that operation, VCS_HOLD is cleared from the clients, and they
+ * are released to resume vmspace accesses.
+ *
+ * vCPU Consumers
+ *
+ * Access to the vmspace for vCPUs running in guest context is different from
+ * emulation-related vm_client activity: they solely rely on the contents of the
+ * page tables.  Furthermore, the existing VCS_HOLD mechanism used to exclude
+ * client access is not feasible when entering guest context, since interrupts
+ * are disabled, making it impossible to block entry.  This is not a concern as
+ * long as vmspace modifications never place the page tables in invalid states
+ * (either intermediate, or final).  The vm_client hold mechanism does provide
+ * the means to IPI vCPU consumers which will trigger a notification once they
+ * report their exit from guest context.  This can be used to ensure that page
+ * table modifications are made visible to those vCPUs within a certain
+ * time frame.
+ */
+
+typedef struct vmspace_mapping {
+	list_node_t	vmsm_node;
+	vm_object_t	*vmsm_object;	/* object backing this mapping */
+	uintptr_t	vmsm_addr;	/* start addr in vmspace for mapping */
+	size_t		vmsm_len;	/* length (in bytes) of mapping */
+	off_t		vmsm_offset;	/* byte offset into object */
+	uint_t		vmsm_prot;
+} vmspace_mapping_t;
+
+#define	VMSM_OFFSET(vmsm, addr)	(			\
+	    (vmsm)->vmsm_offset +			\
+	    ((addr) - (uintptr_t)(vmsm)->vmsm_addr))
+
+typedef enum vm_client_state {
+	VCS_IDLE	= 0,
+	/* currently accessing vmspace for client operation (hold or fault) */
+	VCS_ACTIVE	= (1 << 0),
+	/* client hold requested/asserted */
+	VCS_HOLD	= (1 << 1),
+	/* vCPU is accessing page tables in guest context */
+	VCS_ON_CPU	= (1 << 2),
+	/* client has been orphaned (no more access to vmspace) */
+	VCS_ORPHANED	= (1 << 3),
+	/* client undergoing destroy operation */
+	VCS_DESTROY	= (1 << 4),
+} vm_client_state_t;
+
+struct vmspace {
+	kmutex_t	vms_lock;
+	kcondvar_t	vms_cv;
+	bool		vms_held;
+	uintptr_t	vms_size;	/* immutable after creation */
+
+	/* (nested) page table state */
+	vmm_gpt_t	*vms_gpt;
+	uint64_t	vms_pt_gen;
+	uint64_t	vms_pages_mapped;
+	bool		vms_track_dirty;
+
+	list_t		vms_maplist;
+	list_t		vms_clients;
+};
+
+struct vm_client {
+	vmspace_t	*vmc_space;
+	list_node_t	vmc_node;
+
+	kmutex_t	vmc_lock;
+	kcondvar_t	vmc_cv;
+	vm_client_state_t vmc_state;
+	int		vmc_cpu_active;
+	uint64_t	vmc_cpu_gen;
+	bool		vmc_track_dirty;
+	vmc_inval_cb_t	vmc_inval_func;
+	void		*vmc_inval_data;
+
+	list_t		vmc_held_pages;
+};
+
+typedef enum vm_object_type {
+	VMOT_NONE,
+	VMOT_MEM,
+	VMOT_MMIO,
+} vm_object_type_t;
+
+struct vm_object {
+	uint_t		vmo_refcnt;	/* manipulated with atomic ops */
+
+	/* Fields below are fixed at creation time */
+	vm_object_type_t vmo_type;
+	size_t		vmo_size;
+	void		*vmo_data;
+	uint8_t		vmo_attr;
+};
+
+struct vm_page {
+	vm_client_t	*vmp_client;
+	list_node_t	vmp_node;
+	vm_page_t	*vmp_chain;
+	uintptr_t	vmp_gpa;
+	pfn_t		vmp_pfn;
+	uint64_t	*vmp_ptep;
+	vm_object_t	*vmp_obj_ref;
+	int		vmp_prot;
+};
+
+#define	VMC_IS_ACTIVE(vmc)	(((vmc)->vmc_state & VCS_ACTIVE) != 0)
+
+static vmspace_mapping_t *vm_mapping_find(vmspace_t *, uintptr_t, size_t);
+static void vmc_space_hold(vm_client_t *);
+static void vmc_space_release(vm_client_t *, bool);
+static void vmc_space_invalidate(vm_client_t *, uintptr_t, size_t, uint64_t);
+static void vmc_space_unmap(vm_client_t *, uintptr_t, size_t, vm_object_t *);
+static vm_client_t *vmc_space_orphan(vm_client_t *, vmspace_t *);
+
+
+/*
+ * Create a new vmspace with a maximum address of `end`.
+ */
+vmspace_t *
+vmspace_alloc(size_t end, vmm_pte_ops_t *pte_ops, bool track_dirty)
+{
+	vmspace_t *vms;
+	const uintptr_t size = end + 1;
+
+	/*
+	 * This whole mess is built on the assumption that a 64-bit address
+	 * space is available to work with for the various pagetable tricks.
+	 */
+	VERIFY(size > 0 && (size & PAGEOFFSET) == 0 &&
+	    size <= (uintptr_t)USERLIMIT);
+
+	vms = kmem_zalloc(sizeof (*vms), KM_SLEEP);
+	vms->vms_size = size;
+	list_create(&vms->vms_maplist, sizeof (vmspace_mapping_t),
+	    offsetof(vmspace_mapping_t, vmsm_node));
+	list_create(&vms->vms_clients, sizeof (vm_client_t),
+	    offsetof(vm_client_t, vmc_node));
+
+	vms->vms_gpt = vmm_gpt_alloc(pte_ops);
+	vms->vms_pt_gen = 1;
+	vms->vms_track_dirty = track_dirty;
+
+	return (vms);
+}
+
+/*
+ * Destroy a vmspace.  All regions in the space must be unmapped.  Any remaining
+ * clients will be orphaned.
+ */
+void
+vmspace_destroy(vmspace_t *vms)
+{
+	mutex_enter(&vms->vms_lock);
+	VERIFY(list_is_empty(&vms->vms_maplist));
+
+	if (!list_is_empty(&vms->vms_clients)) {
+		vm_client_t *vmc = list_head(&vms->vms_clients);
+		while (vmc != NULL) {
+			vmc = vmc_space_orphan(vmc, vms);
+		}
+		/*
+		 * Wait for any clients which were in the process of destroying
+		 * themselves to disappear.
+		 */
+		while (!list_is_empty(&vms->vms_clients)) {
+			cv_wait(&vms->vms_cv, &vms->vms_lock);
+		}
+	}
+	VERIFY(list_is_empty(&vms->vms_clients));
+
+	vmm_gpt_free(vms->vms_gpt);
+	mutex_exit(&vms->vms_lock);
+
+	mutex_destroy(&vms->vms_lock);
+	cv_destroy(&vms->vms_cv);
+	list_destroy(&vms->vms_maplist);
+	list_destroy(&vms->vms_clients);
+
+	kmem_free(vms, sizeof (*vms));
+}
+
+/*
+ * Retrieve the count of resident (mapped into the page tables) pages.
+ */
+uint64_t
+vmspace_resident_count(vmspace_t *vms)
+{
+	return (vms->vms_pages_mapped);
+}
+
+static pfn_t
+vm_object_pager_reservoir(vm_object_t *vmo, uintptr_t off)
+{
+	vmmr_region_t *region;
+	pfn_t pfn;
+
+	ASSERT3U(vmo->vmo_type, ==, VMOT_MEM);
+
+	region = vmo->vmo_data;
+	pfn = vmmr_region_pfn_at(region, off);
+
+	return (pfn);
+}
+
+static pfn_t
+vm_object_pager_mmio(vm_object_t *vmo, uintptr_t off)
+{
+	pfn_t pfn;
+
+	ASSERT3U(vmo->vmo_type, ==, VMOT_MMIO);
+	ASSERT3P(vmo->vmo_data, !=, NULL);
+	ASSERT3U(off, <, vmo->vmo_size);
+
+	pfn = ((uintptr_t)vmo->vmo_data + off) >> PAGESHIFT;
+
+	return (pfn);
+}
+
+/*
+ * Allocate a VM object backed by VMM reservoir memory.
+ */
+vm_object_t *
+vm_object_mem_allocate(size_t size, bool transient)
+{
+	int err;
+	vmmr_region_t *region = NULL;
+	vm_object_t *vmo;
+
+	ASSERT3U(size, !=, 0);
+	ASSERT3U(size & PAGEOFFSET, ==, 0);
+
+	err = vmmr_alloc(size, transient, &region);
+	if (err != 0) {
+		return (NULL);
+	}
+
+	vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
+
+	/* For now, these are to stay fixed after allocation */
+	vmo->vmo_type = VMOT_MEM;
+	vmo->vmo_size = size;
+	vmo->vmo_attr = MTRR_TYPE_WB;
+	vmo->vmo_data = region;
+	vmo->vmo_refcnt = 1;
+
+	return (vmo);
+}
+
+static vm_object_t *
+vm_object_mmio_allocate(size_t size, uintptr_t hpa)
+{
+	vm_object_t *vmo;
+
+	ASSERT3U(size, !=, 0);
+	ASSERT3U(size & PAGEOFFSET, ==, 0);
+	ASSERT3U(hpa & PAGEOFFSET, ==, 0);
+
+	vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
+
+	/* For now, these are to stay fixed after allocation */
+	vmo->vmo_type = VMOT_MMIO;
+	vmo->vmo_size = size;
+	vmo->vmo_attr = MTRR_TYPE_UC;
+	vmo->vmo_data = (void *)hpa;
+	vmo->vmo_refcnt = 1;
+
+	return (vmo);
+}
+
+/*
+ * Allocate a VM object backed by an existing range of physical memory.
+ */
+vm_object_t *
+vmm_mmio_alloc(vmspace_t *vmspace, uintptr_t gpa, size_t len, uintptr_t hpa)
+{
+	int error;
+	vm_object_t *obj;
+
+	obj = vm_object_mmio_allocate(len, hpa);
+	if (obj != NULL) {
+		error = vmspace_map(vmspace, obj, 0, gpa, len,
+		    PROT_READ | PROT_WRITE);
+		if (error != 0) {
+			vm_object_release(obj);
+			obj = NULL;
+		}
+	}
+
+	return (obj);
+}
+
+/*
+ * Release a vm_object reference
+ */
+void
+vm_object_release(vm_object_t *vmo)
+{
+	ASSERT(vmo != NULL);
+
+	uint_t ref = atomic_dec_uint_nv(&vmo->vmo_refcnt);
+	/* underflow would be a deadly serious mistake */
+	VERIFY3U(ref, !=, UINT_MAX);
+	if (ref != 0) {
+		return;
+	}
+
+	switch (vmo->vmo_type) {
+	case VMOT_MEM:
+		vmmr_free((vmmr_region_t *)vmo->vmo_data);
+		break;
+	case VMOT_MMIO:
+		break;
+	default:
+		panic("unexpected object type %u", vmo->vmo_type);
+		break;
+	}
+
+	vmo->vmo_data = NULL;
+	vmo->vmo_size = 0;
+	kmem_free(vmo, sizeof (*vmo));
+}
+
+/*
+ * Increase refcount for vm_object reference
+ */
+void
+vm_object_reference(vm_object_t *vmo)
+{
+	ASSERT(vmo != NULL);
+
+	uint_t ref = atomic_inc_uint_nv(&vmo->vmo_refcnt);
+	/* overflow would be a deadly serious mistake */
+	VERIFY3U(ref, !=, 0);
+}
+
+/*
+ * Get the host-physical PFN for a given offset into a vm_object.
+ *
+ * The provided `off` must be within the allocated size of the vm_object.
+ */
+pfn_t
+vm_object_pfn(vm_object_t *vmo, uintptr_t off)
+{
+	const uintptr_t aligned_off = off & PAGEMASK;
+
+	switch (vmo->vmo_type) {
+	case VMOT_MEM:
+		return (vm_object_pager_reservoir(vmo, aligned_off));
+	case VMOT_MMIO:
+		return (vm_object_pager_mmio(vmo, aligned_off));
+	case VMOT_NONE:
+		break;
+	}
+	panic("unexpected object type %u", vmo->vmo_type);
+}
+
+static vmspace_mapping_t *
+vm_mapping_find(vmspace_t *vms, uintptr_t addr, size_t size)
+{
+	vmspace_mapping_t *vmsm;
+	list_t *ml = &vms->vms_maplist;
+	const uintptr_t range_end = addr + size;
+
+	ASSERT3U(addr, <=, range_end);
+
+	if (addr >= vms->vms_size) {
+		return (NULL);
+	}
+	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
+		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len;
+
+		if (addr >= vmsm->vmsm_addr && addr < seg_end) {
+			if (range_end <= seg_end) {
+				return (vmsm);
+			} else {
+				return (NULL);
+			}
+		}
+	}
+	return (NULL);
+}
+
+/*
+ * Check to see if any mappings reside within [addr, addr + size) span in the
+ * vmspace, returning true if that span is indeed empty.
+ */
+static bool
+vm_mapping_gap(vmspace_t *vms, uintptr_t addr, size_t size)
+{
+	vmspace_mapping_t *vmsm;
+	list_t *ml = &vms->vms_maplist;
+	const uintptr_t range_end = addr + size - 1;
+
+	ASSERT(MUTEX_HELD(&vms->vms_lock));
+	ASSERT(size > 0);
+
+	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
+		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len - 1;
+
+		/*
+		 * The two ranges do not overlap if the start of either of
+		 * them is after the end of the other.
+		 */
+		if (vmsm->vmsm_addr > range_end || addr > seg_end)
+			continue;
+		return (false);
+	}
+	return (true);
+}
+
+static void
+vm_mapping_remove(vmspace_t *vms, vmspace_mapping_t *vmsm)
+{
+	list_t *ml = &vms->vms_maplist;
+
+	ASSERT(MUTEX_HELD(&vms->vms_lock));
+	ASSERT(vms->vms_held);
+
+	list_remove(ml, vmsm);
+	vm_object_release(vmsm->vmsm_object);
+	kmem_free(vmsm, sizeof (*vmsm));
+}
+
+/*
+ * Enter a hold state on the vmspace.  This ensures that all VM clients
+ * associated with the vmspace are excluded from establishing new page holds,
+ * or any other actions which would require accessing vmspace state subject to
+ * potential change.
+ *
+ * Returns with vmspace_t`vms_lock held.
+ */
+static void
+vmspace_hold_enter(vmspace_t *vms)
+{
+	mutex_enter(&vms->vms_lock);
+	VERIFY(!vms->vms_held);
+
+	vm_client_t *vmc = list_head(&vms->vms_clients);
+	for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
+		vmc_space_hold(vmc);
+	}
+	vms->vms_held = true;
+}
+
+/*
+ * Exit a hold state on the vmspace.  This releases all VM clients associated
+ * with the vmspace to be able to establish new page holds, and partake in other
+ * actions which require accessing changed vmspace state.  If `kick_on_cpu` is
+ * true, then any CPUs actively using the page tables will be IPIed, and the
+ * call will block until they have acknowledged being ready to use the latest
+ * state of the tables.
+ *
+ * Requires vmspace_t`vms_lock be held, which is released as part of the call.
+ */
+static void
+vmspace_hold_exit(vmspace_t *vms, bool kick_on_cpu)
+{
+	ASSERT(MUTEX_HELD(&vms->vms_lock));
+	VERIFY(vms->vms_held);
+
+	vm_client_t *vmc = list_head(&vms->vms_clients);
+	for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
+		vmc_space_release(vmc, kick_on_cpu);
+	}
+	vms->vms_held = false;
+	mutex_exit(&vms->vms_lock);
+}
+
+/*
+ * Attempt to map a vm_object span into the vmspace.
+ *
+ * Requirements:
+ * - `obj_off`, `addr`, and `len` must be page-aligned
+ * - `obj_off` cannot be greater than the allocated size of the object
+ * - [`obj_off`, `obj_off` + `len`) span cannot extend beyond the allocated
+ *   size of the object
+ * - [`addr`, `addr` + `len`) span cannot reside beyond the maximum address
+ *   of the vmspace
+ */
+int
+vmspace_map(vmspace_t *vms, vm_object_t *vmo, uintptr_t obj_off, uintptr_t addr,
+    size_t len, uint8_t prot)
+{
+	vmspace_mapping_t *vmsm;
+	int res = 0;
+
+	if (len == 0 || (addr + len) < addr ||
+	    obj_off >= (obj_off + len) || vmo->vmo_size < (obj_off + len)) {
+		return (EINVAL);
+	}
+	if ((addr + len) >= vms->vms_size) {
+		return (ENOMEM);
+	}
+
+	vmsm = kmem_alloc(sizeof (*vmsm), KM_SLEEP);
+
+	vmspace_hold_enter(vms);
+	if (!vm_mapping_gap(vms, addr, len)) {
+		kmem_free(vmsm, sizeof (*vmsm));
+		res = ENOMEM;
+	} else {
+		vmsm->vmsm_object = vmo;
+		vmsm->vmsm_addr = addr;
+		vmsm->vmsm_len = len;
+		vmsm->vmsm_offset = (off_t)obj_off;
+		vmsm->vmsm_prot = prot;
+		list_insert_tail(&vms->vms_maplist, vmsm);
+
+		/*
+		 * Make sure the GPT has tables ready for leaf entries across
+		 * the entire new mapping.
+		 */
+		vmm_gpt_populate_region(vms->vms_gpt, addr, addr + len);
+	}
+	vmspace_hold_exit(vms, false);
+	return (res);
+}
+
+/*
+ * Unmap a region of the vmspace.
+ *
+ * Presently the [start, end) span must equal a region previously mapped by a
+ * call to vmspace_map().
+ */
+int
+vmspace_unmap(vmspace_t *vms, uintptr_t start, uintptr_t end)
+{
+	const size_t size = (size_t)(end - start);
+	vmspace_mapping_t *vmsm;
+	vm_client_t *vmc;
+	uint64_t gen = 0;
+
+	ASSERT(start < end);
+
+	vmspace_hold_enter(vms);
+	/* expect to match existing mapping exactly */
+	if ((vmsm = vm_mapping_find(vms, start, size)) == NULL ||
+	    vmsm->vmsm_addr != start || vmsm->vmsm_len != size) {
+		vmspace_hold_exit(vms, false);
+		return (ENOENT);
+	}
+
+	/* Prepare clients (and their held pages) for the unmap. */
+	for (vmc = list_head(&vms->vms_clients); vmc != NULL;
+	    vmc = list_next(&vms->vms_clients, vmc)) {
+		vmc_space_unmap(vmc, start, size, vmsm->vmsm_object);
+	}
+
+	/* Clear all PTEs for region */
+	if (vmm_gpt_unmap_region(vms->vms_gpt, start, end) != 0) {
+		vms->vms_pt_gen++;
+		gen = vms->vms_pt_gen;
+	}
+	/* ... and the intermediate (directory) PTEs as well */
+	vmm_gpt_vacate_region(vms->vms_gpt, start, end);
+
+	/*
+	 * If pages were actually unmapped from the GPT, provide clients with
+	 * an invalidation notice.
+	 */
+	if (gen != 0) {
+		for (vmc = list_head(&vms->vms_clients); vmc != NULL;
+		    vmc = list_next(&vms->vms_clients, vmc)) {
+			vmc_space_invalidate(vmc, start, size, vms->vms_pt_gen);
+		}
+	}
+
+	vm_mapping_remove(vms, vmsm);
+	vmspace_hold_exit(vms, true);
+	return (0);
+}
+
+static int
+vmspace_lookup_map(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
+    uint64_t **ptepp)
+{
+	vmm_gpt_t *gpt = vms->vms_gpt;
+	uint64_t *entries[MAX_GPT_LEVEL], *leaf;
+	pfn_t pfn = PFN_INVALID;
+	uint_t prot;
+
+	ASSERT0(gpa & PAGEOFFSET);
+	ASSERT((req_prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) != PROT_NONE);
+
+	vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
+	leaf = entries[LEVEL1];
+	if (leaf == NULL) {
+		/*
+		 * Since we populated the intermediate tables for any regions
+		 * mapped in the GPT, an empty leaf entry indicates there is no
+		 * mapping, populated or not, at this GPT.
+		 */
+		return (FC_NOMAP);
+	}
+
+	if (vmm_gpt_is_mapped(gpt, leaf, &pfn, &prot)) {
+		if ((req_prot & prot) != req_prot) {
+			return (FC_PROT);
+		}
+	} else {
+		vmspace_mapping_t *vmsm;
+		vm_object_t *vmo;
+
+		/*
+		 * Because of the prior leaf check, we should be confident that
+		 * _some_ mapping covers this GPA
+		 */
+		vmsm = vm_mapping_find(vms, gpa, PAGESIZE);
+		VERIFY(vmsm != NULL);
+
+		if ((req_prot & vmsm->vmsm_prot) != req_prot) {
+			return (FC_PROT);
+		}
+		vmo = vmsm->vmsm_object;
+		pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
+		VERIFY(pfn != PFN_INVALID);
+
+		if (vmm_gpt_map_at(gpt, leaf, pfn, vmsm->vmsm_prot,
+		    vmo->vmo_attr)) {
+			atomic_inc_64(&vms->vms_pages_mapped);
+		}
+	}
+
+	ASSERT(pfn != PFN_INVALID && leaf != NULL);
+	if (pfnp != NULL) {
+		*pfnp = pfn;
+	}
+	if (ptepp != NULL) {
+		*ptepp = leaf;
+	}
+	return (0);
+}
+
+/*
+ * Populate (make resident in the page tables) a region of the vmspace.
+ *
+ * Presently the [start, end) span must equal a region previously mapped by a
+ * call to vmspace_map().
+ */
+int
+vmspace_populate(vmspace_t *vms, uintptr_t start, uintptr_t end)
+{
+	const size_t size = end - start;
+	vmspace_mapping_t *vmsm;
+
+	mutex_enter(&vms->vms_lock);
+
+	/* For the time being, only exact-match mappings are expected */
+	if ((vmsm = vm_mapping_find(vms, start, size)) == NULL) {
+		mutex_exit(&vms->vms_lock);
+		return (FC_NOMAP);
+	}
+
+	vm_object_t *vmo = vmsm->vmsm_object;
+	const int prot = vmsm->vmsm_prot;
+	const uint8_t attr = vmo->vmo_attr;
+	size_t populated = 0;
+	for (uintptr_t gpa = start & PAGEMASK; gpa < end; gpa += PAGESIZE) {
+		const pfn_t pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
+		VERIFY(pfn != PFN_INVALID);
+
+		if (vmm_gpt_map(vms->vms_gpt, gpa, pfn, prot, attr)) {
+			populated++;
+		}
+	}
+	atomic_add_64(&vms->vms_pages_mapped, populated);
+
+	mutex_exit(&vms->vms_lock);
+	return (0);
+}
+
+/*
+ * Allocate a client from a given vmspace.
+ */
+vm_client_t *
+vmspace_client_alloc(vmspace_t *vms)
+{
+	vm_client_t *vmc;
+
+	vmc = kmem_zalloc(sizeof (vm_client_t), KM_SLEEP);
+	vmc->vmc_space = vms;
+	mutex_init(&vmc->vmc_lock, NULL, MUTEX_DRIVER, NULL);
+	cv_init(&vmc->vmc_cv, NULL, CV_DRIVER, NULL);
+	vmc->vmc_state = VCS_IDLE;
+	vmc->vmc_cpu_active = -1;
+	list_create(&vmc->vmc_held_pages, sizeof (vm_page_t),
+	    offsetof(vm_page_t, vmp_node));
+	vmc->vmc_track_dirty = vms->vms_track_dirty;
+
+	mutex_enter(&vms->vms_lock);
+	list_insert_tail(&vms->vms_clients, vmc);
+	mutex_exit(&vms->vms_lock);
+
+	return (vmc);
+}
+
+/*
+ * Get the nested page table root pointer (EPTP/NCR3) value.
+ */
+uint64_t
+vmspace_table_root(vmspace_t *vms)
+{
+	return (vmm_gpt_get_pmtp(vms->vms_gpt));
+}
+
+/*
+ * Get the current generation number of the nested page table.
+ */
+uint64_t
+vmspace_table_gen(vmspace_t *vms)
+{
+	return (vms->vms_pt_gen);
+}
+
+/*
+ * Mark a vm_client as active.  This will block if/while the client is held by
+ * the vmspace.  On success, it returns with vm_client_t`vmc_lock held.  It will
+ * fail if the vm_client has been orphaned.
+ */
+static int
+vmc_activate(vm_client_t *vmc)
+{
+	mutex_enter(&vmc->vmc_lock);
+	VERIFY0(vmc->vmc_state & VCS_ACTIVE);
+	if ((vmc->vmc_state & VCS_ORPHANED) != 0) {
+		return (ENXIO);
+	}
+	while ((vmc->vmc_state & VCS_HOLD) != 0) {
+		cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
+	}
+	vmc->vmc_state |= VCS_ACTIVE;
+	return (0);
+}
+
+/*
+ * Mark a vm_client as no longer active.  It must be called with
+ * vm_client_t`vmc_lock already held, and will return with it released.
+ */
+static void
+vmc_deactivate(vm_client_t *vmc)
+{
+	ASSERT(MUTEX_HELD(&vmc->vmc_lock));
+	VERIFY(vmc->vmc_state & VCS_ACTIVE);
+
+	vmc->vmc_state ^= VCS_ACTIVE;
+	if ((vmc->vmc_state & VCS_HOLD) != 0) {
+		cv_broadcast(&vmc->vmc_cv);
+	}
+	mutex_exit(&vmc->vmc_lock);
+}
+
+/*
+ * Indicate that a CPU will be utilizing the nested page tables through this VM
+ * client.  Interrupts (and/or the GIF) are expected to be disabled when calling
+ * this function.  Returns the generation number of the nested page table (to be
+ * used for TLB invalidations).
+ */
+uint64_t
+vmc_table_enter(vm_client_t *vmc)
+{
+	vmspace_t *vms = vmc->vmc_space;
+	uint64_t gen;
+
+	ASSERT0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
+	ASSERT3S(vmc->vmc_cpu_active, ==, -1);
+
+	/*
+	 * Since the NPT activation occurs with interrupts disabled, this must
+	 * be done without taking vmc_lock like normal.
+	 */
+	gen = vms->vms_pt_gen;
+	vmc->vmc_cpu_active = CPU->cpu_id;
+	vmc->vmc_cpu_gen = gen;
+	atomic_or_uint(&vmc->vmc_state, VCS_ON_CPU);
+
+	return (gen);
+}
+
+/*
+ * Indicate that this VM client is not longer (directly) using the underlying
+ * page tables.  Interrupts (and/or the GIF) must be enabled prior to calling
+ * this function.
+ */
+void
+vmc_table_exit(vm_client_t *vmc)
+{
+	mutex_enter(&vmc->vmc_lock);
+
+	ASSERT(vmc->vmc_state & VCS_ON_CPU);
+	vmc->vmc_state ^= VCS_ON_CPU;
+	vmc->vmc_cpu_active = -1;
+	if ((vmc->vmc_state & VCS_HOLD) != 0) {
+		cv_broadcast(&vmc->vmc_cv);
+	}
+
+	mutex_exit(&vmc->vmc_lock);
+}
+
+static void
+vmc_space_hold(vm_client_t *vmc)
+{
+	mutex_enter(&vmc->vmc_lock);
+	VERIFY0(vmc->vmc_state & VCS_HOLD);
+
+	/*
+	 * Because vmc_table_enter() alters vmc_state from a context where
+	 * interrupts are disabled, it cannot pay heed to vmc_lock, so setting
+	 * VMC_HOLD must be done atomically here.
+	 */
+	atomic_or_uint(&vmc->vmc_state, VCS_HOLD);
+
+	/* Wait for client to go inactive */
+	while ((vmc->vmc_state & VCS_ACTIVE) != 0) {
+		cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
+	}
+	mutex_exit(&vmc->vmc_lock);
+}
+
+static void
+vmc_space_release(vm_client_t *vmc, bool kick_on_cpu)
+{
+	mutex_enter(&vmc->vmc_lock);
+	VERIFY(vmc->vmc_state & VCS_HOLD);
+
+	if (kick_on_cpu && (vmc->vmc_state & VCS_ON_CPU) != 0) {
+		poke_cpu(vmc->vmc_cpu_active);
+
+		while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
+			cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
+		}
+	}
+
+	/*
+	 * Because vmc_table_enter() alters vmc_state from a context where
+	 * interrupts are disabled, it cannot pay heed to vmc_lock, so clearing
+	 * VMC_HOLD must be done atomically here.
+	 */
+	atomic_and_uint(&vmc->vmc_state, ~VCS_HOLD);
+	mutex_exit(&vmc->vmc_lock);
+}
+
+static void
+vmc_space_invalidate(vm_client_t *vmc, uintptr_t addr, size_t size,
+    uint64_t gen)
+{
+	mutex_enter(&vmc->vmc_lock);
+	VERIFY(vmc->vmc_state & VCS_HOLD);
+	if ((vmc->vmc_state & VCS_ON_CPU) != 0) {
+		/*
+		 * Wait for clients using an old generation of the page tables
+		 * to exit guest context, where they subsequently flush the TLB
+		 * for the new generation.
+		 */
+		if (vmc->vmc_cpu_gen < gen) {
+			poke_cpu(vmc->vmc_cpu_active);
+
+			while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
+				cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
+			}
+		}
+	}
+	if (vmc->vmc_inval_func != NULL) {
+		vmc_inval_cb_t func = vmc->vmc_inval_func;
+		void *data = vmc->vmc_inval_data;
+
+		/*
+		 * Perform the actual invalidation call outside vmc_lock to
+		 * avoid lock ordering issues in the consumer.  Since the client
+		 * is under VCS_HOLD, this is safe.
+		 */
+		mutex_exit(&vmc->vmc_lock);
+		func(data, addr, size);
+		mutex_enter(&vmc->vmc_lock);
+	}
+	mutex_exit(&vmc->vmc_lock);
+}
+
+static void
+vmc_space_unmap(vm_client_t *vmc, uintptr_t addr, size_t size,
+    vm_object_t *vmo)
+{
+	mutex_enter(&vmc->vmc_lock);
+	VERIFY(vmc->vmc_state & VCS_HOLD);
+
+	/*
+	 * With the current vCPU exclusion invariants in place, we do not expect
+	 * a vCPU to be in guest context during an unmap.
+	 */
+	VERIFY0(vmc->vmc_state & VCS_ON_CPU);
+
+	/*
+	 * Any holds against the unmapped region need to establish their own
+	 * reference to the underlying object to avoid a potential
+	 * use-after-free.
+	 */
+	for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
+	    vmp != NULL;
+	    vmp = list_next(&vmc->vmc_held_pages, vmc)) {
+		if (vmp->vmp_gpa < addr ||
+		    vmp->vmp_gpa >= (addr + size)) {
+			/* Hold outside region in question */
+			continue;
+		}
+		if (vmp->vmp_obj_ref == NULL) {
+			vm_object_reference(vmo);
+			vmp->vmp_obj_ref = vmo;
+			/* For an unmapped region, PTE is now meaningless */
+			vmp->vmp_ptep = NULL;
+		} else {
+			/*
+			 * Object could have gone through cycle of
+			 * unmap-map-unmap before the hold was released.
+			 */
+			VERIFY3P(vmp->vmp_ptep, ==, NULL);
+		}
+	}
+	mutex_exit(&vmc->vmc_lock);
+}
+
+static vm_client_t *
+vmc_space_orphan(vm_client_t *vmc, vmspace_t *vms)
+{
+	vm_client_t *next;
+
+	ASSERT(MUTEX_HELD(&vms->vms_lock));
+
+	mutex_enter(&vmc->vmc_lock);
+	VERIFY3P(vmc->vmc_space, ==, vms);
+	VERIFY0(vmc->vmc_state & VCS_ORPHANED);
+	if (vmc->vmc_state & VCS_DESTROY) {
+		/*
+		 * This vm_client is currently undergoing destruction, so it
+		 * does not need to be orphaned.  Let it proceed with its own
+		 * clean-up task.
+		 */
+		next = list_next(&vms->vms_clients, vmc);
+	} else {
+		/*
+		 * Clients are only orphaned when the containing vmspace is
+		 * being torn down.  All mappings from the vmspace should
+		 * already be gone, meaning any remaining held pages should have
+		 * direct references to the object.
+		 */
+		for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
+		    vmp != NULL;
+		    vmp = list_next(&vmc->vmc_held_pages, vmp)) {
+			ASSERT3P(vmp->vmp_ptep, ==, NULL);
+			ASSERT3P(vmp->vmp_obj_ref, !=, NULL);
+		}
+
+		/*
+		 * After this point, the client will be orphaned, unable to
+		 * establish new page holds (or access any vmspace-related
+		 * resources) and is in charge of cleaning up after itself.
+		 */
+		vmc->vmc_state |= VCS_ORPHANED;
+		next = list_next(&vms->vms_clients, vmc);
+		list_remove(&vms->vms_clients, vmc);
+		vmc->vmc_space = NULL;
+	}
+	mutex_exit(&vmc->vmc_lock);
+	return (next);
+}
+
+/*
+ * Attempt to hold a page at `gpa` inside the referenced vmspace.
+ */
+vm_page_t *
+vmc_hold(vm_client_t *vmc, uintptr_t gpa, int prot)
+{
+	vmspace_t *vms = vmc->vmc_space;
+	vm_page_t *vmp;
+	pfn_t pfn = PFN_INVALID;
+	uint64_t *ptep = NULL;
+
+	ASSERT0(gpa & PAGEOFFSET);
+	ASSERT((prot & (PROT_READ | PROT_WRITE)) != PROT_NONE);
+
+	vmp = kmem_alloc(sizeof (*vmp), KM_SLEEP);
+	if (vmc_activate(vmc) != 0) {
+		kmem_free(vmp, sizeof (*vmp));
+		return (NULL);
+	}
+
+	if (vmspace_lookup_map(vms, gpa, prot, &pfn, &ptep) != 0) {
+		vmc_deactivate(vmc);
+		kmem_free(vmp, sizeof (*vmp));
+		return (NULL);
+	}
+	ASSERT(pfn != PFN_INVALID && ptep != NULL);
+
+	vmp->vmp_client = vmc;
+	vmp->vmp_chain = NULL;
+	vmp->vmp_gpa = gpa;
+	vmp->vmp_pfn = pfn;
+	vmp->vmp_ptep = ptep;
+	vmp->vmp_obj_ref = NULL;
+	vmp->vmp_prot = prot;
+	list_insert_tail(&vmc->vmc_held_pages, vmp);
+	vmc_deactivate(vmc);
+
+	return (vmp);
+}
+
+int
+vmc_fault(vm_client_t *vmc, uintptr_t gpa, int prot)
+{
+	vmspace_t *vms = vmc->vmc_space;
+	int err;
+
+	err = vmc_activate(vmc);
+	if (err == 0) {
+		err = vmspace_lookup_map(vms, gpa & PAGEMASK, prot, NULL, NULL);
+		vmc_deactivate(vmc);
+	}
+
+	return (err);
+}
+
+/*
+ * Allocate an additional vm_client_t, based on an existing one.  Only the
+ * associatation with the vmspace is cloned, not existing holds or any
+ * configured invalidation function.
+ */
+vm_client_t *
+vmc_clone(vm_client_t *vmc)
+{
+	vmspace_t *vms = vmc->vmc_space;
+
+	return (vmspace_client_alloc(vms));
+}
+
+/*
+ * Register a function (and associated data pointer) to be called when an
+ * address range in the vmspace is invalidated.
+ */
+int
+vmc_set_inval_cb(vm_client_t *vmc, vmc_inval_cb_t func, void *data)
+{
+	int err;
+
+	err = vmc_activate(vmc);
+	if (err == 0) {
+		vmc->vmc_inval_func = func;
+		vmc->vmc_inval_data = data;
+		vmc_deactivate(vmc);
+	}
+
+	return (err);
+}
+
+/*
+ * Destroy a vm_client_t instance.
+ *
+ * No pages held through this vm_client_t may be outstanding when performing a
+ * vmc_destroy().  For vCPU clients, the client cannot be on-CPU (a call to
+ * vmc_table_exit() has been made).
+ */
+void
+vmc_destroy(vm_client_t *vmc)
+{
+	mutex_enter(&vmc->vmc_lock);
+
+	VERIFY(list_is_empty(&vmc->vmc_held_pages));
+	VERIFY0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
+
+	if ((vmc->vmc_state & VCS_ORPHANED) == 0) {
+		vmspace_t *vms;
+
+		/*
+		 * Deassociation with the parent vmspace must be done carefully:
+		 * The vmspace could attempt to orphan this vm_client while we
+		 * release vmc_lock in order to take vms_lock (the required
+		 * order).  The client is marked to indicate that destruction is
+		 * under way.  Doing so prevents any racing orphan operation
+		 * from applying to this client, allowing us to deassociate from
+		 * the vmspace safely.
+		 */
+		vmc->vmc_state |= VCS_DESTROY;
+		vms = vmc->vmc_space;
+		mutex_exit(&vmc->vmc_lock);
+
+		mutex_enter(&vms->vms_lock);
+		mutex_enter(&vmc->vmc_lock);
+		list_remove(&vms->vms_clients, vmc);
+		/*
+		 * If the vmspace began its own destruction operation while we
+		 * were navigating the locks, be sure to notify it about this
+		 * vm_client being deassociated.
+		 */
+		cv_signal(&vms->vms_cv);
+		mutex_exit(&vmc->vmc_lock);
+		mutex_exit(&vms->vms_lock);
+	} else {
+		VERIFY3P(vmc->vmc_space, ==, NULL);
+		mutex_exit(&vmc->vmc_lock);
+	}
+
+	mutex_destroy(&vmc->vmc_lock);
+	cv_destroy(&vmc->vmc_cv);
+	list_destroy(&vmc->vmc_held_pages);
+
+	kmem_free(vmc, sizeof (*vmc));
+}
+
+static __inline void *
+vmp_ptr(const vm_page_t *vmp)
+{
+	ASSERT3U(vmp->vmp_pfn, !=, PFN_INVALID);
+
+	const uintptr_t paddr = (vmp->vmp_pfn << PAGESHIFT);
+	return ((void *)((uintptr_t)kpm_vbase + paddr));
+}
+
+/*
+ * Get a readable kernel-virtual pointer for a held page.
+ *
+ * Only legal to call if PROT_READ was specified in `prot` for the vmc_hold()
+ * call to acquire this page reference.
+ */
+const void *
+vmp_get_readable(const vm_page_t *vmp)
+{
+	ASSERT(vmp->vmp_prot & PROT_READ);
+
+	return (vmp_ptr(vmp));
+}
+
+/*
+ * Get a writable kernel-virtual pointer for a held page.
+ *
+ * Only legal to call if PROT_WRITE was specified in `prot` for the vmc_hold()
+ * call to acquire this page reference.
+ */
+void *
+vmp_get_writable(const vm_page_t *vmp)
+{
+	ASSERT(vmp->vmp_prot & PROT_WRITE);
+
+	return (vmp_ptr(vmp));
+}
+
+/*
+ * Get the host-physical PFN for a held page.
+ */
+pfn_t
+vmp_get_pfn(const vm_page_t *vmp)
+{
+	return (vmp->vmp_pfn);
+}
+
+/*
+ * Store a pointer to `to_chain` in the page-chaining slot of `vmp`.
+ */
+void
+vmp_chain(vm_page_t *vmp, vm_page_t *to_chain)
+{
+	ASSERT3P(vmp->vmp_chain, ==, NULL);
+
+	vmp->vmp_chain = to_chain;
+}
+
+/*
+ * Retrieve the pointer from the page-chaining in `vmp`.
+ */
+vm_page_t *
+vmp_next(const vm_page_t *vmp)
+{
+	return (vmp->vmp_chain);
+}
+
+static __inline bool
+vmp_release_inner(vm_page_t *vmp, vm_client_t *vmc)
+{
+	ASSERT(MUTEX_HELD(&vmc->vmc_lock));
+
+	bool was_unmapped = false;
+
+	list_remove(&vmc->vmc_held_pages, vmp);
+	if (vmp->vmp_obj_ref != NULL) {
+		ASSERT3P(vmp->vmp_ptep, ==, NULL);
+
+		vm_object_release(vmp->vmp_obj_ref);
+		was_unmapped = true;
+	} else {
+		ASSERT3P(vmp->vmp_ptep, !=, NULL);
+
+		if ((vmp->vmp_prot & PROT_WRITE) != 0 && vmc->vmc_track_dirty) {
+			vmm_gpt_t *gpt = vmc->vmc_space->vms_gpt;
+			vmm_gpt_reset_dirty(gpt, vmp->vmp_ptep, true);
+		}
+	}
+	kmem_free(vmp, sizeof (*vmp));
+	return (was_unmapped);
+}
+
+/*
+ * Release held page.  Returns true if page resided on region which was
+ * subsequently unmapped.
+ */
+bool
+vmp_release(vm_page_t *vmp)
+{
+	vm_client_t *vmc = vmp->vmp_client;
+
+	VERIFY(vmc != NULL);
+
+	mutex_enter(&vmc->vmc_lock);
+	const bool was_unmapped = vmp_release_inner(vmp, vmc);
+	mutex_exit(&vmc->vmc_lock);
+	return (was_unmapped);
+}
+
+/*
+ * Release a chain of pages which were associated via vmp_chain() (setting
+ * page-chaining pointer).  Returns true if any pages resided upon a region
+ * which was subsequently unmapped.
+ *
+ * All of those pages must have been held through the same vm_client_t.
+ */
+bool
+vmp_release_chain(vm_page_t *vmp)
+{
+	vm_client_t *vmc = vmp->vmp_client;
+	bool any_unmapped = false;
+
+	ASSERT(vmp != NULL);
+
+	mutex_enter(&vmc->vmc_lock);
+	while (vmp != NULL) {
+		vm_page_t *next = vmp->vmp_chain;
+
+		/* We expect all pages in chain to be from same client */
+		ASSERT3P(vmp->vmp_client, ==, vmc);
+
+		if (vmp_release_inner(vmp, vmc)) {
+			any_unmapped = true;
+		}
+		vmp = next;
+	}
+	mutex_exit(&vmc->vmc_lock);
+	return (any_unmapped);
+}
+
+
+int
+vm_segmap_obj(struct vm *vm, int segid, off_t segoff, off_t len,
+    struct as *as, caddr_t *addrp, uint_t prot, uint_t maxprot, uint_t flags)
+{
+	vm_object_t *vmo;
+	int err;
+
+	if (segoff < 0 || len <= 0 ||
+	    (segoff & PAGEOFFSET) != 0 || (len & PAGEOFFSET) != 0) {
+		return (EINVAL);
+	}
+	if ((prot & PROT_USER) == 0) {
+		return (ENOTSUP);
+	}
+	err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
+	if (err != 0) {
+		return (err);
+	}
+
+	VERIFY(segoff >= 0);
+	VERIFY(len <= vmo->vmo_size);
+	VERIFY((len + segoff) <= vmo->vmo_size);
+
+	if (vmo->vmo_type != VMOT_MEM) {
+		/* Only support memory objects for now */
+		return (ENOTSUP);
+	}
+
+	as_rangelock(as);
+
+	err = choose_addr(as, addrp, (size_t)len, 0, ADDR_VACALIGN, flags);
+	if (err == 0) {
+		segvmm_crargs_t svma;
+
+		svma.prot = prot;
+		svma.offset = segoff;
+		svma.vmo = vmo;
+		svma.vmc = NULL;
+
+		err = as_map(as, *addrp, (size_t)len, segvmm_create, &svma);
+	}
+
+	as_rangeunlock(as);
+	return (err);
+}
+
+int
+vm_segmap_space(struct vm *vm, off_t off, struct as *as, caddr_t *addrp,
+    off_t len, uint_t prot, uint_t maxprot, uint_t flags)
+{
+
+	const uintptr_t gpa = (uintptr_t)off;
+	const size_t size = (uintptr_t)len;
+	int err;
+
+	if (off < 0 || len <= 0 ||
+	    (gpa & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
+		return (EINVAL);
+	}
+	if ((prot & PROT_USER) == 0) {
+		return (ENOTSUP);
+	}
+
+	as_rangelock(as);
+
+	err = choose_addr(as, addrp, size, off, ADDR_VACALIGN, flags);
+	if (err == 0) {
+		segvmm_crargs_t svma;
+
+		svma.prot = prot;
+		svma.offset = gpa;
+		svma.vmo = NULL;
+		svma.vmc = vmspace_client_alloc(vm_get_vmspace(vm));
+
+		err = as_map(as, *addrp, len, segvmm_create, &svma);
+	}
+
+	as_rangeunlock(as);
+	return (err);
+}
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 31b63dfe69..f1241a9183 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -526,9 +526,21 @@ extern "C" {
 #define	IA32_VMX_PROCBASED2_VPID	(1UL << 5)
 
 #define	MSR_IA32_VMX_EPT_VPID_CAP	0x48c
-#define	IA32_VMX_EPT_VPID_INVEPT	(1UL << 20)
-#define	IA32_VMX_EPT_VPID_INVEPT_SINGLE	(1UL << 25)
-#define	IA32_VMX_EPT_VPID_INVEPT_ALL	(1UL << 26)
+#define	IA32_VMX_EPT_VPID_EXEC_ONLY		(1UL << 0)
+#define	IA32_VMX_EPT_VPID_PWL4			(1UL << 6)
+#define	IA32_VMX_EPT_VPID_TYPE_UC		(1UL << 8)
+#define	IA32_VMX_EPT_VPID_TYPE_WB		(1UL << 14)
+#define	IA32_VMX_EPT_VPID_MAP_2M		(1UL << 16)
+#define	IA32_VMX_EPT_VPID_MAP_1G		(1UL << 17)
+#define	IA32_VMX_EPT_VPID_HW_AD			(1UL << 21)
+#define	IA32_VMX_EPT_VPID_INVEPT		(1UL << 20)
+#define	IA32_VMX_EPT_VPID_INVEPT_SINGLE		(1UL << 25)
+#define	IA32_VMX_EPT_VPID_INVEPT_ALL		(1UL << 26)
+#define	IA32_VMX_EPT_VPID_INVVPID		(1UL << 32)
+#define	IA32_VMX_EPT_VPID_INVVPID_ADDR		(1UL << 40)
+#define	IA32_VMX_EPT_VPID_INVVPID_SINGLE	(1UL << 41)
+#define	IA32_VMX_EPT_VPID_INVVPID_ALL		(1UL << 42)
+#define	IA32_VMX_EPT_VPID_INVVPID_RETAIN	(1UL << 43)
 
 /*
  * Intel TSX Control MSRs
author	Patrick Mooney <pmooney@pfmooney.com>	2021-09-05 01:38:39 +0000
committer	Patrick Mooney <pmooney@oxide.computer>	2021-11-19 23:00:59 +0000
commit	0153d828c132fdb1a17c11b99386a3d1b87994cf (patch)
tree	c670df2f1d9cfceb92709c3cb2862fdd1f97f90a
parent	d8f839f91e21bea2f5200f95df55608cbecdeeb9 (diff)
download	illumos-joyent-0153d828c132fdb1a17c11b99386a3d1b87994cf.tar.gz