OS-6409 import Pluribus bhyve port

Authored by: Krupal Joshi <krupal.joshi@pluribusnetworks.com> Contributed by: Pluribus Networks Inc. Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Dan McDonald <danmcd@joyent.com> Reviewed by: Mike Gerdts <mike.gerdts@joyent.com> Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Approved by: Mike Gerdts <mike.gerdts@joyent.com>
author: Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> 2017-09-26 12:19:41 +0200
committer: Patrick Mooney <pmooney@pfmooney.com> 2018-02-22 15:57:20 +0000
commit: 43f85cd4da7e7860e4d240f14e6b5dd45700c7b6 (patch)
tree: fcf7f982094418a90da65ed16d48689bbc72ca5b
parent: 44db5f1c904128c3fd7c7ec37e9d894a10e93f8c (diff)
download: illumos-joyent-43f85cd4da7e7860e4d240f14e6b5dd45700c7b6.tar.gz
215 files changed, 52310 insertions, 2 deletions
diff --git a/usr/contrib/freebsd/amd64/machine/_types.h b/usr/contrib/freebsd/amd64/machine/_types.h
new file mode 100644
index 0000000000..59994352b5
--- /dev/null
+++ b/usr/contrib/freebsd/amd64/machine/_types.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/amd64/include/_types.h 232261 2012-02-28 18:15:28Z tijl $ */
+
+#include <x86/_types.h>
diff --git a/usr/contrib/freebsd/amd64/machine/psl.h b/usr/contrib/freebsd/amd64/machine/psl.h
new file mode 100644
index 0000000000..c660bfbab0
--- /dev/null
+++ b/usr/contrib/freebsd/amd64/machine/psl.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/amd64/include/psl.h 233204 2012-03-19 21:29:57Z tijl $ */
+
+#include <x86/psl.h>
diff --git a/usr/contrib/freebsd/amd64/machine/specialreg.h b/usr/contrib/freebsd/amd64/machine/specialreg.h
new file mode 100644
index 0000000000..41d4125cb9
--- /dev/null
+++ b/usr/contrib/freebsd/amd64/machine/specialreg.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/amd64/include/specialreg.h 233207 2012-03-19 21:34:11Z tijl $ */
+
+#include <x86/specialreg.h>
diff --git a/usr/contrib/freebsd/amd64/machine/timerreg.h b/usr/contrib/freebsd/amd64/machine/timerreg.h
new file mode 100644
index 0000000000..bca7b4dd19
--- /dev/null
+++ b/usr/contrib/freebsd/amd64/machine/timerreg.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (C) 2005 TAKAHASHI Yoshihiro. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/include/timerreg.h 177642 2008-03-26 20:09:21Z phk $
+ */
+
+/*
+ * The outputs of the three timers are connected as follows:
+ *
+ *	 timer 0 -> irq 0
+ *	 timer 1 -> dma chan 0 (for dram refresh)
+ * 	 timer 2 -> speaker (via keyboard controller)
+ *
+ * Timer 0 is used to call hardclock.
+ * Timer 2 is used to generate console beeps.
+ */
+
+#ifndef _MACHINE_TIMERREG_H_
+#define _MACHINE_TIMERREG_H_
+
+#ifdef _KERNEL
+
+#include <dev/ic/i8253reg.h>
+
+#define	IO_TIMER1	0x40		/* 8253 Timer #1 */
+#define	TIMER_CNTR0	(IO_TIMER1 + TIMER_REG_CNTR0)
+#define	TIMER_CNTR1	(IO_TIMER1 + TIMER_REG_CNTR1)
+#define	TIMER_CNTR2	(IO_TIMER1 + TIMER_REG_CNTR2)
+#define	TIMER_MODE	(IO_TIMER1 + TIMER_REG_MODE)
+
+#endif /* _KERNEL */
+
+#endif /* _MACHINE_TIMERREG_H_ */
diff --git a/usr/contrib/freebsd/amd64/machine/vm.h b/usr/contrib/freebsd/amd64/machine/vm.h
new file mode 100644
index 0000000000..885c1607ea
--- /dev/null
+++ b/usr/contrib/freebsd/amd64/machine/vm.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/include/vm.h 233671 2012-03-29 16:51:22Z jhb $
+ */
+
+#ifndef _MACHINE_VM_H_
+#define	_MACHINE_VM_H_
+
+#include <machine/specialreg.h>
+
+/* Memory attributes. */
+#define	VM_MEMATTR_UNCACHEABLE		((vm_memattr_t)PAT_UNCACHEABLE)
+#define	VM_MEMATTR_WRITE_COMBINING	((vm_memattr_t)PAT_WRITE_COMBINING)
+#define	VM_MEMATTR_WRITE_THROUGH	((vm_memattr_t)PAT_WRITE_THROUGH)
+#define	VM_MEMATTR_WRITE_PROTECTED	((vm_memattr_t)PAT_WRITE_PROTECTED)
+#define	VM_MEMATTR_WRITE_BACK		((vm_memattr_t)PAT_WRITE_BACK)
+#define	VM_MEMATTR_WEAK_UNCACHEABLE	((vm_memattr_t)PAT_UNCACHED)
+
+#define	VM_MEMATTR_DEFAULT		VM_MEMATTR_WRITE_BACK
+
+#endif /* !_MACHINE_VM_H_ */
diff --git a/usr/contrib/freebsd/dev/acpica/acpi_hpet.h b/usr/contrib/freebsd/dev/acpica/acpi_hpet.h
new file mode 100644
index 0000000000..df817b7a2b
--- /dev/null
+++ b/usr/contrib/freebsd/dev/acpica/acpi_hpet.h
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 2005 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/acpica/acpi_hpet.h 224919 2011-08-16 21:51:29Z mav $
+ */
+
+#ifndef __ACPI_HPET_H__
+#define	__ACPI_HPET_H__
+
+#define HPET_MEM_WIDTH		0x400	/* Expected memory region size */
+
+/* General registers */
+#define HPET_CAPABILITIES	0x0	/* General capabilities and ID */
+#define	HPET_CAP_VENDOR_ID	0xffff0000
+#define	HPET_CAP_LEG_RT		0x00008000
+#define	HPET_CAP_COUNT_SIZE	0x00002000 /* 1 = 64-bit, 0 = 32-bit */
+#define	HPET_CAP_NUM_TIM	0x00001f00
+#define	HPET_CAP_REV_ID		0x000000ff
+#define HPET_PERIOD		0x4	/* Period (1/hz) of timer */
+#define HPET_CONFIG		0x10	/* General configuration register */
+#define	HPET_CNF_LEG_RT		0x00000002
+#define	HPET_CNF_ENABLE		0x00000001
+#define	HPET_ISR		0x20	/* General interrupt status register */
+#define HPET_MAIN_COUNTER	0xf0	/* Main counter register */
+
+/* Timer registers */
+#define	HPET_TIMER_CAP_CNF(x)	((x) * 0x20 + 0x100)
+#define	HPET_TCAP_INT_ROUTE	0xffffffff00000000
+#define	HPET_TCAP_FSB_INT_DEL	0x00008000
+#define	HPET_TCNF_FSB_EN	0x00004000
+#define	HPET_TCNF_INT_ROUTE	0x00003e00
+#define	HPET_TCNF_32MODE	0x00000100
+#define	HPET_TCNF_VAL_SET	0x00000040
+#define	HPET_TCAP_SIZE		0x00000020 /* 1 = 64-bit, 0 = 32-bit */
+#define	HPET_TCAP_PER_INT	0x00000010 /* Supports periodic interrupts */
+#define	HPET_TCNF_TYPE		0x00000008 /* 1 = periodic, 0 = one-shot */
+#define	HPET_TCNF_INT_ENB	0x00000004
+#define	HPET_TCNF_INT_TYPE	0x00000002 /* 1 = level triggered, 0 = edge */
+#define	HPET_TIMER_COMPARATOR(x) ((x) * 0x20 + 0x108)
+#define	HPET_TIMER_FSB_VAL(x)	((x) * 0x20 + 0x110)
+#define	HPET_TIMER_FSB_ADDR(x)	((x) * 0x20 + 0x114)
+
+#define	HPET_MIN_CYCLES		128	/* Period considered reliable. */
+
+#endif /* !__ACPI_HPET_H__ */
diff --git a/usr/contrib/freebsd/dev/ic/i8253reg.h b/usr/contrib/freebsd/dev/ic/i8253reg.h
new file mode 100644
index 0000000000..47568b3436
--- /dev/null
+++ b/usr/contrib/freebsd/dev/ic/i8253reg.h
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: Header: timerreg.h,v 1.2 93/02/28 15:08:58 mccanne Exp
+ * $FreeBSD: head/sys/dev/ic/i8253reg.h 146215 2005-05-14 10:26:31Z nyan $
+ */
+
+/*
+ * Register definitions for the Intel 8253 Programmable Interval Timer.
+ *
+ * This chip has three independent 16-bit down counters that can be
+ * read on the fly.  There are three mode registers and three countdown
+ * registers.  The countdown registers are addressed directly, via the
+ * first three I/O ports.  The three mode registers are accessed via
+ * the fourth I/O port, with two bits in the mode byte indicating the
+ * register.  (Why are hardware interfaces always so braindead?).
+ *
+ * To write a value into the countdown register, the mode register
+ * is first programmed with a command indicating the which byte of
+ * the two byte register is to be modified.  The three possibilities
+ * are load msb (TMR_MR_MSB), load lsb (TMR_MR_LSB), or load lsb then
+ * msb (TMR_MR_BOTH).
+ *
+ * To read the current value ("on the fly") from the countdown register,
+ * you write a "latch" command into the mode register, then read the stable
+ * value from the corresponding I/O port.  For example, you write
+ * TMR_MR_LATCH into the corresponding mode register.  Presumably,
+ * after doing this, a write operation to the I/O port would result
+ * in undefined behavior (but hopefully not fry the chip).
+ * Reading in this manner has no side effects.
+ */
+
+/*
+ * Macros for specifying values to be written into a mode register.
+ */
+#define	TIMER_REG_CNTR0	0	/* timer 0 counter port */
+#define	TIMER_REG_CNTR1	1	/* timer 1 counter port */
+#define	TIMER_REG_CNTR2	2	/* timer 2 counter port */
+#define	TIMER_REG_MODE	3	/* timer mode port */
+#define		TIMER_SEL0	0x00	/* select counter 0 */
+#define		TIMER_SEL1	0x40	/* select counter 1 */
+#define		TIMER_SEL2	0x80	/* select counter 2 */
+#define		TIMER_INTTC	0x00	/* mode 0, intr on terminal cnt */
+#define		TIMER_ONESHOT	0x02	/* mode 1, one shot */
+#define		TIMER_RATEGEN	0x04	/* mode 2, rate generator */
+#define		TIMER_SQWAVE	0x06	/* mode 3, square wave */
+#define		TIMER_SWSTROBE	0x08	/* mode 4, s/w triggered strobe */
+#define		TIMER_HWSTROBE	0x0a	/* mode 5, h/w triggered strobe */
+#define		TIMER_LATCH	0x00	/* latch counter for reading */
+#define		TIMER_LSB	0x10	/* r/w counter LSB */
+#define		TIMER_MSB	0x20	/* r/w counter MSB */
+#define		TIMER_16BIT	0x30	/* r/w counter 16 bits, LSB first */
+#define		TIMER_BCD	0x01	/* count in BCD */
diff --git a/usr/contrib/freebsd/dev/ic/i8259.h b/usr/contrib/freebsd/dev/ic/i8259.h
new file mode 100644
index 0000000000..be523c1df4
--- /dev/null
+++ b/usr/contrib/freebsd/dev/ic/i8259.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/ic/i8259.h 151580 2005-10-23 09:05:51Z glebius $
+ */
+
+/*
+ * Register defintions for the i8259A programmable interrupt controller.
+ */
+
+#ifndef _DEV_IC_I8259_H_
+#define	_DEV_IC_I8259_H_
+
+/* Initialization control word 1. Written to even address. */
+#define	ICW1_IC4	0x01		/* ICW4 present */
+#define	ICW1_SNGL	0x02		/* 1 = single, 0 = cascaded */
+#define	ICW1_ADI	0x04		/* 1 = 4, 0 = 8 byte vectors */
+#define	ICW1_LTIM	0x08		/* 1 = level trigger, 0 = edge */
+#define	ICW1_RESET	0x10		/* must be 1 */
+/* 0x20 - 0x80 - in 8080/8085 mode only */
+
+/* Initialization control word 2. Written to the odd address. */
+/* No definitions, it is the base vector of the IDT for 8086 mode */
+
+/* Initialization control word 3. Written to the odd address. */
+/* For a master PIC, bitfield indicating a slave 8259 on given input */
+/* For slave, lower 3 bits are the slave's ID binary id on master */
+
+/* Initialization control word 4. Written to the odd address. */
+#define	ICW4_8086	0x01		/* 1 = 8086, 0 = 8080 */
+#define	ICW4_AEOI	0x02		/* 1 = Auto EOI */
+#define	ICW4_MS		0x04		/* 1 = buffered master, 0 = slave */
+#define	ICW4_BUF	0x08		/* 1 = enable buffer mode */
+#define	ICW4_SFNM	0x10		/* 1 = special fully nested mode */
+
+/* Operation control words.  Written after initialization. */
+
+/* Operation control word type 1 */
+/*
+ * No definitions.  Written to the odd address.  Bitmask for interrupts.
+ * 1 = disabled.
+ */
+
+/* Operation control word type 2.  Bit 3 (0x08) must be zero. Even address. */
+#define	OCW2_L0		0x01		/* Level */
+#define	OCW2_L1		0x02
+#define	OCW2_L2		0x04
+/* 0x08 must be 0 to select OCW2 vs OCW3 */
+/* 0x10 must be 0 to select OCW2 vs ICW1 */
+#define	OCW2_EOI	0x20		/* 1 = EOI */
+#define	OCW2_SL		0x40		/* EOI mode */
+#define	OCW2_R		0x80		/* EOI mode */
+
+/* Operation control word type 3.  Bit 3 (0x08) must be set. Even address. */
+#define	OCW3_RIS	0x01		/* 1 = read IS, 0 = read IR */
+#define	OCW3_RR		0x02		/* register read */
+#define	OCW3_P		0x04		/* poll mode command */
+/* 0x08 must be 1 to select OCW3 vs OCW2 */
+#define	OCW3_SEL	0x08		/* must be 1 */
+/* 0x10 must be 0 to select OCW3 vs ICW1 */
+#define	OCW3_SMM	0x20		/* special mode mask */
+#define	OCW3_ESMM	0x40		/* enable SMM */
+
+#endif /* !_DEV_IC_I8259_H_ */
diff --git a/usr/contrib/freebsd/dev/ic/ns16550.h b/usr/contrib/freebsd/dev/ic/ns16550.h
new file mode 100644
index 0000000000..5e8f30e3e8
--- /dev/null
+++ b/usr/contrib/freebsd/dev/ic/ns16550.h
@@ -0,0 +1,240 @@
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)ns16550.h	7.1 (Berkeley) 5/9/91
+ * $FreeBSD: head/sys/dev/ic/ns16550.h 257170 2013-10-26 17:24:59Z zbb $
+ */
+
+/*
+ * NS8250... UART registers.
+ */
+
+/* 8250 registers #[0-6]. */
+
+#define	com_data	0	/* data register (R/W) */
+#define	REG_DATA	com_data
+
+#define	com_ier		1	/* interrupt enable register (W) */
+#define	REG_IER		com_ier
+#define	IER_ERXRDY	0x1
+#define	IER_ETXRDY	0x2
+#define	IER_ERLS	0x4
+#define	IER_EMSC	0x8
+
+#define	IER_BITS	"\20\1ERXRDY\2ETXRDY\3ERLS\4EMSC"
+
+#define	com_iir		2	/* interrupt identification register (R) */
+#define	REG_IIR		com_iir
+#define	IIR_IMASK	0xf
+#define	IIR_RXTOUT	0xc
+#define	IIR_BUSY	0x7
+#define	IIR_RLS		0x6
+#define	IIR_RXRDY	0x4
+#define	IIR_TXRDY	0x2
+#define	IIR_NOPEND	0x1
+#define	IIR_MLSC	0x0
+#define	IIR_FIFO_MASK	0xc0	/* set if FIFOs are enabled */
+
+#define	IIR_BITS	"\20\1NOPEND\2TXRDY\3RXRDY"
+
+#define	com_lcr		3	/* line control register (R/W) */
+#define	com_cfcr	com_lcr	/* character format control register (R/W) */
+#define	REG_LCR		com_lcr
+#define	LCR_DLAB	0x80
+#define	CFCR_DLAB	LCR_DLAB
+#define	LCR_EFR_ENABLE	0xbf	/* magic to enable EFR on 16650 up */
+#define	CFCR_EFR_ENABLE	LCR_EFR_ENABLE
+#define	LCR_SBREAK	0x40
+#define	CFCR_SBREAK	LCR_SBREAK
+#define	LCR_PZERO	0x30
+#define	CFCR_PZERO	LCR_PZERO
+#define	LCR_PONE	0x20
+#define	CFCR_PONE	LCR_PONE
+#define	LCR_PEVEN	0x10
+#define	CFCR_PEVEN	LCR_PEVEN
+#define	LCR_PODD	0x00
+#define	CFCR_PODD	LCR_PODD
+#define	LCR_PENAB	0x08
+#define	CFCR_PENAB	LCR_PENAB
+#define	LCR_STOPB	0x04
+#define	CFCR_STOPB	LCR_STOPB
+#define	LCR_8BITS	0x03
+#define	CFCR_8BITS	LCR_8BITS
+#define	LCR_7BITS	0x02
+#define	CFCR_7BITS	LCR_7BITS
+#define	LCR_6BITS	0x01
+#define	CFCR_6BITS	LCR_6BITS
+#define	LCR_5BITS	0x00
+#define	CFCR_5BITS	LCR_5BITS
+
+#define	com_mcr		4	/* modem control register (R/W) */
+#define	REG_MCR		com_mcr
+#define	MCR_PRESCALE	0x80	/* only available on 16650 up */
+#define	MCR_LOOPBACK	0x10
+#define	MCR_IE		0x08
+#define	MCR_IENABLE	MCR_IE
+#define	MCR_DRS		0x04
+#define	MCR_RTS		0x02
+#define	MCR_DTR		0x01
+
+#define	MCR_BITS	"\20\1DTR\2RTS\3DRS\4IE\5LOOPBACK\10PRESCALE"
+
+#define	com_lsr		5	/* line status register (R/W) */
+#define	REG_LSR		com_lsr
+#define	LSR_RCV_FIFO	0x80
+#define	LSR_TEMT	0x40
+#define	LSR_TSRE	LSR_TEMT
+#define	LSR_THRE	0x20
+#define	LSR_TXRDY	LSR_THRE
+#define	LSR_BI		0x10
+#define	LSR_FE		0x08
+#define	LSR_PE		0x04
+#define	LSR_OE		0x02
+#define	LSR_RXRDY	0x01
+#define	LSR_RCV_MASK	0x1f
+
+#define	LSR_BITS	"\20\1RXRDY\2OE\3PE\4FE\5BI\6THRE\7TEMT\10RCV_FIFO"
+
+#define	com_msr		6	/* modem status register (R/W) */
+#define	REG_MSR		com_msr
+#define	MSR_DCD		0x80
+#define	MSR_RI		0x40
+#define	MSR_DSR		0x20
+#define	MSR_CTS		0x10
+#define	MSR_DDCD	0x08
+#define	MSR_TERI	0x04
+#define	MSR_DDSR	0x02
+#define	MSR_DCTS	0x01
+
+#define	MSR_BITS	"\20\1DCTS\2DDSR\3TERI\4DDCD\5CTS\6DSR\7RI\10DCD"
+
+/* 8250 multiplexed registers #[0-1].  Access enabled by LCR[7]. */
+#define	com_dll		0	/* divisor latch low (R/W) */
+#define	com_dlbl	com_dll
+#define	com_dlm		1	/* divisor latch high (R/W) */
+#define	com_dlbh	com_dlm
+#define	REG_DLL		com_dll
+#define	REG_DLH		com_dlm
+
+/* 16450 register #7.  Not multiplexed. */
+#define	com_scr		7	/* scratch register (R/W) */
+
+/* 16550 register #2.  Not multiplexed. */
+#define	com_fcr		2	/* FIFO control register (W) */
+#define	com_fifo	com_fcr
+#define	REG_FCR		com_fcr
+#define	FCR_ENABLE	0x01
+#define	FIFO_ENABLE	FCR_ENABLE
+#define	FCR_RCV_RST	0x02
+#define	FIFO_RCV_RST	FCR_RCV_RST
+#define	FCR_XMT_RST	0x04
+#define	FIFO_XMT_RST	FCR_XMT_RST
+#define	FCR_DMA		0x08
+#define	FIFO_DMA_MODE	FCR_DMA
+#define	FCR_RX_LOW	0x00
+#define	FIFO_RX_LOW	FCR_RX_LOW
+#define	FCR_RX_MEDL	0x40
+#define	FIFO_RX_MEDL	FCR_RX_MEDL
+#define	FCR_RX_MEDH	0x80
+#define	FIFO_RX_MEDH	FCR_RX_MEDH
+#define	FCR_RX_HIGH	0xc0
+#define	FIFO_RX_HIGH	FCR_RX_HIGH
+
+#define	FCR_BITS	"\20\1ENABLE\2RCV_RST\3XMT_RST\4DMA"
+
+/* 16650 registers #2,[4-7].  Access enabled by LCR_EFR_ENABLE. */
+
+#define	com_efr		2	/* enhanced features register (R/W) */
+#define	REG_EFR		com_efr
+#define	EFR_CTS		0x80
+#define	EFR_AUTOCTS	EFR_CTS
+#define	EFR_RTS		0x40
+#define	EFR_AUTORTS	EFR_RTS
+#define	EFR_EFE		0x10	/* enhanced functions enable */
+
+#define	com_xon1	4	/* XON 1 character (R/W) */
+#define	com_xon2	5	/* XON 2 character (R/W) */
+#define	com_xoff1	6	/* XOFF 1 character (R/W) */
+#define	com_xoff2	7	/* XOFF 2 character (R/W) */
+
+#define DW_REG_USR	31	/* DesignWare derived Uart Status Reg */
+#define com_usr		39	/* Octeon 16750/16550 Uart Status Reg */
+#define REG_USR		com_usr
+#define USR_BUSY	1	/* Uart Busy. Serial transfer in progress */
+#define USR_TXFIFO_NOTFULL 2    /* Uart TX FIFO Not full */
+
+/* 16950 register #1.  Access enabled by ACR[7].  Also requires !LCR[7]. */
+#define	com_asr		1	/* additional status register (R[0-7]/W[0-1]) */
+
+/* 16950 register #3.  R/W access enabled by ACR[7]. */
+#define	com_rfl		3	/* receiver fifo level (R) */
+
+/*
+ * 16950 register #4.  Access enabled by ACR[7].  Also requires
+ * !LCR_EFR_ENABLE.
+ */
+#define	com_tfl		4	/* transmitter fifo level (R) */
+
+/*
+ * 16950 register #5.  Accessible if !LCR_EFR_ENABLE.  Read access also
+ * requires ACR[6].
+ */
+#define	com_icr		5	/* index control register (R/W) */
+
+/*
+ * 16950 register #7.  It is the same as com_scr except it has a different
+ * abbreviation in the manufacturer's data sheet and it also serves as an
+ * index into the Indexed Control register set.
+ */
+#define	com_spr		com_scr	/* scratch pad (and index) register (R/W) */
+#define	REG_SPR		com_scr
+
+/*
+ * 16950 indexed control registers #[0-0x13].  Access is via index in SPR,
+ * data in ICR (if ICR is accessible).
+ */
+
+#define	com_acr		0	/* additional control register (R/W) */
+#define	ACR_ASE		0x80	/* ASR/RFL/TFL enable */
+#define	ACR_ICRE	0x40	/* ICR enable */
+#define	ACR_TLE		0x20	/* TTL/RTL enable */
+
+#define	com_cpr		1	/* clock prescaler register (R/W) */
+#define	com_tcr		2	/* times clock register (R/W) */
+#define	com_ttl		4	/* transmitter trigger level (R/W) */
+#define	com_rtl		5	/* receiver trigger level (R/W) */
+/* ... */
+
+/* Hardware extension mode register for RSB-2000/3000. */
+#define	com_emr		com_msr
+#define	EMR_EXBUFF	0x04
+#define	EMR_CTSFLW	0x08
+#define	EMR_DSRFLW	0x10
+#define	EMR_RTSFLW	0x20
+#define	EMR_DTRFLW	0x40
+#define	EMR_EFMODE	0x80
diff --git a/usr/contrib/freebsd/dev/pci/pcireg.h b/usr/contrib/freebsd/dev/pci/pcireg.h
new file mode 100644
index 0000000000..32a569dbd4
--- /dev/null
+++ b/usr/contrib/freebsd/dev/pci/pcireg.h
@@ -0,0 +1,922 @@
+/*-
+ * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/pci/pcireg.h 266468 2014-05-20 14:39:22Z mav $
+ *
+ */
+
+/*
+ * PCIM_xxx: mask to locate subfield in register
+ * PCIR_xxx: config register offset
+ * PCIC_xxx: device class
+ * PCIS_xxx: device subclass
+ * PCIP_xxx: device programming interface
+ * PCIV_xxx: PCI vendor ID (only required to fixup ancient devices)
+ * PCID_xxx: device ID
+ * PCIY_xxx: capability identification number
+ * PCIZ_xxx: extended capability identification number
+ */
+
+/* some PCI bus constants */
+#define	PCI_DOMAINMAX	65535	/* highest supported domain number */
+#define	PCI_BUSMAX	255	/* highest supported bus number */
+#define	PCI_SLOTMAX	31	/* highest supported slot number */
+#define	PCI_FUNCMAX	7	/* highest supported function number */
+#define	PCI_REGMAX	255	/* highest supported config register addr. */
+#define	PCIE_REGMAX	4095	/* highest supported config register addr. */
+#define	PCI_MAXHDRTYPE	2
+
+#define	PCIE_ARI_SLOTMAX 0
+#define	PCIE_ARI_FUNCMAX 255
+
+#define	PCI_RID_BUS_SHIFT	8
+#define	PCI_RID_SLOT_SHIFT	3
+#define	PCI_RID_FUNC_SHIFT	0
+
+#define PCI_RID(bus, slot, func) \
+    ((((bus) & PCI_BUSMAX) << PCI_RID_BUS_SHIFT) | \
+    (((slot) & PCI_SLOTMAX) << PCI_RID_SLOT_SHIFT) | \
+    (((func) & PCI_FUNCMAX) << PCI_RID_FUNC_SHIFT))
+
+#define PCI_ARI_RID(bus, func) \
+    ((((bus) & PCI_BUSMAX) << PCI_RID_BUS_SHIFT) | \
+    (((func) & PCIE_ARI_FUNCMAX) << PCI_RID_FUNC_SHIFT))
+
+#define PCI_RID2BUS(rid) (((rid) >> PCI_RID_BUS_SHIFT) & PCI_BUSMAX)
+#define PCI_RID2SLOT(rid) (((rid) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
+#define PCI_RID2FUNC(rid) (((rid) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
+
+#define PCIE_ARI_SLOT(func) (((func) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
+#define PCIE_ARI_FUNC(func) (((func) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
+
+/* PCI config header registers for all devices */
+
+#define	PCIR_DEVVENDOR	0x00
+#define	PCIR_VENDOR	0x00
+#define	PCIR_DEVICE	0x02
+#define	PCIR_COMMAND	0x04
+#define	PCIM_CMD_PORTEN		0x0001
+#define	PCIM_CMD_MEMEN		0x0002
+#define	PCIM_CMD_BUSMASTEREN	0x0004
+#define	PCIM_CMD_SPECIALEN	0x0008
+#define	PCIM_CMD_MWRICEN	0x0010
+#define	PCIM_CMD_PERRESPEN	0x0040
+#define	PCIM_CMD_SERRESPEN	0x0100
+#define	PCIM_CMD_BACKTOBACK	0x0200
+#define	PCIM_CMD_INTxDIS	0x0400
+#define	PCIR_STATUS	0x06
+#define	PCIM_STATUS_INTxSTATE	0x0008
+#define	PCIM_STATUS_CAPPRESENT	0x0010
+#define	PCIM_STATUS_66CAPABLE	0x0020
+#define	PCIM_STATUS_BACKTOBACK	0x0080
+#define	PCIM_STATUS_MDPERR	0x0100
+#define	PCIM_STATUS_SEL_FAST	0x0000
+#define	PCIM_STATUS_SEL_MEDIMUM	0x0200
+#define	PCIM_STATUS_SEL_SLOW	0x0400
+#define	PCIM_STATUS_SEL_MASK	0x0600
+#define	PCIM_STATUS_STABORT	0x0800
+#define	PCIM_STATUS_RTABORT	0x1000
+#define	PCIM_STATUS_RMABORT	0x2000
+#define	PCIM_STATUS_SERR	0x4000
+#define	PCIM_STATUS_PERR	0x8000
+#define	PCIR_REVID	0x08
+#define	PCIR_PROGIF	0x09
+#define	PCIR_SUBCLASS	0x0a
+#define	PCIR_CLASS	0x0b
+#define	PCIR_CACHELNSZ	0x0c
+#define	PCIR_LATTIMER	0x0d
+#define	PCIR_HDRTYPE	0x0e
+#define	PCIM_HDRTYPE		0x7f
+#define	PCIM_HDRTYPE_NORMAL	0x00
+#define	PCIM_HDRTYPE_BRIDGE	0x01
+#define	PCIM_HDRTYPE_CARDBUS	0x02
+#define	PCIM_MFDEV		0x80
+#define	PCIR_BIST	0x0f
+
+/* Capability Register Offsets */
+
+#define	PCICAP_ID	0x0
+#define	PCICAP_NEXTPTR	0x1
+
+/* Capability Identification Numbers */
+
+#define	PCIY_PMG	0x01	/* PCI Power Management */
+#define	PCIY_AGP	0x02	/* AGP */
+#define	PCIY_VPD	0x03	/* Vital Product Data */
+#define	PCIY_SLOTID	0x04	/* Slot Identification */
+#define	PCIY_MSI	0x05	/* Message Signaled Interrupts */
+#define	PCIY_CHSWP	0x06	/* CompactPCI Hot Swap */
+#define	PCIY_PCIX	0x07	/* PCI-X */
+#define	PCIY_HT		0x08	/* HyperTransport */
+#define	PCIY_VENDOR	0x09	/* Vendor Unique */
+#define	PCIY_DEBUG	0x0a	/* Debug port */
+#define	PCIY_CRES	0x0b	/* CompactPCI central resource control */
+#define	PCIY_HOTPLUG	0x0c	/* PCI Hot-Plug */
+#define	PCIY_SUBVENDOR	0x0d	/* PCI-PCI bridge subvendor ID */
+#define	PCIY_AGP8X	0x0e	/* AGP 8x */
+#define	PCIY_SECDEV	0x0f	/* Secure Device */
+#define	PCIY_EXPRESS	0x10	/* PCI Express */
+#define	PCIY_MSIX	0x11	/* MSI-X */
+#define	PCIY_SATA	0x12	/* SATA */
+#define	PCIY_PCIAF	0x13	/* PCI Advanced Features */
+
+/* Extended Capability Register Fields */
+
+#define	PCIR_EXTCAP	0x100
+#define	PCIM_EXTCAP_ID		0x0000ffff
+#define	PCIM_EXTCAP_VER		0x000f0000
+#define	PCIM_EXTCAP_NEXTPTR	0xfff00000
+#define	PCI_EXTCAP_ID(ecap)	((ecap) & PCIM_EXTCAP_ID)
+#define	PCI_EXTCAP_VER(ecap)	(((ecap) & PCIM_EXTCAP_VER) >> 16)
+#define	PCI_EXTCAP_NEXTPTR(ecap) (((ecap) & PCIM_EXTCAP_NEXTPTR) >> 20)
+
+/* Extended Capability Identification Numbers */
+
+#define	PCIZ_AER	0x0001	/* Advanced Error Reporting */
+#define	PCIZ_VC		0x0002	/* Virtual Channel if MFVC Ext Cap not set */
+#define	PCIZ_SERNUM	0x0003	/* Device Serial Number */
+#define	PCIZ_PWRBDGT	0x0004	/* Power Budgeting */
+#define	PCIZ_RCLINK_DCL	0x0005	/* Root Complex Link Declaration */
+#define	PCIZ_RCLINK_CTL	0x0006	/* Root Complex Internal Link Control */
+#define	PCIZ_RCEC_ASSOC	0x0007	/* Root Complex Event Collector Association */
+#define	PCIZ_MFVC	0x0008	/* Multi-Function Virtual Channel */
+#define	PCIZ_VC2	0x0009	/* Virtual Channel if MFVC Ext Cap set */
+#define	PCIZ_RCRB	0x000a	/* RCRB Header */
+#define	PCIZ_VENDOR	0x000b	/* Vendor Unique */
+#define	PCIZ_CAC	0x000c	/* Configuration Access Correction -- obsolete */
+#define	PCIZ_ACS	0x000d	/* Access Control Services */
+#define	PCIZ_ARI	0x000e	/* Alternative Routing-ID Interpretation */
+#define	PCIZ_ATS	0x000f	/* Address Translation Services */
+#define	PCIZ_SRIOV	0x0010	/* Single Root IO Virtualization */
+#define	PCIZ_MRIOV	0x0011	/* Multiple Root IO Virtualization */
+#define	PCIZ_MULTICAST	0x0012	/* Multicast */
+#define	PCIZ_PAGE_REQ	0x0013	/* Page Request */
+#define	PCIZ_AMD	0x0014	/* Reserved for AMD */
+#define	PCIZ_RESIZE_BAR	0x0015	/* Resizable BAR */
+#define	PCIZ_DPA	0x0016	/* Dynamic Power Allocation */
+#define	PCIZ_TPH_REQ	0x0017	/* TPH Requester */
+#define	PCIZ_LTR	0x0018	/* Latency Tolerance Reporting */
+#define	PCIZ_SEC_PCIE	0x0019	/* Secondary PCI Express */
+#define	PCIZ_PMUX	0x001a	/* Protocol Multiplexing */
+#define	PCIZ_PASID	0x001b	/* Process Address Space ID */
+#define	PCIZ_LN_REQ	0x001c	/* LN Requester */
+#define	PCIZ_DPC	0x001d	/* Downstream Porto Containment */
+#define	PCIZ_L1PM	0x001e	/* L1 PM Substates */
+
+/* config registers for header type 0 devices */
+
+#define	PCIR_BARS	0x10
+#define	PCIR_BAR(x)		(PCIR_BARS + (x) * 4)
+#define	PCIR_MAX_BAR_0		5
+#define	PCI_RID2BAR(rid)	(((rid) - PCIR_BARS) / 4)
+#define	PCI_BAR_IO(x)		(((x) & PCIM_BAR_SPACE) == PCIM_BAR_IO_SPACE)
+#define	PCI_BAR_MEM(x)		(((x) & PCIM_BAR_SPACE) == PCIM_BAR_MEM_SPACE)
+#define	PCIM_BAR_SPACE		0x00000001
+#define	PCIM_BAR_MEM_SPACE	0
+#define	PCIM_BAR_IO_SPACE	1
+#define	PCIM_BAR_MEM_TYPE	0x00000006
+#define	PCIM_BAR_MEM_32		0
+#define	PCIM_BAR_MEM_1MB	2	/* Locate below 1MB in PCI <= 2.1 */
+#define	PCIM_BAR_MEM_64		4
+#define	PCIM_BAR_MEM_PREFETCH	0x00000008
+#define	PCIM_BAR_MEM_BASE	0xfffffffffffffff0ULL
+#define	PCIM_BAR_IO_RESERVED	0x00000002
+#define	PCIM_BAR_IO_BASE	0xfffffffc
+#define	PCIR_CIS	0x28
+#define	PCIM_CIS_ASI_MASK	0x00000007
+#define	PCIM_CIS_ASI_CONFIG	0
+#define	PCIM_CIS_ASI_BAR0	1
+#define	PCIM_CIS_ASI_BAR1	2
+#define	PCIM_CIS_ASI_BAR2	3
+#define	PCIM_CIS_ASI_BAR3	4
+#define	PCIM_CIS_ASI_BAR4	5
+#define	PCIM_CIS_ASI_BAR5	6
+#define	PCIM_CIS_ASI_ROM	7
+#define	PCIM_CIS_ADDR_MASK	0x0ffffff8
+#define	PCIM_CIS_ROM_MASK	0xf0000000
+#define	PCIM_CIS_CONFIG_MASK	0xff
+#define	PCIR_SUBVEND_0	0x2c
+#define	PCIR_SUBDEV_0	0x2e
+#define	PCIR_BIOS	0x30
+#define	PCIM_BIOS_ENABLE	0x01
+#define	PCIM_BIOS_ADDR_MASK	0xfffff800
+#define	PCIR_CAP_PTR	0x34
+#define	PCIR_INTLINE	0x3c
+#define	PCIR_INTPIN	0x3d
+#define	PCIR_MINGNT	0x3e
+#define	PCIR_MAXLAT	0x3f
+
+/* config registers for header type 1 (PCI-to-PCI bridge) devices */
+
+#define	PCIR_MAX_BAR_1	1
+#define	PCIR_SECSTAT_1	0x1e
+
+#define	PCIR_PRIBUS_1	0x18
+#define	PCIR_SECBUS_1	0x19
+#define	PCIR_SUBBUS_1	0x1a
+#define	PCIR_SECLAT_1	0x1b
+
+#define	PCIR_IOBASEL_1	0x1c
+#define	PCIR_IOLIMITL_1	0x1d
+#define	PCIR_IOBASEH_1	0x30
+#define	PCIR_IOLIMITH_1	0x32
+#define	PCIM_BRIO_16		0x0
+#define	PCIM_BRIO_32		0x1
+#define	PCIM_BRIO_MASK		0xf
+
+#define	PCIR_MEMBASE_1	0x20
+#define	PCIR_MEMLIMIT_1	0x22
+
+#define	PCIR_PMBASEL_1	0x24
+#define	PCIR_PMLIMITL_1	0x26
+#define	PCIR_PMBASEH_1	0x28
+#define	PCIR_PMLIMITH_1	0x2c
+#define	PCIM_BRPM_32		0x0
+#define	PCIM_BRPM_64		0x1
+#define	PCIM_BRPM_MASK		0xf
+
+#define	PCIR_BIOS_1	0x38
+#define	PCIR_BRIDGECTL_1 0x3e
+
+/* config registers for header type 2 (CardBus) devices */
+
+#define	PCIR_MAX_BAR_2	0
+#define	PCIR_CAP_PTR_2	0x14
+#define	PCIR_SECSTAT_2	0x16
+
+#define	PCIR_PRIBUS_2	0x18
+#define	PCIR_SECBUS_2	0x19
+#define	PCIR_SUBBUS_2	0x1a
+#define	PCIR_SECLAT_2	0x1b
+
+#define	PCIR_MEMBASE0_2	0x1c
+#define	PCIR_MEMLIMIT0_2 0x20
+#define	PCIR_MEMBASE1_2	0x24
+#define	PCIR_MEMLIMIT1_2 0x28
+#define	PCIR_IOBASE0_2	0x2c
+#define	PCIR_IOLIMIT0_2	0x30
+#define	PCIR_IOBASE1_2	0x34
+#define	PCIR_IOLIMIT1_2	0x38
+
+#define	PCIR_BRIDGECTL_2 0x3e
+
+#define	PCIR_SUBVEND_2	0x40
+#define	PCIR_SUBDEV_2	0x42
+
+#define	PCIR_PCCARDIF_2	0x44
+
+/* PCI device class, subclass and programming interface definitions */
+
+#define	PCIC_OLD	0x00
+#define	PCIS_OLD_NONVGA		0x00
+#define	PCIS_OLD_VGA		0x01
+
+#define	PCIC_STORAGE	0x01
+#define	PCIS_STORAGE_SCSI	0x00
+#define	PCIS_STORAGE_IDE	0x01
+#define	PCIP_STORAGE_IDE_MODEPRIM	0x01
+#define	PCIP_STORAGE_IDE_PROGINDPRIM	0x02
+#define	PCIP_STORAGE_IDE_MODESEC	0x04
+#define	PCIP_STORAGE_IDE_PROGINDSEC	0x08
+#define	PCIP_STORAGE_IDE_MASTERDEV	0x80
+#define	PCIS_STORAGE_FLOPPY	0x02
+#define	PCIS_STORAGE_IPI	0x03
+#define	PCIS_STORAGE_RAID	0x04
+#define	PCIS_STORAGE_ATA_ADMA	0x05
+#define	PCIS_STORAGE_SATA	0x06
+#define	PCIP_STORAGE_SATA_AHCI_1_0	0x01
+#define	PCIS_STORAGE_SAS	0x07
+#define	PCIS_STORAGE_NVM	0x08
+#define	PCIP_STORAGE_NVM_NVMHCI_1_0	0x01
+#define	PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0	0x02
+#define	PCIS_STORAGE_OTHER	0x80
+
+#define	PCIC_NETWORK	0x02
+#define	PCIS_NETWORK_ETHERNET	0x00
+#define	PCIS_NETWORK_TOKENRING	0x01
+#define	PCIS_NETWORK_FDDI	0x02
+#define	PCIS_NETWORK_ATM	0x03
+#define	PCIS_NETWORK_ISDN	0x04
+#define	PCIS_NETWORK_WORLDFIP	0x05
+#define	PCIS_NETWORK_PICMG	0x06
+#define	PCIS_NETWORK_OTHER	0x80
+
+#define	PCIC_DISPLAY	0x03
+#define	PCIS_DISPLAY_VGA	0x00
+#define	PCIS_DISPLAY_XGA	0x01
+#define	PCIS_DISPLAY_3D		0x02
+#define	PCIS_DISPLAY_OTHER	0x80
+
+#define	PCIC_MULTIMEDIA	0x04
+#define	PCIS_MULTIMEDIA_VIDEO	0x00
+#define	PCIS_MULTIMEDIA_AUDIO	0x01
+#define	PCIS_MULTIMEDIA_TELE	0x02
+#define	PCIS_MULTIMEDIA_HDA	0x03
+#define	PCIS_MULTIMEDIA_OTHER	0x80
+
+#define	PCIC_MEMORY	0x05
+#define	PCIS_MEMORY_RAM		0x00
+#define	PCIS_MEMORY_FLASH	0x01
+#define	PCIS_MEMORY_OTHER	0x80
+
+#define	PCIC_BRIDGE	0x06
+#define	PCIS_BRIDGE_HOST	0x00
+#define	PCIS_BRIDGE_ISA		0x01
+#define	PCIS_BRIDGE_EISA	0x02
+#define	PCIS_BRIDGE_MCA		0x03
+#define	PCIS_BRIDGE_PCI		0x04
+#define	PCIP_BRIDGE_PCI_SUBTRACTIVE	0x01
+#define	PCIS_BRIDGE_PCMCIA	0x05
+#define	PCIS_BRIDGE_NUBUS	0x06
+#define	PCIS_BRIDGE_CARDBUS	0x07
+#define	PCIS_BRIDGE_RACEWAY	0x08
+#define	PCIS_BRIDGE_PCI_TRANSPARENT 0x09
+#define	PCIS_BRIDGE_INFINIBAND	0x0a
+#define	PCIS_BRIDGE_OTHER	0x80
+
+#define	PCIC_SIMPLECOMM	0x07
+#define	PCIS_SIMPLECOMM_UART	0x00
+#define	PCIP_SIMPLECOMM_UART_8250	0x00
+#define	PCIP_SIMPLECOMM_UART_16450A	0x01
+#define	PCIP_SIMPLECOMM_UART_16550A	0x02
+#define	PCIP_SIMPLECOMM_UART_16650A	0x03
+#define	PCIP_SIMPLECOMM_UART_16750A	0x04
+#define	PCIP_SIMPLECOMM_UART_16850A	0x05
+#define	PCIP_SIMPLECOMM_UART_16950A	0x06
+#define	PCIS_SIMPLECOMM_PAR	0x01
+#define	PCIS_SIMPLECOMM_MULSER	0x02
+#define	PCIS_SIMPLECOMM_MODEM	0x03
+#define	PCIS_SIMPLECOMM_GPIB	0x04
+#define	PCIS_SIMPLECOMM_SMART_CARD 0x05
+#define	PCIS_SIMPLECOMM_OTHER	0x80
+
+#define	PCIC_BASEPERIPH	0x08
+#define	PCIS_BASEPERIPH_PIC	0x00
+#define	PCIP_BASEPERIPH_PIC_8259A	0x00
+#define	PCIP_BASEPERIPH_PIC_ISA		0x01
+#define	PCIP_BASEPERIPH_PIC_EISA	0x02
+#define	PCIP_BASEPERIPH_PIC_IO_APIC	0x10
+#define	PCIP_BASEPERIPH_PIC_IOX_APIC	0x20
+#define	PCIS_BASEPERIPH_DMA	0x01
+#define	PCIS_BASEPERIPH_TIMER	0x02
+#define	PCIS_BASEPERIPH_RTC	0x03
+#define	PCIS_BASEPERIPH_PCIHOT	0x04
+#define	PCIS_BASEPERIPH_SDHC	0x05
+#define	PCIS_BASEPERIPH_IOMMU	0x06
+#define	PCIS_BASEPERIPH_OTHER	0x80
+
+#define	PCIC_INPUTDEV	0x09
+#define	PCIS_INPUTDEV_KEYBOARD	0x00
+#define	PCIS_INPUTDEV_DIGITIZER	0x01
+#define	PCIS_INPUTDEV_MOUSE	0x02
+#define	PCIS_INPUTDEV_SCANNER	0x03
+#define	PCIS_INPUTDEV_GAMEPORT	0x04
+#define	PCIS_INPUTDEV_OTHER	0x80
+
+#define	PCIC_DOCKING	0x0a
+#define	PCIS_DOCKING_GENERIC	0x00
+#define	PCIS_DOCKING_OTHER	0x80
+
+#define	PCIC_PROCESSOR	0x0b
+#define	PCIS_PROCESSOR_386	0x00
+#define	PCIS_PROCESSOR_486	0x01
+#define	PCIS_PROCESSOR_PENTIUM	0x02
+#define	PCIS_PROCESSOR_ALPHA	0x10
+#define	PCIS_PROCESSOR_POWERPC	0x20
+#define	PCIS_PROCESSOR_MIPS	0x30
+#define	PCIS_PROCESSOR_COPROC	0x40
+
+#define	PCIC_SERIALBUS	0x0c
+#define	PCIS_SERIALBUS_FW	0x00
+#define	PCIS_SERIALBUS_ACCESS	0x01
+#define	PCIS_SERIALBUS_SSA	0x02
+#define	PCIS_SERIALBUS_USB	0x03
+#define	PCIP_SERIALBUS_USB_UHCI		0x00
+#define	PCIP_SERIALBUS_USB_OHCI		0x10
+#define	PCIP_SERIALBUS_USB_EHCI		0x20
+#define	PCIP_SERIALBUS_USB_XHCI		0x30
+#define	PCIP_SERIALBUS_USB_DEVICE	0xfe
+#define	PCIS_SERIALBUS_FC	0x04
+#define	PCIS_SERIALBUS_SMBUS	0x05
+#define	PCIS_SERIALBUS_INFINIBAND 0x06
+#define	PCIS_SERIALBUS_IPMI	0x07
+#define	PCIP_SERIALBUS_IPMI_SMIC	0x00
+#define	PCIP_SERIALBUS_IPMI_KCS		0x01
+#define	PCIP_SERIALBUS_IPMI_BT		0x02
+#define	PCIS_SERIALBUS_SERCOS	0x08
+#define	PCIS_SERIALBUS_CANBUS	0x09
+
+#define	PCIC_WIRELESS	0x0d
+#define	PCIS_WIRELESS_IRDA	0x00
+#define	PCIS_WIRELESS_IR	0x01
+#define	PCIS_WIRELESS_RF	0x10
+#define	PCIS_WIRELESS_BLUETOOTH	0x11
+#define	PCIS_WIRELESS_BROADBAND	0x12
+#define	PCIS_WIRELESS_80211A	0x20
+#define	PCIS_WIRELESS_80211B	0x21
+#define	PCIS_WIRELESS_OTHER	0x80
+
+#define	PCIC_INTELLIIO	0x0e
+#define	PCIS_INTELLIIO_I2O	0x00
+
+#define	PCIC_SATCOM	0x0f
+#define	PCIS_SATCOM_TV		0x01
+#define	PCIS_SATCOM_AUDIO	0x02
+#define	PCIS_SATCOM_VOICE	0x03
+#define	PCIS_SATCOM_DATA	0x04
+
+#define	PCIC_CRYPTO	0x10
+#define	PCIS_CRYPTO_NETCOMP	0x00
+#define	PCIS_CRYPTO_ENTERTAIN	0x10
+#define	PCIS_CRYPTO_OTHER	0x80
+
+#define	PCIC_DASP	0x11
+#define	PCIS_DASP_DPIO		0x00
+#define	PCIS_DASP_PERFCNTRS	0x01
+#define	PCIS_DASP_COMM_SYNC	0x10
+#define	PCIS_DASP_MGMT_CARD	0x20
+#define	PCIS_DASP_OTHER		0x80
+
+#define	PCIC_OTHER	0xff
+
+/* Bridge Control Values. */
+#define	PCIB_BCR_PERR_ENABLE		0x0001
+#define	PCIB_BCR_SERR_ENABLE		0x0002
+#define	PCIB_BCR_ISA_ENABLE		0x0004
+#define	PCIB_BCR_VGA_ENABLE		0x0008
+#define	PCIB_BCR_MASTER_ABORT_MODE	0x0020
+#define	PCIB_BCR_SECBUS_RESET		0x0040
+#define	PCIB_BCR_SECBUS_BACKTOBACK	0x0080
+#define	PCIB_BCR_PRI_DISCARD_TIMEOUT	0x0100
+#define	PCIB_BCR_SEC_DISCARD_TIMEOUT	0x0200
+#define	PCIB_BCR_DISCARD_TIMER_STATUS	0x0400
+#define	PCIB_BCR_DISCARD_TIMER_SERREN	0x0800
+
+/* PCI power manangement */
+#define	PCIR_POWER_CAP		0x2
+#define	PCIM_PCAP_SPEC			0x0007
+#define	PCIM_PCAP_PMEREQCLK		0x0008
+#define	PCIM_PCAP_DEVSPECINIT		0x0020
+#define	PCIM_PCAP_AUXPWR_0		0x0000
+#define	PCIM_PCAP_AUXPWR_55		0x0040
+#define	PCIM_PCAP_AUXPWR_100		0x0080
+#define	PCIM_PCAP_AUXPWR_160		0x00c0
+#define	PCIM_PCAP_AUXPWR_220		0x0100
+#define	PCIM_PCAP_AUXPWR_270		0x0140
+#define	PCIM_PCAP_AUXPWR_320		0x0180
+#define	PCIM_PCAP_AUXPWR_375		0x01c0
+#define	PCIM_PCAP_AUXPWRMASK		0x01c0
+#define	PCIM_PCAP_D1SUPP		0x0200
+#define	PCIM_PCAP_D2SUPP		0x0400
+#define	PCIM_PCAP_D0PME			0x0800
+#define	PCIM_PCAP_D1PME			0x1000
+#define	PCIM_PCAP_D2PME			0x2000
+#define	PCIM_PCAP_D3PME_HOT		0x4000
+#define	PCIM_PCAP_D3PME_COLD		0x8000
+
+#define	PCIR_POWER_STATUS	0x4
+#define	PCIM_PSTAT_D0			0x0000
+#define	PCIM_PSTAT_D1			0x0001
+#define	PCIM_PSTAT_D2			0x0002
+#define	PCIM_PSTAT_D3			0x0003
+#define	PCIM_PSTAT_DMASK		0x0003
+#define	PCIM_PSTAT_NOSOFTRESET		0x0008
+#define	PCIM_PSTAT_PMEENABLE		0x0100
+#define	PCIM_PSTAT_D0POWER		0x0000
+#define	PCIM_PSTAT_D1POWER		0x0200
+#define	PCIM_PSTAT_D2POWER		0x0400
+#define	PCIM_PSTAT_D3POWER		0x0600
+#define	PCIM_PSTAT_D0HEAT		0x0800
+#define	PCIM_PSTAT_D1HEAT		0x0a00
+#define	PCIM_PSTAT_D2HEAT		0x0c00
+#define	PCIM_PSTAT_D3HEAT		0x0e00
+#define	PCIM_PSTAT_DATASELMASK		0x1e00
+#define	PCIM_PSTAT_DATAUNKN		0x0000
+#define	PCIM_PSTAT_DATADIV10		0x2000
+#define	PCIM_PSTAT_DATADIV100		0x4000
+#define	PCIM_PSTAT_DATADIV1000		0x6000
+#define	PCIM_PSTAT_DATADIVMASK		0x6000
+#define	PCIM_PSTAT_PME			0x8000
+
+#define	PCIR_POWER_BSE		0x6
+#define	PCIM_PMCSR_BSE_D3B3		0x00
+#define	PCIM_PMCSR_BSE_D3B2		0x40
+#define	PCIM_PMCSR_BSE_BPCCE		0x80
+
+#define	PCIR_POWER_DATA		0x7
+
+/* VPD capability registers */
+#define	PCIR_VPD_ADDR		0x2
+#define	PCIR_VPD_DATA		0x4
+
+/* PCI Message Signalled Interrupts (MSI) */
+#define	PCIR_MSI_CTRL		0x2
+#define	PCIM_MSICTRL_VECTOR		0x0100
+#define	PCIM_MSICTRL_64BIT		0x0080
+#define	PCIM_MSICTRL_MME_MASK		0x0070
+#define	PCIM_MSICTRL_MME_1		0x0000
+#define	PCIM_MSICTRL_MME_2		0x0010
+#define	PCIM_MSICTRL_MME_4		0x0020
+#define	PCIM_MSICTRL_MME_8		0x0030
+#define	PCIM_MSICTRL_MME_16		0x0040
+#define	PCIM_MSICTRL_MME_32		0x0050
+#define	PCIM_MSICTRL_MMC_MASK		0x000E
+#define	PCIM_MSICTRL_MMC_1		0x0000
+#define	PCIM_MSICTRL_MMC_2		0x0002
+#define	PCIM_MSICTRL_MMC_4		0x0004
+#define	PCIM_MSICTRL_MMC_8		0x0006
+#define	PCIM_MSICTRL_MMC_16		0x0008
+#define	PCIM_MSICTRL_MMC_32		0x000A
+#define	PCIM_MSICTRL_MSI_ENABLE		0x0001
+#define	PCIR_MSI_ADDR		0x4
+#define	PCIR_MSI_ADDR_HIGH	0x8
+#define	PCIR_MSI_DATA		0x8
+#define	PCIR_MSI_DATA_64BIT	0xc
+#define	PCIR_MSI_MASK		0x10
+#define	PCIR_MSI_PENDING	0x14
+
+/* PCI-X definitions */
+
+/* For header type 0 devices */
+#define	PCIXR_COMMAND		0x2
+#define	PCIXM_COMMAND_DPERR_E		0x0001	/* Data Parity Error Recovery */
+#define	PCIXM_COMMAND_ERO		0x0002	/* Enable Relaxed Ordering */
+#define	PCIXM_COMMAND_MAX_READ		0x000c	/* Maximum Burst Read Count */
+#define	PCIXM_COMMAND_MAX_READ_512	0x0000
+#define	PCIXM_COMMAND_MAX_READ_1024	0x0004
+#define	PCIXM_COMMAND_MAX_READ_2048	0x0008
+#define	PCIXM_COMMAND_MAX_READ_4096	0x000c
+#define	PCIXM_COMMAND_MAX_SPLITS 	0x0070	/* Maximum Split Transactions */
+#define	PCIXM_COMMAND_MAX_SPLITS_1	0x0000
+#define	PCIXM_COMMAND_MAX_SPLITS_2	0x0010
+#define	PCIXM_COMMAND_MAX_SPLITS_3	0x0020
+#define	PCIXM_COMMAND_MAX_SPLITS_4	0x0030
+#define	PCIXM_COMMAND_MAX_SPLITS_8	0x0040
+#define	PCIXM_COMMAND_MAX_SPLITS_12	0x0050
+#define	PCIXM_COMMAND_MAX_SPLITS_16	0x0060
+#define	PCIXM_COMMAND_MAX_SPLITS_32	0x0070
+#define	PCIXM_COMMAND_VERSION		0x3000
+#define	PCIXR_STATUS		0x4
+#define	PCIXM_STATUS_DEVFN		0x000000FF
+#define	PCIXM_STATUS_BUS		0x0000FF00
+#define	PCIXM_STATUS_64BIT		0x00010000
+#define	PCIXM_STATUS_133CAP		0x00020000
+#define	PCIXM_STATUS_SC_DISCARDED	0x00040000
+#define	PCIXM_STATUS_UNEXP_SC		0x00080000
+#define	PCIXM_STATUS_COMPLEX_DEV	0x00100000
+#define	PCIXM_STATUS_MAX_READ		0x00600000
+#define	PCIXM_STATUS_MAX_READ_512	0x00000000
+#define	PCIXM_STATUS_MAX_READ_1024	0x00200000
+#define	PCIXM_STATUS_MAX_READ_2048	0x00400000
+#define	PCIXM_STATUS_MAX_READ_4096	0x00600000
+#define	PCIXM_STATUS_MAX_SPLITS		0x03800000
+#define	PCIXM_STATUS_MAX_SPLITS_1	0x00000000
+#define	PCIXM_STATUS_MAX_SPLITS_2	0x00800000
+#define	PCIXM_STATUS_MAX_SPLITS_3	0x01000000
+#define	PCIXM_STATUS_MAX_SPLITS_4	0x01800000
+#define	PCIXM_STATUS_MAX_SPLITS_8	0x02000000
+#define	PCIXM_STATUS_MAX_SPLITS_12	0x02800000
+#define	PCIXM_STATUS_MAX_SPLITS_16	0x03000000
+#define	PCIXM_STATUS_MAX_SPLITS_32	0x03800000
+#define	PCIXM_STATUS_MAX_CUM_READ	0x1C000000
+#define	PCIXM_STATUS_RCVD_SC_ERR	0x20000000
+#define	PCIXM_STATUS_266CAP		0x40000000
+#define	PCIXM_STATUS_533CAP		0x80000000
+
+/* For header type 1 devices (PCI-X bridges) */
+#define	PCIXR_SEC_STATUS	0x2
+#define	PCIXM_SEC_STATUS_64BIT		0x0001
+#define	PCIXM_SEC_STATUS_133CAP		0x0002
+#define	PCIXM_SEC_STATUS_SC_DISC	0x0004
+#define	PCIXM_SEC_STATUS_UNEXP_SC	0x0008
+#define	PCIXM_SEC_STATUS_SC_OVERRUN	0x0010
+#define	PCIXM_SEC_STATUS_SR_DELAYED	0x0020
+#define	PCIXM_SEC_STATUS_BUS_MODE	0x03c0
+#define	PCIXM_SEC_STATUS_VERSION	0x3000
+#define	PCIXM_SEC_STATUS_266CAP		0x4000
+#define	PCIXM_SEC_STATUS_533CAP		0x8000
+#define	PCIXR_BRIDGE_STATUS	0x4
+#define	PCIXM_BRIDGE_STATUS_DEVFN	0x000000FF
+#define	PCIXM_BRIDGE_STATUS_BUS		0x0000FF00
+#define	PCIXM_BRIDGE_STATUS_64BIT	0x00010000
+#define	PCIXM_BRIDGE_STATUS_133CAP	0x00020000
+#define	PCIXM_BRIDGE_STATUS_SC_DISCARDED 0x00040000
+#define	PCIXM_BRIDGE_STATUS_UNEXP_SC	0x00080000
+#define	PCIXM_BRIDGE_STATUS_SC_OVERRUN	0x00100000
+#define	PCIXM_BRIDGE_STATUS_SR_DELAYED	0x00200000
+#define	PCIXM_BRIDGE_STATUS_DEVID_MSGCAP 0x20000000
+#define	PCIXM_BRIDGE_STATUS_266CAP	0x40000000
+#define	PCIXM_BRIDGE_STATUS_533CAP	0x80000000
+
+/* HT (HyperTransport) Capability definitions */
+#define	PCIR_HT_COMMAND		0x2
+#define	PCIM_HTCMD_CAP_MASK		0xf800	/* Capability type. */
+#define	PCIM_HTCAP_SLAVE		0x0000	/* 000xx */
+#define	PCIM_HTCAP_HOST			0x2000	/* 001xx */
+#define	PCIM_HTCAP_SWITCH		0x4000	/* 01000 */
+#define	PCIM_HTCAP_INTERRUPT		0x8000	/* 10000 */
+#define	PCIM_HTCAP_REVISION_ID		0x8800	/* 10001 */
+#define	PCIM_HTCAP_UNITID_CLUMPING	0x9000	/* 10010 */
+#define	PCIM_HTCAP_EXT_CONFIG_SPACE	0x9800	/* 10011 */
+#define	PCIM_HTCAP_ADDRESS_MAPPING	0xa000	/* 10100 */
+#define	PCIM_HTCAP_MSI_MAPPING		0xa800	/* 10101 */
+#define	PCIM_HTCAP_DIRECT_ROUTE		0xb000	/* 10110 */
+#define	PCIM_HTCAP_VCSET		0xb800	/* 10111 */
+#define	PCIM_HTCAP_RETRY_MODE		0xc000	/* 11000 */
+#define	PCIM_HTCAP_X86_ENCODING		0xc800	/* 11001 */
+#define	PCIM_HTCAP_GEN3			0xd000	/* 11010 */
+#define	PCIM_HTCAP_FLE			0xd800	/* 11011 */
+#define	PCIM_HTCAP_PM			0xe000	/* 11100 */
+#define	PCIM_HTCAP_HIGH_NODE_COUNT	0xe800	/* 11101 */
+
+/* HT MSI Mapping Capability definitions. */
+#define	PCIM_HTCMD_MSI_ENABLE		0x0001
+#define	PCIM_HTCMD_MSI_FIXED		0x0002
+#define	PCIR_HTMSI_ADDRESS_LO	0x4
+#define	PCIR_HTMSI_ADDRESS_HI	0x8
+
+/* PCI Vendor capability definitions */
+#define	PCIR_VENDOR_LENGTH	0x2
+#define	PCIR_VENDOR_DATA	0x3
+
+/* PCI EHCI Debug Port definitions */
+#define	PCIR_DEBUG_PORT		0x2
+#define	PCIM_DEBUG_PORT_OFFSET		0x1FFF
+#define	PCIM_DEBUG_PORT_BAR		0xe000
+
+/* PCI-PCI Bridge Subvendor definitions */
+#define	PCIR_SUBVENDCAP_ID	0x4
+
+/* PCI Express definitions */
+#define	PCIER_FLAGS		0x2
+#define	PCIEM_FLAGS_VERSION		0x000F
+#define	PCIEM_FLAGS_TYPE		0x00F0
+#define	PCIEM_TYPE_ENDPOINT		0x0000
+#define	PCIEM_TYPE_LEGACY_ENDPOINT	0x0010
+#define	PCIEM_TYPE_ROOT_PORT		0x0040
+#define	PCIEM_TYPE_UPSTREAM_PORT	0x0050
+#define	PCIEM_TYPE_DOWNSTREAM_PORT	0x0060
+#define	PCIEM_TYPE_PCI_BRIDGE		0x0070
+#define	PCIEM_TYPE_PCIE_BRIDGE		0x0080
+#define	PCIEM_TYPE_ROOT_INT_EP		0x0090
+#define	PCIEM_TYPE_ROOT_EC		0x00a0
+#define	PCIEM_FLAGS_SLOT		0x0100
+#define	PCIEM_FLAGS_IRQ			0x3e00
+#define	PCIER_DEVICE_CAP	0x4
+#define	PCIEM_CAP_MAX_PAYLOAD		0x00000007
+#define	PCIEM_CAP_PHANTHOM_FUNCS	0x00000018
+#define	PCIEM_CAP_EXT_TAG_FIELD		0x00000020
+#define	PCIEM_CAP_L0S_LATENCY		0x000001c0
+#define	PCIEM_CAP_L1_LATENCY		0x00000e00
+#define	PCIEM_CAP_ROLE_ERR_RPT		0x00008000
+#define	PCIEM_CAP_SLOT_PWR_LIM_VAL	0x03fc0000
+#define	PCIEM_CAP_SLOT_PWR_LIM_SCALE	0x0c000000
+#define	PCIEM_CAP_FLR			0x10000000
+#define	PCIER_DEVICE_CTL	0x8
+#define	PCIEM_CTL_COR_ENABLE		0x0001
+#define	PCIEM_CTL_NFER_ENABLE		0x0002
+#define	PCIEM_CTL_FER_ENABLE		0x0004
+#define	PCIEM_CTL_URR_ENABLE		0x0008
+#define	PCIEM_CTL_RELAXED_ORD_ENABLE	0x0010
+#define	PCIEM_CTL_MAX_PAYLOAD		0x00e0
+#define	PCIEM_CTL_EXT_TAG_FIELD		0x0100
+#define	PCIEM_CTL_PHANTHOM_FUNCS	0x0200
+#define	PCIEM_CTL_AUX_POWER_PM		0x0400
+#define	PCIEM_CTL_NOSNOOP_ENABLE	0x0800
+#define	PCIEM_CTL_MAX_READ_REQUEST	0x7000
+#define	PCIEM_CTL_BRDG_CFG_RETRY	0x8000	/* PCI-E - PCI/PCI-X bridges */
+#define	PCIEM_CTL_INITIATE_FLR		0x8000	/* FLR capable endpoints */
+#define	PCIER_DEVICE_STA	0xa
+#define	PCIEM_STA_CORRECTABLE_ERROR	0x0001
+#define	PCIEM_STA_NON_FATAL_ERROR	0x0002
+#define	PCIEM_STA_FATAL_ERROR		0x0004
+#define	PCIEM_STA_UNSUPPORTED_REQ	0x0008
+#define	PCIEM_STA_AUX_POWER		0x0010
+#define	PCIEM_STA_TRANSACTION_PND	0x0020
+#define	PCIER_LINK_CAP		0xc
+#define	PCIEM_LINK_CAP_MAX_SPEED	0x0000000f
+#define	PCIEM_LINK_CAP_MAX_WIDTH	0x000003f0
+#define	PCIEM_LINK_CAP_ASPM		0x00000c00
+#define	PCIEM_LINK_CAP_L0S_EXIT		0x00007000
+#define	PCIEM_LINK_CAP_L1_EXIT		0x00038000
+#define	PCIEM_LINK_CAP_CLOCK_PM		0x00040000
+#define	PCIEM_LINK_CAP_SURPRISE_DOWN	0x00080000
+#define	PCIEM_LINK_CAP_DL_ACTIVE	0x00100000
+#define	PCIEM_LINK_CAP_LINK_BW_NOTIFY	0x00200000
+#define	PCIEM_LINK_CAP_ASPM_COMPLIANCE	0x00400000
+#define	PCIEM_LINK_CAP_PORT		0xff000000
+#define	PCIER_LINK_CTL		0x10
+#define	PCIEM_LINK_CTL_ASPMC_DIS	0x0000
+#define	PCIEM_LINK_CTL_ASPMC_L0S	0x0001
+#define	PCIEM_LINK_CTL_ASPMC_L1		0x0002
+#define	PCIEM_LINK_CTL_ASPMC		0x0003
+#define	PCIEM_LINK_CTL_RCB		0x0008
+#define	PCIEM_LINK_CTL_LINK_DIS		0x0010
+#define	PCIEM_LINK_CTL_RETRAIN_LINK	0x0020
+#define	PCIEM_LINK_CTL_COMMON_CLOCK	0x0040
+#define	PCIEM_LINK_CTL_EXTENDED_SYNC	0x0080
+#define	PCIEM_LINK_CTL_ECPM		0x0100
+#define	PCIEM_LINK_CTL_HAWD		0x0200
+#define	PCIEM_LINK_CTL_LBMIE		0x0400
+#define	PCIEM_LINK_CTL_LABIE		0x0800
+#define	PCIER_LINK_STA		0x12
+#define	PCIEM_LINK_STA_SPEED		0x000f
+#define	PCIEM_LINK_STA_WIDTH		0x03f0
+#define	PCIEM_LINK_STA_TRAINING_ERROR	0x0400
+#define	PCIEM_LINK_STA_TRAINING		0x0800
+#define	PCIEM_LINK_STA_SLOT_CLOCK	0x1000
+#define	PCIEM_LINK_STA_DL_ACTIVE	0x2000
+#define	PCIEM_LINK_STA_LINK_BW_MGMT	0x4000
+#define	PCIEM_LINK_STA_LINK_AUTO_BW	0x8000
+#define	PCIER_SLOT_CAP		0x14
+#define	PCIEM_SLOT_CAP_APB		0x00000001
+#define	PCIEM_SLOT_CAP_PCP		0x00000002
+#define	PCIEM_SLOT_CAP_MRLSP		0x00000004
+#define	PCIEM_SLOT_CAP_AIP		0x00000008
+#define	PCIEM_SLOT_CAP_PIP		0x00000010
+#define	PCIEM_SLOT_CAP_HPS		0x00000020
+#define	PCIEM_SLOT_CAP_HPC		0x00000040
+#define	PCIEM_SLOT_CAP_SPLV		0x00007f80
+#define	PCIEM_SLOT_CAP_SPLS		0x00018000
+#define	PCIEM_SLOT_CAP_EIP		0x00020000
+#define	PCIEM_SLOT_CAP_NCCS		0x00040000
+#define	PCIEM_SLOT_CAP_PSN		0xfff80000
+#define	PCIER_SLOT_CTL		0x18
+#define	PCIEM_SLOT_CTL_ABPE		0x0001
+#define	PCIEM_SLOT_CTL_PFDE		0x0002
+#define	PCIEM_SLOT_CTL_MRLSCE		0x0004
+#define	PCIEM_SLOT_CTL_PDCE		0x0008
+#define	PCIEM_SLOT_CTL_CCIE		0x0010
+#define	PCIEM_SLOT_CTL_HPIE		0x0020
+#define	PCIEM_SLOT_CTL_AIC		0x00c0
+#define	PCIEM_SLOT_CTL_PIC		0x0300
+#define	PCIEM_SLOT_CTL_PCC		0x0400
+#define	PCIEM_SLOT_CTL_EIC		0x0800
+#define	PCIEM_SLOT_CTL_DLLSCE		0x1000
+#define	PCIER_SLOT_STA		0x1a
+#define	PCIEM_SLOT_STA_ABP		0x0001
+#define	PCIEM_SLOT_STA_PFD		0x0002
+#define	PCIEM_SLOT_STA_MRLSC		0x0004
+#define	PCIEM_SLOT_STA_PDC		0x0008
+#define	PCIEM_SLOT_STA_CC		0x0010
+#define	PCIEM_SLOT_STA_MRLSS		0x0020
+#define	PCIEM_SLOT_STA_PDS		0x0040
+#define	PCIEM_SLOT_STA_EIS		0x0080
+#define	PCIEM_SLOT_STA_DLLSC		0x0100
+#define	PCIER_ROOT_CTL		0x1c
+#define	PCIEM_ROOT_CTL_SERR_CORR	0x0001
+#define	PCIEM_ROOT_CTL_SERR_NONFATAL	0x0002
+#define	PCIEM_ROOT_CTL_SERR_FATAL	0x0004
+#define	PCIEM_ROOT_CTL_PME		0x0008
+#define	PCIEM_ROOT_CTL_CRS_VIS		0x0010
+#define	PCIER_ROOT_CAP		0x1e
+#define	PCIEM_ROOT_CAP_CRS_VIS		0x0001
+#define	PCIER_ROOT_STA		0x20
+#define	PCIEM_ROOT_STA_PME_REQID_MASK	0x0000ffff
+#define	PCIEM_ROOT_STA_PME_STATUS	0x00010000
+#define	PCIEM_ROOT_STA_PME_PEND		0x00020000
+#define	PCIER_DEVICE_CAP2	0x24
+#define	PCIEM_CAP2_ARI		0x20
+#define	PCIER_DEVICE_CTL2	0x28
+#define	PCIEM_CTL2_COMP_TIMEOUT_VAL	0x000f
+#define	PCIEM_CTL2_COMP_TIMEOUT_DIS	0x0010
+#define	PCIEM_CTL2_ARI			0x0020
+#define	PCIEM_CTL2_ATOMIC_REQ_ENABLE	0x0040
+#define	PCIEM_CTL2_ATOMIC_EGR_BLOCK	0x0080
+#define	PCIEM_CTL2_ID_ORDERED_REQ_EN	0x0100
+#define	PCIEM_CTL2_ID_ORDERED_CMP_EN	0x0200
+#define	PCIEM_CTL2_LTR_ENABLE		0x0400
+#define	PCIEM_CTL2_OBFF			0x6000
+#define	PCIEM_OBFF_DISABLE		0x0000
+#define	PCIEM_OBFF_MSGA_ENABLE		0x2000
+#define	PCIEM_OBFF_MSGB_ENABLE		0x4000
+#define	PCIEM_OBFF_WAKE_ENABLE		0x6000
+#define	PCIEM_CTL2_END2END_TLP		0x8000
+#define	PCIER_DEVICE_STA2	0x2a
+#define	PCIER_LINK_CAP2		0x2c
+#define	PCIER_LINK_CTL2		0x30
+#define	PCIER_LINK_STA2		0x32
+#define	PCIER_SLOT_CAP2		0x34
+#define	PCIER_SLOT_CTL2		0x38
+#define	PCIER_SLOT_STA2		0x3a
+
+/* MSI-X definitions */
+#define	PCIR_MSIX_CTRL		0x2
+#define	PCIM_MSIXCTRL_MSIX_ENABLE	0x8000
+#define	PCIM_MSIXCTRL_FUNCTION_MASK	0x4000
+#define	PCIM_MSIXCTRL_TABLE_SIZE	0x07FF
+#define	PCIR_MSIX_TABLE		0x4
+#define	PCIR_MSIX_PBA		0x8
+#define	PCIM_MSIX_BIR_MASK		0x7
+#define	PCIM_MSIX_BIR_BAR_10		0
+#define	PCIM_MSIX_BIR_BAR_14		1
+#define	PCIM_MSIX_BIR_BAR_18		2
+#define	PCIM_MSIX_BIR_BAR_1C		3
+#define	PCIM_MSIX_BIR_BAR_20		4
+#define	PCIM_MSIX_BIR_BAR_24		5
+#define	PCIM_MSIX_VCTRL_MASK		0x1
+
+/* PCI Advanced Features definitions */
+#define	PCIR_PCIAF_CAP		0x3
+#define	PCIM_PCIAFCAP_TP	0x01
+#define	PCIM_PCIAFCAP_FLR	0x02
+#define	PCIR_PCIAF_CTRL		0x4
+#define	PCIR_PCIAFCTRL_FLR	0x01
+#define	PCIR_PCIAF_STATUS	0x5
+#define	PCIR_PCIAFSTATUS_TP	0x01
+
+/* Advanced Error Reporting */
+#define	PCIR_AER_UC_STATUS	0x04
+#define	PCIM_AER_UC_TRAINING_ERROR	0x00000001
+#define	PCIM_AER_UC_DL_PROTOCOL_ERROR	0x00000010
+#define	PCIM_AER_UC_SURPRISE_LINK_DOWN	0x00000020
+#define	PCIM_AER_UC_POISONED_TLP	0x00001000
+#define	PCIM_AER_UC_FC_PROTOCOL_ERROR	0x00002000
+#define	PCIM_AER_UC_COMPLETION_TIMEOUT	0x00004000
+#define	PCIM_AER_UC_COMPLETER_ABORT	0x00008000
+#define	PCIM_AER_UC_UNEXPECTED_COMPLETION 0x00010000
+#define	PCIM_AER_UC_RECEIVER_OVERFLOW	0x00020000
+#define	PCIM_AER_UC_MALFORMED_TLP	0x00040000
+#define	PCIM_AER_UC_ECRC_ERROR		0x00080000
+#define	PCIM_AER_UC_UNSUPPORTED_REQUEST	0x00100000
+#define	PCIM_AER_UC_ACS_VIOLATION	0x00200000
+#define	PCIM_AER_UC_INTERNAL_ERROR	0x00400000
+#define	PCIM_AER_UC_MC_BLOCKED_TLP	0x00800000
+#define	PCIM_AER_UC_ATOMIC_EGRESS_BLK	0x01000000
+#define	PCIM_AER_UC_TLP_PREFIX_BLOCKED	0x02000000
+#define	PCIR_AER_UC_MASK	0x08	/* Shares bits with UC_STATUS */
+#define	PCIR_AER_UC_SEVERITY	0x0c	/* Shares bits with UC_STATUS */
+#define	PCIR_AER_COR_STATUS	0x10
+#define	PCIM_AER_COR_RECEIVER_ERROR	0x00000001
+#define	PCIM_AER_COR_BAD_TLP		0x00000040
+#define	PCIM_AER_COR_BAD_DLLP		0x00000080
+#define	PCIM_AER_COR_REPLAY_ROLLOVER	0x00000100
+#define	PCIM_AER_COR_REPLAY_TIMEOUT	0x00001000
+#define	PCIM_AER_COR_ADVISORY_NF_ERROR	0x00002000
+#define	PCIM_AER_COR_INTERNAL_ERROR	0x00004000
+#define	PCIM_AER_COR_HEADER_LOG_OVFLOW	0x00008000
+#define	PCIR_AER_COR_MASK	0x14	/* Shares bits with COR_STATUS */
+#define	PCIR_AER_CAP_CONTROL	0x18
+#define	PCIM_AER_FIRST_ERROR_PTR	0x0000001f
+#define	PCIM_AER_ECRC_GEN_CAPABLE	0x00000020
+#define	PCIM_AER_ECRC_GEN_ENABLE	0x00000040
+#define	PCIM_AER_ECRC_CHECK_CAPABLE	0x00000080
+#define	PCIM_AER_ECRC_CHECK_ENABLE	0x00000100
+#define	PCIM_AER_MULT_HDR_CAPABLE	0x00000200
+#define	PCIM_AER_MULT_HDR_ENABLE	0x00000400
+#define	PCIM_AER_TLP_PREFIX_LOG_PRESENT	0x00000800
+#define	PCIR_AER_HEADER_LOG	0x1c
+#define	PCIR_AER_ROOTERR_CMD	0x2c	/* Only for root complex ports */
+#define	PCIM_AER_ROOTERR_COR_ENABLE	0x00000001
+#define	PCIM_AER_ROOTERR_NF_ENABLE	0x00000002
+#define	PCIM_AER_ROOTERR_F_ENABLE	0x00000004
+#define	PCIR_AER_ROOTERR_STATUS	0x30	/* Only for root complex ports */
+#define	PCIM_AER_ROOTERR_COR_ERR	0x00000001
+#define	PCIM_AER_ROOTERR_MULTI_COR_ERR	0x00000002
+#define	PCIM_AER_ROOTERR_UC_ERR		0x00000004
+#define	PCIM_AER_ROOTERR_MULTI_UC_ERR	0x00000008
+#define	PCIM_AER_ROOTERR_FIRST_UC_FATAL	0x00000010
+#define	PCIM_AER_ROOTERR_NF_ERR		0x00000020
+#define	PCIM_AER_ROOTERR_F_ERR		0x00000040
+#define	PCIM_AER_ROOTERR_INT_MESSAGE	0xf8000000
+#define	PCIR_AER_COR_SOURCE_ID	0x34	/* Only for root complex ports */
+#define	PCIR_AER_ERR_SOURCE_ID	0x36	/* Only for root complex ports */
+#define	PCIR_AER_TLP_PREFIX_LOG	0x38	/* Only for TLP prefix functions */
+
+/* Virtual Channel definitions */
+#define	PCIR_VC_CAP1		0x04
+#define	PCIM_VC_CAP1_EXT_COUNT		0x00000007
+#define	PCIM_VC_CAP1_LOWPRI_EXT_COUNT	0x00000070
+#define	PCIR_VC_CAP2		0x08
+#define	PCIR_VC_CONTROL		0x0C
+#define	PCIR_VC_STATUS		0x0E
+#define	PCIR_VC_RESOURCE_CAP(n)	(0x10 + (n) * 0x0C)
+#define	PCIR_VC_RESOURCE_CTL(n)	(0x14 + (n) * 0x0C)
+#define	PCIR_VC_RESOURCE_STA(n)	(0x18 + (n) * 0x0C)
+
+/* Serial Number definitions */
+#define	PCIR_SERIAL_LOW		0x04
+#define	PCIR_SERIAL_HIGH	0x08
+
diff --git a/usr/contrib/freebsd/isa/isareg.h b/usr/contrib/freebsd/isa/isareg.h
new file mode 100644
index 0000000000..e83e34674f
--- /dev/null
+++ b/usr/contrib/freebsd/isa/isareg.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)isa.h	5.7 (Berkeley) 5/9/91
+ * $FreeBSD: head/sys/isa/isareg.h 263379 2014-03-19 21:03:04Z imp $
+ */
+
+#ifdef PC98
+#error isareg.h is included from PC-9801 source
+#endif
+
+#ifndef _ISA_ISA_H_
+#define	_ISA_ISA_H_
+
+/*
+ * ISA Bus conventions
+ */
+
+/*
+ * Input / Output Port Assignments
+ */
+#ifndef IO_ISABEGIN
+#define	IO_ISABEGIN	0x000		/* 0x000 - Beginning of I/O Registers */
+#define	IO_ICU1		0x020		/* 8259A Interrupt Controller #1 */
+#define	IO_KBD		0x060		/* 8042 Keyboard */
+#define	IO_RTC		0x070		/* RTC */
+#define	IO_ICU2		0x0A0		/* 8259A Interrupt Controller #2 */
+
+#define	IO_MDA		0x3B0		/* Monochome Adapter */
+#define	IO_VGA		0x3C0		/* E/VGA Ports */
+#define	IO_CGA		0x3D0		/* CGA Ports */
+
+#endif /* !IO_ISABEGIN */
+
+/*
+ * Input / Output Port Sizes
+ */
+#define	IO_CGASIZE	12		/* CGA controllers */
+#define	IO_MDASIZE	12		/* Monochrome display controllers */
+#define	IO_VGASIZE	16		/* VGA controllers */
+
+#endif /* !_ISA_ISA_H_ */
diff --git a/usr/contrib/freebsd/lib/libutil/expand_number.c b/usr/contrib/freebsd/lib/libutil/expand_number.c
new file mode 100644
index 0000000000..f3b4da89f9
--- /dev/null
+++ b/usr/contrib/freebsd/lib/libutil/expand_number.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 2007 Eric Anderson <anderson@FreeBSD.org>
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/lib/libutil/expand_number.c 255069 2013-08-30 11:21:52Z pluknet $");
+
+#include <sys/types.h>
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <libutil.h>
+#include <stdint.h>
+
+int
+expand_number(const char *buf, uint64_t *num)
+{
+	char *endptr;
+	uintmax_t umaxval;
+	uint64_t number;
+	unsigned shift;
+	int serrno;
+
+	serrno = errno;
+	errno = 0;
+	umaxval = strtoumax(buf, &endptr, 0);
+	if (umaxval > UINT64_MAX)
+		errno = ERANGE;
+	if (errno != 0)
+		return (-1);
+	errno = serrno;
+	number = umaxval;
+
+	switch (tolower((unsigned char)*endptr)) {
+	case 'e':
+		shift = 60;
+		break;
+	case 'p':
+		shift = 50;
+		break;
+	case 't':
+		shift = 40;
+		break;
+	case 'g':
+		shift = 30;
+		break;
+	case 'm':
+		shift = 20;
+		break;
+	case 'k':
+		shift = 10;
+		break;
+	case 'b':
+	case '\0': /* No unit. */
+		*num = number;
+		return (0);
+	default:
+		/* Unrecognized unit. */
+		errno = EINVAL;
+		return (-1);
+	}
+
+	if ((number << shift) >> shift != number) {
+		/* Overflow */
+		errno = ERANGE;
+		return (-1);
+	}
+	*num = number << shift;
+	return (0);
+}
diff --git a/usr/contrib/freebsd/sys/ata.h b/usr/contrib/freebsd/sys/ata.h
new file mode 100644
index 0000000000..705460355f
--- /dev/null
+++ b/usr/contrib/freebsd/sys/ata.h
@@ -0,0 +1,635 @@
+/*-
+ * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer,
+ *    without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/sys/ata.h 264853 2014-04-24 01:28:14Z smh $
+ */
+
+#ifndef _SYS_ATA_H_
+#define _SYS_ATA_H_
+
+#include <sys/ioccom.h>
+
+/* ATA/ATAPI device parameters */
+struct ata_params {
+/*000*/ u_int16_t       config;         /* configuration info */
+#define ATA_PROTO_MASK                  0x8003
+#define ATA_PROTO_ATAPI                 0x8000
+#define ATA_PROTO_ATAPI_12              0x8000
+#define ATA_PROTO_ATAPI_16              0x8001
+#define ATA_PROTO_CFA                   0x848a
+#define ATA_ATAPI_TYPE_MASK             0x1f00
+#define ATA_ATAPI_TYPE_DIRECT           0x0000  /* disk/floppy */
+#define ATA_ATAPI_TYPE_TAPE             0x0100  /* streaming tape */
+#define ATA_ATAPI_TYPE_CDROM            0x0500  /* CD-ROM device */
+#define ATA_ATAPI_TYPE_OPTICAL          0x0700  /* optical disk */
+#define ATA_DRQ_MASK                    0x0060
+#define ATA_DRQ_SLOW                    0x0000  /* cpu 3 ms delay */
+#define ATA_DRQ_INTR                    0x0020  /* interrupt 10 ms delay */
+#define ATA_DRQ_FAST                    0x0040  /* accel 50 us delay */
+#define ATA_RESP_INCOMPLETE             0x0004
+
+/*001*/ u_int16_t       cylinders;              /* # of cylinders */
+/*002*/ u_int16_t       specconf;		/* specific configuration */
+/*003*/ u_int16_t       heads;                  /* # heads */
+	u_int16_t       obsolete4;
+	u_int16_t       obsolete5;
+/*006*/ u_int16_t       sectors;                /* # sectors/track */
+/*007*/ u_int16_t       vendor7[3];
+/*010*/ u_int8_t        serial[20];             /* serial number */
+/*020*/ u_int16_t       retired20;
+	u_int16_t       retired21;
+	u_int16_t       obsolete22;
+/*023*/ u_int8_t        revision[8];            /* firmware revision */
+/*027*/ u_int8_t        model[40];              /* model name */
+/*047*/ u_int16_t       sectors_intr;           /* sectors per interrupt */
+/*048*/ u_int16_t       usedmovsd;              /* double word read/write? */
+/*049*/ u_int16_t       capabilities1;
+#define ATA_SUPPORT_DMA                 0x0100
+#define ATA_SUPPORT_LBA                 0x0200
+#define ATA_SUPPORT_IORDY               0x0400
+#define ATA_SUPPORT_IORDYDIS            0x0800
+#define ATA_SUPPORT_OVERLAP             0x4000
+
+/*050*/ u_int16_t       capabilities2;
+/*051*/ u_int16_t       retired_piomode;        /* PIO modes 0-2 */
+#define ATA_RETIRED_PIO_MASK            0x0300
+
+/*052*/ u_int16_t       retired_dmamode;        /* DMA modes */
+#define ATA_RETIRED_DMA_MASK            0x0003
+
+/*053*/ u_int16_t       atavalid;               /* fields valid */
+#define ATA_FLAG_54_58                  0x0001  /* words 54-58 valid */
+#define ATA_FLAG_64_70                  0x0002  /* words 64-70 valid */
+#define ATA_FLAG_88                     0x0004  /* word 88 valid */
+
+/*054*/ u_int16_t       current_cylinders;
+/*055*/ u_int16_t       current_heads;
+/*056*/ u_int16_t       current_sectors;
+/*057*/ u_int16_t       current_size_1;
+/*058*/ u_int16_t       current_size_2;
+/*059*/ u_int16_t       multi;
+#define ATA_MULTI_VALID                 0x0100
+
+/*060*/ u_int16_t       lba_size_1;
+	u_int16_t       lba_size_2;
+	u_int16_t       obsolete62;
+/*063*/ u_int16_t       mwdmamodes;             /* multiword DMA modes */
+/*064*/ u_int16_t       apiomodes;              /* advanced PIO modes */
+
+/*065*/ u_int16_t       mwdmamin;               /* min. M/W DMA time/word ns */
+/*066*/ u_int16_t       mwdmarec;               /* rec. M/W DMA time ns */
+/*067*/ u_int16_t       pioblind;               /* min. PIO cycle w/o flow */
+/*068*/ u_int16_t       pioiordy;               /* min. PIO cycle IORDY flow */
+/*069*/ u_int16_t       support3;
+#define ATA_SUPPORT_RZAT                0x0020
+#define ATA_SUPPORT_DRAT                0x4000
+	u_int16_t       reserved70;
+/*071*/ u_int16_t       rlsovlap;               /* rel time (us) for overlap */
+/*072*/ u_int16_t       rlsservice;             /* rel time (us) for service */
+	u_int16_t       reserved73;
+	u_int16_t       reserved74;
+/*075*/ u_int16_t       queue;
+#define ATA_QUEUE_LEN(x)                ((x) & 0x001f)
+
+/*76*/  u_int16_t       satacapabilities;
+#define ATA_SATA_GEN1                   0x0002
+#define ATA_SATA_GEN2                   0x0004
+#define ATA_SATA_GEN3                   0x0008
+#define ATA_SUPPORT_NCQ                 0x0100
+#define ATA_SUPPORT_IFPWRMNGTRCV        0x0200
+#define ATA_SUPPORT_PHYEVENTCNT         0x0400
+#define ATA_SUPPORT_NCQ_UNLOAD          0x0800
+#define ATA_SUPPORT_NCQ_PRIO            0x1000
+#define ATA_SUPPORT_HAPST               0x2000
+#define ATA_SUPPORT_DAPST               0x4000
+#define ATA_SUPPORT_READLOGDMAEXT       0x8000
+
+/*77*/  u_int16_t       satacapabilities2;
+#define ATA_SATA_CURR_GEN_MASK          0x0006
+#define ATA_SUPPORT_NCQ_STREAM          0x0010
+#define ATA_SUPPORT_NCQ_QMANAGEMENT     0x0020
+#define ATA_SUPPORT_RCVSND_FPDMA_QUEUED 0x0040
+/*78*/  u_int16_t       satasupport;
+#define ATA_SUPPORT_NONZERO             0x0002
+#define ATA_SUPPORT_AUTOACTIVATE        0x0004
+#define ATA_SUPPORT_IFPWRMNGT           0x0008
+#define ATA_SUPPORT_INORDERDATA         0x0010
+#define ATA_SUPPORT_ASYNCNOTIF          0x0020
+#define ATA_SUPPORT_SOFTSETPRESERVE     0x0040
+/*79*/  u_int16_t       sataenabled;
+#define ATA_ENABLED_DAPST               0x0080
+
+/*080*/ u_int16_t       version_major;
+/*081*/ u_int16_t       version_minor;
+
+	struct {
+/*082/085*/ u_int16_t   command1;
+#define ATA_SUPPORT_SMART               0x0001
+#define ATA_SUPPORT_SECURITY            0x0002
+#define ATA_SUPPORT_REMOVABLE           0x0004
+#define ATA_SUPPORT_POWERMGT            0x0008
+#define ATA_SUPPORT_PACKET              0x0010
+#define ATA_SUPPORT_WRITECACHE          0x0020
+#define ATA_SUPPORT_LOOKAHEAD           0x0040
+#define ATA_SUPPORT_RELEASEIRQ          0x0080
+#define ATA_SUPPORT_SERVICEIRQ          0x0100
+#define ATA_SUPPORT_RESET               0x0200
+#define ATA_SUPPORT_PROTECTED           0x0400
+#define ATA_SUPPORT_WRITEBUFFER         0x1000
+#define ATA_SUPPORT_READBUFFER          0x2000
+#define ATA_SUPPORT_NOP                 0x4000
+
+/*083/086*/ u_int16_t   command2;
+#define ATA_SUPPORT_MICROCODE           0x0001
+#define ATA_SUPPORT_QUEUED              0x0002
+#define ATA_SUPPORT_CFA                 0x0004
+#define ATA_SUPPORT_APM                 0x0008
+#define ATA_SUPPORT_NOTIFY              0x0010
+#define ATA_SUPPORT_STANDBY             0x0020
+#define ATA_SUPPORT_SPINUP              0x0040
+#define ATA_SUPPORT_MAXSECURITY         0x0100
+#define ATA_SUPPORT_AUTOACOUSTIC        0x0200
+#define ATA_SUPPORT_ADDRESS48           0x0400
+#define ATA_SUPPORT_OVERLAY             0x0800
+#define ATA_SUPPORT_FLUSHCACHE          0x1000
+#define ATA_SUPPORT_FLUSHCACHE48        0x2000
+
+/*084/087*/ u_int16_t   extension;
+#define ATA_SUPPORT_SMARTLOG		0x0001
+#define ATA_SUPPORT_SMARTTEST		0x0002
+#define ATA_SUPPORT_MEDIASN		0x0004
+#define ATA_SUPPORT_MEDIAPASS		0x0008
+#define ATA_SUPPORT_STREAMING		0x0010
+#define ATA_SUPPORT_GENLOG		0x0020
+#define ATA_SUPPORT_WRITEDMAFUAEXT	0x0040
+#define ATA_SUPPORT_WRITEDMAQFUAEXT	0x0080
+#define ATA_SUPPORT_64BITWWN		0x0100
+#define ATA_SUPPORT_UNLOAD		0x2000
+	} __packed support, enabled;
+
+/*088*/ u_int16_t       udmamodes;              /* UltraDMA modes */
+/*089*/ u_int16_t       erase_time;             /* time req'd in 2min units */
+/*090*/ u_int16_t       enhanced_erase_time;    /* time req'd in 2min units */
+/*091*/ u_int16_t       apm_value;
+/*092*/ u_int16_t       master_passwd_revision; /* password revision code */
+/*093*/ u_int16_t       hwres;
+#define ATA_CABLE_ID                    0x2000
+
+/*094*/ u_int16_t       acoustic;
+#define ATA_ACOUSTIC_CURRENT(x)         ((x) & 0x00ff)
+#define ATA_ACOUSTIC_VENDOR(x)          (((x) & 0xff00) >> 8)
+
+/*095*/ u_int16_t       stream_min_req_size;
+/*096*/ u_int16_t       stream_transfer_time;
+/*097*/ u_int16_t       stream_access_latency;
+/*098*/ u_int32_t       stream_granularity;
+/*100*/ u_int16_t       lba_size48_1;
+	u_int16_t       lba_size48_2;
+	u_int16_t       lba_size48_3;
+	u_int16_t       lba_size48_4;
+	u_int16_t       reserved104;
+/*105*/	u_int16_t       max_dsm_blocks;
+/*106*/	u_int16_t       pss;
+#define ATA_PSS_LSPPS			0x000F
+#define ATA_PSS_LSSABOVE512		0x1000
+#define ATA_PSS_MULTLS			0x2000
+#define ATA_PSS_VALID_MASK		0xC000
+#define ATA_PSS_VALID_VALUE		0x4000
+/*107*/ u_int16_t       isd;
+/*108*/ u_int16_t       wwn[4];
+	u_int16_t       reserved112[5];
+/*117*/ u_int16_t       lss_1;
+/*118*/ u_int16_t       lss_2;
+/*119*/ u_int16_t       support2;
+#define ATA_SUPPORT_WRITEREADVERIFY	0x0002
+#define ATA_SUPPORT_WRITEUNCORREXT	0x0004
+#define ATA_SUPPORT_RWLOGDMAEXT		0x0008
+#define ATA_SUPPORT_MICROCODE3		0x0010
+#define ATA_SUPPORT_FREEFALL		0x0020
+/*120*/ u_int16_t       enabled2;
+	u_int16_t       reserved121[6];
+/*127*/ u_int16_t       removable_status;
+/*128*/ u_int16_t       security_status;
+#define ATA_SECURITY_LEVEL		0x0100	/* 0: high, 1: maximum */
+#define ATA_SECURITY_ENH_SUPP		0x0020	/* enhanced erase supported */
+#define ATA_SECURITY_COUNT_EXP		0x0010	/* count expired */
+#define ATA_SECURITY_FROZEN		0x0008	/* security config is frozen */
+#define ATA_SECURITY_LOCKED		0x0004	/* drive is locked */
+#define ATA_SECURITY_ENABLED		0x0002	/* ATA Security is enabled */
+#define ATA_SECURITY_SUPPORTED		0x0001	/* ATA Security is supported */
+
+	u_int16_t       reserved129[31];
+/*160*/ u_int16_t       cfa_powermode1;
+	u_int16_t       reserved161;
+/*162*/ u_int16_t       cfa_kms_support;
+/*163*/ u_int16_t       cfa_trueide_modes;
+/*164*/ u_int16_t       cfa_memory_modes;
+	u_int16_t       reserved165[4];
+/*169*/	u_int16_t       support_dsm;
+#define ATA_SUPPORT_DSM_TRIM		0x0001
+	u_int16_t       reserved170[6];
+/*176*/ u_int8_t        media_serial[60];
+/*206*/ u_int16_t       sct;
+	u_int16_t       reserved206[2];
+/*209*/ u_int16_t       lsalign;
+/*210*/ u_int16_t       wrv_sectors_m3_1;
+	u_int16_t       wrv_sectors_m3_2;
+/*212*/ u_int16_t       wrv_sectors_m2_1;
+	u_int16_t       wrv_sectors_m2_2;
+/*214*/ u_int16_t       nv_cache_caps;
+/*215*/ u_int16_t       nv_cache_size_1;
+	u_int16_t       nv_cache_size_2;
+/*217*/ u_int16_t       media_rotation_rate;
+#define ATA_RATE_NOT_REPORTED		0x0000
+#define ATA_RATE_NON_ROTATING		0x0001
+	u_int16_t       reserved218;
+/*219*/ u_int16_t       nv_cache_opt;
+/*220*/ u_int16_t       wrv_mode;
+	u_int16_t       reserved221;
+/*222*/ u_int16_t       transport_major;
+/*223*/ u_int16_t       transport_minor;
+	u_int16_t       reserved224[31];
+/*255*/ u_int16_t       integrity;
+} __packed;
+
+/* ATA Dataset Management */
+#define ATA_DSM_BLK_SIZE	512
+#define ATA_DSM_BLK_RANGES	64
+#define ATA_DSM_RANGE_SIZE	8
+#define ATA_DSM_RANGE_MAX	65535
+
+/*
+ * ATA Device Register
+ *
+ * bit 7 Obsolete (was 1 in early ATA specs)
+ * bit 6 Sets LBA/CHS mode. 1=LBA, 0=CHS 
+ * bit 5 Obsolete (was 1 in early ATA specs)
+ * bit 4 1 = Slave Drive, 0 = Master Drive
+ * bit 3-0 In LBA mode, 27-24 of address. In CHS mode, head number
+*/
+
+#define ATA_DEV_MASTER		0x00
+#define ATA_DEV_SLAVE		0x10
+#define ATA_DEV_LBA		0x40
+
+/* ATA limits */
+#define ATA_MAX_28BIT_LBA	268435455UL
+
+/* ATA Status Register */
+#define ATA_STATUS_ERROR	0x01
+#define ATA_STATUS_DEVICE_FAULT	0x20
+
+/* ATA Error Register */
+#define ATA_ERROR_ABORT		0x04
+#define ATA_ERROR_ID_NOT_FOUND	0x10
+
+/* ATA HPA Features */
+#define ATA_HPA_FEAT_MAX_ADDR	0x00
+#define ATA_HPA_FEAT_SET_PWD	0x01
+#define ATA_HPA_FEAT_LOCK	0x02
+#define ATA_HPA_FEAT_UNLOCK	0x03
+#define ATA_HPA_FEAT_FREEZE	0x04
+
+/* ATA transfer modes */
+#define ATA_MODE_MASK           0x0f
+#define ATA_DMA_MASK            0xf0
+#define ATA_PIO                 0x00
+#define ATA_PIO0                0x08
+#define ATA_PIO1                0x09
+#define ATA_PIO2                0x0a
+#define ATA_PIO3                0x0b
+#define ATA_PIO4                0x0c
+#define ATA_PIO_MAX             0x0f
+#define ATA_DMA                 0x10
+#define ATA_WDMA0               0x20
+#define ATA_WDMA1               0x21
+#define ATA_WDMA2               0x22
+#define ATA_UDMA0               0x40
+#define ATA_UDMA1               0x41
+#define ATA_UDMA2               0x42
+#define ATA_UDMA3               0x43
+#define ATA_UDMA4               0x44
+#define ATA_UDMA5               0x45
+#define ATA_UDMA6               0x46
+#define ATA_SA150               0x47
+#define ATA_SA300               0x48
+#define ATA_DMA_MAX             0x4f
+
+
+/* ATA commands */
+#define ATA_NOP                         0x00    /* NOP */
+#define         ATA_NF_FLUSHQUEUE       0x00    /* flush queued cmd's */
+#define         ATA_NF_AUTOPOLL         0x01    /* start autopoll function */
+#define ATA_DATA_SET_MANAGEMENT		0x06
+#define 	ATA_DSM_TRIM		0x01
+#define ATA_DEVICE_RESET                0x08    /* reset device */
+#define ATA_READ                        0x20    /* read */
+#define ATA_READ48                      0x24    /* read 48bit LBA */
+#define ATA_READ_DMA48                  0x25    /* read DMA 48bit LBA */
+#define ATA_READ_DMA_QUEUED48           0x26    /* read DMA QUEUED 48bit LBA */
+#define ATA_READ_NATIVE_MAX_ADDRESS48   0x27    /* read native max addr 48bit */
+#define ATA_READ_MUL48                  0x29    /* read multi 48bit LBA */
+#define ATA_READ_STREAM_DMA48           0x2a    /* read DMA stream 48bit LBA */
+#define ATA_READ_LOG_EXT                0x2f    /* read log ext - PIO Data-In */
+#define ATA_READ_STREAM48               0x2b    /* read stream 48bit LBA */
+#define ATA_WRITE                       0x30    /* write */
+#define ATA_WRITE48                     0x34    /* write 48bit LBA */
+#define ATA_WRITE_DMA48                 0x35    /* write DMA 48bit LBA */
+#define ATA_WRITE_DMA_QUEUED48          0x36    /* write DMA QUEUED 48bit LBA*/
+#define ATA_SET_MAX_ADDRESS48           0x37    /* set max address 48bit */
+#define ATA_WRITE_MUL48                 0x39    /* write multi 48bit LBA */
+#define ATA_WRITE_STREAM_DMA48          0x3a
+#define ATA_WRITE_STREAM48              0x3b
+#define ATA_WRITE_DMA_FUA48             0x3d
+#define ATA_WRITE_DMA_QUEUED_FUA48      0x3e
+#define ATA_WRITE_LOG_EXT               0x3f
+#define ATA_READ_VERIFY                 0x40
+#define ATA_READ_VERIFY48               0x42
+#define ATA_READ_LOG_DMA_EXT            0x47    /* read log DMA ext - PIO Data-In */
+#define ATA_READ_FPDMA_QUEUED           0x60    /* read DMA NCQ */
+#define ATA_WRITE_FPDMA_QUEUED          0x61    /* write DMA NCQ */
+#define ATA_SEND_FPDMA_QUEUED           0x64    /* send DMA NCQ */
+#define ATA_RECV_FPDMA_QUEUED           0x65    /* recieve DMA NCQ */
+#define ATA_SEP_ATTN                    0x67    /* SEP request */
+#define ATA_SEEK                        0x70    /* seek */
+#define ATA_PACKET_CMD                  0xa0    /* packet command */
+#define ATA_ATAPI_IDENTIFY              0xa1    /* get ATAPI params*/
+#define ATA_SERVICE                     0xa2    /* service command */
+#define ATA_SMART_CMD                   0xb0    /* SMART command */
+#define ATA_CFA_ERASE                   0xc0    /* CFA erase */
+#define ATA_READ_MUL                    0xc4    /* read multi */
+#define ATA_WRITE_MUL                   0xc5    /* write multi */
+#define ATA_SET_MULTI                   0xc6    /* set multi size */
+#define ATA_READ_DMA_QUEUED             0xc7    /* read DMA QUEUED */
+#define ATA_READ_DMA                    0xc8    /* read DMA */
+#define ATA_WRITE_DMA                   0xca    /* write DMA */
+#define ATA_WRITE_DMA_QUEUED            0xcc    /* write DMA QUEUED */
+#define ATA_WRITE_MUL_FUA48             0xce
+#define ATA_STANDBY_IMMEDIATE           0xe0    /* standby immediate */
+#define ATA_IDLE_IMMEDIATE              0xe1    /* idle immediate */
+#define ATA_STANDBY_CMD                 0xe2    /* standby */
+#define ATA_IDLE_CMD                    0xe3    /* idle */
+#define ATA_READ_BUFFER                 0xe4    /* read buffer */
+#define ATA_READ_PM                     0xe4    /* read portmultiplier */
+#define ATA_SLEEP                       0xe6    /* sleep */
+#define ATA_FLUSHCACHE                  0xe7    /* flush cache to disk */
+#define ATA_WRITE_PM                    0xe8    /* write portmultiplier */
+#define ATA_FLUSHCACHE48                0xea    /* flush cache to disk */
+#define ATA_ATA_IDENTIFY                0xec    /* get ATA params */
+#define ATA_SETFEATURES                 0xef    /* features command */
+#define         ATA_SF_SETXFER          0x03    /* set transfer mode */
+#define         ATA_SF_ENAB_WCACHE      0x02    /* enable write cache */
+#define         ATA_SF_DIS_WCACHE       0x82    /* disable write cache */
+#define         ATA_SF_ENAB_PUIS        0x06    /* enable PUIS */
+#define         ATA_SF_DIS_PUIS         0x86    /* disable PUIS */
+#define         ATA_SF_PUIS_SPINUP      0x07    /* PUIS spin-up */
+#define         ATA_SF_ENAB_RCACHE      0xaa    /* enable readahead cache */
+#define         ATA_SF_DIS_RCACHE       0x55    /* disable readahead cache */
+#define         ATA_SF_ENAB_RELIRQ      0x5d    /* enable release interrupt */
+#define         ATA_SF_DIS_RELIRQ       0xdd    /* disable release interrupt */
+#define         ATA_SF_ENAB_SRVIRQ      0x5e    /* enable service interrupt */
+#define         ATA_SF_DIS_SRVIRQ       0xde    /* disable service interrupt */
+#define ATA_SECURITY_SET_PASSWORD       0xf1    /* set drive password */
+#define ATA_SECURITY_UNLOCK             0xf2    /* unlock drive using passwd */
+#define ATA_SECURITY_ERASE_PREPARE      0xf3    /* prepare to erase drive */
+#define ATA_SECURITY_ERASE_UNIT         0xf4    /* erase all blocks on drive */
+#define ATA_SECURITY_FREEZE_LOCK        0xf5    /* freeze security config */
+#define ATA_SECURITY_DISABLE_PASSWORD   0xf6    /* disable drive password */
+#define ATA_READ_NATIVE_MAX_ADDRESS     0xf8    /* read native max address */
+#define ATA_SET_MAX_ADDRESS             0xf9    /* set max address */
+
+
+/* ATAPI commands */
+#define ATAPI_TEST_UNIT_READY           0x00    /* check if device is ready */
+#define ATAPI_REZERO                    0x01    /* rewind */
+#define ATAPI_REQUEST_SENSE             0x03    /* get sense data */
+#define ATAPI_FORMAT                    0x04    /* format unit */
+#define ATAPI_READ                      0x08    /* read data */
+#define ATAPI_WRITE                     0x0a    /* write data */
+#define ATAPI_WEOF                      0x10    /* write filemark */
+#define         ATAPI_WF_WRITE          0x01
+#define ATAPI_SPACE                     0x11    /* space command */
+#define         ATAPI_SP_FM             0x01
+#define         ATAPI_SP_EOD            0x03
+#define ATAPI_INQUIRY			0x12	/* get inquiry data */
+#define ATAPI_MODE_SELECT               0x15    /* mode select */
+#define ATAPI_ERASE                     0x19    /* erase */
+#define ATAPI_MODE_SENSE                0x1a    /* mode sense */
+#define ATAPI_START_STOP                0x1b    /* start/stop unit */
+#define         ATAPI_SS_LOAD           0x01
+#define         ATAPI_SS_RETENSION      0x02
+#define         ATAPI_SS_EJECT          0x04
+#define ATAPI_PREVENT_ALLOW             0x1e    /* media removal */
+#define ATAPI_READ_FORMAT_CAPACITIES    0x23    /* get format capacities */
+#define ATAPI_READ_CAPACITY             0x25    /* get volume capacity */
+#define ATAPI_READ_BIG                  0x28    /* read data */
+#define ATAPI_WRITE_BIG                 0x2a    /* write data */
+#define ATAPI_LOCATE                    0x2b    /* locate to position */
+#define ATAPI_READ_POSITION             0x34    /* read position */
+#define ATAPI_SYNCHRONIZE_CACHE         0x35    /* flush buf, close channel */
+#define ATAPI_WRITE_BUFFER              0x3b    /* write device buffer */
+#define ATAPI_READ_BUFFER               0x3c    /* read device buffer */
+#define ATAPI_READ_SUBCHANNEL           0x42    /* get subchannel info */
+#define ATAPI_READ_TOC                  0x43    /* get table of contents */
+#define ATAPI_PLAY_10                   0x45    /* play by lba */
+#define ATAPI_PLAY_MSF                  0x47    /* play by MSF address */
+#define ATAPI_PLAY_TRACK                0x48    /* play by track number */
+#define ATAPI_PAUSE                     0x4b    /* pause audio operation */
+#define ATAPI_READ_DISK_INFO            0x51    /* get disk info structure */
+#define ATAPI_READ_TRACK_INFO           0x52    /* get track info structure */
+#define ATAPI_RESERVE_TRACK             0x53    /* reserve track */
+#define ATAPI_SEND_OPC_INFO             0x54    /* send OPC structurek */
+#define ATAPI_MODE_SELECT_BIG           0x55    /* set device parameters */
+#define ATAPI_REPAIR_TRACK              0x58    /* repair track */
+#define ATAPI_READ_MASTER_CUE           0x59    /* read master CUE info */
+#define ATAPI_MODE_SENSE_BIG            0x5a    /* get device parameters */
+#define ATAPI_CLOSE_TRACK               0x5b    /* close track/session */
+#define ATAPI_READ_BUFFER_CAPACITY      0x5c    /* get buffer capicity */
+#define ATAPI_SEND_CUE_SHEET            0x5d    /* send CUE sheet */
+#define ATAPI_SERVICE_ACTION_IN         0x96	/* get service data */
+#define ATAPI_BLANK                     0xa1    /* blank the media */
+#define ATAPI_SEND_KEY                  0xa3    /* send DVD key structure */
+#define ATAPI_REPORT_KEY                0xa4    /* get DVD key structure */
+#define ATAPI_PLAY_12                   0xa5    /* play by lba */
+#define ATAPI_LOAD_UNLOAD               0xa6    /* changer control command */
+#define ATAPI_READ_STRUCTURE            0xad    /* get DVD structure */
+#define ATAPI_PLAY_CD                   0xb4    /* universal play command */
+#define ATAPI_SET_SPEED                 0xbb    /* set drive speed */
+#define ATAPI_MECH_STATUS               0xbd    /* get changer status */
+#define ATAPI_READ_CD                   0xbe    /* read data */
+#define ATAPI_POLL_DSC                  0xff    /* poll DSC status bit */
+
+
+struct ata_ioc_devices {
+    int                 channel;
+    char                name[2][32];
+    struct ata_params   params[2];
+};
+
+/* pr channel ATA ioctl calls */
+#define IOCATAGMAXCHANNEL       _IOR('a',  1, int)
+#define IOCATAREINIT            _IOW('a',  2, int)
+#define IOCATAATTACH            _IOW('a',  3, int)
+#define IOCATADETACH            _IOW('a',  4, int)
+#define IOCATADEVICES           _IOWR('a',  5, struct ata_ioc_devices)
+
+/* ATAPI request sense structure */
+struct atapi_sense {
+    u_int8_t	error;				/* current or deferred errors */
+#define	ATA_SENSE_VALID			0x80
+
+    u_int8_t	segment;			/* segment number */
+    u_int8_t	key;				/* sense key */
+#define ATA_SENSE_KEY_MASK		0x0f    /* sense key mask */
+#define ATA_SENSE_NO_SENSE		0x00    /* no specific sense key info */
+#define ATA_SENSE_RECOVERED_ERROR 	0x01    /* command OK, data recovered */
+#define ATA_SENSE_NOT_READY		0x02    /* no access to drive */
+#define ATA_SENSE_MEDIUM_ERROR		0x03    /* non-recovered data error */
+#define ATA_SENSE_HARDWARE_ERROR	0x04    /* non-recoverable HW failure */
+#define ATA_SENSE_ILLEGAL_REQUEST	0x05    /* invalid command param(s) */
+#define ATA_SENSE_UNIT_ATTENTION	0x06    /* media changed */
+#define ATA_SENSE_DATA_PROTECT		0x07    /* write protect */
+#define ATA_SENSE_BLANK_CHECK		0x08    /* blank check */
+#define ATA_SENSE_VENDOR_SPECIFIC	0x09    /* vendor specific skey */
+#define ATA_SENSE_COPY_ABORTED		0x0a    /* copy aborted */
+#define ATA_SENSE_ABORTED_COMMAND	0x0b    /* command aborted, try again */
+#define ATA_SENSE_EQUAL			0x0c    /* equal */
+#define ATA_SENSE_VOLUME_OVERFLOW	0x0d    /* volume overflow */
+#define ATA_SENSE_MISCOMPARE		0x0e    /* data dont match the medium */
+#define ATA_SENSE_RESERVED		0x0f
+#define	ATA_SENSE_ILI			0x20;
+#define	ATA_SENSE_EOM			0x40;
+#define	ATA_SENSE_FILEMARK		0x80;
+
+    u_int32_t   cmd_info;		/* cmd information */
+    u_int8_t	sense_length;		/* additional sense len (n-7) */
+    u_int32_t   cmd_specific_info;	/* additional cmd spec info */
+    u_int8_t    asc;			/* additional sense code */
+    u_int8_t    ascq;			/* additional sense code qual */
+    u_int8_t    replaceable_unit_code;	/* replaceable unit code */
+    u_int8_t	specific;		/* sense key specific */
+#define	ATA_SENSE_SPEC_VALID	0x80
+#define	ATA_SENSE_SPEC_MASK	0x7f
+	
+    u_int8_t	specific1;		/* sense key specific */
+    u_int8_t	specific2;		/* sense key specific */
+} __packed;
+
+struct ata_ioc_request {
+    union {
+	struct {
+	    u_int8_t            command;
+	    u_int8_t            feature;
+	    u_int64_t           lba;
+	    u_int16_t           count;
+	} ata;
+	struct {
+	    char                ccb[16];
+	    struct atapi_sense	sense;
+	} atapi;
+    } u;
+    caddr_t             data;
+    int                 count;
+    int                 flags;
+#define ATA_CMD_CONTROL                 0x01
+#define ATA_CMD_READ                    0x02
+#define ATA_CMD_WRITE                   0x04
+#define ATA_CMD_ATAPI                   0x08
+
+    int                 timeout;
+    int                 error;
+};
+
+struct ata_security_password {
+	u_int16_t		ctrl;
+#define ATA_SECURITY_PASSWORD_USER	0x0000
+#define ATA_SECURITY_PASSWORD_MASTER	0x0001
+#define ATA_SECURITY_ERASE_NORMAL	0x0000
+#define ATA_SECURITY_ERASE_ENHANCED	0x0002
+#define ATA_SECURITY_LEVEL_HIGH		0x0000
+#define ATA_SECURITY_LEVEL_MAXIMUM	0x0100
+
+	u_int8_t		password[32];
+	u_int16_t		revision;
+	u_int16_t		reserved[238];
+};
+
+/* pr device ATA ioctl calls */
+#define IOCATAREQUEST           _IOWR('a', 100, struct ata_ioc_request)
+#define IOCATAGPARM             _IOR('a', 101, struct ata_params)
+#define IOCATAGMODE             _IOR('a', 102, int)
+#define IOCATASMODE             _IOW('a', 103, int)
+
+#define IOCATAGSPINDOWN		_IOR('a', 104, int)
+#define IOCATASSPINDOWN		_IOW('a', 105, int)
+
+
+struct ata_ioc_raid_config {
+	    int                 lun;
+	    int                 type;
+#define AR_JBOD                         0x0001
+#define AR_SPAN                         0x0002
+#define AR_RAID0                        0x0004
+#define AR_RAID1                        0x0008
+#define AR_RAID01                       0x0010
+#define AR_RAID3                        0x0020
+#define AR_RAID4                        0x0040
+#define AR_RAID5                        0x0080
+
+	    int                 interleave;
+	    int                 status;
+#define AR_READY                        1
+#define AR_DEGRADED                     2
+#define AR_REBUILDING                   4
+
+	    int                 progress;
+	    int                 total_disks;
+	    int                 disks[16];
+};
+
+struct ata_ioc_raid_status {
+	    int                 lun;
+	    int                 type;
+	    int                 interleave;
+	    int                 status;
+	    int                 progress;
+	    int                 total_disks;
+	    struct {
+		    int		state;
+#define AR_DISK_ONLINE			0x01
+#define AR_DISK_PRESENT			0x02
+#define AR_DISK_SPARE			0x04
+		    int		lun;
+	    } disks[16];
+};
+
+/* ATA RAID ioctl calls */
+#define IOCATARAIDCREATE        _IOWR('a', 200, struct ata_ioc_raid_config)
+#define IOCATARAIDDELETE        _IOW('a', 201, int)
+#define IOCATARAIDSTATUS        _IOWR('a', 202, struct ata_ioc_raid_status)
+#define IOCATARAIDADDSPARE      _IOW('a', 203, struct ata_ioc_raid_config)
+#define IOCATARAIDREBUILD       _IOW('a', 204, int)
+
+#endif /* _SYS_ATA_H_ */
diff --git a/usr/contrib/freebsd/sys/linker_set.h b/usr/contrib/freebsd/sys/linker_set.h
new file mode 100644
index 0000000000..393dfbc131
--- /dev/null
+++ b/usr/contrib/freebsd/sys/linker_set.h
@@ -0,0 +1,119 @@
+/*-
+ * Copyright (c) 1999 John D. Polstra
+ * Copyright (c) 1999,2001 Peter Wemm <peter@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/sys/linker_set.h 215701 2010-11-22 19:32:54Z dim $
+ */
+
+#ifndef _SYS_LINKER_SET_H_
+#define _SYS_LINKER_SET_H_
+
+#ifdef	__FreeBSD__
+#ifndef _SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+#else
+#ifndef	_COMPAT_FREEBSD_SYS_CDEFS_H_
+#error this file needs sys/cdefs.h as a prerequisite
+#endif
+#endif
+
+/*
+ * The following macros are used to declare global sets of objects, which
+ * are collected by the linker into a `linker_set' as defined below.
+ * For ELF, this is done by constructing a separate segment for each set.
+ */
+
+/*
+ * Private macros, not to be used outside this header file.
+ */
+#ifdef __GNUCLIKE___SECTION
+#ifdef	__FreeBSD__
+#define __MAKE_SET(set, sym)						\
+	__GLOBL(__CONCAT(__start_set_,set));				\
+	__GLOBL(__CONCAT(__stop_set_,set));				\
+	static void const * const __set_##set##_sym_##sym 		\
+	__section("set_" #set) __used = &sym
+#else
+#define __MAKE_SET(set, sym)						\
+	static void const * const __set_##set##_sym_##sym 		\
+	__section("set_" #set) __used = &sym
+#endif
+#else /* !__GNUCLIKE___SECTION */
+#ifndef lint
+#error this file needs to be ported to your compiler
+#endif /* lint */
+#define __MAKE_SET(set, sym)	extern void const * const (__set_##set##_sym_##sym)
+#endif /* __GNUCLIKE___SECTION */
+
+/*
+ * Public macros.
+ */
+#define TEXT_SET(set, sym)	__MAKE_SET(set, sym)
+#define DATA_SET(set, sym)	__MAKE_SET(set, sym)
+#define BSS_SET(set, sym)	__MAKE_SET(set, sym)
+#define ABS_SET(set, sym)	__MAKE_SET(set, sym)
+#define SET_ENTRY(set, sym)	__MAKE_SET(set, sym)
+
+/*
+ * Initialize before referring to a given linker set.
+ */
+#ifdef	__FreeBSD__
+#define SET_DECLARE(set, ptype)						\
+	extern ptype *__CONCAT(__start_set_,set);			\
+	extern ptype *__CONCAT(__stop_set_,set)
+#else
+#define	SET_DECLARE(set, ptype)						\
+	_Pragma(__XSTRING(weak __CONCAT(__start_set_,set)))		\
+	_Pragma(__XSTRING(weak __CONCAT(__stop_set_,set)))		\
+	extern ptype *__CONCAT(__start_set_,set);			\
+	extern ptype *__CONCAT(__stop_set_,set)
+#endif
+
+#define SET_BEGIN(set)							\
+	(&__CONCAT(__start_set_,set))
+#define SET_LIMIT(set)							\
+	(&__CONCAT(__stop_set_,set))
+
+/*
+ * Iterate over all the elements of a set.
+ *
+ * Sets always contain addresses of things, and "pvar" points to words
+ * containing those addresses.  Thus is must be declared as "type **pvar",
+ * and the address of each set item is obtained inside the loop by "*pvar".
+ */
+#define SET_FOREACH(pvar, set)						\
+	for (pvar = SET_BEGIN(set); pvar < SET_LIMIT(set); pvar++)
+
+#define SET_ITEM(set, i)						\
+	((SET_BEGIN(set))[i])
+
+/*
+ * Provide a count of the items in a set.
+ */
+#define SET_COUNT(set)							\
+	(SET_LIMIT(set) - SET_BEGIN(set))
+
+#endif	/* _SYS_LINKER_SET_H_ */
diff --git a/usr/contrib/freebsd/sys/tree.h b/usr/contrib/freebsd/sys/tree.h
new file mode 100644
index 0000000000..6b47e247bb
--- /dev/null
+++ b/usr/contrib/freebsd/sys/tree.h
@@ -0,0 +1,765 @@
+/*	$NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $	*/
+/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
+/* $FreeBSD: head/sys/sys/tree.h 189204 2009-03-01 04:57:23Z bms $ */
+
+/*-
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_TREE_H_
+#define	_SYS_TREE_H_
+
+#include <sys/cdefs.h>
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *	- every search path from the root to a leaf consists of the
+ *	  same number of black nodes,
+ *	- each red node (except for the root) has a black parent,
+ *	- each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)						\
+struct name {								\
+	struct type *sph_root; /* root of the tree */			\
+}
+
+#define SPLAY_INITIALIZER(root)						\
+	{ NULL }
+
+#define SPLAY_INIT(root) do {						\
+	(root)->sph_root = NULL;					\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type)						\
+struct {								\
+	struct type *spe_left; /* left element */			\
+	struct type *spe_right; /* right element */			\
+}
+
+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
+#define SPLAY_ROOT(head)		(head)->sph_root
+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+	
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {				\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *, struct type *);			\
+void name##_SPLAY_MINMAX(struct name *, int);				\
+struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
+									\
+/* Finds the node with the same key as elm */				\
+static __inline struct type *						\
+name##_SPLAY_FIND(struct name *head, struct type *elm)			\
+{									\
+	if (SPLAY_EMPTY(head))						\
+		return(NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0)				\
+		return (head->sph_root);				\
+	return (NULL);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
+{									\
+	name##_SPLAY(head, elm);					\
+	if (SPLAY_RIGHT(elm, field) != NULL) {				\
+		elm = SPLAY_RIGHT(elm, field);				\
+		while (SPLAY_LEFT(elm, field) != NULL) {		\
+			elm = SPLAY_LEFT(elm, field);			\
+		}							\
+	} else								\
+		elm = NULL;						\
+	return (elm);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_MIN_MAX(struct name *head, int val)			\
+{									\
+	name##_SPLAY_MINMAX(head, val);					\
+        return (SPLAY_ROOT(head));					\
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)				\
+struct type *								\
+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
+{									\
+    if (SPLAY_EMPTY(head)) {						\
+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
+    } else {								\
+	    int __comp;							\
+	    name##_SPLAY(head, elm);					\
+	    __comp = (cmp)(elm, (head)->sph_root);			\
+	    if(__comp < 0) {						\
+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
+		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
+	    } else if (__comp > 0) {					\
+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
+		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
+	    } else							\
+		    return ((head)->sph_root);				\
+    }									\
+    (head)->sph_root = (elm);						\
+    return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
+{									\
+	struct type *__tmp;						\
+	if (SPLAY_EMPTY(head))						\
+		return (NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0) {			\
+		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+		} else {						\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+			name##_SPLAY(head, elm);			\
+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
+		}							\
+		return (elm);						\
+	}								\
+	return (NULL);							\
+}									\
+									\
+void									\
+name##_SPLAY(struct name *head, struct type *elm)			\
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+	int __comp;							\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {		\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) < 0){			\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) > 0){			\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}									\
+									\
+/* Splay with either the minimum or the maximum element			\
+ * Used to find minimum or maximum element in tree.			\
+ */									\
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while (1) {							\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp < 0){				\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp > 0) {				\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}
+
+#define SPLAY_NEGINF	-1
+#define SPLAY_INF	1
+
+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)					\
+	for ((x) = SPLAY_MIN(name, head);				\
+	     (x) != NULL;						\
+	     (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type)						\
+struct name {								\
+	struct type *rbh_root; /* root of the tree */			\
+}
+
+#define RB_INITIALIZER(root)						\
+	{ NULL }
+
+#define RB_INIT(root) do {						\
+	(root)->rbh_root = NULL;					\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK	0
+#define RB_RED		1
+#define RB_ENTRY(type)							\
+struct {								\
+	struct type *rbe_left;		/* left element */		\
+	struct type *rbe_right;		/* right element */		\
+	struct type *rbe_parent;	/* parent element */		\
+	int rbe_color;			/* node color */		\
+}
+
+#define RB_LEFT(elm, field)		(elm)->field.rbe_left
+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
+#define RB_PARENT(elm, field)		(elm)->field.rbe_parent
+#define RB_COLOR(elm, field)		(elm)->field.rbe_color
+#define RB_ROOT(head)			(head)->rbh_root
+#define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do {					\
+	RB_PARENT(elm, field) = parent;					\
+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
+	RB_COLOR(elm, field) = RB_RED;					\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field) do {				\
+	RB_COLOR(black, field) = RB_BLACK;				\
+	RB_COLOR(red, field) = RB_RED;					\
+} while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)	do {} while (0)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
+	(tmp) = RB_RIGHT(elm, field);					\
+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) {	\
+		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {	\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_LEFT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
+	(tmp) = RB_LEFT(elm, field);					\
+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) {	\
+		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {	\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_RIGHT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define	RB_PROTOTYPE(name, type, field, cmp)				\
+	RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
+#define	RB_PROTOTYPE_STATIC(name, type, field, cmp)			\
+	RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static)
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)		\
+attr void name##_RB_INSERT_COLOR(struct name *, struct type *);		\
+attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+attr struct type *name##_RB_REMOVE(struct name *, struct type *);	\
+attr struct type *name##_RB_INSERT(struct name *, struct type *);	\
+attr struct type *name##_RB_FIND(struct name *, struct type *);		\
+attr struct type *name##_RB_NFIND(struct name *, struct type *);	\
+attr struct type *name##_RB_NEXT(struct type *);			\
+attr struct type *name##_RB_PREV(struct type *);			\
+attr struct type *name##_RB_MINMAX(struct name *, int);			\
+									\
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define	RB_GENERATE(name, type, field, cmp)				\
+	RB_GENERATE_INTERNAL(name, type, field, cmp,)
+#define	RB_GENERATE_STATIC(name, type, field, cmp)			\
+	RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static)
+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)		\
+attr void								\
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
+{									\
+	struct type *parent, *gparent, *tmp;				\
+	while ((parent = RB_PARENT(elm, field)) != NULL &&		\
+	    RB_COLOR(parent, field) == RB_RED) {			\
+		gparent = RB_PARENT(parent, field);			\
+		if (parent == RB_LEFT(gparent, field)) {		\
+			tmp = RB_RIGHT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_RIGHT(parent, field) == elm) {		\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
+		} else {						\
+			tmp = RB_LEFT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_LEFT(parent, field) == elm) {		\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
+		}							\
+	}								\
+	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
+}									\
+									\
+attr void								\
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{									\
+	struct type *tmp;						\
+	while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&	\
+	    elm != RB_ROOT(head)) {					\
+		if (RB_LEFT(parent, field) == elm) {			\
+			tmp = RB_RIGHT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = RB_RIGHT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_RIGHT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+					struct type *oleft;		\
+					if ((oleft = RB_LEFT(tmp, field)) \
+					    != NULL)			\
+						RB_COLOR(oleft, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+					tmp = RB_RIGHT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_RIGHT(tmp, field))		\
+					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		} else {						\
+			tmp = RB_LEFT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = RB_LEFT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_LEFT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+					struct type *oright;		\
+					if ((oright = RB_RIGHT(tmp, field)) \
+					    != NULL)			\
+						RB_COLOR(oright, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_LEFT(head, tmp, oright, field);\
+					tmp = RB_LEFT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_LEFT(tmp, field))		\
+					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		}							\
+	}								\
+	if (elm)							\
+		RB_COLOR(elm, field) = RB_BLACK;			\
+}									\
+									\
+attr struct type *							\
+name##_RB_REMOVE(struct name *head, struct type *elm)			\
+{									\
+	struct type *child, *parent, *old = elm;			\
+	int color;							\
+	if (RB_LEFT(elm, field) == NULL)				\
+		child = RB_RIGHT(elm, field);				\
+	else if (RB_RIGHT(elm, field) == NULL)				\
+		child = RB_LEFT(elm, field);				\
+	else {								\
+		struct type *left;					\
+		elm = RB_RIGHT(elm, field);				\
+		while ((left = RB_LEFT(elm, field)) != NULL)		\
+			elm = left;					\
+		child = RB_RIGHT(elm, field);				\
+		parent = RB_PARENT(elm, field);				\
+		color = RB_COLOR(elm, field);				\
+		if (child)						\
+			RB_PARENT(child, field) = parent;		\
+		if (parent) {						\
+			if (RB_LEFT(parent, field) == elm)		\
+				RB_LEFT(parent, field) = child;		\
+			else						\
+				RB_RIGHT(parent, field) = child;	\
+			RB_AUGMENT(parent);				\
+		} else							\
+			RB_ROOT(head) = child;				\
+		if (RB_PARENT(elm, field) == old)			\
+			parent = elm;					\
+		(elm)->field = (old)->field;				\
+		if (RB_PARENT(old, field)) {				\
+			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+				RB_LEFT(RB_PARENT(old, field), field) = elm;\
+			else						\
+				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+			RB_AUGMENT(RB_PARENT(old, field));		\
+		} else							\
+			RB_ROOT(head) = elm;				\
+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
+		if (RB_RIGHT(old, field))				\
+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
+		if (parent) {						\
+			left = parent;					\
+			do {						\
+				RB_AUGMENT(left);			\
+			} while ((left = RB_PARENT(left, field)) != NULL); \
+		}							\
+		goto color;						\
+	}								\
+	parent = RB_PARENT(elm, field);					\
+	color = RB_COLOR(elm, field);					\
+	if (child)							\
+		RB_PARENT(child, field) = parent;			\
+	if (parent) {							\
+		if (RB_LEFT(parent, field) == elm)			\
+			RB_LEFT(parent, field) = child;			\
+		else							\
+			RB_RIGHT(parent, field) = child;		\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = child;					\
+color:									\
+	if (color == RB_BLACK)						\
+		name##_RB_REMOVE_COLOR(head, parent, child);		\
+	return (old);							\
+}									\
+									\
+/* Inserts a node into the RB tree */					\
+attr struct type *							\
+name##_RB_INSERT(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp;						\
+	struct type *parent = NULL;					\
+	int comp = 0;							\
+	tmp = RB_ROOT(head);						\
+	while (tmp) {							\
+		parent = tmp;						\
+		comp = (cmp)(elm, parent);				\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	RB_SET(elm, parent, field);					\
+	if (parent != NULL) {						\
+		if (comp < 0)						\
+			RB_LEFT(parent, field) = elm;			\
+		else							\
+			RB_RIGHT(parent, field) = elm;			\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = elm;					\
+	name##_RB_INSERT_COLOR(head, elm);				\
+	return (NULL);							\
+}									\
+									\
+/* Finds the node with the same key as elm */				\
+attr struct type *							\
+name##_RB_FIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (NULL);							\
+}									\
+									\
+/* Finds the first node greater than or equal to the search key */	\
+attr struct type *							\
+name##_RB_NFIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *res = NULL;					\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0) {						\
+			res = tmp;					\
+			tmp = RB_LEFT(tmp, field);			\
+		}							\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (res);							\
+}									\
+									\
+/* ARGSUSED */								\
+attr struct type *							\
+name##_RB_NEXT(struct type *elm)					\
+{									\
+	if (RB_RIGHT(elm, field)) {					\
+		elm = RB_RIGHT(elm, field);				\
+		while (RB_LEFT(elm, field))				\
+			elm = RB_LEFT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+									\
+/* ARGSUSED */								\
+attr struct type *							\
+name##_RB_PREV(struct type *elm)					\
+{									\
+	if (RB_LEFT(elm, field)) {					\
+		elm = RB_LEFT(elm, field);				\
+		while (RB_RIGHT(elm, field))				\
+			elm = RB_RIGHT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+									\
+attr struct type *							\
+name##_RB_MINMAX(struct name *head, int val)				\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *parent = NULL;					\
+	while (tmp) {							\
+		parent = tmp;						\
+		if (val < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else							\
+			tmp = RB_RIGHT(tmp, field);			\
+	}								\
+	return (parent);						\
+}
+
+#define RB_NEGINF	-1
+#define RB_INF	1
+
+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y)	name##_RB_NFIND(x, y)
+#define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
+#define RB_PREV(name, x, y)	name##_RB_PREV(y)
+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)					\
+	for ((x) = RB_MIN(name, head);					\
+	     (x) != NULL;						\
+	     (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y)					\
+	for ((x) = (y);							\
+	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);	\
+	     (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y)				\
+	for ((x) = RB_MIN(name, head);					\
+	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);	\
+	     (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head)				\
+	for ((x) = RB_MAX(name, head);					\
+	     (x) != NULL;						\
+	     (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y)				\
+	for ((x) = (y);							\
+	    ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);	\
+	     (x) = (y))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y)			\
+	for ((x) = RB_MAX(name, head);					\
+	    ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);	\
+	     (x) = (y))
+
+#endif	/* _SYS_TREE_H_ */
diff --git a/usr/contrib/freebsd/x86/apicreg.h b/usr/contrib/freebsd/x86/apicreg.h
new file mode 100644
index 0000000000..24006e2733
--- /dev/null
+++ b/usr/contrib/freebsd/x86/apicreg.h
@@ -0,0 +1,455 @@
+/*-
+ * Copyright (c) 1996, by Peter Wemm and Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/x86/include/apicreg.h 259140 2013-12-09 21:08:52Z jhb $
+ */
+
+#ifndef _X86_APICREG_H_
+#define _X86_APICREG_H_
+
+/*
+ * Local && I/O APIC definitions.
+ */
+
+/*
+ * Pentium P54C+ Built-in APIC
+ * (Advanced programmable Interrupt Controller)
+ * 
+ * Base Address of Built-in APIC in memory location
+ * is 0xfee00000.
+ * 
+ * Map of APIC Registers:
+ * 
+ * Offset (hex)    Description                     Read/Write state
+ * 000             Reserved
+ * 010             Reserved
+ * 020 ID          Local APIC ID                   R/W
+ * 030 VER         Local APIC Version              R
+ * 040             Reserved
+ * 050             Reserved
+ * 060             Reserved
+ * 070             Reserved
+ * 080             Task Priority Register          R/W
+ * 090             Arbitration Priority Register   R
+ * 0A0             Processor Priority Register     R
+ * 0B0             EOI Register                    W
+ * 0C0 RRR         Remote read                     R
+ * 0D0             Logical Destination             R/W
+ * 0E0             Destination Format Register     0..27 R;  28..31 R/W
+ * 0F0 SVR         Spurious Interrupt Vector Reg.  0..3  R;  4..9   R/W
+ * 100             ISR  000-031                    R
+ * 110             ISR  032-063                    R
+ * 120             ISR  064-095                    R
+ * 130             ISR  095-128                    R
+ * 140             ISR  128-159                    R
+ * 150             ISR  160-191                    R
+ * 160             ISR  192-223                    R
+ * 170             ISR  224-255                    R
+ * 180             TMR  000-031                    R
+ * 190             TMR  032-063                    R
+ * 1A0             TMR  064-095                    R
+ * 1B0             TMR  095-128                    R
+ * 1C0             TMR  128-159                    R
+ * 1D0             TMR  160-191                    R
+ * 1E0             TMR  192-223                    R
+ * 1F0             TMR  224-255                    R
+ * 200             IRR  000-031                    R
+ * 210             IRR  032-063                    R
+ * 220             IRR  064-095                    R
+ * 230             IRR  095-128                    R
+ * 240             IRR  128-159                    R
+ * 250             IRR  160-191                    R
+ * 260             IRR  192-223                    R
+ * 270             IRR  224-255                    R
+ * 280             Error Status Register           R
+ * 290             Reserved
+ * 2A0             Reserved
+ * 2B0             Reserved
+ * 2C0             Reserved
+ * 2D0             Reserved
+ * 2E0             Reserved
+ * 2F0             Local Vector Table (CMCI)       R/W
+ * 300 ICR_LOW     Interrupt Command Reg. (0-31)   R/W
+ * 310 ICR_HI      Interrupt Command Reg. (32-63)  R/W
+ * 320             Local Vector Table (Timer)      R/W
+ * 330             Local Vector Table (Thermal)    R/W (PIV+)
+ * 340             Local Vector Table (Performance) R/W (P6+)
+ * 350 LVT1        Local Vector Table (LINT0)      R/W
+ * 360 LVT2        Local Vector Table (LINT1)      R/W
+ * 370 LVT3        Local Vector Table (ERROR)      R/W
+ * 380             Initial Count Reg. for Timer    R/W
+ * 390             Current Count of Timer          R
+ * 3A0             Reserved
+ * 3B0             Reserved
+ * 3C0             Reserved
+ * 3D0             Reserved
+ * 3E0             Timer Divide Configuration Reg. R/W
+ * 3F0             Reserved
+ */
+
+
+/******************************************************************************
+ * global defines, etc.
+ */
+
+
+/******************************************************************************
+ * LOCAL APIC structure
+ */
+
+#ifndef LOCORE
+#include <sys/types.h>
+
+#define PAD3	int : 32; int : 32; int : 32
+#define PAD4	int : 32; int : 32; int : 32; int : 32
+
+struct LAPIC {
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	u_int32_t id;		PAD3;
+	u_int32_t version;	PAD3;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	u_int32_t tpr;		PAD3;
+	u_int32_t apr;		PAD3;
+	u_int32_t ppr;		PAD3;
+	u_int32_t eoi;		PAD3;
+	/* reserved */		PAD4;
+	u_int32_t ldr;		PAD3;
+	u_int32_t dfr;		PAD3;
+	u_int32_t svr;		PAD3;
+	u_int32_t isr0;		PAD3;
+	u_int32_t isr1;		PAD3;
+	u_int32_t isr2;		PAD3;
+	u_int32_t isr3;		PAD3;
+	u_int32_t isr4;		PAD3;
+	u_int32_t isr5;		PAD3;
+	u_int32_t isr6;		PAD3;
+	u_int32_t isr7;		PAD3;
+	u_int32_t tmr0;		PAD3;
+	u_int32_t tmr1;		PAD3;
+	u_int32_t tmr2;		PAD3;
+	u_int32_t tmr3;		PAD3;
+	u_int32_t tmr4;		PAD3;
+	u_int32_t tmr5;		PAD3;
+	u_int32_t tmr6;		PAD3;
+	u_int32_t tmr7;		PAD3;
+	u_int32_t irr0;		PAD3;
+	u_int32_t irr1;		PAD3;
+	u_int32_t irr2;		PAD3;
+	u_int32_t irr3;		PAD3;
+	u_int32_t irr4;		PAD3;
+	u_int32_t irr5;		PAD3;
+	u_int32_t irr6;		PAD3;
+	u_int32_t irr7;		PAD3;
+	u_int32_t esr;		PAD3;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	u_int32_t lvt_cmci;	PAD3;
+	u_int32_t icr_lo;	PAD3;
+	u_int32_t icr_hi;	PAD3;
+	u_int32_t lvt_timer;	PAD3;
+	u_int32_t lvt_thermal;	PAD3;
+	u_int32_t lvt_pcint;	PAD3;
+	u_int32_t lvt_lint0;	PAD3;
+	u_int32_t lvt_lint1;	PAD3;
+	u_int32_t lvt_error;	PAD3;
+	u_int32_t icr_timer;	PAD3;
+	u_int32_t ccr_timer;	PAD3;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	/* reserved */		PAD4;
+	u_int32_t dcr_timer;	PAD3;
+	/* reserved */		PAD4;
+};
+
+typedef struct LAPIC lapic_t;
+
+/******************************************************************************
+ * I/O APIC structure
+ */
+
+struct IOAPIC {
+	u_int32_t ioregsel;	PAD3;
+	u_int32_t iowin;	PAD3;
+};
+
+typedef struct IOAPIC ioapic_t;
+
+#undef PAD4
+#undef PAD3
+
+#endif  /* !LOCORE */
+
+
+/******************************************************************************
+ * various code 'logical' values
+ */
+
+/******************************************************************************
+ * LOCAL APIC defines
+ */
+
+/* default physical locations of LOCAL (CPU) APICs */
+#define DEFAULT_APIC_BASE	0xfee00000
+
+/* constants relating to APIC ID registers */
+#define APIC_ID_MASK		0xff000000
+#define	APIC_ID_SHIFT		24
+#define	APIC_ID_CLUSTER		0xf0
+#define	APIC_ID_CLUSTER_ID	0x0f
+#define	APIC_MAX_CLUSTER	0xe
+#define	APIC_MAX_INTRACLUSTER_ID 3
+#define	APIC_ID_CLUSTER_SHIFT	4
+
+/* fields in VER */
+#define APIC_VER_VERSION	0x000000ff
+#define APIC_VER_MAXLVT		0x00ff0000
+#define MAXLVTSHIFT		16
+#define APIC_VER_EOI_SUPPRESSION 0x01000000
+
+/* fields in LDR */
+#define	APIC_LDR_RESERVED	0x00ffffff
+
+/* fields in DFR */
+#define	APIC_DFR_RESERVED	0x0fffffff
+#define	APIC_DFR_MODEL_MASK	0xf0000000
+#define	APIC_DFR_MODEL_FLAT	0xf0000000
+#define	APIC_DFR_MODEL_CLUSTER	0x00000000
+
+/* fields in SVR */
+#define APIC_SVR_VECTOR		0x000000ff
+#define APIC_SVR_VEC_PROG	0x000000f0
+#define APIC_SVR_VEC_FIX	0x0000000f
+#define APIC_SVR_ENABLE		0x00000100
+# define APIC_SVR_SWDIS		0x00000000
+# define APIC_SVR_SWEN		0x00000100
+#define APIC_SVR_FOCUS		0x00000200
+# define APIC_SVR_FEN		0x00000000
+# define APIC_SVR_FDIS		0x00000200
+#define APIC_SVR_EOI_SUPPRESSION 0x00001000
+
+/* fields in TPR */
+#define APIC_TPR_PRIO		0x000000ff
+# define APIC_TPR_INT		0x000000f0
+# define APIC_TPR_SUB		0x0000000f
+
+/* fields in ESR */
+#define	APIC_ESR_SEND_CS_ERROR		0x00000001
+#define	APIC_ESR_RECEIVE_CS_ERROR	0x00000002
+#define	APIC_ESR_SEND_ACCEPT		0x00000004
+#define	APIC_ESR_RECEIVE_ACCEPT		0x00000008
+#define	APIC_ESR_SEND_ILLEGAL_VECTOR	0x00000020
+#define	APIC_ESR_RECEIVE_ILLEGAL_VECTOR	0x00000040
+#define	APIC_ESR_ILLEGAL_REGISTER	0x00000080
+
+/* fields in ICR_LOW */
+#define APIC_VECTOR_MASK	0x000000ff
+
+#define APIC_DELMODE_MASK	0x00000700
+# define APIC_DELMODE_FIXED	0x00000000
+# define APIC_DELMODE_LOWPRIO	0x00000100
+# define APIC_DELMODE_SMI	0x00000200
+# define APIC_DELMODE_RR	0x00000300
+# define APIC_DELMODE_NMI	0x00000400
+# define APIC_DELMODE_INIT	0x00000500
+# define APIC_DELMODE_STARTUP	0x00000600
+# define APIC_DELMODE_RESV	0x00000700
+
+#define APIC_DESTMODE_MASK	0x00000800
+# define APIC_DESTMODE_PHY	0x00000000
+# define APIC_DESTMODE_LOG	0x00000800
+
+#define APIC_DELSTAT_MASK	0x00001000
+# define APIC_DELSTAT_IDLE	0x00000000
+# define APIC_DELSTAT_PEND	0x00001000
+
+#define APIC_RESV1_MASK		0x00002000
+
+#define APIC_LEVEL_MASK		0x00004000
+# define APIC_LEVEL_DEASSERT	0x00000000
+# define APIC_LEVEL_ASSERT	0x00004000
+
+#define APIC_TRIGMOD_MASK	0x00008000
+# define APIC_TRIGMOD_EDGE	0x00000000
+# define APIC_TRIGMOD_LEVEL	0x00008000
+
+#define APIC_RRSTAT_MASK	0x00030000
+# define APIC_RRSTAT_INVALID	0x00000000
+# define APIC_RRSTAT_INPROG	0x00010000
+# define APIC_RRSTAT_VALID	0x00020000
+# define APIC_RRSTAT_RESV	0x00030000
+
+#define APIC_DEST_MASK		0x000c0000
+# define APIC_DEST_DESTFLD	0x00000000
+# define APIC_DEST_SELF		0x00040000
+# define APIC_DEST_ALLISELF	0x00080000
+# define APIC_DEST_ALLESELF	0x000c0000
+
+#define APIC_RESV2_MASK		0xfff00000
+
+#define	APIC_ICRLO_RESV_MASK	(APIC_RESV1_MASK | APIC_RESV2_MASK)
+
+/* fields in LVT1/2 */
+#define APIC_LVT_VECTOR		0x000000ff
+#define APIC_LVT_DM		0x00000700
+# define APIC_LVT_DM_FIXED	0x00000000
+# define APIC_LVT_DM_SMI	0x00000200
+# define APIC_LVT_DM_NMI	0x00000400
+# define APIC_LVT_DM_INIT	0x00000500
+# define APIC_LVT_DM_EXTINT	0x00000700
+#define APIC_LVT_DS		0x00001000
+#define APIC_LVT_IIPP		0x00002000
+#define APIC_LVT_IIPP_INTALO	0x00002000
+#define APIC_LVT_IIPP_INTAHI	0x00000000
+#define APIC_LVT_RIRR		0x00004000
+#define APIC_LVT_TM		0x00008000
+#define APIC_LVT_M		0x00010000
+
+
+/* fields in LVT Timer */
+#define APIC_LVTT_VECTOR	0x000000ff
+#define APIC_LVTT_DS		0x00001000
+#define APIC_LVTT_M		0x00010000
+#define APIC_LVTT_TM		0x00020000
+# define APIC_LVTT_TM_ONE_SHOT	0x00000000
+# define APIC_LVTT_TM_PERIODIC	0x00020000
+
+
+/* APIC timer current count */
+#define	APIC_TIMER_MAX_COUNT	0xffffffff
+
+/* fields in TDCR */
+#define APIC_TDCR_2		0x00
+#define APIC_TDCR_4		0x01
+#define APIC_TDCR_8		0x02
+#define APIC_TDCR_16		0x03
+#define APIC_TDCR_32		0x08
+#define APIC_TDCR_64		0x09
+#define APIC_TDCR_128		0x0a
+#define APIC_TDCR_1		0x0b
+
+/* LVT table indices */
+#define	APIC_LVT_LINT0		0
+#define	APIC_LVT_LINT1		1
+#define	APIC_LVT_TIMER		2
+#define	APIC_LVT_ERROR		3
+#define	APIC_LVT_PMC		4
+#define	APIC_LVT_THERMAL	5
+#define	APIC_LVT_CMCI		6
+#define	APIC_LVT_MAX		APIC_LVT_CMCI
+
+/******************************************************************************
+ * I/O APIC defines
+ */
+
+/* default physical locations of an IO APIC */
+#define DEFAULT_IO_APIC_BASE	0xfec00000
+
+/* window register offset */
+#define IOAPIC_WINDOW		0x10
+#define IOAPIC_EOIR		0x40
+
+/* indexes into IO APIC */
+#define IOAPIC_ID		0x00
+#define IOAPIC_VER		0x01
+#define IOAPIC_ARB		0x02
+#define IOAPIC_REDTBL		0x10
+#define IOAPIC_REDTBL0		IOAPIC_REDTBL
+#define IOAPIC_REDTBL1		(IOAPIC_REDTBL+0x02)
+#define IOAPIC_REDTBL2		(IOAPIC_REDTBL+0x04)
+#define IOAPIC_REDTBL3		(IOAPIC_REDTBL+0x06)
+#define IOAPIC_REDTBL4		(IOAPIC_REDTBL+0x08)
+#define IOAPIC_REDTBL5		(IOAPIC_REDTBL+0x0a)
+#define IOAPIC_REDTBL6		(IOAPIC_REDTBL+0x0c)
+#define IOAPIC_REDTBL7		(IOAPIC_REDTBL+0x0e)
+#define IOAPIC_REDTBL8		(IOAPIC_REDTBL+0x10)
+#define IOAPIC_REDTBL9		(IOAPIC_REDTBL+0x12)
+#define IOAPIC_REDTBL10		(IOAPIC_REDTBL+0x14)
+#define IOAPIC_REDTBL11		(IOAPIC_REDTBL+0x16)
+#define IOAPIC_REDTBL12		(IOAPIC_REDTBL+0x18)
+#define IOAPIC_REDTBL13		(IOAPIC_REDTBL+0x1a)
+#define IOAPIC_REDTBL14		(IOAPIC_REDTBL+0x1c)
+#define IOAPIC_REDTBL15		(IOAPIC_REDTBL+0x1e)
+#define IOAPIC_REDTBL16		(IOAPIC_REDTBL+0x20)
+#define IOAPIC_REDTBL17		(IOAPIC_REDTBL+0x22)
+#define IOAPIC_REDTBL18		(IOAPIC_REDTBL+0x24)
+#define IOAPIC_REDTBL19		(IOAPIC_REDTBL+0x26)
+#define IOAPIC_REDTBL20		(IOAPIC_REDTBL+0x28)
+#define IOAPIC_REDTBL21		(IOAPIC_REDTBL+0x2a)
+#define IOAPIC_REDTBL22		(IOAPIC_REDTBL+0x2c)
+#define IOAPIC_REDTBL23		(IOAPIC_REDTBL+0x2e)
+
+/* fields in VER */
+#define IOART_VER_VERSION	0x000000ff
+#define IOART_VER_MAXREDIR	0x00ff0000
+#define MAXREDIRSHIFT		16
+
+/*
+ * fields in the IO APIC's redirection table entries
+ */
+#define IOART_DEST	APIC_ID_MASK	/* broadcast addr: all APICs */
+
+#define IOART_RESV	0x00fe0000	/* reserved */
+
+#define IOART_INTMASK	0x00010000	/* R/W: INTerrupt mask */
+# define IOART_INTMCLR	0x00000000	/*       clear, allow INTs */
+# define IOART_INTMSET	0x00010000	/*       set, inhibit INTs */
+
+#define IOART_TRGRMOD	0x00008000	/* R/W: trigger mode */
+# define IOART_TRGREDG	0x00000000	/*       edge */
+# define IOART_TRGRLVL	0x00008000	/*       level */
+
+#define IOART_REM_IRR	0x00004000	/* RO: remote IRR */
+
+#define IOART_INTPOL	0x00002000	/* R/W: INT input pin polarity */
+# define IOART_INTAHI	0x00000000	/*      active high */
+# define IOART_INTALO	0x00002000	/*      active low */
+
+#define IOART_DELIVS	0x00001000	/* RO: delivery status */
+
+#define IOART_DESTMOD	0x00000800	/* R/W: destination mode */
+# define IOART_DESTPHY	0x00000000	/*      physical */
+# define IOART_DESTLOG	0x00000800	/*      logical */
+
+#define IOART_DELMOD	0x00000700	/* R/W: delivery mode */
+# define IOART_DELFIXED	0x00000000	/*       fixed */
+# define IOART_DELLOPRI	0x00000100	/*       lowest priority */
+# define IOART_DELSMI	0x00000200	/*       System Management INT */
+# define IOART_DELRSV1	0x00000300	/*       reserved */
+# define IOART_DELNMI	0x00000400	/*       NMI signal */
+# define IOART_DELINIT	0x00000500	/*       INIT signal */
+# define IOART_DELRSV2	0x00000600	/*       reserved */
+# define IOART_DELEXINT	0x00000700	/*       External INTerrupt */
+
+#define IOART_INTVEC	0x000000ff	/* R/W: INTerrupt vector field */
+
+#endif /* _X86_APICREG_H_ */
diff --git a/usr/contrib/freebsd/x86/mptable.h b/usr/contrib/freebsd/x86/mptable.h
new file mode 100644
index 0000000000..8f3c62a295
--- /dev/null
+++ b/usr/contrib/freebsd/x86/mptable.h
@@ -0,0 +1,204 @@
+/*-
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/x86/include/mptable.h 259228 2013-12-11 21:19:04Z jhb $
+ */
+
+#ifndef __MACHINE_MPTABLE_H__
+#define	__MACHINE_MPTABLE_H__
+
+enum busTypes {
+    NOBUS = 0,
+    CBUS = 1,
+    CBUSII = 2,
+    EISA = 3,
+    ISA = 6,
+    MCA = 9,
+    PCI = 13,
+    XPRESS = 18,
+    MAX_BUSTYPE = 18,
+    UNKNOWN_BUSTYPE = 0xff
+};
+
+/* MP Floating Pointer Structure */
+typedef struct MPFPS {
+	uint8_t	signature[4];
+	uint32_t pap;
+	uint8_t	length;
+	uint8_t	spec_rev;
+	uint8_t	checksum;
+	uint8_t	config_type;
+	uint8_t	mpfb2;
+	uint8_t	mpfb3;
+	uint8_t	mpfb4;
+	uint8_t	mpfb5;
+} __packed *mpfps_t;
+
+#define	MPFB2_IMCR_PRESENT	0x80
+#define	MPFB2_MUL_CLK_SRCS	0x40
+
+/* MP Configuration Table Header */
+typedef struct MPCTH {
+	uint8_t	signature[4];
+	uint16_t base_table_length;
+	uint8_t	spec_rev;
+	uint8_t	checksum;
+	uint8_t	oem_id[8];
+	uint8_t	product_id[12];
+	uint32_t oem_table_pointer;
+	uint16_t oem_table_size;
+	uint16_t entry_count;
+	uint32_t apic_address;
+	uint16_t extended_table_length;
+	uint8_t	extended_table_checksum;
+	uint8_t	reserved;
+} __packed *mpcth_t;
+
+/* Base table entries */
+
+#define	MPCT_ENTRY_PROCESSOR	0
+#define	MPCT_ENTRY_BUS		1
+#define	MPCT_ENTRY_IOAPIC	2
+#define	MPCT_ENTRY_INT		3
+#define	MPCT_ENTRY_LOCAL_INT	4
+
+typedef struct PROCENTRY {
+	uint8_t	type;
+	uint8_t	apic_id;
+	uint8_t	apic_version;
+	uint8_t	cpu_flags;
+	uint32_t cpu_signature;
+	uint32_t feature_flags;
+	uint32_t reserved1;
+	uint32_t reserved2;
+} __packed *proc_entry_ptr;
+
+#define PROCENTRY_FLAG_EN	0x01
+#define PROCENTRY_FLAG_BP	0x02
+
+typedef struct BUSENTRY {
+	uint8_t	type;
+	uint8_t	bus_id;
+	uint8_t	bus_type[6];
+} __packed *bus_entry_ptr;
+
+typedef struct IOAPICENTRY {
+	uint8_t	type;
+	uint8_t	apic_id;
+	uint8_t	apic_version;
+	uint8_t	apic_flags;
+	uint32_t apic_address;
+} __packed *io_apic_entry_ptr;
+
+#define IOAPICENTRY_FLAG_EN	0x01
+
+typedef struct INTENTRY {
+	uint8_t	type;
+	uint8_t	int_type;
+	uint16_t int_flags;
+	uint8_t	src_bus_id;
+	uint8_t	src_bus_irq;
+	uint8_t	dst_apic_id;
+	uint8_t	dst_apic_int;
+} __packed *int_entry_ptr;
+
+#define	INTENTRY_TYPE_INT  	0
+#define	INTENTRY_TYPE_NMI	1
+#define	INTENTRY_TYPE_SMI	2
+#define	INTENTRY_TYPE_EXTINT	3
+
+#define	INTENTRY_FLAGS_POLARITY			0x3
+#define	INTENTRY_FLAGS_POLARITY_CONFORM		0x0
+#define	INTENTRY_FLAGS_POLARITY_ACTIVEHI	0x1
+#define	INTENTRY_FLAGS_POLARITY_ACTIVELO	0x3
+#define	INTENTRY_FLAGS_TRIGGER			0xc
+#define	INTENTRY_FLAGS_TRIGGER_CONFORM		0x0
+#define	INTENTRY_FLAGS_TRIGGER_EDGE		0x4
+#define	INTENTRY_FLAGS_TRIGGER_LEVEL		0xc
+
+/* Extended table entries */
+
+typedef	struct EXTENTRY {
+	uint8_t	type;
+	uint8_t	length;
+} __packed *ext_entry_ptr;
+
+#define	MPCT_EXTENTRY_SAS	0x80
+#define	MPCT_EXTENTRY_BHD	0x81
+#define	MPCT_EXTENTRY_CBASM	0x82
+
+typedef struct SASENTRY {
+	uint8_t	type;
+	uint8_t	length;
+	uint8_t	bus_id;
+	uint8_t	address_type;
+	uint64_t address_base;
+	uint64_t address_length;
+} __packed *sas_entry_ptr;
+
+#define	SASENTRY_TYPE_IO	0
+#define	SASENTRY_TYPE_MEMORY	1
+#define	SASENTRY_TYPE_PREFETCH	2
+
+typedef struct BHDENTRY {
+	uint8_t	type;
+	uint8_t	length;
+	uint8_t	bus_id;
+	uint8_t	bus_info;
+	uint8_t	parent_bus;
+	uint8_t	reserved[3];
+} __packed *bhd_entry_ptr;
+
+#define	BHDENTRY_INFO_SUBTRACTIVE_DECODE	0x1
+
+typedef struct CBASMENTRY {
+	uint8_t	type;
+	uint8_t	length;
+	uint8_t	bus_id;
+	uint8_t	address_mod;
+	uint32_t predefined_range;
+} __packed *cbasm_entry_ptr;
+
+#define	CBASMENTRY_ADDRESS_MOD_ADD		0x0
+#define	CBASMENTRY_ADDRESS_MOD_SUBTRACT		0x1
+
+#define	CBASMENTRY_RANGE_ISA_IO		0
+#define	CBASMENTRY_RANGE_VGA_IO		1
+
+#ifdef _KERNEL
+struct mptable_hostb_softc {
+#ifdef NEW_PCIB
+	struct pcib_host_resources sc_host_res;
+	int		sc_decodes_vga_io;
+	int		sc_decodes_isa_io;
+#endif
+};
+
+#ifdef NEW_PCIB
+void	mptable_pci_host_res_init(device_t pcib);
+#endif
+int	mptable_pci_probe_table(int bus);
+int	mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin);
+#endif
+#endif /* !__MACHINE_MPTABLE_H__ */
diff --git a/usr/contrib/freebsd/x86/psl.h b/usr/contrib/freebsd/x86/psl.h
new file mode 100644
index 0000000000..6934b4feb7
--- /dev/null
+++ b/usr/contrib/freebsd/x86/psl.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)psl.h	5.2 (Berkeley) 1/18/91
+ * $FreeBSD: head/sys/x86/include/psl.h 258135 2013-11-14 15:37:20Z emaste $
+ */
+
+#ifndef _MACHINE_PSL_H_
+#define	_MACHINE_PSL_H_
+
+/*
+ * 386 processor status longword.
+ */
+#define	PSL_C		0x00000001	/* carry bit */
+#define	PSL_PF		0x00000004	/* parity bit */
+#define	PSL_AF		0x00000010	/* bcd carry bit */
+#define	PSL_Z		0x00000040	/* zero bit */
+#define	PSL_N		0x00000080	/* negative bit */
+#define	PSL_T		0x00000100	/* trace enable bit */
+#define	PSL_I		0x00000200	/* interrupt enable bit */
+#define	PSL_D		0x00000400	/* string instruction direction bit */
+#define	PSL_V		0x00000800	/* overflow bit */
+#define	PSL_IOPL	0x00003000	/* i/o privilege level */
+#define	PSL_NT		0x00004000	/* nested task bit */
+#define	PSL_RF		0x00010000	/* resume flag bit */
+#define	PSL_VM		0x00020000	/* virtual 8086 mode bit */
+#define	PSL_AC		0x00040000	/* alignment checking */
+#define	PSL_VIF		0x00080000	/* virtual interrupt enable */
+#define	PSL_VIP		0x00100000	/* virtual interrupt pending */
+#define	PSL_ID		0x00200000	/* identification bit */
+
+/*
+ * The i486 manual says that we are not supposed to change reserved flags,
+ * but this is too much trouble since the reserved flags depend on the cpu
+ * and setting them to their historical values works in practice.
+ */
+#define	PSL_RESERVED_DEFAULT	0x00000002
+
+/*
+ * Initial flags for kernel and user mode.  The kernel later inherits
+ * PSL_I and some other flags from user mode.
+ */
+#define	PSL_KERNEL	PSL_RESERVED_DEFAULT
+#define	PSL_USER	(PSL_RESERVED_DEFAULT | PSL_I)
+
+/*
+ * Bits that can be changed in user mode on 486's.  We allow these bits
+ * to be changed using ptrace(), sigreturn() and procfs.  Setting PS_NT
+ * is undesirable but it may as well be allowed since users can inflict
+ * it on the kernel directly.  Changes to PSL_AC are silently ignored on
+ * 386's.
+ *
+ * Users are allowed to change the privileged flag PSL_RF.  The cpu sets PSL_RF
+ * in tf_eflags for faults.  Debuggers should sometimes set it there too.
+ * tf_eflags is kept in the signal context during signal handling and there is
+ * no other place to remember it, so the PSL_RF bit may be corrupted by the
+ * signal handler without us knowing.  Corruption of the PSL_RF bit at worst
+ * causes one more or one less debugger trap, so allowing it is fairly
+ * harmless.   
+ */
+#define	PSL_USERCHANGE (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_T \
+			| PSL_D | PSL_V | PSL_NT | PSL_RF | PSL_AC | PSL_ID)
+
+#endif /* !_MACHINE_PSL_H_ */
diff --git a/usr/contrib/freebsd/x86/specialreg.h b/usr/contrib/freebsd/x86/specialreg.h
new file mode 100644
index 0000000000..bea3122423
--- /dev/null
+++ b/usr/contrib/freebsd/x86/specialreg.h
@@ -0,0 +1,839 @@
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)specialreg.h	7.1 (Berkeley) 5/9/91
+ * $FreeBSD: head/sys/x86/include/specialreg.h 273338 2014-10-20 18:09:33Z neel $
+ */
+
+#ifndef _MACHINE_SPECIALREG_H_
+#define	_MACHINE_SPECIALREG_H_
+
+/*
+ * Bits in 386 special registers:
+ */
+#define	CR0_PE	0x00000001	/* Protected mode Enable */
+#define	CR0_MP	0x00000002	/* "Math" (fpu) Present */
+#define	CR0_EM	0x00000004	/* EMulate FPU instructions. (trap ESC only) */
+#define	CR0_TS	0x00000008	/* Task Switched (if MP, trap ESC and WAIT) */
+#define	CR0_PG	0x80000000	/* PaGing enable */
+
+/*
+ * Bits in 486 special registers:
+ */
+#define	CR0_NE	0x00000020	/* Numeric Error enable (EX16 vs IRQ13) */
+#define	CR0_WP	0x00010000	/* Write Protect (honor page protect in
+							   all modes) */
+#define	CR0_AM	0x00040000	/* Alignment Mask (set to enable AC flag) */
+#define	CR0_NW  0x20000000	/* Not Write-through */
+#define	CR0_CD  0x40000000	/* Cache Disable */
+
+#define	CR3_PCID_SAVE 0x8000000000000000
+
+/*
+ * Bits in PPro special registers
+ */
+#define	CR4_VME	0x00000001	/* Virtual 8086 mode extensions */
+#define	CR4_PVI	0x00000002	/* Protected-mode virtual interrupts */
+#define	CR4_TSD	0x00000004	/* Time stamp disable */
+#define	CR4_DE	0x00000008	/* Debugging extensions */
+#define	CR4_PSE	0x00000010	/* Page size extensions */
+#define	CR4_PAE	0x00000020	/* Physical address extension */
+#define	CR4_MCE	0x00000040	/* Machine check enable */
+#define	CR4_PGE	0x00000080	/* Page global enable */
+#define	CR4_PCE	0x00000100	/* Performance monitoring counter enable */
+#define	CR4_FXSR 0x00000200	/* Fast FPU save/restore used by OS */
+#define	CR4_XMM	0x00000400	/* enable SIMD/MMX2 to use except 16 */
+#define	CR4_VMXE 0x00002000	/* enable VMX operation (Intel-specific) */
+#define	CR4_FSGSBASE 0x00010000	/* Enable FS/GS BASE accessing instructions */
+#define	CR4_PCIDE 0x00020000	/* Enable Context ID */
+#define	CR4_XSAVE 0x00040000	/* XSETBV/XGETBV */
+#define	CR4_SMEP 0x00100000	/* Supervisor-Mode Execution Prevention */
+
+/*
+ * Bits in AMD64 special registers.  EFER is 64 bits wide.
+ */
+#define	EFER_SCE 0x000000001	/* System Call Extensions (R/W) */
+#define	EFER_LME 0x000000100	/* Long mode enable (R/W) */
+#define	EFER_LMA 0x000000400	/* Long mode active (R) */
+#define	EFER_NXE 0x000000800	/* PTE No-Execute bit enable (R/W) */
+#define	EFER_SVM 0x000001000	/* SVM enable bit for AMD, reserved for Intel */
+
+/*
+ * Intel Extended Features registers
+ */
+#define	XCR0	0		/* XFEATURE_ENABLED_MASK register */
+
+#define	XFEATURE_ENABLED_X87		0x00000001
+#define	XFEATURE_ENABLED_SSE		0x00000002
+#define	XFEATURE_ENABLED_YMM_HI128	0x00000004
+#define	XFEATURE_ENABLED_AVX		XFEATURE_ENABLED_YMM_HI128
+#define	XFEATURE_ENABLED_BNDREGS	0x00000008
+#define	XFEATURE_ENABLED_BNDCSR		0x00000010
+#define	XFEATURE_ENABLED_OPMASK		0x00000020
+#define	XFEATURE_ENABLED_ZMM_HI256	0x00000040
+#define	XFEATURE_ENABLED_HI16_ZMM	0x00000080
+
+#define	XFEATURE_AVX					\
+    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
+#define	XFEATURE_AVX512						\
+    (XFEATURE_ENABLED_OPMASK | XFEATURE_ENABLED_ZMM_HI256 |	\
+    XFEATURE_ENABLED_HI16_ZMM)
+#define	XFEATURE_MPX					\
+    (XFEATURE_ENABLED_BNDREGS | XFEATURE_ENABLED_BNDCSR)
+
+/*
+ * CPUID instruction features register
+ */
+#define	CPUID_FPU	0x00000001
+#define	CPUID_VME	0x00000002
+#define	CPUID_DE	0x00000004
+#define	CPUID_PSE	0x00000008
+#define	CPUID_TSC	0x00000010
+#define	CPUID_MSR	0x00000020
+#define	CPUID_PAE	0x00000040
+#define	CPUID_MCE	0x00000080
+#define	CPUID_CX8	0x00000100
+#define	CPUID_APIC	0x00000200
+#define	CPUID_B10	0x00000400
+#define	CPUID_SEP	0x00000800
+#define	CPUID_MTRR	0x00001000
+#define	CPUID_PGE	0x00002000
+#define	CPUID_MCA	0x00004000
+#define	CPUID_CMOV	0x00008000
+#define	CPUID_PAT	0x00010000
+#define	CPUID_PSE36	0x00020000
+#define	CPUID_PSN	0x00040000
+#define	CPUID_CLFSH	0x00080000
+#define	CPUID_B20	0x00100000
+#define	CPUID_DS	0x00200000
+#define	CPUID_ACPI	0x00400000
+#define	CPUID_MMX	0x00800000
+#define	CPUID_FXSR	0x01000000
+#define	CPUID_SSE	0x02000000
+#define	CPUID_XMM	0x02000000
+#define	CPUID_SSE2	0x04000000
+#define	CPUID_SS	0x08000000
+#define	CPUID_HTT	0x10000000
+#define	CPUID_TM	0x20000000
+#define	CPUID_IA64	0x40000000
+#define	CPUID_PBE	0x80000000
+
+#define	CPUID2_SSE3	0x00000001
+#define	CPUID2_PCLMULQDQ 0x00000002
+#define	CPUID2_DTES64	0x00000004
+#define	CPUID2_MON	0x00000008
+#define	CPUID2_DS_CPL	0x00000010
+#define	CPUID2_VMX	0x00000020
+#define	CPUID2_SMX	0x00000040
+#define	CPUID2_EST	0x00000080
+#define	CPUID2_TM2	0x00000100
+#define	CPUID2_SSSE3	0x00000200
+#define	CPUID2_CNXTID	0x00000400
+#define	CPUID2_FMA	0x00001000
+#define	CPUID2_CX16	0x00002000
+#define	CPUID2_XTPR	0x00004000
+#define	CPUID2_PDCM	0x00008000
+#define	CPUID2_PCID	0x00020000
+#define	CPUID2_DCA	0x00040000
+#define	CPUID2_SSE41	0x00080000
+#define	CPUID2_SSE42	0x00100000
+#define	CPUID2_X2APIC	0x00200000
+#define	CPUID2_MOVBE	0x00400000
+#define	CPUID2_POPCNT	0x00800000
+#define	CPUID2_TSCDLT	0x01000000
+#define	CPUID2_AESNI	0x02000000
+#define	CPUID2_XSAVE	0x04000000
+#define	CPUID2_OSXSAVE	0x08000000
+#define	CPUID2_AVX	0x10000000
+#define	CPUID2_F16C	0x20000000
+#define	CPUID2_RDRAND	0x40000000
+#define	CPUID2_HV	0x80000000
+
+/*
+ * Important bits in the Thermal and Power Management flags
+ * CPUID.6 EAX and ECX.
+ */
+#define	CPUTPM1_SENSOR	0x00000001
+#define	CPUTPM1_TURBO	0x00000002
+#define	CPUTPM1_ARAT	0x00000004
+#define	CPUTPM2_EFFREQ	0x00000001
+
+/*
+ * Important bits in the AMD extended cpuid flags
+ */
+#define	AMDID_SYSCALL	0x00000800
+#define	AMDID_MP	0x00080000
+#define	AMDID_NX	0x00100000
+#define	AMDID_EXT_MMX	0x00400000
+#define	AMDID_FFXSR	0x01000000
+#define	AMDID_PAGE1GB	0x04000000
+#define	AMDID_RDTSCP	0x08000000
+#define	AMDID_LM	0x20000000
+#define	AMDID_EXT_3DNOW	0x40000000
+#define	AMDID_3DNOW	0x80000000
+
+#define	AMDID2_LAHF	0x00000001
+#define	AMDID2_CMP	0x00000002
+#define	AMDID2_SVM	0x00000004
+#define	AMDID2_EXT_APIC	0x00000008
+#define	AMDID2_CR8	0x00000010
+#define	AMDID2_ABM	0x00000020
+#define	AMDID2_SSE4A	0x00000040
+#define	AMDID2_MAS	0x00000080
+#define	AMDID2_PREFETCH	0x00000100
+#define	AMDID2_OSVW	0x00000200
+#define	AMDID2_IBS	0x00000400
+#define	AMDID2_XOP	0x00000800
+#define	AMDID2_SKINIT	0x00001000
+#define	AMDID2_WDT	0x00002000
+#define	AMDID2_LWP	0x00008000
+#define	AMDID2_FMA4	0x00010000
+#define	AMDID2_TCE	0x00020000
+#define	AMDID2_NODE_ID	0x00080000
+#define	AMDID2_TBM	0x00200000
+#define	AMDID2_TOPOLOGY	0x00400000
+#define	AMDID2_PCXC	0x00800000
+#define	AMDID2_PNXC	0x01000000
+#define	AMDID2_DBE	0x04000000
+#define	AMDID2_PTSC	0x08000000
+#define	AMDID2_PTSCEL2I	0x10000000
+
+/*
+ * CPUID instruction 1 eax info
+ */
+#define	CPUID_STEPPING		0x0000000f
+#define	CPUID_MODEL		0x000000f0
+#define	CPUID_FAMILY		0x00000f00
+#define	CPUID_EXT_MODEL		0x000f0000
+#define	CPUID_EXT_FAMILY	0x0ff00000
+#ifdef __i386__
+#define	CPUID_TO_MODEL(id) \
+    ((((id) & CPUID_MODEL) >> 4) | \
+    ((((id) & CPUID_FAMILY) >= 0x600) ? \
+    (((id) & CPUID_EXT_MODEL) >> 12) : 0))
+#define	CPUID_TO_FAMILY(id) \
+    ((((id) & CPUID_FAMILY) >> 8) + \
+    ((((id) & CPUID_FAMILY) == 0xf00) ? \
+    (((id) & CPUID_EXT_FAMILY) >> 20) : 0))
+#else
+#define	CPUID_TO_MODEL(id) \
+    ((((id) & CPUID_MODEL) >> 4) | \
+    (((id) & CPUID_EXT_MODEL) >> 12))
+#define	CPUID_TO_FAMILY(id) \
+    ((((id) & CPUID_FAMILY) >> 8) + \
+    (((id) & CPUID_EXT_FAMILY) >> 20))
+#endif
+
+/*
+ * CPUID instruction 1 ebx info
+ */
+#define	CPUID_BRAND_INDEX	0x000000ff
+#define	CPUID_CLFUSH_SIZE	0x0000ff00
+#define	CPUID_HTT_CORES		0x00ff0000
+#define	CPUID_LOCAL_APIC_ID	0xff000000
+
+/*
+ * CPUID instruction 5 info
+ */
+#define	CPUID5_MON_MIN_SIZE	0x0000ffff	/* eax */
+#define	CPUID5_MON_MAX_SIZE	0x0000ffff	/* ebx */
+#define	CPUID5_MON_MWAIT_EXT	0x00000001	/* ecx */
+#define	CPUID5_MWAIT_INTRBREAK	0x00000002	/* ecx */
+
+/*
+ * MWAIT cpu power states.  Lower 4 bits are sub-states.
+ */
+#define	MWAIT_C0	0xf0
+#define	MWAIT_C1	0x00
+#define	MWAIT_C2	0x10
+#define	MWAIT_C3	0x20
+#define	MWAIT_C4	0x30
+
+/*
+ * MWAIT extensions.
+ */
+/* Interrupt breaks MWAIT even when masked. */
+#define	MWAIT_INTRBREAK		0x00000001
+
+/*
+ * CPUID instruction 6 ecx info
+ */
+#define	CPUID_PERF_STAT		0x00000001
+#define	CPUID_PERF_BIAS		0x00000008
+
+/* 
+ * CPUID instruction 0xb ebx info.
+ */
+#define	CPUID_TYPE_INVAL	0
+#define	CPUID_TYPE_SMT		1
+#define	CPUID_TYPE_CORE		2
+
+/*
+ * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
+ */
+#define	CPUID_EXTSTATE_XSAVEOPT	0x00000001
+#define	CPUID_EXTSTATE_XSAVEC	0x00000002
+#define	CPUID_EXTSTATE_XINUSE	0x00000004
+#define	CPUID_EXTSTATE_XSAVES	0x00000008
+
+/*
+ * AMD extended function 8000_0007h edx info
+ */
+#define	AMDPM_TS		0x00000001
+#define	AMDPM_FID		0x00000002
+#define	AMDPM_VID		0x00000004
+#define	AMDPM_TTP		0x00000008
+#define	AMDPM_TM		0x00000010
+#define	AMDPM_STC		0x00000020
+#define	AMDPM_100MHZ_STEPS	0x00000040
+#define	AMDPM_HW_PSTATE		0x00000080
+#define	AMDPM_TSC_INVARIANT	0x00000100
+#define	AMDPM_CPB		0x00000200
+
+/*
+ * AMD extended function 8000_0008h ecx info
+ */
+#define	AMDID_CMP_CORES		0x000000ff
+#define	AMDID_COREID_SIZE	0x0000f000
+#define	AMDID_COREID_SIZE_SHIFT	12
+
+/*
+ * CPUID instruction 7 Structured Extended Features, leaf 0 ebx info
+ */
+#define	CPUID_STDEXT_FSGSBASE	0x00000001
+#define	CPUID_STDEXT_TSC_ADJUST	0x00000002
+#define	CPUID_STDEXT_BMI1	0x00000008
+#define	CPUID_STDEXT_HLE	0x00000010
+#define	CPUID_STDEXT_AVX2	0x00000020
+#define	CPUID_STDEXT_SMEP	0x00000080
+#define	CPUID_STDEXT_BMI2	0x00000100
+#define	CPUID_STDEXT_ERMS	0x00000200
+#define	CPUID_STDEXT_INVPCID	0x00000400
+#define	CPUID_STDEXT_RTM	0x00000800
+#define	CPUID_STDEXT_MPX	0x00004000
+#define	CPUID_STDEXT_AVX512F	0x00010000
+#define	CPUID_STDEXT_RDSEED	0x00040000
+#define	CPUID_STDEXT_ADX	0x00080000
+#define	CPUID_STDEXT_SMAP	0x00100000
+#define	CPUID_STDEXT_CLFLUSHOPT	0x00800000
+#define	CPUID_STDEXT_PROCTRACE	0x02000000
+#define	CPUID_STDEXT_AVX512PF	0x04000000
+#define	CPUID_STDEXT_AVX512ER	0x08000000
+#define	CPUID_STDEXT_AVX512CD	0x10000000
+#define	CPUID_STDEXT_SHA	0x20000000
+
+/*
+ * CPUID manufacturers identifiers
+ */
+#define	AMD_VENDOR_ID		"AuthenticAMD"
+#define	CENTAUR_VENDOR_ID	"CentaurHauls"
+#define	CYRIX_VENDOR_ID		"CyrixInstead"
+#define	INTEL_VENDOR_ID		"GenuineIntel"
+#define	NEXGEN_VENDOR_ID	"NexGenDriven"
+#define	NSC_VENDOR_ID		"Geode by NSC"
+#define	RISE_VENDOR_ID		"RiseRiseRise"
+#define	SIS_VENDOR_ID		"SiS SiS SiS "
+#define	TRANSMETA_VENDOR_ID	"GenuineTMx86"
+#define	UMC_VENDOR_ID		"UMC UMC UMC "
+
+/*
+ * Model-specific registers for the i386 family
+ */
+#define	MSR_P5_MC_ADDR		0x000
+#define	MSR_P5_MC_TYPE		0x001
+#define	MSR_TSC			0x010
+#define	MSR_P5_CESR		0x011
+#define	MSR_P5_CTR0		0x012
+#define	MSR_P5_CTR1		0x013
+#define	MSR_IA32_PLATFORM_ID	0x017
+#define	MSR_APICBASE		0x01b
+#define	MSR_EBL_CR_POWERON	0x02a
+#define	MSR_TEST_CTL		0x033
+#define	MSR_IA32_FEATURE_CONTROL 0x03a
+#define	MSR_BIOS_UPDT_TRIG	0x079
+#define	MSR_BBL_CR_D0		0x088
+#define	MSR_BBL_CR_D1		0x089
+#define	MSR_BBL_CR_D2		0x08a
+#define	MSR_BIOS_SIGN		0x08b
+#define	MSR_PERFCTR0		0x0c1
+#define	MSR_PERFCTR1		0x0c2
+#define	MSR_PLATFORM_INFO	0x0ce
+#define	MSR_MPERF		0x0e7
+#define	MSR_APERF		0x0e8
+#define	MSR_IA32_EXT_CONFIG	0x0ee	/* Undocumented. Core Solo/Duo only */
+#define	MSR_MTRRcap		0x0fe
+#define	MSR_BBL_CR_ADDR		0x116
+#define	MSR_BBL_CR_DECC		0x118
+#define	MSR_BBL_CR_CTL		0x119
+#define	MSR_BBL_CR_TRIG		0x11a
+#define	MSR_BBL_CR_BUSY		0x11b
+#define	MSR_BBL_CR_CTL3		0x11e
+#define	MSR_SYSENTER_CS_MSR	0x174
+#define	MSR_SYSENTER_ESP_MSR	0x175
+#define	MSR_SYSENTER_EIP_MSR	0x176
+#define	MSR_MCG_CAP		0x179
+#define	MSR_MCG_STATUS		0x17a
+#define	MSR_MCG_CTL		0x17b
+#define	MSR_EVNTSEL0		0x186
+#define	MSR_EVNTSEL1		0x187
+#define	MSR_THERM_CONTROL	0x19a
+#define	MSR_THERM_INTERRUPT	0x19b
+#define	MSR_THERM_STATUS	0x19c
+#define	MSR_IA32_MISC_ENABLE	0x1a0
+#define	MSR_IA32_TEMPERATURE_TARGET	0x1a2
+#define	MSR_TURBO_RATIO_LIMIT	0x1ad
+#define	MSR_TURBO_RATIO_LIMIT1	0x1ae
+#define	MSR_DEBUGCTLMSR		0x1d9
+#define	MSR_LASTBRANCHFROMIP	0x1db
+#define	MSR_LASTBRANCHTOIP	0x1dc
+#define	MSR_LASTINTFROMIP	0x1dd
+#define	MSR_LASTINTTOIP		0x1de
+#define	MSR_ROB_CR_BKUPTMPDR6	0x1e0
+#define	MSR_MTRRVarBase		0x200
+#define	MSR_MTRR64kBase		0x250
+#define	MSR_MTRR16kBase		0x258
+#define	MSR_MTRR4kBase		0x268
+#define	MSR_PAT			0x277
+#define	MSR_MC0_CTL2		0x280
+#define	MSR_MTRRdefType		0x2ff
+#define	MSR_MC0_CTL		0x400
+#define	MSR_MC0_STATUS		0x401
+#define	MSR_MC0_ADDR		0x402
+#define	MSR_MC0_MISC		0x403
+#define	MSR_MC1_CTL		0x404
+#define	MSR_MC1_STATUS		0x405
+#define	MSR_MC1_ADDR		0x406
+#define	MSR_MC1_MISC		0x407
+#define	MSR_MC2_CTL		0x408
+#define	MSR_MC2_STATUS		0x409
+#define	MSR_MC2_ADDR		0x40a
+#define	MSR_MC2_MISC		0x40b
+#define	MSR_MC3_CTL		0x40c
+#define	MSR_MC3_STATUS		0x40d
+#define	MSR_MC3_ADDR		0x40e
+#define	MSR_MC3_MISC		0x40f
+#define	MSR_MC4_CTL		0x410
+#define	MSR_MC4_STATUS		0x411
+#define	MSR_MC4_ADDR		0x412
+#define	MSR_MC4_MISC		0x413
+#define	MSR_RAPL_POWER_UNIT	0x606
+#define	MSR_PKG_ENERGY_STATUS	0x611
+#define	MSR_DRAM_ENERGY_STATUS	0x619
+#define	MSR_PP0_ENERGY_STATUS	0x639
+#define	MSR_PP1_ENERGY_STATUS	0x641
+
+/*
+ * VMX MSRs
+ */
+#define	MSR_VMX_BASIC		0x480
+#define	MSR_VMX_PINBASED_CTLS	0x481
+#define	MSR_VMX_PROCBASED_CTLS	0x482
+#define	MSR_VMX_EXIT_CTLS	0x483
+#define	MSR_VMX_ENTRY_CTLS	0x484
+#define	MSR_VMX_CR0_FIXED0	0x486
+#define	MSR_VMX_CR0_FIXED1	0x487
+#define	MSR_VMX_CR4_FIXED0	0x488
+#define	MSR_VMX_CR4_FIXED1	0x489
+#define	MSR_VMX_PROCBASED_CTLS2	0x48b
+#define	MSR_VMX_EPT_VPID_CAP	0x48c
+#define	MSR_VMX_TRUE_PINBASED_CTLS	0x48d
+#define	MSR_VMX_TRUE_PROCBASED_CTLS	0x48e
+#define	MSR_VMX_TRUE_EXIT_CTLS	0x48f
+#define	MSR_VMX_TRUE_ENTRY_CTLS	0x490
+
+/*
+ * X2APIC MSRs
+ */
+#define	MSR_APIC_ID		0x802
+#define	MSR_APIC_VERSION	0x803
+#define	MSR_APIC_TPR		0x808
+#define	MSR_APIC_EOI		0x80b
+#define	MSR_APIC_LDR		0x80d
+#define	MSR_APIC_SVR		0x80f
+#define	MSR_APIC_ISR0		0x810
+#define	MSR_APIC_ISR1		0x811
+#define	MSR_APIC_ISR2		0x812
+#define	MSR_APIC_ISR3		0x813
+#define	MSR_APIC_ISR4		0x814
+#define	MSR_APIC_ISR5		0x815
+#define	MSR_APIC_ISR6		0x816
+#define	MSR_APIC_ISR7		0x817
+#define	MSR_APIC_TMR0		0x818
+#define	MSR_APIC_IRR0		0x820
+#define	MSR_APIC_ESR		0x828
+#define	MSR_APIC_LVT_CMCI	0x82F
+#define	MSR_APIC_ICR		0x830
+#define	MSR_APIC_LVT_TIMER	0x832
+#define	MSR_APIC_LVT_THERMAL	0x833
+#define	MSR_APIC_LVT_PCINT	0x834
+#define	MSR_APIC_LVT_LINT0	0x835
+#define	MSR_APIC_LVT_LINT1	0x836
+#define	MSR_APIC_LVT_ERROR	0x837
+#define	MSR_APIC_ICR_TIMER	0x838
+#define	MSR_APIC_CCR_TIMER	0x839
+#define	MSR_APIC_DCR_TIMER	0x83e
+#define	MSR_APIC_SELF_IPI	0x83f
+
+#define	MSR_IA32_XSS		0xda0
+
+/*
+ * Constants related to MSR's.
+ */
+#define	APICBASE_RESERVED	0x000002ff
+#define	APICBASE_BSP		0x00000100
+#define	APICBASE_X2APIC		0x00000400
+#define	APICBASE_ENABLED	0x00000800
+#define	APICBASE_ADDRESS	0xfffff000
+
+/* MSR_IA32_FEATURE_CONTROL related */
+#define	IA32_FEATURE_CONTROL_LOCK	0x01	/* lock bit */
+#define	IA32_FEATURE_CONTROL_SMX_EN	0x02	/* enable VMX inside SMX */
+#define	IA32_FEATURE_CONTROL_VMX_EN	0x04	/* enable VMX outside SMX */
+
+/*
+ * PAT modes.
+ */
+#define	PAT_UNCACHEABLE		0x00
+#define	PAT_WRITE_COMBINING	0x01
+#define	PAT_WRITE_THROUGH	0x04
+#define	PAT_WRITE_PROTECTED	0x05
+#define	PAT_WRITE_BACK		0x06
+#define	PAT_UNCACHED		0x07
+#define	PAT_VALUE(i, m)		((long long)(m) << (8 * (i)))
+#define	PAT_MASK(i)		PAT_VALUE(i, 0xff)
+
+/*
+ * Constants related to MTRRs
+ */
+#define	MTRR_UNCACHEABLE	0x00
+#define	MTRR_WRITE_COMBINING	0x01
+#define	MTRR_WRITE_THROUGH	0x04
+#define	MTRR_WRITE_PROTECTED	0x05
+#define	MTRR_WRITE_BACK		0x06
+#define	MTRR_N64K		8	/* numbers of fixed-size entries */
+#define	MTRR_N16K		16
+#define	MTRR_N4K		64
+#define	MTRR_CAP_WC		0x0000000000000400
+#define	MTRR_CAP_FIXED		0x0000000000000100
+#define	MTRR_CAP_VCNT		0x00000000000000ff
+#define	MTRR_DEF_ENABLE		0x0000000000000800
+#define	MTRR_DEF_FIXED_ENABLE	0x0000000000000400
+#define	MTRR_DEF_TYPE		0x00000000000000ff
+#define	MTRR_PHYSBASE_PHYSBASE	0x000ffffffffff000
+#define	MTRR_PHYSBASE_TYPE	0x00000000000000ff
+#define	MTRR_PHYSMASK_PHYSMASK	0x000ffffffffff000
+#define	MTRR_PHYSMASK_VALID	0x0000000000000800
+
+/*
+ * Cyrix configuration registers, accessible as IO ports.
+ */
+#define	CCR0			0xc0	/* Configuration control register 0 */
+#define	CCR0_NC0		0x01	/* First 64K of each 1M memory region is
+								   non-cacheable */
+#define	CCR0_NC1		0x02	/* 640K-1M region is non-cacheable */
+#define	CCR0_A20M		0x04	/* Enables A20M# input pin */
+#define	CCR0_KEN		0x08	/* Enables KEN# input pin */
+#define	CCR0_FLUSH		0x10	/* Enables FLUSH# input pin */
+#define	CCR0_BARB		0x20	/* Flushes internal cache when entering hold
+								   state */
+#define	CCR0_CO			0x40	/* Cache org: 1=direct mapped, 0=2x set
+								   assoc */
+#define	CCR0_SUSPEND	0x80	/* Enables SUSP# and SUSPA# pins */
+
+#define	CCR1			0xc1	/* Configuration control register 1 */
+#define	CCR1_RPL		0x01	/* Enables RPLSET and RPLVAL# pins */
+#define	CCR1_SMI		0x02	/* Enables SMM pins */
+#define	CCR1_SMAC		0x04	/* System management memory access */
+#define	CCR1_MMAC		0x08	/* Main memory access */
+#define	CCR1_NO_LOCK	0x10	/* Negate LOCK# */
+#define	CCR1_SM3		0x80	/* SMM address space address region 3 */
+
+#define	CCR2			0xc2
+#define	CCR2_WB			0x02	/* Enables WB cache interface pins */
+#define	CCR2_SADS		0x02	/* Slow ADS */
+#define	CCR2_LOCK_NW	0x04	/* LOCK NW Bit */
+#define	CCR2_SUSP_HLT	0x08	/* Suspend on HALT */
+#define	CCR2_WT1		0x10	/* WT region 1 */
+#define	CCR2_WPR1		0x10	/* Write-protect region 1 */
+#define	CCR2_BARB		0x20	/* Flushes write-back cache when entering
+								   hold state. */
+#define	CCR2_BWRT		0x40	/* Enables burst write cycles */
+#define	CCR2_USE_SUSP	0x80	/* Enables suspend pins */
+
+#define	CCR3			0xc3
+#define	CCR3_SMILOCK	0x01	/* SMM register lock */
+#define	CCR3_NMI		0x02	/* Enables NMI during SMM */
+#define	CCR3_LINBRST	0x04	/* Linear address burst cycles */
+#define	CCR3_SMMMODE	0x08	/* SMM Mode */
+#define	CCR3_MAPEN0		0x10	/* Enables Map0 */
+#define	CCR3_MAPEN1		0x20	/* Enables Map1 */
+#define	CCR3_MAPEN2		0x40	/* Enables Map2 */
+#define	CCR3_MAPEN3		0x80	/* Enables Map3 */
+
+#define	CCR4			0xe8
+#define	CCR4_IOMASK		0x07
+#define	CCR4_MEM		0x08	/* Enables momory bypassing */
+#define	CCR4_DTE		0x10	/* Enables directory table entry cache */
+#define	CCR4_FASTFPE	0x20	/* Fast FPU exception */
+#define	CCR4_CPUID		0x80	/* Enables CPUID instruction */
+
+#define	CCR5			0xe9
+#define	CCR5_WT_ALLOC	0x01	/* Write-through allocate */
+#define	CCR5_SLOP		0x02	/* LOOP instruction slowed down */
+#define	CCR5_LBR1		0x10	/* Local bus region 1 */
+#define	CCR5_ARREN		0x20	/* Enables ARR region */
+
+#define	CCR6			0xea
+
+#define	CCR7			0xeb
+
+/* Performance Control Register (5x86 only). */
+#define	PCR0			0x20
+#define	PCR0_RSTK		0x01	/* Enables return stack */
+#define	PCR0_BTB		0x02	/* Enables branch target buffer */
+#define	PCR0_LOOP		0x04	/* Enables loop */
+#define	PCR0_AIS		0x08	/* Enables all instrcutions stalled to
+								   serialize pipe. */
+#define	PCR0_MLR		0x10	/* Enables reordering of misaligned loads */
+#define	PCR0_BTBRT		0x40	/* Enables BTB test register. */
+#define	PCR0_LSSER		0x80	/* Disable reorder */
+
+/* Device Identification Registers */
+#define	DIR0			0xfe
+#define	DIR1			0xff
+
+/*
+ * Machine Check register constants.
+ */
+#define	MCG_CAP_COUNT		0x000000ff
+#define	MCG_CAP_CTL_P		0x00000100
+#define	MCG_CAP_EXT_P		0x00000200
+#define	MCG_CAP_CMCI_P		0x00000400
+#define	MCG_CAP_TES_P		0x00000800
+#define	MCG_CAP_EXT_CNT		0x00ff0000
+#define	MCG_CAP_SER_P		0x01000000
+#define	MCG_STATUS_RIPV		0x00000001
+#define	MCG_STATUS_EIPV		0x00000002
+#define	MCG_STATUS_MCIP		0x00000004
+#define	MCG_CTL_ENABLE		0xffffffffffffffff
+#define	MCG_CTL_DISABLE		0x0000000000000000
+#define	MSR_MC_CTL(x)		(MSR_MC0_CTL + (x) * 4)
+#define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
+#define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
+#define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
+#define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
+#define	MC_STATUS_MCA_ERROR	0x000000000000ffff
+#define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000
+#define	MC_STATUS_OTHER_INFO	0x01ffffff00000000
+#define	MC_STATUS_COR_COUNT	0x001fffc000000000	/* If MCG_CAP_CMCI_P */
+#define	MC_STATUS_TES_STATUS	0x0060000000000000	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_AR		0x0080000000000000	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_S		0x0100000000000000	/* If MCG_CAP_TES_P */
+#define	MC_STATUS_PCC		0x0200000000000000
+#define	MC_STATUS_ADDRV		0x0400000000000000
+#define	MC_STATUS_MISCV		0x0800000000000000
+#define	MC_STATUS_EN		0x1000000000000000
+#define	MC_STATUS_UC		0x2000000000000000
+#define	MC_STATUS_OVER		0x4000000000000000
+#define	MC_STATUS_VAL		0x8000000000000000
+#define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
+#define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
+#define	MC_CTL2_THRESHOLD	0x0000000000007fff
+#define	MC_CTL2_CMCI_EN		0x0000000040000000
+
+/*
+ * The following four 3-byte registers control the non-cacheable regions.
+ * These registers must be written as three separate bytes.
+ *
+ * NCRx+0: A31-A24 of starting address
+ * NCRx+1: A23-A16 of starting address
+ * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx.
+ *
+ * The non-cacheable region's starting address must be aligned to the
+ * size indicated by the NCR_SIZE_xx field.
+ */
+#define	NCR1	0xc4
+#define	NCR2	0xc7
+#define	NCR3	0xca
+#define	NCR4	0xcd
+
+#define	NCR_SIZE_0K	0
+#define	NCR_SIZE_4K	1
+#define	NCR_SIZE_8K	2
+#define	NCR_SIZE_16K	3
+#define	NCR_SIZE_32K	4
+#define	NCR_SIZE_64K	5
+#define	NCR_SIZE_128K	6
+#define	NCR_SIZE_256K	7
+#define	NCR_SIZE_512K	8
+#define	NCR_SIZE_1M	9
+#define	NCR_SIZE_2M	10
+#define	NCR_SIZE_4M	11
+#define	NCR_SIZE_8M	12
+#define	NCR_SIZE_16M	13
+#define	NCR_SIZE_32M	14
+#define	NCR_SIZE_4G	15
+
+/*
+ * The address region registers are used to specify the location and
+ * size for the eight address regions.
+ *
+ * ARRx + 0: A31-A24 of start address
+ * ARRx + 1: A23-A16 of start address
+ * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx
+ */
+#define	ARR0	0xc4
+#define	ARR1	0xc7
+#define	ARR2	0xca
+#define	ARR3	0xcd
+#define	ARR4	0xd0
+#define	ARR5	0xd3
+#define	ARR6	0xd6
+#define	ARR7	0xd9
+
+#define	ARR_SIZE_0K		0
+#define	ARR_SIZE_4K		1
+#define	ARR_SIZE_8K		2
+#define	ARR_SIZE_16K	3
+#define	ARR_SIZE_32K	4
+#define	ARR_SIZE_64K	5
+#define	ARR_SIZE_128K	6
+#define	ARR_SIZE_256K	7
+#define	ARR_SIZE_512K	8
+#define	ARR_SIZE_1M		9
+#define	ARR_SIZE_2M		10
+#define	ARR_SIZE_4M		11
+#define	ARR_SIZE_8M		12
+#define	ARR_SIZE_16M	13
+#define	ARR_SIZE_32M	14
+#define	ARR_SIZE_4G		15
+
+/*
+ * The region control registers specify the attributes associated with
+ * the ARRx addres regions.
+ */
+#define	RCR0	0xdc
+#define	RCR1	0xdd
+#define	RCR2	0xde
+#define	RCR3	0xdf
+#define	RCR4	0xe0
+#define	RCR5	0xe1
+#define	RCR6	0xe2
+#define	RCR7	0xe3
+
+#define	RCR_RCD	0x01	/* Disables caching for ARRx (x = 0-6). */
+#define	RCR_RCE	0x01	/* Enables caching for ARR7. */
+#define	RCR_WWO	0x02	/* Weak write ordering. */
+#define	RCR_WL	0x04	/* Weak locking. */
+#define	RCR_WG	0x08	/* Write gathering. */
+#define	RCR_WT	0x10	/* Write-through. */
+#define	RCR_NLB	0x20	/* LBA# pin is not asserted. */
+
+/* AMD Write Allocate Top-Of-Memory and Control Register */
+#define	AMD_WT_ALLOC_TME	0x40000	/* top-of-memory enable */
+#define	AMD_WT_ALLOC_PRE	0x20000	/* programmable range enable */
+#define	AMD_WT_ALLOC_FRE	0x10000	/* fixed (A0000-FFFFF) range enable */
+
+/* AMD64 MSR's */
+#define	MSR_EFER	0xc0000080	/* extended features */
+#define	MSR_STAR	0xc0000081	/* legacy mode SYSCALL target/cs/ss */
+#define	MSR_LSTAR	0xc0000082	/* long mode SYSCALL target rip */
+#define	MSR_CSTAR	0xc0000083	/* compat mode SYSCALL target rip */
+#define	MSR_SF_MASK	0xc0000084	/* syscall flags mask */
+#define	MSR_FSBASE	0xc0000100	/* base address of the %fs "segment" */
+#define	MSR_GSBASE	0xc0000101	/* base address of the %gs "segment" */
+#define	MSR_KGSBASE	0xc0000102	/* base address of the kernel %gs */
+#define	MSR_PERFEVSEL0	0xc0010000
+#define	MSR_PERFEVSEL1	0xc0010001
+#define	MSR_PERFEVSEL2	0xc0010002
+#define	MSR_PERFEVSEL3	0xc0010003
+#define	MSR_K7_PERFCTR0	0xc0010004
+#define	MSR_K7_PERFCTR1	0xc0010005
+#define	MSR_K7_PERFCTR2	0xc0010006
+#define	MSR_K7_PERFCTR3	0xc0010007
+#define	MSR_SYSCFG	0xc0010010
+#define	MSR_HWCR	0xc0010015
+#define	MSR_IORRBASE0	0xc0010016
+#define	MSR_IORRMASK0	0xc0010017
+#define	MSR_IORRBASE1	0xc0010018
+#define	MSR_IORRMASK1	0xc0010019
+#define	MSR_TOP_MEM	0xc001001a	/* boundary for ram below 4G */
+#define	MSR_TOP_MEM2	0xc001001d	/* boundary for ram above 4G */
+#define	MSR_NB_CFG1	0xc001001f	/* NB configuration 1 */
+#define	MSR_P_STATE_LIMIT 0xc0010061	/* P-state Current Limit Register */
+#define	MSR_P_STATE_CONTROL 0xc0010062	/* P-state Control Register */
+#define	MSR_P_STATE_STATUS 0xc0010063	/* P-state Status Register */
+#define	MSR_P_STATE_CONFIG(n) (0xc0010064 + (n)) /* P-state Config */
+#define	MSR_SMM_ADDR	0xc0010112	/* SMM TSEG base address */
+#define	MSR_SMM_MASK	0xc0010113	/* SMM TSEG address mask */
+#define	MSR_IC_CFG	0xc0011021	/* Instruction Cache Configuration */
+#define	MSR_K8_UCODE_UPDATE	0xc0010020	/* update microcode */
+#define	MSR_MC0_CTL_MASK	0xc0010044
+#define	MSR_VM_CR		0xc0010114 /* SVM: feature control */
+#define	MSR_VM_HSAVE_PA		0xc0010117 /* SVM: host save area address */
+
+/* MSR_VM_CR related */
+#define	VM_CR_SVMDIS		0x10	/* SVM: disabled by BIOS */
+
+/* VIA ACE crypto featureset: for via_feature_rng */
+#define	VIA_HAS_RNG		1	/* cpu has RNG */
+
+/* VIA ACE crypto featureset: for via_feature_xcrypt */
+#define	VIA_HAS_AES		1	/* cpu has AES */
+#define	VIA_HAS_SHA		2	/* cpu has SHA1 & SHA256 */
+#define	VIA_HAS_MM		4	/* cpu has RSA instructions */
+#define	VIA_HAS_AESCTR		8	/* cpu has AES-CTR instructions */
+
+/* Centaur Extended Feature flags */
+#define	VIA_CPUID_HAS_RNG	0x000004
+#define	VIA_CPUID_DO_RNG	0x000008
+#define	VIA_CPUID_HAS_ACE	0x000040
+#define	VIA_CPUID_DO_ACE	0x000080
+#define	VIA_CPUID_HAS_ACE2	0x000100
+#define	VIA_CPUID_DO_ACE2	0x000200
+#define	VIA_CPUID_HAS_PHE	0x000400
+#define	VIA_CPUID_DO_PHE	0x000800
+#define	VIA_CPUID_HAS_PMM	0x001000
+#define	VIA_CPUID_DO_PMM	0x002000
+
+/* VIA ACE xcrypt-* instruction context control options */
+#define	VIA_CRYPT_CWLO_ROUND_M		0x0000000f
+#define	VIA_CRYPT_CWLO_ALG_M		0x00000070
+#define	VIA_CRYPT_CWLO_ALG_AES		0x00000000
+#define	VIA_CRYPT_CWLO_KEYGEN_M		0x00000080
+#define	VIA_CRYPT_CWLO_KEYGEN_HW	0x00000000
+#define	VIA_CRYPT_CWLO_KEYGEN_SW	0x00000080
+#define	VIA_CRYPT_CWLO_NORMAL		0x00000000
+#define	VIA_CRYPT_CWLO_INTERMEDIATE	0x00000100
+#define	VIA_CRYPT_CWLO_ENCRYPT		0x00000000
+#define	VIA_CRYPT_CWLO_DECRYPT		0x00000200
+#define	VIA_CRYPT_CWLO_KEY128		0x0000000a	/* 128bit, 10 rds */
+#define	VIA_CRYPT_CWLO_KEY192		0x0000040c	/* 192bit, 12 rds */
+#define	VIA_CRYPT_CWLO_KEY256		0x0000080e	/* 256bit, 15 rds */
+
+#endif /* !_MACHINE_SPECIALREG_H_ */
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
new file mode 100644
index 0000000000..f47daead31
--- /dev/null
+++ b/usr/src/cmd/bhyve/Makefile
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Pluribus Networks Inc.
+#
+
+PROG =		bhyve
+
+include ../Makefile.cmd
+
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all	:=	TARGET = all
+install	:=	TARGET = install
+clean	:=	TARGET = clean
+clobber	:=	TARGET = clobber
+lint	:=	TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint:	$(SUBDIRS)
+
+install: $(SUBDIRS)
+	-$(RM) $(ROOTUSRSBINPROG)
+	-$(LN) $(ISAEXEC) $(ROOTUSRSBINPROG)
+
+$(SUBDIRS):	FRC
+	@cd $@; pwd; $(MAKE) CW_NO_SHADOW=true __GNUC= $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/bhyve/Makefile.com b/usr/src/cmd/bhyve/Makefile.com
new file mode 100644
index 0000000000..4a92b622ab
--- /dev/null
+++ b/usr/src/cmd/bhyve/Makefile.com
@@ -0,0 +1,94 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Pluribus Networks Inc.
+#
+
+PROG= bhyve
+
+SRCS =	atkbdc.c		\
+	bhyvegc.c		\
+	bhyverun.c		\
+	block_if.c		\
+	console.c		\
+	consport.c		\
+	inout.c			\
+	ioapic.c		\
+	mem.c			\
+	mptbl.c			\
+	pci_ahci.c		\
+	pci_emul.c		\
+	pci_hostbridge.c	\
+	pci_irq.c		\
+	pci_lpc.c		\
+	pci_virtio_block.c	\
+	pci_virtio_net.c	\
+	pci_virtio_viona.c	\
+	pm.c			\
+	pmtmr.c			\
+	post.c			\
+	ps2kbd.c		\
+	ps2mouse.c		\
+	rfb.c			\
+	rtc.c			\
+	smbiostbl.c		\
+	uart_emul.c		\
+	vga.c			\
+	virtio.c		\
+	vmm_instruction_emul.c	\
+	xmsr.c			\
+	spinup_ap.c		\
+	bhyve_sol_glue.c
+
+OBJS = $(SRCS:.c=.o)
+
+include ../../Makefile.cmd
+
+.KEEP_STATE:
+
+CFLAGS +=	$(CCVERBOSE) -_gcc=-Wimplicit-function-declaration
+CFLAGS64 +=	$(CCVERBOSE) -_gcc=-Wimplicit-function-declaration
+CPPFLAGS =	-I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd $(CPPFLAGS.master) \
+		-I$(ROOT)/usr/platform/i86pc/include \
+		-I$(SRC)/uts/i86pc/io/vmm \
+		-I$(SRC)/uts/common \
+		-I$(SRC)/uts/i86pc \
+		-I$(SRC)/lib/libdladm/common
+LDLIBS +=	-lsocket -lnsl -ldlpi -ldladm -lkstat -lmd -luuid -lvmmapi
+
+POST_PROCESS += ; $(GENSETDEFS) $@
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+	$(LINK.c) -o $@ $(OBJS) $(LDFLAGS) $(LDLIBS)
+	$(POST_PROCESS)
+
+install: all $(ROOTUSRSBINPROG)
+
+clean:
+	$(RM) $(OBJS)
+
+lint:	lint_SRCS
+
+include ../../Makefile.targ
+
+%.o: ../%.c
+	$(COMPILE.c) $<
+	$(POST_PROCESS_O)
+
+%.o: $(SRC)/uts/i86pc/io/vmm/%.c
+	$(COMPILE.c) $<
+	$(POST_PROCESS_O)
+
+%.o: ../%.s
+	$(COMPILE.s) $<
diff --git a/usr/src/cmd/bhyve/acpi.h b/usr/src/cmd/bhyve/acpi.h
new file mode 100644
index 0000000000..477f827286
--- /dev/null
+++ b/usr/src/cmd/bhyve/acpi.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/acpi.h 266125 2014-05-15 14:16:55Z jhb $
+ */
+
+#ifndef _ACPI_H_
+#define _ACPI_H_
+
+#define	SCI_INT			9
+
+#define	SMI_CMD			0xb2
+#define	BHYVE_ACPI_ENABLE	0xa0
+#define	BHYVE_ACPI_DISABLE	0xa1
+
+#define	PM1A_EVT_ADDR		0x400
+#define	PM1A_CNT_ADDR		0x404
+
+#define	IO_PMTMR		0x408	/* 4-byte i/o port for the timer */
+
+struct vmctx;
+
+int	acpi_build(struct vmctx *ctx, int ncpu);
+void	dsdt_line(const char *fmt, ...);
+void	dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
+void	dsdt_fixed_irq(uint8_t irq);
+void	dsdt_fixed_mem32(uint32_t base, uint32_t length);
+void	dsdt_indent(int levels);
+void	dsdt_unindent(int levels);
+void	sci_init(struct vmctx *ctx);
+
+#endif /* _ACPI_H_ */
diff --git a/usr/src/cmd/bhyve/ahci.h b/usr/src/cmd/bhyve/ahci.h
new file mode 100644
index 0000000000..1cf09adcbf
--- /dev/null
+++ b/usr/src/cmd/bhyve/ahci.h
@@ -0,0 +1,304 @@
+/*-
+ * Copyright (c) 1998 - 2008 Søren Schmidt <sos@FreeBSD.org>
+ * Copyright (c) 2009-2012 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer,
+ *    without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/ahci.h 256056 2013-10-04 18:31:38Z grehan $
+ */
+
+#ifndef _AHCI_H_
+#define	_AHCI_H_
+
+/* ATA register defines */
+#define ATA_DATA                        0       /* (RW) data */
+
+#define ATA_FEATURE                     1       /* (W) feature */
+#define         ATA_F_DMA               0x01    /* enable DMA */
+#define         ATA_F_OVL               0x02    /* enable overlap */
+
+#define ATA_COUNT                       2       /* (W) sector count */
+
+#define ATA_SECTOR                      3       /* (RW) sector # */
+#define ATA_CYL_LSB                     4       /* (RW) cylinder# LSB */
+#define ATA_CYL_MSB                     5       /* (RW) cylinder# MSB */
+#define ATA_DRIVE                       6       /* (W) Sector/Drive/Head */
+#define         ATA_D_LBA               0x40    /* use LBA addressing */
+#define         ATA_D_IBM               0xa0    /* 512 byte sectors, ECC */
+
+#define ATA_COMMAND                     7       /* (W) command */
+
+#define ATA_ERROR                       8       /* (R) error */
+#define         ATA_E_ILI               0x01    /* illegal length */
+#define         ATA_E_NM                0x02    /* no media */
+#define         ATA_E_ABORT             0x04    /* command aborted */
+#define         ATA_E_MCR               0x08    /* media change request */
+#define         ATA_E_IDNF              0x10    /* ID not found */
+#define         ATA_E_MC                0x20    /* media changed */
+#define         ATA_E_UNC               0x40    /* uncorrectable data */
+#define         ATA_E_ICRC              0x80    /* UDMA crc error */
+#define		ATA_E_ATAPI_SENSE_MASK	0xf0	/* ATAPI sense key mask */
+
+#define ATA_IREASON                     9       /* (R) interrupt reason */
+#define         ATA_I_CMD               0x01    /* cmd (1) | data (0) */
+#define         ATA_I_IN                0x02    /* read (1) | write (0) */
+#define         ATA_I_RELEASE           0x04    /* released bus (1) */
+#define         ATA_I_TAGMASK           0xf8    /* tag mask */
+
+#define ATA_STATUS                      10      /* (R) status */
+#define ATA_ALTSTAT                     11      /* (R) alternate status */
+#define         ATA_S_ERROR             0x01    /* error */
+#define         ATA_S_INDEX             0x02    /* index */
+#define         ATA_S_CORR              0x04    /* data corrected */
+#define         ATA_S_DRQ               0x08    /* data request */
+#define         ATA_S_DSC               0x10    /* drive seek completed */
+#define         ATA_S_SERVICE           0x10    /* drive needs service */
+#define         ATA_S_DWF               0x20    /* drive write fault */
+#define         ATA_S_DMA               0x20    /* DMA ready */
+#define         ATA_S_READY             0x40    /* drive ready */
+#define         ATA_S_BUSY              0x80    /* busy */
+
+#define ATA_CONTROL                     12      /* (W) control */
+#define         ATA_A_IDS               0x02    /* disable interrupts */
+#define         ATA_A_RESET             0x04    /* RESET controller */
+#define         ATA_A_4BIT              0x08    /* 4 head bits */
+#define         ATA_A_HOB               0x80    /* High Order Byte enable */
+
+/* SATA register defines */
+#define ATA_SSTATUS                     13
+#define         ATA_SS_DET_MASK         0x0000000f
+#define         ATA_SS_DET_NO_DEVICE    0x00000000
+#define         ATA_SS_DET_DEV_PRESENT  0x00000001
+#define         ATA_SS_DET_PHY_ONLINE   0x00000003
+#define         ATA_SS_DET_PHY_OFFLINE  0x00000004
+
+#define         ATA_SS_SPD_MASK         0x000000f0
+#define         ATA_SS_SPD_NO_SPEED     0x00000000
+#define         ATA_SS_SPD_GEN1         0x00000010
+#define         ATA_SS_SPD_GEN2         0x00000020
+#define         ATA_SS_SPD_GEN3         0x00000040
+
+#define         ATA_SS_IPM_MASK         0x00000f00
+#define         ATA_SS_IPM_NO_DEVICE    0x00000000
+#define         ATA_SS_IPM_ACTIVE       0x00000100
+#define         ATA_SS_IPM_PARTIAL      0x00000200
+#define         ATA_SS_IPM_SLUMBER      0x00000600
+
+#define ATA_SERROR                      14
+#define         ATA_SE_DATA_CORRECTED   0x00000001
+#define         ATA_SE_COMM_CORRECTED   0x00000002
+#define         ATA_SE_DATA_ERR         0x00000100
+#define         ATA_SE_COMM_ERR         0x00000200
+#define         ATA_SE_PROT_ERR         0x00000400
+#define         ATA_SE_HOST_ERR         0x00000800
+#define         ATA_SE_PHY_CHANGED      0x00010000
+#define         ATA_SE_PHY_IERROR       0x00020000
+#define         ATA_SE_COMM_WAKE        0x00040000
+#define         ATA_SE_DECODE_ERR       0x00080000
+#define         ATA_SE_PARITY_ERR       0x00100000
+#define         ATA_SE_CRC_ERR          0x00200000
+#define         ATA_SE_HANDSHAKE_ERR    0x00400000
+#define         ATA_SE_LINKSEQ_ERR      0x00800000
+#define         ATA_SE_TRANSPORT_ERR    0x01000000
+#define         ATA_SE_UNKNOWN_FIS      0x02000000
+#define         ATA_SE_EXCHANGED        0x04000000
+
+#define ATA_SCONTROL                    15
+#define         ATA_SC_DET_MASK         0x0000000f
+#define         ATA_SC_DET_IDLE         0x00000000
+#define         ATA_SC_DET_RESET        0x00000001
+#define         ATA_SC_DET_DISABLE      0x00000004
+
+#define         ATA_SC_SPD_MASK         0x000000f0
+#define         ATA_SC_SPD_NO_SPEED     0x00000000
+#define         ATA_SC_SPD_SPEED_GEN1   0x00000010
+#define         ATA_SC_SPD_SPEED_GEN2   0x00000020
+#define         ATA_SC_SPD_SPEED_GEN3   0x00000040
+
+#define         ATA_SC_IPM_MASK         0x00000f00
+#define         ATA_SC_IPM_NONE         0x00000000
+#define         ATA_SC_IPM_DIS_PARTIAL  0x00000100
+#define         ATA_SC_IPM_DIS_SLUMBER  0x00000200
+
+#define ATA_SACTIVE                     16
+
+#define AHCI_MAX_PORTS			32
+#define AHCI_MAX_SLOTS			32
+
+/* SATA AHCI v1.0 register defines */
+#define AHCI_CAP                    0x00
+#define		AHCI_CAP_NPMASK	0x0000001f
+#define		AHCI_CAP_SXS	0x00000020
+#define		AHCI_CAP_EMS	0x00000040
+#define		AHCI_CAP_CCCS	0x00000080
+#define		AHCI_CAP_NCS	0x00001F00
+#define		AHCI_CAP_NCS_SHIFT	8
+#define		AHCI_CAP_PSC	0x00002000
+#define		AHCI_CAP_SSC	0x00004000
+#define		AHCI_CAP_PMD	0x00008000
+#define		AHCI_CAP_FBSS	0x00010000
+#define		AHCI_CAP_SPM	0x00020000
+#define		AHCI_CAP_SAM	0x00080000
+#define		AHCI_CAP_ISS	0x00F00000
+#define		AHCI_CAP_ISS_SHIFT	20
+#define		AHCI_CAP_SCLO	0x01000000
+#define		AHCI_CAP_SAL	0x02000000
+#define		AHCI_CAP_SALP	0x04000000
+#define		AHCI_CAP_SSS	0x08000000
+#define		AHCI_CAP_SMPS	0x10000000
+#define		AHCI_CAP_SSNTF	0x20000000
+#define		AHCI_CAP_SNCQ	0x40000000
+#define		AHCI_CAP_64BIT	0x80000000
+
+#define AHCI_GHC                    0x04
+#define         AHCI_GHC_AE         0x80000000
+#define         AHCI_GHC_MRSM       0x00000004
+#define         AHCI_GHC_IE         0x00000002
+#define         AHCI_GHC_HR         0x00000001
+
+#define AHCI_IS                     0x08
+#define AHCI_PI                     0x0c
+#define AHCI_VS                     0x10
+
+#define AHCI_CCCC                   0x14
+#define		AHCI_CCCC_TV_MASK	0xffff0000
+#define		AHCI_CCCC_TV_SHIFT	16
+#define		AHCI_CCCC_CC_MASK	0x0000ff00
+#define		AHCI_CCCC_CC_SHIFT	8
+#define		AHCI_CCCC_INT_MASK	0x000000f8
+#define		AHCI_CCCC_INT_SHIFT	3
+#define		AHCI_CCCC_EN		0x00000001
+#define AHCI_CCCP                   0x18
+
+#define AHCI_EM_LOC                 0x1C
+#define AHCI_EM_CTL                 0x20
+#define 	AHCI_EM_MR              0x00000001
+#define 	AHCI_EM_TM              0x00000100
+#define 	AHCI_EM_RST             0x00000200
+#define 	AHCI_EM_LED             0x00010000
+#define 	AHCI_EM_SAFTE           0x00020000
+#define 	AHCI_EM_SES2            0x00040000
+#define 	AHCI_EM_SGPIO           0x00080000
+#define 	AHCI_EM_SMB             0x01000000
+#define 	AHCI_EM_XMT             0x02000000
+#define 	AHCI_EM_ALHD            0x04000000
+#define 	AHCI_EM_PM              0x08000000
+
+#define AHCI_CAP2                   0x24
+#define		AHCI_CAP2_BOH	0x00000001
+#define		AHCI_CAP2_NVMP	0x00000002
+#define		AHCI_CAP2_APST	0x00000004
+
+#define AHCI_OFFSET                 0x100
+#define AHCI_STEP                   0x80
+
+#define AHCI_P_CLB                  0x00
+#define AHCI_P_CLBU                 0x04
+#define AHCI_P_FB                   0x08
+#define AHCI_P_FBU                  0x0c
+#define AHCI_P_IS                   0x10
+#define AHCI_P_IE                   0x14
+#define         AHCI_P_IX_DHR       0x00000001
+#define         AHCI_P_IX_PS        0x00000002
+#define         AHCI_P_IX_DS        0x00000004
+#define         AHCI_P_IX_SDB       0x00000008
+#define         AHCI_P_IX_UF        0x00000010
+#define         AHCI_P_IX_DP        0x00000020
+#define         AHCI_P_IX_PC        0x00000040
+#define         AHCI_P_IX_MP        0x00000080
+
+#define         AHCI_P_IX_PRC       0x00400000
+#define         AHCI_P_IX_IPM       0x00800000
+#define         AHCI_P_IX_OF        0x01000000
+#define         AHCI_P_IX_INF       0x04000000
+#define         AHCI_P_IX_IF        0x08000000
+#define         AHCI_P_IX_HBD       0x10000000
+#define         AHCI_P_IX_HBF       0x20000000
+#define         AHCI_P_IX_TFE       0x40000000
+#define         AHCI_P_IX_CPD       0x80000000
+
+#define AHCI_P_CMD                  0x18
+#define         AHCI_P_CMD_ST       0x00000001
+#define         AHCI_P_CMD_SUD      0x00000002
+#define         AHCI_P_CMD_POD      0x00000004
+#define         AHCI_P_CMD_CLO      0x00000008
+#define         AHCI_P_CMD_FRE      0x00000010
+#define         AHCI_P_CMD_CCS_MASK 0x00001f00
+#define         AHCI_P_CMD_CCS_SHIFT 8
+#define         AHCI_P_CMD_ISS      0x00002000
+#define         AHCI_P_CMD_FR       0x00004000
+#define         AHCI_P_CMD_CR       0x00008000
+#define         AHCI_P_CMD_CPS      0x00010000
+#define         AHCI_P_CMD_PMA      0x00020000
+#define         AHCI_P_CMD_HPCP     0x00040000
+#define         AHCI_P_CMD_MPSP     0x00080000
+#define         AHCI_P_CMD_CPD      0x00100000
+#define         AHCI_P_CMD_ESP      0x00200000
+#define         AHCI_P_CMD_FBSCP    0x00400000
+#define         AHCI_P_CMD_APSTE    0x00800000
+#define         AHCI_P_CMD_ATAPI    0x01000000
+#define         AHCI_P_CMD_DLAE     0x02000000
+#define         AHCI_P_CMD_ALPE     0x04000000
+#define         AHCI_P_CMD_ASP      0x08000000
+#define         AHCI_P_CMD_ICC_MASK 0xf0000000
+#define         AHCI_P_CMD_NOOP     0x00000000
+#define         AHCI_P_CMD_ACTIVE   0x10000000
+#define         AHCI_P_CMD_PARTIAL  0x20000000
+#define         AHCI_P_CMD_SLUMBER  0x60000000
+
+#define AHCI_P_TFD                  0x20
+#define AHCI_P_SIG                  0x24
+#define AHCI_P_SSTS                 0x28
+#define AHCI_P_SCTL                 0x2c
+#define AHCI_P_SERR                 0x30
+#define AHCI_P_SACT                 0x34
+#define AHCI_P_CI                   0x38
+#define AHCI_P_SNTF                 0x3C
+#define AHCI_P_FBS                  0x40
+#define 	AHCI_P_FBS_EN       0x00000001
+#define 	AHCI_P_FBS_DEC      0x00000002
+#define 	AHCI_P_FBS_SDE      0x00000004
+#define 	AHCI_P_FBS_DEV      0x00000f00
+#define 	AHCI_P_FBS_DEV_SHIFT 8
+#define 	AHCI_P_FBS_ADO      0x0000f000
+#define 	AHCI_P_FBS_ADO_SHIFT 12
+#define 	AHCI_P_FBS_DWE      0x000f0000
+#define 	AHCI_P_FBS_DWE_SHIFT 16
+
+/* Just to be sure, if building as module. */
+#if MAXPHYS < 512 * 1024
+#undef MAXPHYS
+#define MAXPHYS				512 * 1024
+#endif
+/* Pessimistic prognosis on number of required S/G entries */
+#define AHCI_SG_ENTRIES	(roundup(btoc(MAXPHYS) + 1, 8))
+/* Command list. 32 commands. First, 1Kbyte aligned. */
+#define AHCI_CL_OFFSET              0
+#define AHCI_CL_SIZE                32
+/* Command tables. Up to 32 commands, Each, 128byte aligned. */
+#define AHCI_CT_OFFSET              (AHCI_CL_OFFSET + AHCI_CL_SIZE * AHCI_MAX_SLOTS)
+#define AHCI_CT_SIZE                (128 + AHCI_SG_ENTRIES * 16)
+/* Total main work area. */
+#define AHCI_WORK_SIZE              (AHCI_CT_OFFSET + AHCI_CT_SIZE * ch->numslots)
+
+#endif /* _AHCI_H_ */
diff --git a/usr/src/cmd/bhyve/amd64/Makefile b/usr/src/cmd/bhyve/amd64/Makefile
new file mode 100644
index 0000000000..13cdae6663
--- /dev/null
+++ b/usr/src/cmd/bhyve/amd64/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Pluribus Networks Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+CPPFLAGS += -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64
+
+install: all $(ROOTUSRSBINPROG64)
diff --git a/usr/src/cmd/bhyve/atkbdc.c b/usr/src/cmd/bhyve/atkbdc.c
new file mode 100644
index 0000000000..4d09d88266
--- /dev/null
+++ b/usr/src/cmd/bhyve/atkbdc.c
@@ -0,0 +1,576 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2015 Nahanni Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/atkbdc.c 267611 2014-06-18 17:20:02Z neel $");
+
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+
+#include <vmmapi.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "acpi.h"
+#include "inout.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "ps2kbd.h"
+#include "ps2mouse.h"
+
+#define	KBD_DATA_PORT		0x60
+
+#define	KBD_STS_CTL_PORT	0x64
+
+#define	KBDC_RESET		0xfe
+
+#define	KBD_DEV_IRQ		1
+#define	AUX_DEV_IRQ		12
+
+/* controller commands */
+#define	KBDC_SET_COMMAND_BYTE	0x60
+#define	KBDC_GET_COMMAND_BYTE	0x20
+#define	KBDC_DISABLE_AUX_PORT	0xa7
+#define	KBDC_ENABLE_AUX_PORT	0xa8
+#define	KBDC_TEST_AUX_PORT	0xa9
+#define	KBDC_TEST_CTRL		0xaa
+#define	KBDC_TEST_KBD_PORT	0xab
+#define	KBDC_DISABLE_KBD_PORT	0xad
+#define	KBDC_ENABLE_KBD_PORT	0xae
+#define	KBDC_READ_INPORT	0xc0
+#define	KBDC_READ_OUTPORT	0xd0
+#define	KBDC_WRITE_OUTPORT	0xd1
+#define	KBDC_WRITE_KBD_OUTBUF	0xd2
+#define	KBDC_WRITE_AUX_OUTBUF	0xd3
+#define	KBDC_WRITE_TO_AUX	0xd4
+
+/* controller command byte (set by KBDC_SET_COMMAND_BYTE) */
+#define	KBD_TRANSLATION		0x40
+#define	KBD_SYS_FLAG_BIT	0x04
+#define	KBD_DISABLE_KBD_PORT	0x10
+#define	KBD_DISABLE_AUX_PORT	0x20
+#define	KBD_ENABLE_AUX_INT	0x02
+#define	KBD_ENABLE_KBD_INT	0x01
+#define	KBD_KBD_CONTROL_BITS	(KBD_DISABLE_KBD_PORT | KBD_ENABLE_KBD_INT)
+#define	KBD_AUX_CONTROL_BITS	(KBD_DISABLE_AUX_PORT | KBD_ENABLE_AUX_INT)
+
+/* controller status bits */
+#define	KBDS_KBD_BUFFER_FULL	0x01
+#define KBDS_SYS_FLAG		0x04
+#define KBDS_CTRL_FLAG		0x08
+#define	KBDS_AUX_BUFFER_FULL	0x20
+
+/* controller output port */
+#define	KBDO_KBD_OUTFULL	0x10
+#define	KBDO_AUX_OUTFULL	0x20
+
+#define	RAMSZ			32
+
+struct kbd_dev {
+	bool	irq_active;
+	int	irq;
+
+	uint8_t	buffer;
+};
+
+struct aux_dev {
+	bool	irq_active;
+	int	irq;
+
+	uint8_t	buffer;
+};
+
+struct atkbdc_softc {
+	struct vmctx *ctx;
+	pthread_mutex_t mtx;
+
+	struct ps2kbd_softc	*ps2kbd_sc;
+	struct ps2mouse_softc	*ps2mouse_sc;
+
+	uint8_t	status;		/* status register */
+	uint8_t	outport;	/* controller output port */
+	uint8_t	ram[RAMSZ];	/* byte0 = controller config */
+
+	uint32_t curcmd;	/* current command for next byte */
+
+	struct kbd_dev kbd;
+	struct aux_dev aux;
+};
+
+static void
+atkbdc_assert_kbd_intr(struct atkbdc_softc *sc)
+{
+	if (!sc->kbd.irq_active &&
+	    (sc->ram[0] & KBD_ENABLE_KBD_INT) != 0) {
+		sc->kbd.irq_active = true;
+		vm_isa_assert_irq(sc->ctx, sc->kbd.irq, sc->kbd.irq);
+	}
+}
+
+static void
+atkbdc_deassert_kbd_intr(struct atkbdc_softc *sc)
+{
+	if (sc->kbd.irq_active) {
+		vm_isa_deassert_irq(sc->ctx, sc->kbd.irq, sc->kbd.irq);
+		sc->kbd.irq_active = false;
+	}
+}
+
+static void
+atkbdc_assert_aux_intr(struct atkbdc_softc *sc)
+{
+	if (!sc->aux.irq_active &&
+	    (sc->ram[0] & KBD_ENABLE_AUX_INT) != 0) {
+		sc->aux.irq_active = true;
+		vm_isa_assert_irq(sc->ctx, sc->aux.irq, sc->aux.irq);
+	}
+}
+
+static void
+atkbdc_deassert_aux_intr(struct atkbdc_softc *sc)
+{
+	if (sc->aux.irq_active) {
+		vm_isa_deassert_irq(sc->ctx, sc->aux.irq, sc->aux.irq);
+		sc->aux.irq_active = false;
+	}
+}
+
+static void
+atkbdc_aux_queue_data(struct atkbdc_softc *sc, uint8_t val)
+{
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	sc->aux.buffer = val;
+	sc->status |= (KBDS_AUX_BUFFER_FULL | KBDS_KBD_BUFFER_FULL);
+	sc->outport |= KBDO_AUX_OUTFULL;
+	atkbdc_assert_aux_intr(sc);
+}
+
+static void
+atkbdc_kbd_queue_data(struct atkbdc_softc *sc, uint8_t val)
+{
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	sc->kbd.buffer = val;
+	sc->status |= KBDS_KBD_BUFFER_FULL;
+	sc->outport |= KBDO_KBD_OUTFULL;
+	atkbdc_assert_kbd_intr(sc);
+}
+
+static void
+atkbdc_aux_read(struct atkbdc_softc *sc)
+{
+	uint8_t val;
+
+        assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	if (ps2mouse_read(sc->ps2mouse_sc, &val) != -1)
+		atkbdc_aux_queue_data(sc, val);
+}
+
+static void
+atkbdc_kbd_read(struct atkbdc_softc *sc)
+{
+	const uint8_t translation[256] = {
+		0xff, 0x43, 0x41, 0x3f, 0x3d, 0x3b, 0x3c, 0x58,
+		0x64, 0x44, 0x42, 0x40, 0x3e, 0x0f, 0x29, 0x59,
+		0x65, 0x38, 0x2a, 0x70, 0x1d, 0x10, 0x02, 0x5a,
+		0x66, 0x71, 0x2c, 0x1f, 0x1e, 0x11, 0x03, 0x5b,
+		0x67, 0x2e, 0x2d, 0x20, 0x12, 0x05, 0x04, 0x5c,
+		0x68, 0x39, 0x2f, 0x21, 0x14, 0x13, 0x06, 0x5d,
+		0x69, 0x31, 0x30, 0x23, 0x22, 0x15, 0x07, 0x5e,
+		0x6a, 0x72, 0x32, 0x24, 0x16, 0x08, 0x09, 0x5f,
+		0x6b, 0x33, 0x25, 0x17, 0x18, 0x0b, 0x0a, 0x60,
+		0x6c, 0x34, 0x35, 0x26, 0x27, 0x19, 0x0c, 0x61,
+		0x6d, 0x73, 0x28, 0x74, 0x1a, 0x0d, 0x62, 0x6e,
+		0x3a, 0x36, 0x1c, 0x1b, 0x75, 0x2b, 0x63, 0x76,
+		0x55, 0x56, 0x77, 0x78, 0x79, 0x7a, 0x0e, 0x7b,
+		0x7c, 0x4f, 0x7d, 0x4b, 0x47, 0x7e, 0x7f, 0x6f,
+		0x52, 0x53, 0x50, 0x4c, 0x4d, 0x48, 0x01, 0x45,
+		0x57, 0x4e, 0x51, 0x4a, 0x37, 0x49, 0x46, 0x54,
+		0x80, 0x81, 0x82, 0x41, 0x54, 0x85, 0x86, 0x87,
+		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+		0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+		0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+		0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+		0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+		0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+		0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+		0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+		0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+		0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+		0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+		0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+		0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+		0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+		0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+	};
+	uint8_t val;
+	uint8_t release = 0;
+
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	if (sc->ram[0] & KBD_TRANSLATION) {
+		while (ps2kbd_read(sc->ps2kbd_sc, &val) != -1) {
+			if (val == 0xf0) {
+				release = 0x80;
+				continue;
+			} else {
+				val = translation[val] | release;
+			}
+
+			atkbdc_kbd_queue_data(sc, val);
+			break;
+		}
+	} else {
+		if (ps2kbd_read(sc->ps2kbd_sc, &val) != -1)
+			atkbdc_kbd_queue_data(sc, val);
+	}
+}
+
+static void
+atkbdc_aux_poll(struct atkbdc_softc *sc)
+{
+	if ((sc->outport & KBDO_AUX_OUTFULL) == 0)
+		atkbdc_aux_read(sc);
+}
+
+static void
+atkbdc_kbd_poll(struct atkbdc_softc *sc)
+{
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	if ((sc->outport & KBDO_KBD_OUTFULL) == 0)
+		atkbdc_kbd_read(sc);
+}
+
+static void
+atkbdc_poll(struct atkbdc_softc *sc)
+{
+	atkbdc_aux_poll(sc);
+	atkbdc_kbd_poll(sc);
+}
+
+static void
+atkbdc_dequeue_data(struct atkbdc_softc *sc, uint8_t *buf)
+{
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	if (sc->outport & KBDO_AUX_OUTFULL) {
+		*buf = sc->aux.buffer;
+		sc->status &= ~(KBDS_AUX_BUFFER_FULL | KBDS_KBD_BUFFER_FULL);
+		sc->outport &= ~KBDO_AUX_OUTFULL;
+		atkbdc_deassert_aux_intr(sc);
+
+		atkbdc_poll(sc);
+		return;
+	}
+
+	*buf = sc->kbd.buffer;
+	sc->status &= ~KBDS_KBD_BUFFER_FULL;
+	sc->outport &= ~KBDO_KBD_OUTFULL;
+	atkbdc_deassert_kbd_intr(sc);
+
+	atkbdc_poll(sc);
+}
+
+static int
+atkbdc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+	struct atkbdc_softc *sc;
+	uint8_t buf;
+	int retval;
+
+	if (bytes != 1)
+		return (-1);
+
+	sc = arg;
+	retval = 0;
+
+	pthread_mutex_lock(&sc->mtx);
+	if (in) {
+		sc->curcmd = 0;
+		sc->status &= ~KBDS_CTRL_FLAG;
+
+		/* read device buffer; includes kbd cmd responses */
+		atkbdc_dequeue_data(sc, &buf);
+		*eax = buf;
+
+		pthread_mutex_unlock(&sc->mtx);
+		return (retval);
+	}
+
+	if (sc->status & KBDS_CTRL_FLAG) {
+		/*
+		 * Command byte for the controller.
+		 */
+		switch (sc->curcmd) {
+		case KBDC_SET_COMMAND_BYTE:
+			sc->ram[0] = *eax;
+			if (sc->ram[0] & KBD_SYS_FLAG_BIT)
+				sc->status |= KBDS_SYS_FLAG;
+			else
+				sc->status &= KBDS_SYS_FLAG;
+			if (sc->outport & KBDO_AUX_OUTFULL)
+				atkbdc_assert_aux_intr(sc);
+			else if (sc->outport & KBDO_KBD_OUTFULL)
+				atkbdc_assert_kbd_intr(sc);
+			break;
+		case KBDC_WRITE_OUTPORT:
+			sc->outport = *eax;
+			if (sc->outport & KBDO_AUX_OUTFULL)
+				sc->status |= (KBDS_AUX_BUFFER_FULL |
+					       KBDS_KBD_BUFFER_FULL);
+			if (sc->outport & KBDO_KBD_OUTFULL)
+				sc->status |= KBDS_KBD_BUFFER_FULL;
+			break;
+		case KBDC_WRITE_TO_AUX:
+			ps2mouse_write(sc->ps2mouse_sc, *eax);
+			atkbdc_poll(sc);
+			break;
+		case KBDC_WRITE_KBD_OUTBUF:
+			atkbdc_kbd_queue_data(sc, *eax);
+			break;
+		case KBDC_WRITE_AUX_OUTBUF:
+			atkbdc_aux_queue_data(sc, *eax);
+			break;
+		default:
+			/* write to particular RAM byte */
+			if (sc->curcmd >= 0x61 && sc->curcmd <= 0x7f) {
+				int byten;
+
+				byten = (sc->curcmd - 0x60) & 0x1f;
+				sc->ram[byten] = *eax & 0xff;
+			}
+			break;
+		}
+
+		sc->curcmd = 0;
+		sc->status &= ~KBDS_CTRL_FLAG;
+
+		pthread_mutex_unlock(&sc->mtx);
+		return (retval);
+	}
+
+	/*
+	 * Data byte for the device.
+	 */
+	ps2kbd_write(sc->ps2kbd_sc, *eax);
+	atkbdc_poll(sc);
+
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (retval);
+}
+
+
+static int
+atkbdc_sts_ctl_handler(struct vmctx *ctx, int vcpu, int in, int port,
+    int bytes, uint32_t *eax, void *arg)
+{
+	struct atkbdc_softc *sc;
+	int	error, retval;
+
+	if (bytes != 1)
+		return (-1);
+
+	sc = arg;
+	retval = 0;
+
+	pthread_mutex_lock(&sc->mtx);
+
+	if (in) {
+		/* read status register */
+		*eax = sc->status;
+		pthread_mutex_unlock(&sc->mtx);
+		return (retval);
+	}
+
+	sc->curcmd = 0;
+	sc->status |= KBDS_CTRL_FLAG;
+
+	switch (*eax) {
+	case KBDC_GET_COMMAND_BYTE:
+		atkbdc_kbd_queue_data(sc, sc->ram[0]);
+		break;
+	case KBDC_TEST_CTRL:
+		atkbdc_kbd_queue_data(sc, 0x55);
+		break;
+	case KBDC_TEST_AUX_PORT:
+	case KBDC_TEST_KBD_PORT:
+		atkbdc_kbd_queue_data(sc, 0);
+		break;
+	case KBDC_READ_INPORT:
+		atkbdc_kbd_queue_data(sc, 0);
+		break;
+	case KBDC_READ_OUTPORT:
+		atkbdc_kbd_queue_data(sc, sc->outport);
+		break;
+	case KBDC_SET_COMMAND_BYTE:
+	case KBDC_WRITE_OUTPORT:
+	case KBDC_WRITE_KBD_OUTBUF:
+	case KBDC_WRITE_AUX_OUTBUF:
+		sc->curcmd = *eax;
+		break;
+	case KBDC_DISABLE_KBD_PORT:
+		sc->ram[0] |= KBD_DISABLE_KBD_PORT;
+		break;
+	case KBDC_ENABLE_KBD_PORT:
+		sc->ram[0] &= ~KBD_DISABLE_KBD_PORT;
+		atkbdc_poll(sc);
+		break;
+	case KBDC_WRITE_TO_AUX:
+		sc->curcmd = *eax;
+		break;
+	case KBDC_DISABLE_AUX_PORT:
+		sc->ram[0] |= KBD_DISABLE_AUX_PORT;
+		break;
+	case KBDC_ENABLE_AUX_PORT:
+		sc->ram[0] &= ~KBD_DISABLE_AUX_PORT;
+		break;
+	case KBDC_RESET:		/* Pulse "reset" line */
+#ifdef	__FreeBSD__
+		error = vm_suspend(ctx, VM_SUSPEND_RESET);
+		assert(error == 0 || errno == EALREADY);
+#else
+		exit(0);
+#endif
+		break;
+	default:
+		if (*eax >= 0x21 && *eax <= 0x3f) {
+			/* read "byte N" from RAM */
+			int	byten;
+
+			byten = (*eax - 0x20) & 0x1f;
+			atkbdc_kbd_queue_data(sc, sc->ram[byten]);
+		}
+		break;
+	}
+
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (retval);
+}
+
+void
+atkbdc_event(struct atkbdc_softc *sc)
+{
+	pthread_mutex_lock(&sc->mtx);
+	atkbdc_poll(sc);
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+void
+atkbdc_init(struct vmctx *ctx)
+{
+	struct inout_port iop;
+	struct atkbdc_softc *sc;
+	int error;
+
+	sc = calloc(1, sizeof(struct atkbdc_softc));
+	sc->ctx = ctx;
+
+	pthread_mutex_init(&sc->mtx, NULL);
+
+	bzero(&iop, sizeof(struct inout_port));
+	iop.name = "atkdbc";
+	iop.port = KBD_STS_CTL_PORT;
+	iop.size = 1;
+	iop.flags = IOPORT_F_INOUT;
+	iop.handler = atkbdc_sts_ctl_handler;
+	iop.arg = sc;
+
+	error = register_inout(&iop);
+	assert(error == 0);
+
+	bzero(&iop, sizeof(struct inout_port));
+	iop.name = "atkdbc";
+	iop.port = KBD_DATA_PORT;
+	iop.size = 1;
+	iop.flags = IOPORT_F_INOUT;
+	iop.handler = atkbdc_data_handler;
+	iop.arg = sc;
+
+	error = register_inout(&iop);
+	assert(error == 0);
+
+	pci_irq_reserve(KBD_DEV_IRQ);
+	sc->kbd.irq = KBD_DEV_IRQ;
+
+	pci_irq_reserve(AUX_DEV_IRQ);
+	sc->aux.irq = AUX_DEV_IRQ;
+
+	sc->ps2kbd_sc = ps2kbd_init(sc);
+	sc->ps2mouse_sc = ps2mouse_init(sc);
+}
+
+#ifdef	__FreeBSD__
+static void
+atkbdc_dsdt(void)
+{
+
+	dsdt_line("");
+	dsdt_line("Device (KBD)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0303\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(KBD_DATA_PORT, 1);
+	dsdt_fixed_ioport(KBD_STS_CTL_PORT, 1);
+	dsdt_fixed_irq(1);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+
+	dsdt_line("");
+	dsdt_line("Device (MOU)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0F13\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(KBD_DATA_PORT, 1);
+	dsdt_fixed_ioport(KBD_STS_CTL_PORT, 1);
+	dsdt_fixed_irq(12);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+}
+LPC_DSDT(atkbdc_dsdt);
+#endif
diff --git a/usr/src/cmd/bhyve/atkbdc.h b/usr/src/cmd/bhyve/atkbdc.h
new file mode 100644
index 0000000000..48b3a8b00c
--- /dev/null
+++ b/usr/src/cmd/bhyve/atkbdc.h
@@ -0,0 +1,38 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ATKBDC_H_
+#define _ATKBDC_H_
+
+struct atkbdc_softc;
+struct vmctx;
+
+void atkbdc_init(struct vmctx *ctx);
+void atkbdc_event(struct atkbdc_softc *sc);
+
+#endif /* _ATKBDC_H_ */
diff --git a/usr/src/cmd/bhyve/bhyve_sol_glue.c b/usr/src/cmd/bhyve/bhyve_sol_glue.c
new file mode 100644
index 0000000000..633faacc5f
--- /dev/null
+++ b/usr/src/cmd/bhyve/bhyve_sol_glue.c
@@ -0,0 +1,86 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/uio.h>
+
+#include <termios.h>
+#include <unistd.h>
+
+/*
+ * Make a pre-existing termios structure into "raw" mode: character-at-a-time
+ * mode with no characters interpreted, 8-bit data path.
+ */
+void
+cfmakeraw(struct termios *t)
+{
+	t->c_iflag &= ~(IMAXBEL|IXOFF|INPCK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON|IGNPAR);
+	t->c_iflag |= IGNBRK;
+	t->c_oflag &= ~OPOST;
+	t->c_lflag &= ~(ECHO|ECHOE|ECHOK|ECHONL|ICANON|ISIG|IEXTEN|NOFLSH|TOSTOP |PENDIN);
+	t->c_cflag &= ~(CSIZE|PARENB);
+	t->c_cflag |= CS8|CREAD;
+	t->c_cc[VMIN] = 1;
+	t->c_cc[VTIME] = 0;
+}
+
+ssize_t
+preadv(int d, const struct iovec *iov, int iovcnt, off_t offset)
+{
+	off_t		old_offset;
+	ssize_t		n;
+
+	old_offset = lseek(d, (off_t)0, SEEK_CUR);
+	if (old_offset == -1)
+		return (-1);
+
+	offset = lseek(d, offset, SEEK_SET);
+	if (offset == -1)
+		return (-1);
+
+	n = readv(d, iov, iovcnt);
+	if (n == -1)
+		return (-1);
+
+	offset = lseek(d, old_offset, SEEK_SET);
+	if (offset == -1)
+		return (-1);
+
+	return (n);
+}
+
+ssize_t
+pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset)
+{
+	off_t		old_offset;
+	ssize_t		n;
+
+	old_offset = lseek(d, (off_t)0, SEEK_CUR);
+	if (old_offset == -1)
+		return (-1);
+
+	offset = lseek(d, offset, SEEK_SET);
+	if (offset == -1)
+		return (-1);
+
+	n = writev(d, iov, iovcnt);
+	if (n == -1)
+		return (-1);
+
+	offset = lseek(d, old_offset, SEEK_SET);
+	if (offset == -1)
+		return (-1);
+
+	return (n);
+}
diff --git a/usr/src/cmd/bhyve/bhyvegc.c b/usr/src/cmd/bhyve/bhyvegc.c
new file mode 100644
index 0000000000..7a13c4c83f
--- /dev/null
+++ b/usr/src/cmd/bhyve/bhyvegc.c
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "bhyvegc.h"
+
+struct bhyvegc {
+	struct bhyvegc_image	*gc_image;
+};
+
+struct bhyvegc *
+bhyvegc_init(int width, int height)
+{
+	struct bhyvegc *gc;
+	struct bhyvegc_image *gc_image;
+
+	gc = calloc(1, sizeof (struct bhyvegc));
+
+	gc_image = calloc(1, sizeof(struct bhyvegc_image));
+	gc_image->width = width;
+	gc_image->height = height;
+	gc_image->data = calloc(width * height, sizeof (uint32_t));
+
+	gc->gc_image = gc_image;
+
+	return (gc);
+}
+
+void
+bhyvegc_resize(struct bhyvegc *gc, int width, int height)
+{
+	struct bhyvegc_image *gc_image;
+
+	gc_image = gc->gc_image;
+
+	gc_image->width = width;
+	gc_image->height = height;
+	gc_image->data = realloc(gc_image->data,
+	    sizeof (uint32_t) * width * height);
+	memset(gc_image->data, 0, width * height * sizeof (uint32_t));
+}
+
+struct bhyvegc_image *
+bhyvegc_get_image(struct bhyvegc *gc)
+{
+	return (gc->gc_image);
+}
diff --git a/usr/src/cmd/bhyve/bhyvegc.h b/usr/src/cmd/bhyve/bhyvegc.h
new file mode 100644
index 0000000000..19648f98af
--- /dev/null
+++ b/usr/src/cmd/bhyve/bhyvegc.h
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _BHYVEGC_H_
+#define	_BHYVEGC_H_
+
+struct bhyvegc;
+
+struct bhyvegc_image {
+	int		width;
+	int		height;
+	uint32_t	*data;
+};
+
+struct bhyvegc *bhyvegc_init(int width, int height);
+void bhyvegc_resize(struct bhyvegc *gc, int width, int height);
+struct bhyvegc_image *bhyvegc_get_image(struct bhyvegc *gc);
+
+#endif /* _BHYVEGC_H_ */
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
new file mode 100644
index 0000000000..b985a2286e
--- /dev/null
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -0,0 +1,820 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/bhyverun.c 281611 2015-04-16 20:11:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/bhyverun.c 281611 2015-04-16 20:11:49Z neel $");
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+
+#include <machine/segments.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <sysexits.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "acpi.h"
+#include "atkbdc.h"
+#include "console.h"
+#include "inout.h"
+#include "dbgport.h"
+#include "ioapic.h"
+#include "mem.h"
+#ifdef	__FreeBSD__
+#include "mevent.h"
+#endif
+#include "mptbl.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "smbiostbl.h"
+#include "xmsr.h"
+#include "spinup_ap.h"
+#include "rfb.h"
+#include "rtc.h"
+#include "vga.h"
+
+#define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
+
+#define MB		(1024UL * 1024)
+#define GB		(1024UL * MB)
+
+typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
+
+char *vmname;
+
+int guest_ncpus;
+char *guest_uuid_str;
+
+static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
+static int virtio_msix = 1;
+static int x2apic_mode = 0;	/* default is xAPIC */
+
+static int strictio;
+static int strictmsr = 1;
+
+#ifdef	__FreeBSD__
+static int acpi;
+#endif
+
+static char *progname;
+static const int BSP = 0;
+
+#ifndef	__FreeBSD__
+int bcons_wait = 0;
+int bcons_connected = 0;
+pthread_mutex_t bcons_wait_lock = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t bcons_wait_done = PTHREAD_COND_INITIALIZER;
+#endif
+
+static cpuset_t cpumask;
+
+static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
+
+static struct vm_exit vmexit[VM_MAXCPU];
+
+struct bhyvestats {
+        uint64_t        vmexit_bogus;
+        uint64_t        vmexit_bogus_switch;
+        uint64_t        vmexit_hlt;
+        uint64_t        vmexit_pause;
+        uint64_t        vmexit_mtrap;
+        uint64_t        vmexit_inst_emul;
+        uint64_t        cpu_switch_rotate;
+        uint64_t        cpu_switch_direct;
+        int             io_reset;
+} stats;
+
+struct mt_vmm_info {
+	pthread_t	mt_thr;
+	struct vmctx	*mt_ctx;
+	int		mt_vcpu;	
+} mt_vmm_info[VM_MAXCPU];
+
+static void
+usage(int code)
+{
+
+#ifdef	__FreeBSD__
+        fprintf(stderr,
+                "Usage: %s [-aehwAHIPW] [-g <gdb port>] [-s <pci>] [-c vcpus]\n"
+		"       %*s [-p vcpu:hostcpu] [-m mem] [-l <lpc>] <vm>\n"
+		"       -a: local apic is in xAPIC mode (deprecated)\n"
+		"       -A: create an ACPI table\n"
+		"       -g: gdb port\n"
+		"       -c: # cpus (default 1)\n"
+		"       -C: include guest memory in core file\n"
+		"       -p: pin 'vcpu' to 'hostcpu'\n"
+		"       -H: vmexit from the guest on hlt\n"
+		"       -P: vmexit from the guest on pause\n"
+		"       -W: force virtio to use single-vector MSI\n"
+		"       -e: exit on unhandled I/O access\n"
+		"       -h: help\n"
+		"       -s: <slot,driver,configinfo> PCI slot config\n"
+		"       -l: LPC device configuration\n"
+		"       -m: memory size in MB\n"
+		"       -w: ignore unimplemented MSRs\n"
+		"       -x: local apic is in x2APIC mode\n"
+		"       -Y: disable MPtable generation\n"
+		"       -U: uuid\n",
+		progname, (int)strlen(progname), "");
+#else
+        fprintf(stderr,
+                "Usage: %s [-ehwHPW] [-s <pci>] [-c vcpus]\n"
+		"       %*s [-p vcpu:hostcpu] [-m mem] [-l <lpc>] <vm>\n"
+		"       -c: # cpus (default 1)\n"
+		"       -H: vmexit from the guest on hlt\n"
+		"       -P: vmexit from the guest on pause\n"
+		"       -W: force virtio to use single-vector MSI\n"
+		"       -e: exit on unhandled I/O access\n"
+		"       -h: help\n"
+		"       -s: <slot,driver,configinfo> PCI slot config\n"
+		"       -l: LPC device configuration\n"
+		"       -m: memory size in MB\n"
+		"       -w: ignore unimplemented MSRs\n"
+		"       -Y: disable MPtable generation\n"
+		"       -U: uuid\n",
+		progname, (int)strlen(progname), "");
+#endif
+
+	exit(code);
+}
+
+void
+vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
+    int errcode)
+{
+	struct vmctx *ctx;
+	int error, restart_instruction;
+
+	ctx = arg;
+	restart_instruction = 1;
+
+	error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
+	    restart_instruction);
+	assert(error == 0);
+}
+
+void *
+paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
+{
+
+	return (vm_map_gpa(ctx, gaddr, len));
+}
+
+int
+fbsdrun_vmexit_on_pause(void)
+{
+
+	return (guest_vmexit_on_pause);
+}
+
+int
+fbsdrun_vmexit_on_hlt(void)
+{
+
+	return (guest_vmexit_on_hlt);
+}
+
+int
+fbsdrun_virtio_msix(void)
+{
+
+	return (virtio_msix);
+}
+
+static void *
+fbsdrun_start_thread(void *param)
+{
+	char tname[MAXCOMLEN + 1];
+	struct mt_vmm_info *mtp;
+	int vcpu;
+
+	mtp = param;
+	vcpu = mtp->mt_vcpu;
+
+	snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
+	pthread_set_name_np(mtp->mt_thr, tname);
+
+	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
+
+	/* not reached */
+	exit(1);
+	return (NULL);
+}
+
+void
+fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
+{
+	int error;
+
+	assert(fromcpu == BSP);
+
+	/*
+	 * The 'newcpu' must be activated in the context of 'fromcpu'. If
+	 * vm_activate_cpu() is delayed until newcpu's pthread starts running
+	 * then vmm.ko is out-of-sync with bhyve and this can create a race
+	 * with vm_suspend().
+	 */
+	error = vm_activate_cpu(ctx, newcpu);
+	assert(error == 0);
+
+	CPU_SET_ATOMIC(newcpu, &cpumask);
+
+	/*
+	 * Set up the vmexit struct to allow execution to start
+	 * at the given RIP
+	 */
+	vmexit[newcpu].rip = rip;
+	vmexit[newcpu].inst_length = 0;
+
+	mt_vmm_info[newcpu].mt_ctx = ctx;
+	mt_vmm_info[newcpu].mt_vcpu = newcpu;
+
+	error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
+	    fbsdrun_start_thread, &mt_vmm_info[newcpu]);
+	assert(error == 0);
+}
+
+static int
+vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
+		     uint32_t eax)
+{
+#if BHYVE_DEBUG
+	/*
+	 * put guest-driven debug here
+	 */
+#endif
+        return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	int error;
+	int bytes, port, in, out, string;
+	int vcpu;
+
+	vcpu = *pvcpu;
+
+	port = vme->u.inout.port;
+	bytes = vme->u.inout.bytes;
+	string = vme->u.inout.string;
+	in = vme->u.inout.in;
+	out = !in;
+
+        /* Extra-special case of host notifications */
+        if (out && port == GUEST_NIO_PORT) {
+                error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
+		return (error);
+	}
+
+	error = emulate_inout(ctx, vcpu, vme, strictio);
+	if (error) {
+		fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
+		    in ? "in" : "out",
+		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
+		    port, vmexit->rip);
+		return (VMEXIT_ABORT);
+	} else {
+		return (VMEXIT_CONTINUE);
+	}
+}
+
+static int
+vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	uint64_t val;
+	uint32_t eax, edx;
+	int error;
+
+	val = 0;
+	error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val);
+	if (error != 0) {
+		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
+		    vme->u.msr.code, *pvcpu);
+		if (strictmsr) {
+			vm_inject_gp(ctx, *pvcpu);
+			return (VMEXIT_CONTINUE);
+		}
+	}
+
+	eax = val;
+	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax);
+	assert(error == 0);
+
+	edx = val >> 32;
+	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx);
+	assert(error == 0);
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	int error;
+
+	error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
+	if (error != 0) {
+		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
+		    vme->u.msr.code, vme->u.msr.wval, *pvcpu);
+		if (strictmsr) {
+			vm_inject_gp(ctx, *pvcpu);
+			return (VMEXIT_CONTINUE);
+		}
+	}
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	int newcpu;
+	int retval = VMEXIT_CONTINUE;
+
+	newcpu = spinup_ap(ctx, *pvcpu,
+			   vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
+
+	return (retval);
+}
+
+static int
+vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
+	fprintf(stderr, "\treason\t\tVMX\n");
+	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
+	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
+	fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status);
+	fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
+	fprintf(stderr, "\tqualification\t0x%016lx\n",
+	    vmexit->u.vmx.exit_qualification);
+	fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
+	fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
+
+	return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	stats.vmexit_bogus++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	stats.vmexit_hlt++;
+
+	/*
+	 * Just continue execution with the next instruction. We use
+	 * the HLT VM exit as a way to be friendly with the host
+	 * scheduler.
+	 */
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	stats.vmexit_pause++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	stats.vmexit_mtrap++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+	int err, i;
+	struct vie *vie;
+
+	stats.vmexit_inst_emul++;
+
+	vie = &vmexit->u.inst_emul.vie;
+	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
+	    vie, &vmexit->u.inst_emul.paging);
+
+	if (err) {
+		if (err == ESRCH) {
+			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
+			    vmexit->u.inst_emul.gpa);
+		}
+
+		fprintf(stderr, "Failed to emulate instruction [");
+		for (i = 0; i < vie->num_valid; i++) {
+			fprintf(stderr, "0x%02x%s", vie->inst[i],
+			    i != (vie->num_valid - 1) ? " " : "");
+		}
+		fprintf(stderr, "] at 0x%lx\n", vmexit->rip);
+		return (VMEXIT_ABORT);
+	}
+
+	return (VMEXIT_CONTINUE);
+}
+
+static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
+	[VM_EXITCODE_INOUT]  = vmexit_inout,
+	[VM_EXITCODE_VMX]    = vmexit_vmx,
+	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
+	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
+	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
+	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
+	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
+};
+
+static void
+vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
+{
+#ifdef	__FreeBSD__
+	cpuset_t mask;
+#endif
+	int error, rc, prevcpu;
+	enum vm_exitcode exitcode;
+
+#ifdef	__FreeBSD__
+	if (pincpu >= 0) {
+		CPU_ZERO(&mask);
+		CPU_SET(pincpu + vcpu, &mask);
+		error = pthread_setaffinity_np(pthread_self(),
+					       sizeof(mask), &mask);
+		assert(error == 0);
+	}
+#endif
+
+	error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
+	assert(error == 0);
+
+	while (1) {
+		error = vm_run(ctx, vcpu, &vmexit[vcpu]);
+		if (error != 0)
+			break;
+
+		prevcpu = vcpu;
+
+		exitcode = vmexit[vcpu].exitcode;
+		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
+			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
+			    exitcode);
+			exit(1);
+		}
+
+                rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
+
+		switch (rc) {
+		case VMEXIT_CONTINUE:
+			break;
+		case VMEXIT_ABORT:
+			abort();
+		default:
+			exit(1);
+		}
+	}
+	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
+}
+
+static int
+num_vcpus_allowed(struct vmctx *ctx)
+{
+	int tmp, error;
+
+	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
+
+	/*
+	 * The guest is allowed to spinup more than one processor only if the
+	 * UNRESTRICTED_GUEST capability is available.
+	 */
+	if (error == 0)
+		return (VM_MAXCPU);
+	else
+		return (1);
+}
+
+void
+fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
+{
+	int err, tmp;
+
+	if (fbsdrun_vmexit_on_hlt()) {
+		err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
+		if (err < 0) {
+			fprintf(stderr, "VM exit on HLT not supported\n");
+			exit(1);
+		}
+		vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
+		if (cpu == BSP)
+			handler[VM_EXITCODE_HLT] = vmexit_hlt;
+	}
+
+        if (fbsdrun_vmexit_on_pause()) {
+		/*
+		 * pause exit support required for this mode
+		 */
+		err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
+		if (err < 0) {
+			fprintf(stderr,
+			    "SMP mux requested, no pause support\n");
+			exit(1);
+		}
+		vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
+		if (cpu == BSP)
+			handler[VM_EXITCODE_PAUSE] = vmexit_pause;
+        }
+
+	if (x2apic_mode)
+		err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
+	else
+		err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
+
+	if (err) {
+		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
+		exit(1);
+	}
+
+#ifdef	__FreeBSD__
+	vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
+#endif
+}
+
+int
+main(int argc, char *argv[])
+{
+	int c, error, gdb_port, rfb_port, err, bvmcons;
+	int max_vcpus;
+	struct vmctx *ctx;
+	uint64_t rip;
+	size_t memsize;
+
+	bvmcons = 0;
+	progname = basename(argv[0]);
+	gdb_port = 0;
+	rfb_port = -1;
+	guest_ncpus = 1;
+	memsize = 256 * MB;
+
+
+#ifdef	__FreeBSD__
+	while ((c = getopt(argc, argv, "abehwxACHIPWYp:r:g:c:s:m:l:U:")) != -1) {
+#else
+	while ((c = getopt(argc, argv, "abehwxHIPWYr:c:s:m:l:U:")) != -1) {
+#endif
+		switch (c) {
+		case 'a':
+			x2apic_mode = 0;
+			break;
+#ifdef	__FreeBSD__
+		case 'A':
+			acpi = 1;
+			break;
+#endif
+		case 'b':
+			bvmcons = 1;
+			break;
+#ifdef	__FreeBSD__
+		case 'p':
+			pincpu = atoi(optarg);
+			break;
+#endif
+		case 'r':
+			if (optarg[0] == ':')
+				rfb_port = atoi(optarg + 1) + RFB_PORT;
+			else
+				rfb_port = atoi(optarg);
+			break;
+                case 'c':
+			guest_ncpus = atoi(optarg);
+			break;
+#ifdef	__FreeBSD__
+		case 'g':
+			gdb_port = atoi(optarg);
+			break;
+#endif
+		case 'l':
+			if (lpc_device_parse(optarg) != 0) {
+				errx(EX_USAGE, "invalid lpc device "
+				    "configuration '%s'", optarg);
+			}
+			break;
+		case 's':
+			if (pci_parse_slot(optarg) != 0)
+				exit(1);
+			else
+				break;
+                case 'm':
+			error = vm_parse_memsize(optarg, &memsize);
+			if (error)
+				errx(EX_USAGE, "invalid memsize '%s'", optarg);
+			break;
+		case 'H':
+			guest_vmexit_on_hlt = 1;
+			break;
+		case 'I':
+			/*
+			 * The "-I" option was used to add an ioapic to the
+			 * virtual machine.
+			 *
+			 * An ioapic is now provided unconditionally for each
+			 * virtual machine and this option is now deprecated.
+			 */
+			break;
+		case 'P':
+			guest_vmexit_on_pause = 1;
+			break;
+		case 'e':
+			strictio = 1;
+			break;
+		case 'U':
+			guest_uuid_str = optarg;
+			break;
+		case 'W':
+			virtio_msix = 0;
+			break;
+		case 'x':
+			x2apic_mode = 1;
+			break;
+		case 'h':
+			usage(0);			
+		default:
+			usage(1);
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 1)
+		usage(1);
+
+	vmname = argv[0];
+
+	ctx = vm_open(vmname);
+	if (ctx == NULL) {
+		perror("vm_open");
+		exit(1);
+	}
+
+	max_vcpus = num_vcpus_allowed(ctx);
+	if (guest_ncpus > max_vcpus) {
+		fprintf(stderr, "%d vCPUs requested but only %d available\n",
+			guest_ncpus, max_vcpus);
+		exit(1);
+	}
+
+	fbsdrun_set_capabilities(ctx, BSP);
+
+	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
+	if (err) {
+		fprintf(stderr, "Unable to setup memory (%d)\n", err);
+		exit(1);
+	}
+
+	error = init_msr();
+	if (error) {
+		fprintf(stderr, "init_msr error %d", error);
+		exit(1);
+	}
+
+	init_mem();
+	init_inout();
+	atkbdc_init(ctx);
+	pci_irq_init(ctx);
+	ioapic_init(ctx);
+
+	rtc_init(ctx);
+
+	/*
+	 * Exit if a device emulation finds an error in it's initilization
+	 */
+	if (init_pci(ctx) != 0)
+		exit(1);
+
+#ifdef	__FreeBSD__
+	if (gdb_port != 0)
+		init_dbgport(gdb_port);
+#endif
+
+	if (bvmcons)
+		init_bvmcons();
+
+	console_init();
+	vga_init();
+	if (rfb_port != -1)
+		rfb_init(rfb_port);
+
+	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
+	assert(error == 0);
+
+	/*
+ 	 * build the guest tables, MP etc.
+	 */
+	mptable_build(ctx, guest_ncpus);
+
+	error = smbios_build(ctx);
+	assert(error == 0);
+
+#ifdef	__FreeBSD__
+	if (acpi) {
+		error = acpi_build(ctx, guest_ncpus);
+		assert(error == 0);
+	}
+
+	/*
+	 * Change the proc title to include the VM name.
+	 */
+	setproctitle("%s", vmname); 
+#else
+	/*
+	 * If applicable, wait for bhyveconsole
+	 */
+	if (bcons_wait) {
+		printf("Waiting for bhyveconsole connection...\n");
+		(void) pthread_mutex_lock(&bcons_wait_lock);
+		while (!bcons_connected) {
+			(void) pthread_cond_wait(&bcons_wait_done,
+			    &bcons_wait_lock);
+		}
+		(void) pthread_mutex_unlock(&bcons_wait_lock);
+	}
+#endif
+
+	/*
+	 * Add CPU 0
+	 */
+	fbsdrun_addcpu(ctx, BSP, BSP, rip);
+
+	/*
+	 * Head off to the main event dispatch loop
+	 */
+#ifdef	__FreeBSD__
+	mevent_dispatch();
+#else
+	pthread_exit(NULL);
+#endif
+
+	exit(1);
+}
diff --git a/usr/src/cmd/bhyve/bhyverun.h b/usr/src/cmd/bhyve/bhyverun.h
new file mode 100644
index 0000000000..be89314c09
--- /dev/null
+++ b/usr/src/cmd/bhyve/bhyverun.h
@@ -0,0 +1,73 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/bhyverun.h 277310 2015-01-18 03:08:30Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef	_FBSDRUN_H_
+#define	_FBSDRUN_H_
+
+#ifndef CTASSERT		/* Allow lint to override */
+#define	CTASSERT(x)		_CTASSERT(x, __LINE__)
+#define	_CTASSERT(x, y)		__CTASSERT(x, y)
+#define	__CTASSERT(x, y)	typedef char __assert ## y[(x) ? 1 : -1]
+#endif
+
+#define	VMEXIT_CONTINUE		(0)
+#define	VMEXIT_ABORT		(-1)
+
+struct vmctx;
+extern int guest_ncpus;
+extern char *guest_uuid_str;
+extern char *vmname;
+#ifndef	__FreeBSD__
+extern int bcons_wait;
+extern int bcons_connected;
+extern pthread_mutex_t bcons_wait_lock;
+extern pthread_cond_t bcons_wait_done;
+#endif
+
+void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
+
+void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
+void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);
+int  fbsdrun_muxed(void);
+int  fbsdrun_vmexit_on_hlt(void);
+int  fbsdrun_vmexit_on_pause(void);
+int  fbsdrun_disable_x2apic(void);
+int  fbsdrun_virtio_msix(void);
+#endif
diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c
new file mode 100644
index 0000000000..2da946d420
--- /dev/null
+++ b/usr/src/cmd/bhyve/block_if.c
@@ -0,0 +1,625 @@
+/*-
+ * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/block_if.c 274330 2014-11-09 21:08:52Z tychon $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/block_if.c 274330 2014-11-09 21:08:52Z tychon $");
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <machine/atomic.h>
+
+#include "bhyverun.h"
+#ifdef	__FreeBSD__
+#include "mevent.h"
+#endif
+#include "block_if.h"
+
+#define BLOCKIF_SIG	0xb109b109
+
+#define BLOCKIF_MAXREQ	33
+
+enum blockop {
+	BOP_READ,
+	BOP_WRITE,
+	BOP_FLUSH
+};
+
+enum blockstat {
+	BST_FREE,
+	BST_PEND,
+	BST_BUSY,
+	BST_DONE
+};
+
+struct blockif_elem {
+	TAILQ_ENTRY(blockif_elem) be_link;
+	struct blockif_req  *be_req;
+	enum blockop	     be_op;
+	enum blockstat	     be_status;
+	pthread_t            be_tid;
+};
+
+struct blockif_ctxt {
+	int			bc_magic;
+	int			bc_fd;
+	int			bc_rdonly;
+	off_t			bc_size;
+	int			bc_sectsz;
+	pthread_t		bc_btid;
+        pthread_mutex_t		bc_mtx;
+        pthread_cond_t		bc_cond;
+	int			bc_closing;
+
+	/* Request elements and free/pending/busy queues */
+	TAILQ_HEAD(, blockif_elem) bc_freeq;       
+	TAILQ_HEAD(, blockif_elem) bc_pendq;
+	TAILQ_HEAD(, blockif_elem) bc_busyq;
+	u_int			bc_req_count;
+	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
+};
+
+static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
+
+struct blockif_sig_elem {
+	pthread_mutex_t			bse_mtx;
+	pthread_cond_t			bse_cond;
+	int				bse_pending;
+	struct blockif_sig_elem		*bse_next;
+};
+
+static struct blockif_sig_elem *blockif_bse_head;
+
+static int
+blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
+		enum blockop op)
+{
+	struct blockif_elem *be;
+
+	assert(bc->bc_req_count < BLOCKIF_MAXREQ);
+
+	be = TAILQ_FIRST(&bc->bc_freeq);
+	assert(be != NULL);
+	assert(be->be_status == BST_FREE);
+
+	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
+	be->be_status = BST_PEND;
+	be->be_req = breq;
+	be->be_op = op;
+	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
+
+	bc->bc_req_count++;
+
+	return (0);
+}
+
+static int
+blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep)
+{
+	struct blockif_elem *be;
+
+	if (bc->bc_req_count == 0)
+		return (ENOENT);
+
+	be = TAILQ_FIRST(&bc->bc_pendq);
+	assert(be != NULL);
+	assert(be->be_status == BST_PEND);
+	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+	be->be_status = BST_BUSY;
+	be->be_tid = bc->bc_btid;
+	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
+
+	*bep = be;
+
+	return (0);
+}
+
+static void
+blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
+{
+	assert(be->be_status == BST_DONE);
+
+	TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
+	be->be_tid = 0;
+	be->be_status = BST_FREE;
+	be->be_req = NULL;
+	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
+
+	bc->bc_req_count--;
+}
+
+static void
+blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
+{
+	struct blockif_req *br;
+	int err;
+
+	br = be->be_req;
+	err = 0;
+
+	switch (be->be_op) {
+	case BOP_READ:
+		if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
+			   br->br_offset) < 0)
+			err = errno;
+		break;
+	case BOP_WRITE:
+		if (bc->bc_rdonly)
+			err = EROFS;
+		else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
+			     br->br_offset) < 0)
+			err = errno;
+		break;
+	case BOP_FLUSH:
+		break;
+	default:
+		err = EINVAL;
+		break;
+	}
+
+	be->be_status = BST_DONE;
+
+	(*br->br_callback)(br, err);
+}
+
+static void *
+blockif_thr(void *arg)
+{
+	struct blockif_ctxt *bc;
+	struct blockif_elem *be;
+
+	bc = arg;
+
+	for (;;) {
+		pthread_mutex_lock(&bc->bc_mtx);
+		while (!blockif_dequeue(bc, &be)) {
+			pthread_mutex_unlock(&bc->bc_mtx);
+			blockif_proc(bc, be);
+			pthread_mutex_lock(&bc->bc_mtx);
+			blockif_complete(bc, be);
+		}
+		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
+		pthread_mutex_unlock(&bc->bc_mtx);
+
+		/*
+		 * Check ctxt status here to see if exit requested
+		 */
+		if (bc->bc_closing)
+			pthread_exit(NULL);
+	}
+
+	/* Not reached */
+	return (NULL);
+}
+
+#ifdef	__FreeBSD__
+static void
+blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
+#else
+static void
+blockif_sigcont_handler(int signal)
+#endif
+{
+	struct blockif_sig_elem *bse;
+
+	for (;;) {
+		/*
+		 * Process the entire list even if not intended for
+		 * this thread.
+		 */
+		do {
+			bse = blockif_bse_head;
+			if (bse == NULL)
+				return;
+		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+					    (uintptr_t)bse,
+					    (uintptr_t)bse->bse_next));
+
+		pthread_mutex_lock(&bse->bse_mtx);
+		bse->bse_pending = 0;
+		pthread_cond_signal(&bse->bse_cond);
+		pthread_mutex_unlock(&bse->bse_mtx);
+	}
+}
+
+static void
+blockif_init(void)
+{
+#ifdef	__FreeBSD__
+	mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
+	(void) signal(SIGCONT, SIG_IGN);
+#else
+	(void) sigset(SIGCONT, blockif_sigcont_handler);
+#endif
+}
+
+struct blockif_ctxt *
+blockif_open(const char *optstr, const char *ident)
+{
+	char tname[MAXCOMLEN + 1];
+	char *nopt, *xopts;
+	struct blockif_ctxt *bc;
+	struct stat sbuf;
+	off_t size;
+	int extra, fd, i, sectsz;
+	int nocache, sync, ro;
+
+	pthread_once(&blockif_once, blockif_init);
+
+	nocache = 0;
+	sync = 0;
+	ro = 0;
+
+	/*
+	 * The first element in the optstring is always a pathname.
+	 * Optional elements follow
+	 */
+	nopt = strdup(optstr);
+	for (xopts = strtok(nopt, ",");
+	     xopts != NULL;
+	     xopts = strtok(NULL, ",")) {
+		if (!strcmp(xopts, "nocache"))
+			nocache = 1;
+		else if (!strcmp(xopts, "sync"))
+			sync = 1;
+		else if (!strcmp(xopts, "ro"))
+			ro = 1;
+	}
+
+	extra = 0;
+	if (nocache)
+		extra |= O_DIRECT;
+	if (sync)
+		extra |= O_SYNC;
+
+	fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
+	if (fd < 0 && !ro) {
+		/* Attempt a r/w fail with a r/o open */
+		fd = open(nopt, O_RDONLY | extra);
+		ro = 1;
+	}
+
+	if (fd < 0) {
+		perror("Could not open backing file");
+		return (NULL);
+	}
+
+        if (fstat(fd, &sbuf) < 0) {
+                perror("Could not stat backing file");
+                close(fd);
+                return (NULL);
+        }
+
+        /*
+	 * Deal with raw devices
+	 */
+        size = sbuf.st_size;
+	sectsz = DEV_BSIZE;
+#ifdef	__FreeBSD__
+	if (S_ISCHR(sbuf.st_mode)) {
+		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
+		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
+			perror("Could not fetch dev blk/sector size");
+			close(fd);
+			return (NULL);
+		}
+		assert(size != 0);
+		assert(sectsz != 0);
+	}
+#endif
+
+	bc = calloc(1, sizeof(struct blockif_ctxt));
+	if (bc == NULL) {
+		close(fd);
+		return (NULL);
+	}
+
+	bc->bc_magic = BLOCKIF_SIG;
+	bc->bc_fd = fd;
+	bc->bc_rdonly = ro;
+	bc->bc_size = size;
+	bc->bc_sectsz = sectsz;
+	pthread_mutex_init(&bc->bc_mtx, NULL);
+	pthread_cond_init(&bc->bc_cond, NULL);
+	TAILQ_INIT(&bc->bc_freeq);
+	TAILQ_INIT(&bc->bc_pendq);
+	TAILQ_INIT(&bc->bc_busyq);
+	bc->bc_req_count = 0;
+	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
+		bc->bc_reqs[i].be_status = BST_FREE;
+		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
+	}
+
+	pthread_create(&bc->bc_btid, NULL, blockif_thr, bc);
+
+	snprintf(tname, sizeof(tname), "blk-%s", ident);
+	pthread_set_name_np(bc->bc_btid, tname);
+
+	return (bc);
+}
+
+static int
+blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
+		enum blockop op)
+{
+	int err;
+
+	err = 0;
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	if (bc->bc_req_count < BLOCKIF_MAXREQ) {
+		/*
+		 * Enqueue and inform the block i/o thread
+		 * that there is work available
+		 */
+		blockif_enqueue(bc, breq, op);
+		pthread_cond_signal(&bc->bc_cond);
+	} else {
+		/*
+		 * Callers are not allowed to enqueue more than
+		 * the specified blockif queue limit. Return an
+		 * error to indicate that the queue length has been
+		 * exceeded.
+		 */
+		err = E2BIG;
+	}
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	return (err);
+}
+
+int
+blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_READ));
+}
+
+int
+blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_WRITE));
+}
+
+int
+blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_FLUSH));
+}
+
+int
+blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+	struct blockif_elem *be;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	/*
+	 * Check pending requests.
+	 */
+	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
+		if (be->be_req == breq)
+			break;
+	}
+	if (be != NULL) {
+		/*
+		 * Found it.
+		 */
+		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+		be->be_status = BST_FREE;
+		be->be_req = NULL;
+		TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
+		bc->bc_req_count--;
+		pthread_mutex_unlock(&bc->bc_mtx);
+
+		return (0);
+	}
+
+	/*
+	 * Check in-flight requests.
+	 */
+	TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
+		if (be->be_req == breq)
+			break;
+	}
+	if (be == NULL) {
+		/*
+		 * Didn't find it.
+		 */
+		pthread_mutex_unlock(&bc->bc_mtx);
+		return (EINVAL);
+	}
+
+	/*
+	 * Interrupt the processing thread to force it return
+	 * prematurely via it's normal callback path.
+	 */
+	while (be->be_status == BST_BUSY) {
+		struct blockif_sig_elem bse, *old_head;
+
+		pthread_mutex_init(&bse.bse_mtx, NULL);
+		pthread_cond_init(&bse.bse_cond, NULL);
+
+		bse.bse_pending = 1;
+
+		do {
+			old_head = blockif_bse_head;
+			bse.bse_next = old_head;
+		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+					    (uintptr_t)old_head,
+					    (uintptr_t)&bse));
+
+		pthread_kill(be->be_tid, SIGCONT);
+
+		pthread_mutex_lock(&bse.bse_mtx);
+		while (bse.bse_pending)
+			pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
+		pthread_mutex_unlock(&bse.bse_mtx);
+	}
+
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	/*
+	 * The processing thread has been interrupted.  Since it's not
+	 * clear if the callback has been invoked yet, return EBUSY.
+	 */
+	return (EBUSY);
+}
+
+int
+blockif_close(struct blockif_ctxt *bc)
+{
+	void *jval;
+	int err;
+
+	err = 0;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	/*
+	 * Stop the block i/o thread
+	 */
+	bc->bc_closing = 1;
+	pthread_cond_signal(&bc->bc_cond);
+	pthread_join(bc->bc_btid, &jval);
+
+	/* XXX Cancel queued i/o's ??? */
+
+	/*
+	 * Release resources
+	 */
+	bc->bc_magic = 0;
+	close(bc->bc_fd);
+	free(bc);
+
+	return (0);
+}
+
+/*
+ * Return virtual C/H/S values for a given block. Use the algorithm
+ * outlined in the VHD specification to calculate values.
+ */
+void
+blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
+{
+	off_t sectors;		/* total sectors of the block dev */
+	off_t hcyl;		/* cylinders times heads */
+	uint16_t secpt;		/* sectors per track */
+	uint8_t heads;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	sectors = bc->bc_size / bc->bc_sectsz;
+
+	/* Clamp the size to the largest possible with CHS */
+	if (sectors > 65535UL*16*255)
+		sectors = 65535UL*16*255;
+
+	if (sectors >= 65536UL*16*63) {
+		secpt = 255;
+		heads = 16;
+		hcyl = sectors / secpt;
+	} else {
+		secpt = 17;
+		hcyl = sectors / secpt;
+		heads = (hcyl + 1023) / 1024;
+
+		if (heads < 4)
+			heads = 4;
+
+		if (hcyl >= (heads * 1024) || heads > 16) {
+			secpt = 31;
+			heads = 16;
+			hcyl = sectors / secpt;
+		}
+		if (hcyl >= (heads * 1024)) {
+			secpt = 63;
+			heads = 16;
+			hcyl = sectors / secpt;
+		}
+	}
+
+	*c = hcyl / heads;
+	*h = heads;
+	*s = secpt;
+}
+
+/*
+ * Accessors
+ */
+off_t
+blockif_size(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_size);
+}
+
+int
+blockif_sectsz(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_sectsz);
+}
+
+int
+blockif_queuesz(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (BLOCKIF_MAXREQ - 1);
+}
+
+int
+blockif_is_ro(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_rdonly);
+}
diff --git a/usr/src/cmd/bhyve/block_if.h b/usr/src/cmd/bhyve/block_if.h
new file mode 100644
index 0000000000..5ef120933c
--- /dev/null
+++ b/usr/src/cmd/bhyve/block_if.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/block_if.h 268638 2014-07-15 00:25:54Z grehan $
+ */
+
+/*
+ * The block API to be used by bhyve block-device emulations. The routines
+ * are thread safe, with no assumptions about the context of the completion
+ * callback - it may occur in the caller's context, or asynchronously in
+ * another thread.
+ */
+
+#ifndef _BLOCK_IF_H_
+#define _BLOCK_IF_H_
+
+#include <sys/uio.h>
+#include <sys/unistd.h>
+
+#ifdef	__FreeBSD__
+#define BLOCKIF_IOV_MAX		32	/* not practical to be IOV_MAX */
+#else
+#define BLOCKIF_IOV_MAX		16	/* not practical to be IOV_MAX */
+#endif
+
+struct blockif_req {
+	struct iovec	br_iov[BLOCKIF_IOV_MAX];
+	int		br_iovcnt;
+	off_t		br_offset;
+	void		(*br_callback)(struct blockif_req *req, int err);
+	void		*br_param;
+};
+
+struct blockif_ctxt;
+struct blockif_ctxt *blockif_open(const char *optstr, const char *ident);
+off_t	blockif_size(struct blockif_ctxt *bc);
+void	blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h,
+    uint8_t *s);
+int	blockif_sectsz(struct blockif_ctxt *bc);
+int	blockif_queuesz(struct blockif_ctxt *bc);
+int	blockif_is_ro(struct blockif_ctxt *bc);
+int	blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_close(struct blockif_ctxt *bc);
+
+#endif /* _BLOCK_IF_H_ */
diff --git a/usr/src/cmd/bhyve/console.c b/usr/src/cmd/bhyve/console.c
new file mode 100644
index 0000000000..a8d07709be
--- /dev/null
+++ b/usr/src/cmd/bhyve/console.c
@@ -0,0 +1,101 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include "bhyvegc.h"
+#include "console.h"
+
+static struct {
+	struct bhyvegc		*gc;
+
+	fb_render_func_t	fb_render_cb;
+	void			*fb_arg;
+
+	kbd_event_func_t	kbd_event_cb;
+	void			*kbd_arg;
+
+	ptr_event_func_t	ptr_event_cb;
+	void			*ptr_arg;
+} console;
+
+void
+console_init(void)
+{
+	console.gc = bhyvegc_init(640, 400);
+}
+
+struct bhyvegc_image *
+console_get_image(void)
+{
+	struct bhyvegc_image *bhyvegc_image;
+
+	bhyvegc_image = bhyvegc_get_image(console.gc);
+
+	return (bhyvegc_image);
+}
+
+void
+console_fb_register(fb_render_func_t render_cb, void *arg)
+{
+	console.fb_render_cb = render_cb;
+	console.fb_arg = arg;
+}
+
+void
+console_refresh(void)
+{
+	(*console.fb_render_cb)(console.gc, console.fb_arg);
+}
+
+void
+console_kbd_register(kbd_event_func_t event_cb, void *arg)
+{
+	console.kbd_event_cb = event_cb;
+	console.kbd_arg = arg;
+}
+
+void
+console_ptr_register(ptr_event_func_t event_cb, void *arg)
+{
+	console.ptr_event_cb = event_cb;
+	console.ptr_arg = arg;
+}
+
+void
+console_key_event(int down, uint32_t keysym)
+{
+	(*console.kbd_event_cb)(down, keysym, console.kbd_arg);
+}
+
+void
+console_ptr_event(uint8_t button, int x, int y)
+{
+	(*console.ptr_event_cb)(button, x, y, console.ptr_arg);
+}
diff --git a/usr/src/cmd/bhyve/console.h b/usr/src/cmd/bhyve/console.h
new file mode 100644
index 0000000000..bffb7c2456
--- /dev/null
+++ b/usr/src/cmd/bhyve/console.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _CONSOLE_H_
+#define	_CONSOLE_H_
+
+struct bhyvegc;
+
+typedef void (*fb_render_func_t)(struct bhyvegc *gc, void *arg);
+typedef void (*kbd_event_func_t)(int down, uint32_t keysym, void *arg);
+typedef void (*ptr_event_func_t)(uint8_t mask, int x, int y, void *arg);
+
+void	console_init(void);
+struct bhyvegc_image *console_get_image(void);
+
+void	console_fb_register(fb_render_func_t render_cb, void *arg);
+void	console_refresh(void);
+
+void	console_kbd_register(kbd_event_func_t event_cb, void *arg);
+void	console_key_event(int down, uint32_t keysym);
+
+void	console_ptr_register(ptr_event_func_t event_cb, void *arg);
+void	console_ptr_event(uint8_t button, int x, int y);
+
+#endif /* _CONSOLE_H_ */
diff --git a/usr/src/cmd/bhyve/consport.c b/usr/src/cmd/bhyve/consport.c
new file mode 100644
index 0000000000..69b6dfddf1
--- /dev/null
+++ b/usr/src/cmd/bhyve/consport.c
@@ -0,0 +1,155 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/consport.c 264277 2014-04-08 21:02:03Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/consport.c 264277 2014-04-08 21:02:03Z jhb $");
+
+#include <sys/types.h>
+#include <sys/select.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include "inout.h"
+#include "pci_lpc.h"
+
+#define	BVM_CONSOLE_PORT	0x220
+#define	BVM_CONS_SIG		('b' << 8 | 'v')
+
+static struct termios tio_orig, tio_new;
+
+static void
+ttyclose(void)
+{
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
+}
+
+static void
+ttyopen(void)
+{
+#ifdef	__FreeBSD__
+	tcgetattr(STDIN_FILENO, &tio_orig);
+
+	cfmakeraw(&tio_new);
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);	
+
+	atexit(ttyclose);
+#endif
+}
+
+static bool
+tty_char_available(void)
+{
+        fd_set rfds;
+        struct timeval tv;
+
+        FD_ZERO(&rfds);
+        FD_SET(STDIN_FILENO, &rfds);
+        tv.tv_sec = 0;
+        tv.tv_usec = 0;
+        if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
+		return (true);
+	} else {
+		return (false);
+	}
+}
+
+static int
+ttyread(void)
+{
+	char rb;
+
+	if (tty_char_available()) {
+		read(STDIN_FILENO, &rb, 1);
+		return (rb & 0xff);
+	} else {
+		return (-1);
+	}
+}
+
+static void
+ttywrite(unsigned char wb)
+{
+	(void) write(STDOUT_FILENO, &wb, 1);
+}
+
+static int
+console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		uint32_t *eax, void *arg)
+{
+	static int opened;
+
+	if (bytes == 2 && in) {
+		*eax = BVM_CONS_SIG;
+		return (0);
+	}
+
+	/*
+	 * Guests might probe this port to look for old ISA devices
+	 * using single-byte reads.  Return 0xff for those.
+	 */
+	if (bytes == 1 && in) {
+		*eax = 0xff;
+		return (0);
+	}
+
+	if (bytes != 4)
+		return (-1);
+
+	if (!opened) {
+		ttyopen();
+		opened = 1;
+	}
+	
+	if (in)
+		*eax = ttyread();
+	else
+		ttywrite(*eax);
+
+	return (0);
+}
+
+SYSRES_IO(BVM_CONSOLE_PORT, 4);
+
+static struct inout_port consport = {
+	"bvmcons",
+	BVM_CONSOLE_PORT,
+	1,
+	IOPORT_F_INOUT,
+	console_handler
+};
+
+void
+init_bvmcons(void)
+{
+
+	register_inout(&consport);
+}
diff --git a/usr/src/cmd/bhyve/dbgport.h b/usr/src/cmd/bhyve/dbgport.h
new file mode 100644
index 0000000000..b95df0bd31
--- /dev/null
+++ b/usr/src/cmd/bhyve/dbgport.h
@@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/dbgport.h 256156 2013-10-08 16:36:17Z neel $
+ */
+
+#ifndef _DBGPORT_H_
+#define	_DBGPORT_H_
+
+void	init_dbgport(int port);
+
+#endif
diff --git a/usr/src/cmd/bhyve/inout.c b/usr/src/cmd/bhyve/inout.c
new file mode 100644
index 0000000000..510649893a
--- /dev/null
+++ b/usr/src/cmd/bhyve/inout.c
@@ -0,0 +1,297 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/inout.c 277310 2015-01-18 03:08:30Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/inout.c 277310 2015-01-18 03:08:30Z neel $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/_iovec.h>
+#include <sys/mman.h>
+
+#include <x86/psl.h>
+#include <x86/segments.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+#include <vmmapi.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "bhyverun.h"
+#include "inout.h"
+
+SET_DECLARE(inout_port_set, struct inout_port);
+
+#define	MAX_IOPORTS	(1 << 16)
+
+#define	VERIFY_IOPORT(port, size) \
+	assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
+
+static struct {
+	const char	*name;
+	int		flags;
+	inout_func_t	handler;
+	void		*arg;
+} inout_handlers[MAX_IOPORTS];
+
+static int
+default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+              uint32_t *eax, void *arg)
+{
+        if (in) {
+                switch (bytes) {
+                case 4:
+                        *eax = 0xffffffff;
+                        break;
+                case 2:
+                        *eax = 0xffff;
+                        break;
+                case 1:
+                        *eax = 0xff;
+                        break;
+                }
+        }
+        
+        return (0);
+}
+
+static void 
+register_default_iohandler(int start, int size)
+{
+	struct inout_port iop;
+	
+	VERIFY_IOPORT(start, size);
+
+	bzero(&iop, sizeof(iop));
+	iop.name = "default";
+	iop.port = start;
+	iop.size = size;
+	iop.flags = IOPORT_F_INOUT | IOPORT_F_DEFAULT;
+	iop.handler = default_inout;
+
+	register_inout(&iop);
+}
+
+int
+emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
+{
+	int addrsize, bytes, flags, in, port, prot, rep;
+	uint32_t eax, val;
+	inout_func_t handler;
+	void *arg;
+	int error, retval;
+	enum vm_reg_name idxreg;
+	uint64_t gla, index, iterations, count;
+	struct vm_inout_str *vis;
+	struct iovec iov[2];
+
+	bytes = vmexit->u.inout.bytes;
+	in = vmexit->u.inout.in;
+	port = vmexit->u.inout.port;
+
+	assert(port < MAX_IOPORTS);
+	assert(bytes == 1 || bytes == 2 || bytes == 4);
+
+	handler = inout_handlers[port].handler;
+
+	if (strict && handler == default_inout)
+		return (-1);
+
+	flags = inout_handlers[port].flags;
+	arg = inout_handlers[port].arg;
+
+	if (in) {
+		if (!(flags & IOPORT_F_IN))
+			return (-1);
+	} else {
+		if (!(flags & IOPORT_F_OUT))
+			return (-1);
+	}
+
+	retval = 0;
+	if (vmexit->u.inout.string) {
+		vis = &vmexit->u.inout_str;
+		rep = vis->inout.rep;
+		addrsize = vis->addrsize;
+		prot = in ? PROT_WRITE : PROT_READ;
+		assert(addrsize == 2 || addrsize == 4 || addrsize == 8);
+
+		/* Index register */
+		idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
+		index = vis->index & vie_size2mask(addrsize);
+
+		/* Count register */
+		count = vis->count & vie_size2mask(addrsize);
+
+		/* Limit number of back-to-back in/out emulations to 16 */
+		iterations = MIN(count, 16);
+		while (iterations > 0) {
+			assert(retval == 0);
+			if (vie_calculate_gla(vis->paging.cpu_mode,
+			    vis->seg_name, &vis->seg_desc, index, bytes,
+			    addrsize, prot, &gla)) {
+				vm_inject_gp(ctx, vcpu);
+				break;
+			}
+
+			error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
+			    bytes, prot, iov, nitems(iov));
+			if (error == -1) {
+				retval = -1;  /* Unrecoverable error */
+				break;
+			} else if (error == 1) {
+				retval = 0;  /* Resume guest to handle fault */
+				break;
+			}
+
+			if (vie_alignment_check(vis->paging.cpl, bytes,
+			    vis->cr0, vis->rflags, gla)) {
+				vm_inject_ac(ctx, vcpu, 0);
+				break;
+			}
+
+			val = 0;
+			if (!in)
+				vm_copyin(ctx, vcpu, iov, &val, bytes);
+
+			retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
+			if (retval != 0)
+				break;
+
+			if (in)
+				vm_copyout(ctx, vcpu, &val, iov, bytes);
+
+			/* Update index */
+			if (vis->rflags & PSL_D)
+				index -= bytes;
+			else
+				index += bytes;
+
+			count--;
+			iterations--;
+		}
+
+		/* Update index register */
+		error = vie_update_register(ctx, vcpu, idxreg, index, addrsize);
+		assert(error == 0);
+
+		/*
+		 * Update count register only if the instruction had a repeat
+		 * prefix.
+		 */
+		if (rep) {
+			error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX,
+			    count, addrsize);
+			assert(error == 0);
+		}
+
+		/* Restart the instruction if more iterations remain */
+		if (retval == 0 && count != 0) {
+			error = vm_restart_instruction(ctx, vcpu);
+			assert(error == 0);
+		}
+	} else {
+		eax = vmexit->u.inout.eax;
+		val = eax & vie_size2mask(bytes);
+		retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
+		if (retval == 0 && in) {
+			eax &= ~vie_size2mask(bytes);
+			eax |= val & vie_size2mask(bytes);
+			error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
+			    eax);
+			assert(error == 0);
+		}
+	}
+	return (retval);
+}
+
+void
+init_inout(void)
+{
+	struct inout_port **iopp, *iop;
+
+	/*
+	 * Set up the default handler for all ports
+	 */
+	register_default_iohandler(0, MAX_IOPORTS);
+
+	/*
+	 * Overwrite with specified handlers
+	 */
+	SET_FOREACH(iopp, inout_port_set) {
+		iop = *iopp;
+		assert(iop->port < MAX_IOPORTS);
+		inout_handlers[iop->port].name = iop->name;
+		inout_handlers[iop->port].flags = iop->flags;
+		inout_handlers[iop->port].handler = iop->handler;
+		inout_handlers[iop->port].arg = NULL;
+	}
+}
+
+int
+register_inout(struct inout_port *iop)
+{
+	int i;
+
+	VERIFY_IOPORT(iop->port, iop->size);
+
+	/*
+	 * Verify that the new registration is not overwriting an already
+	 * allocated i/o range.
+	 */
+	if ((iop->flags & IOPORT_F_DEFAULT) == 0) {
+		for (i = iop->port; i < iop->port + iop->size; i++) {
+			if ((inout_handlers[i].flags & IOPORT_F_DEFAULT) == 0)
+				return (-1);
+		}
+	}
+
+	for (i = iop->port; i < iop->port + iop->size; i++) {
+		inout_handlers[i].name = iop->name;
+		inout_handlers[i].flags = iop->flags;
+		inout_handlers[i].handler = iop->handler;
+		inout_handlers[i].arg = iop->arg;
+	}
+
+	return (0);
+}
+
+int
+unregister_inout(struct inout_port *iop)
+{
+
+	VERIFY_IOPORT(iop->port, iop->size);
+	assert(inout_handlers[iop->port].name == iop->name);
+
+	register_default_iohandler(iop->port, iop->size);
+
+	return (0);
+}
diff --git a/usr/src/cmd/bhyve/inout.h b/usr/src/cmd/bhyve/inout.h
new file mode 100644
index 0000000000..0d4046bd61
--- /dev/null
+++ b/usr/src/cmd/bhyve/inout.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/inout.h 269094 2014-07-25 20:18:35Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _INOUT_H_
+#define	_INOUT_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+struct vm_exit;
+
+/*
+ * inout emulation handlers return 0 on success and -1 on failure.
+ */
+typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port,
+			    int bytes, uint32_t *eax, void *arg);
+
+struct inout_port {
+	const char 	*name;
+	int		port;
+	int		size;
+	int		flags;
+	inout_func_t	handler;
+	void		*arg;
+};
+#define	IOPORT_F_IN		0x1
+#define	IOPORT_F_OUT		0x2
+#define	IOPORT_F_INOUT		(IOPORT_F_IN | IOPORT_F_OUT)
+
+/*
+ * The following flags are used internally and must not be used by
+ * device models.
+ */
+#define	IOPORT_F_DEFAULT	0x80000000	/* claimed by default handler */
+
+#define	INOUT_PORT(name, port, flags, handler)				\
+	static struct inout_port __CONCAT(__inout_port, __LINE__) = {	\
+		#name,							\
+		(port),							\
+		1,							\
+		(flags),						\
+		(handler),						\
+		0							\
+	};								\
+	DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
+	
+void	init_inout(void);
+int	emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit,
+		      int strict);
+int	register_inout(struct inout_port *iop);
+int	unregister_inout(struct inout_port *iop);
+void	init_bvmcons(void);
+
+#endif	/* _INOUT_H_ */
diff --git a/usr/src/cmd/bhyve/ioapic.c b/usr/src/cmd/bhyve/ioapic.c
new file mode 100644
index 0000000000..86ff5c6580
--- /dev/null
+++ b/usr/src/cmd/bhyve/ioapic.c
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/ioapic.c 261268 2014-01-29 14:56:48Z jhb $");
+
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "ioapic.h"
+
+/*
+ * Assign PCI INTx interrupts to I/O APIC pins in a round-robin
+ * fashion.  Note that we have no idea what the HPET is using, but the
+ * HPET is also programmable whereas this is intended for hardwired
+ * PCI interrupts.
+ *
+ * This assumes a single I/O APIC where pins >= 16 are permitted for
+ * PCI devices.
+ */
+static int pci_pins;
+
+void
+ioapic_init(struct vmctx *ctx)
+{
+
+	if (vm_ioapic_pincount(ctx, &pci_pins) < 0) {
+		pci_pins = 0;
+		return;
+	}
+
+	/* Ignore the first 16 pins. */
+	if (pci_pins <= 16) {
+		pci_pins = 0;
+		return;
+	}
+	pci_pins -= 16;
+}
+
+int
+ioapic_pci_alloc_irq(void)
+{
+	static int last_pin;
+
+	if (pci_pins == 0)
+		return (-1);
+	return (16 + (last_pin++ % pci_pins));
+}
diff --git a/usr/src/cmd/bhyve/ioapic.h b/usr/src/cmd/bhyve/ioapic.h
new file mode 100644
index 0000000000..789f90fea9
--- /dev/null
+++ b/usr/src/cmd/bhyve/ioapic.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/ioapic.h 261268 2014-01-29 14:56:48Z jhb $
+ */
+
+#ifndef _IOAPIC_H_
+#define	_IOAPIC_H_
+
+/*
+ * Allocate a PCI IRQ from the I/O APIC.
+ */
+void	ioapic_init(struct vmctx *ctx);
+int	ioapic_pci_alloc_irq(void);
+
+#endif
diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c
new file mode 100644
index 0000000000..a153a8e960
--- /dev/null
+++ b/usr/src/cmd/bhyve/mem.c
@@ -0,0 +1,291 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/mem.c 269700 2014-08-08 03:49:01Z neel $
+ */
+
+/*
+ * Memory ranges are represented with an RB tree. On insertion, the range
+ * is checked for overlaps. On lookup, the key has the same base and limit
+ * so it can be searched within the range.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/mem.c 269700 2014-08-08 03:49:01Z neel $");
+
+#include <sys/types.h>
+#include <sys/tree.h>
+#include <sys/errno.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "mem.h"
+
+struct mmio_rb_range {
+	RB_ENTRY(mmio_rb_range)	mr_link;	/* RB tree links */
+	struct mem_range	mr_param;
+	uint64_t                mr_base;
+	uint64_t                mr_end;
+};
+
+struct mmio_rb_tree;
+RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
+
+/*
+ * Per-vCPU cache. Since most accesses from a vCPU will be to
+ * consecutive addresses in a range, it makes sense to cache the
+ * result of a lookup.
+ */
+static struct mmio_rb_range	*mmio_hint[VM_MAXCPU];
+
+static pthread_rwlock_t mmio_rwlock;
+
+static int
+mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
+{
+	if (a->mr_end < b->mr_base)
+		return (-1);
+	else if (a->mr_base > b->mr_end)
+		return (1);
+	return (0);
+}
+
+static int
+mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
+    struct mmio_rb_range **entry)
+{
+	struct mmio_rb_range find, *res;
+
+	find.mr_base = find.mr_end = addr;
+
+	res = RB_FIND(mmio_rb_tree, rbt, &find);
+
+	if (res != NULL) {
+		*entry = res;
+		return (0);
+	}
+	
+	return (ENOENT);
+}
+
+static int
+mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
+{
+	struct mmio_rb_range *overlap;
+
+	overlap = RB_INSERT(mmio_rb_tree, rbt, new);
+
+	if (overlap != NULL) {
+#ifdef RB_DEBUG
+		printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
+		       new->mr_base, new->mr_end,
+		       overlap->mr_base, overlap->mr_end);
+#endif
+
+		return (EEXIST);
+	}
+
+	return (0);
+}
+
+#if 0
+static void
+mmio_rb_dump(struct mmio_rb_tree *rbt)
+{
+	struct mmio_rb_range *np;
+
+	pthread_rwlock_rdlock(&mmio_rwlock);
+	RB_FOREACH(np, mmio_rb_tree, rbt) {
+		printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
+		       np->mr_param.name);
+	}
+	pthread_rwlock_unlock(&mmio_rwlock);
+}
+#endif
+
+RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+static int
+mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
+{
+	int error;
+	struct mem_range *mr = arg;
+
+	error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
+			       rval, mr->arg1, mr->arg2);
+	return (error);
+}
+
+static int
+mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
+{
+	int error;
+	struct mem_range *mr = arg;
+
+	error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
+			       &wval, mr->arg1, mr->arg2);
+	return (error);
+}
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
+    struct vm_guest_paging *paging)
+
+{
+	struct mmio_rb_range *entry;
+	int err, immutable;
+	
+	pthread_rwlock_rdlock(&mmio_rwlock);
+	/*
+	 * First check the per-vCPU cache
+	 */
+	if (mmio_hint[vcpu] &&
+	    paddr >= mmio_hint[vcpu]->mr_base &&
+	    paddr <= mmio_hint[vcpu]->mr_end) {
+		entry = mmio_hint[vcpu];
+	} else
+		entry = NULL;
+
+	if (entry == NULL) {
+		if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
+			/* Update the per-vCPU cache */
+			mmio_hint[vcpu] = entry;			
+		} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
+			pthread_rwlock_unlock(&mmio_rwlock);
+			return (ESRCH);
+		}
+	}
+
+	assert(entry != NULL);
+
+	/*
+	 * An 'immutable' memory range is guaranteed to be never removed
+	 * so there is no need to hold 'mmio_rwlock' while calling the
+	 * handler.
+	 *
+	 * XXX writes to the PCIR_COMMAND register can cause register_mem()
+	 * to be called. If the guest is using PCI extended config space
+	 * to modify the PCIR_COMMAND register then register_mem() can
+	 * deadlock on 'mmio_rwlock'. However by registering the extended
+	 * config space window as 'immutable' the deadlock can be avoided.
+	 */
+	immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE);
+	if (immutable)
+		pthread_rwlock_unlock(&mmio_rwlock);
+
+	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
+				      mem_read, mem_write, &entry->mr_param);
+
+	if (!immutable)
+		pthread_rwlock_unlock(&mmio_rwlock);
+
+	return (err);
+}
+
+static int
+register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
+{
+	struct mmio_rb_range *entry, *mrp;
+	int		err;
+
+	err = 0;
+
+	mrp = malloc(sizeof(struct mmio_rb_range));
+	
+	if (mrp != NULL) {
+		mrp->mr_param = *memp;
+		mrp->mr_base = memp->base;
+		mrp->mr_end = memp->base + memp->size - 1;
+		pthread_rwlock_wrlock(&mmio_rwlock);
+		if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
+			err = mmio_rb_add(rbt, mrp);
+		pthread_rwlock_unlock(&mmio_rwlock);
+		if (err)
+			free(mrp);
+	} else
+		err = ENOMEM;
+
+	return (err);
+}
+
+int
+register_mem(struct mem_range *memp)
+{
+
+	return (register_mem_int(&mmio_rb_root, memp));
+}
+
+int
+register_mem_fallback(struct mem_range *memp)
+{
+
+	return (register_mem_int(&mmio_rb_fallback, memp));
+}
+
+int 
+unregister_mem(struct mem_range *memp)
+{
+	struct mem_range *mr;
+	struct mmio_rb_range *entry = NULL;
+	int err, i;
+	
+	pthread_rwlock_wrlock(&mmio_rwlock);
+	err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
+	if (err == 0) {
+		mr = &entry->mr_param;
+		assert(mr->name == memp->name);
+		assert(mr->base == memp->base && mr->size == memp->size); 
+		assert((mr->flags & MEM_F_IMMUTABLE) == 0);
+		RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
+
+		/* flush Per-vCPU cache */	
+		for (i=0; i < VM_MAXCPU; i++) {
+			if (mmio_hint[i] == entry)
+				mmio_hint[i] = NULL;
+		}
+	}
+	pthread_rwlock_unlock(&mmio_rwlock);
+
+	if (entry)
+		free(entry);
+	
+	return (err);
+}
+
+void
+init_mem(void)
+{
+
+	RB_INIT(&mmio_rb_root);
+	RB_INIT(&mmio_rb_fallback);
+	pthread_rwlock_init(&mmio_rwlock, NULL);
+}
diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h
new file mode 100644
index 0000000000..09cf56b72e
--- /dev/null
+++ b/usr/src/cmd/bhyve/mem.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/mem.h 269700 2014-08-08 03:49:01Z neel $
+ */
+
+#ifndef _MEM_H_
+#define	_MEM_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+
+typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+			  int size, uint64_t *val, void *arg1, long arg2);
+
+struct mem_range {
+	const char 	*name;
+	int		flags;
+	mem_func_t	handler;
+	void		*arg1;
+	long		arg2;
+	uint64_t  	base;
+	uint64_t  	size;
+};
+#define	MEM_F_READ		0x1
+#define	MEM_F_WRITE		0x2
+#define	MEM_F_RW		0x3
+#define	MEM_F_IMMUTABLE		0x4	/* mem_range cannot be unregistered */
+
+void	init_mem(void);
+int     emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
+		    struct vm_guest_paging *paging);
+		    
+int	register_mem(struct mem_range *memp);
+int	register_mem_fallback(struct mem_range *memp);
+int	unregister_mem(struct mem_range *memp);
+
+#endif	/* _MEM_H_ */
diff --git a/usr/src/cmd/bhyve/mptbl.c b/usr/src/cmd/bhyve/mptbl.c
new file mode 100644
index 0000000000..9d03765c7a
--- /dev/null
+++ b/usr/src/cmd/bhyve/mptbl.c
@@ -0,0 +1,377 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/mptbl.c 266125 2014-05-15 14:16:55Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/mptbl.c 266125 2014-05-15 14:16:55Z jhb $");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <x86/mptable.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#include "acpi.h"
+#include "bhyverun.h"
+#include "mptbl.h"
+#include "pci_emul.h"
+
+#define MPTABLE_BASE		0xE0000
+
+/* floating pointer length + maximum length of configuration table */
+#define	MPTABLE_MAX_LENGTH	(65536 + 16)
+
+#define LAPIC_PADDR		0xFEE00000
+#define LAPIC_VERSION 		16
+
+#define IOAPIC_PADDR		0xFEC00000
+#define IOAPIC_VERSION		0x11
+
+#define MP_SPECREV		4
+#define MPFP_SIG		"_MP_"
+
+/* Configuration header defines */
+#define MPCH_SIG		"PCMP"
+#define MPCH_OEMID		"BHyVe   "
+#define MPCH_OEMID_LEN          8
+#define MPCH_PRODID             "Hypervisor  "
+#define MPCH_PRODID_LEN         12
+
+/* Processor entry defines */
+#define MPEP_SIG_FAMILY		6	/* XXX bhyve should supply this */
+#define MPEP_SIG_MODEL		26
+#define MPEP_SIG_STEPPING	5
+#define MPEP_SIG		\
+	((MPEP_SIG_FAMILY << 8) | \
+	 (MPEP_SIG_MODEL << 4)	| \
+	 (MPEP_SIG_STEPPING))
+
+#define MPEP_FEATURES           (0xBFEBFBFF) /* XXX Intel i7 */
+
+/* Number of local intr entries */
+#define	MPEII_NUM_LOCAL_IRQ	2
+
+/* Bus entry defines */
+#define MPE_NUM_BUSES		2
+#define MPE_BUSNAME_LEN		6
+#define MPE_BUSNAME_ISA		"ISA   "
+#define MPE_BUSNAME_PCI		"PCI   "
+
+static void *oem_tbl_start;
+static int oem_tbl_size;
+
+static uint8_t
+mpt_compute_checksum(void *base, size_t len)
+{
+	uint8_t	*bytes;
+	uint8_t	sum;
+
+	for(bytes = base, sum = 0; len > 0; len--) {
+		sum += *bytes++;
+	}
+
+	return (256 - sum);
+}
+
+static void
+mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa)
+{
+
+	memset(mpfp, 0, sizeof(*mpfp));
+	memcpy(mpfp->signature, MPFP_SIG, 4);
+	mpfp->pap = gpa + sizeof(*mpfp);
+	mpfp->length = 1;
+	mpfp->spec_rev = MP_SPECREV;
+	mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp));
+}
+
+static void
+mpt_build_mpch(mpcth_t mpch)
+{
+
+	memset(mpch, 0, sizeof(*mpch));
+	memcpy(mpch->signature, MPCH_SIG, 4);
+	mpch->spec_rev = MP_SPECREV;
+	memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN);
+	memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN);
+	mpch->apic_address = LAPIC_PADDR;
+}
+
+static void
+mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu)
+{
+	int i;
+
+	for (i = 0; i < ncpu; i++) {
+		memset(mpep, 0, sizeof(*mpep));
+		mpep->type = MPCT_ENTRY_PROCESSOR;
+		mpep->apic_id = i; // XXX
+		mpep->apic_version = LAPIC_VERSION;
+		mpep->cpu_flags = PROCENTRY_FLAG_EN;
+		if (i == 0)
+			mpep->cpu_flags |= PROCENTRY_FLAG_BP;
+		mpep->cpu_signature = MPEP_SIG;
+		mpep->feature_flags = MPEP_FEATURES;
+		mpep++;
+	}
+}
+
+static void
+mpt_build_localint_entries(int_entry_ptr mpie)
+{
+
+	/* Hardcode LINT0 as ExtINT on all CPUs. */
+	memset(mpie, 0, sizeof(*mpie));
+	mpie->type = MPCT_ENTRY_LOCAL_INT;
+	mpie->int_type = INTENTRY_TYPE_EXTINT;
+	mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+	    INTENTRY_FLAGS_TRIGGER_CONFORM;
+	mpie->dst_apic_id = 0xff;
+	mpie->dst_apic_int = 0;
+	mpie++;
+
+	/* Hardcode LINT1 as NMI on all CPUs. */
+	memset(mpie, 0, sizeof(*mpie));
+	mpie->type = MPCT_ENTRY_LOCAL_INT;
+	mpie->int_type = INTENTRY_TYPE_NMI;
+	mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+	    INTENTRY_FLAGS_TRIGGER_CONFORM;
+	mpie->dst_apic_id = 0xff;
+	mpie->dst_apic_int = 1;
+}
+
+static void
+mpt_build_bus_entries(bus_entry_ptr mpeb)
+{
+
+	memset(mpeb, 0, sizeof(*mpeb));
+	mpeb->type = MPCT_ENTRY_BUS;
+	mpeb->bus_id = 0;
+	memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN);
+	mpeb++;
+
+	memset(mpeb, 0, sizeof(*mpeb));
+	mpeb->type = MPCT_ENTRY_BUS;
+	mpeb->bus_id = 1;	
+	memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN);
+}
+
+static void
+mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id)
+{
+
+	memset(mpei, 0, sizeof(*mpei));
+	mpei->type = MPCT_ENTRY_IOAPIC;
+	mpei->apic_id = id;
+	mpei->apic_version = IOAPIC_VERSION;
+	mpei->apic_flags = IOAPICENTRY_FLAG_EN;
+	mpei->apic_address = IOAPIC_PADDR;
+}
+
+static int
+mpt_count_ioint_entries(void)
+{
+	int bus, count;
+
+	count = 0;
+	for (bus = 0; bus <= PCI_BUSMAX; bus++)
+		count += pci_count_lintr(bus);
+
+	/*
+	 * Always include entries for the first 16 pins along with a entry
+	 * for each active PCI INTx pin.
+	 */
+	return (16 + count);
+}
+
+static void
+mpt_generate_pci_int(int bus, int slot, int pin, int pirq_pin, int ioapic_irq,
+    void *arg)
+{
+	int_entry_ptr *mpiep, mpie;
+
+	mpiep = arg;
+	mpie = *mpiep;
+	memset(mpie, 0, sizeof(*mpie));
+
+	/*
+	 * This is always after another I/O interrupt entry, so cheat
+	 * and fetch the I/O APIC ID from the prior entry.
+	 */
+	mpie->type = MPCT_ENTRY_INT;
+	mpie->int_type = INTENTRY_TYPE_INT;
+	mpie->src_bus_id = bus;
+	mpie->src_bus_irq = slot << 2 | (pin - 1);
+	mpie->dst_apic_id = mpie[-1].dst_apic_id;
+	mpie->dst_apic_int = ioapic_irq;
+
+	*mpiep = mpie + 1;
+}
+
+static void
+mpt_build_ioint_entries(int_entry_ptr mpie, int id)
+{
+	int pin, bus;
+
+	/*
+	 * The following config is taken from kernel mptable.c
+	 * mptable_parse_default_config_ints(...), for now 
+	 * just use the default config, tweek later if needed.
+	 */
+
+	/* First, generate the first 16 pins. */
+	for (pin = 0; pin < 16; pin++) {
+		memset(mpie, 0, sizeof(*mpie));
+		mpie->type = MPCT_ENTRY_INT;
+		mpie->src_bus_id = 1;
+		mpie->dst_apic_id = id;
+
+		/*
+		 * All default configs route IRQs from bus 0 to the first 16
+		 * pins of the first I/O APIC with an APIC ID of 2.
+		 */
+		mpie->dst_apic_int = pin;
+		switch (pin) {
+		case 0:
+			/* Pin 0 is an ExtINT pin. */
+			mpie->int_type = INTENTRY_TYPE_EXTINT;
+			break;
+		case 2:
+			/* IRQ 0 is routed to pin 2. */
+			mpie->int_type = INTENTRY_TYPE_INT;
+			mpie->src_bus_irq = 0;
+			break;
+		case SCI_INT:
+			/* ACPI SCI is level triggered and active-lo. */
+			mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
+			    INTENTRY_FLAGS_TRIGGER_LEVEL;
+			mpie->int_type = INTENTRY_TYPE_INT;
+			mpie->src_bus_irq = SCI_INT;
+			break;
+		default:
+			/* All other pins are identity mapped. */
+			mpie->int_type = INTENTRY_TYPE_INT;
+			mpie->src_bus_irq = pin;
+			break;
+		}
+		mpie++;
+	}
+
+	/* Next, generate entries for any PCI INTx interrupts. */
+	for (bus = 0; bus <= PCI_BUSMAX; bus++)
+		pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); 
+}
+
+void
+mptable_add_oemtbl(void *tbl, int tblsz)
+{
+
+	oem_tbl_start = tbl;
+	oem_tbl_size = tblsz;
+}
+
+int
+mptable_build(struct vmctx *ctx, int ncpu)
+{
+	mpcth_t			mpch;
+	bus_entry_ptr		mpeb;
+	io_apic_entry_ptr	mpei;
+	proc_entry_ptr		mpep;
+	mpfps_t			mpfp;
+	int_entry_ptr		mpie;
+	int			ioints, bus;
+	char 			*curraddr;
+	char 			*startaddr;
+
+	startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH);
+	if (startaddr == NULL) {
+		fprintf(stderr, "mptable requires mapped mem\n");
+		return (ENOMEM);
+	}
+
+	/*
+	 * There is no way to advertise multiple PCI hierarchies via MPtable
+	 * so require that there is no PCI hierarchy with a non-zero bus
+	 * number.
+	 */
+	for (bus = 1; bus <= PCI_BUSMAX; bus++) {
+		if (pci_bus_configured(bus)) {
+			fprintf(stderr, "MPtable is incompatible with "
+			    "multiple PCI hierarchies.\r\n");
+			fprintf(stderr, "MPtable generation can be disabled "
+			    "by passing the -Y option to bhyve(8).\r\n");
+			return (EINVAL);
+		}
+	}
+
+	curraddr = startaddr;
+	mpfp = (mpfps_t)curraddr;
+	mpt_build_mpfp(mpfp, MPTABLE_BASE);
+	curraddr += sizeof(*mpfp);
+
+	mpch = (mpcth_t)curraddr;
+	mpt_build_mpch(mpch);
+	curraddr += sizeof(*mpch);
+
+	mpep = (proc_entry_ptr)curraddr;
+	mpt_build_proc_entries(mpep, ncpu);
+	curraddr += sizeof(*mpep) * ncpu;
+	mpch->entry_count += ncpu;
+
+	mpeb = (bus_entry_ptr) curraddr;
+	mpt_build_bus_entries(mpeb);
+	curraddr += sizeof(*mpeb) * MPE_NUM_BUSES;
+	mpch->entry_count += MPE_NUM_BUSES;
+
+	mpei = (io_apic_entry_ptr)curraddr;
+	mpt_build_ioapic_entries(mpei, 0);
+	curraddr += sizeof(*mpei);
+	mpch->entry_count++;
+
+	mpie = (int_entry_ptr) curraddr;
+	ioints = mpt_count_ioint_entries();
+	mpt_build_ioint_entries(mpie, 0);
+	curraddr += sizeof(*mpie) * ioints;
+	mpch->entry_count += ioints;
+
+	mpie = (int_entry_ptr)curraddr;
+	mpt_build_localint_entries(mpie);
+	curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ;
+	mpch->entry_count += MPEII_NUM_LOCAL_IRQ;
+
+	if (oem_tbl_start) {
+		mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE;
+		mpch->oem_table_size = oem_tbl_size;
+		memcpy(curraddr, oem_tbl_start, oem_tbl_size);
+	}
+
+	mpch->base_table_length = curraddr - (char *)mpch;
+	mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length);
+
+	return (0);
+}
diff --git a/usr/src/cmd/bhyve/mptbl.h b/usr/src/cmd/bhyve/mptbl.h
new file mode 100644
index 0000000000..d78ea6da09
--- /dev/null
+++ b/usr/src/cmd/bhyve/mptbl.h
@@ -0,0 +1,35 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/mptbl.h 257423 2013-10-31 05:44:45Z neel $
+ */
+
+#ifndef _MPTBL_H_
+#define _MPTBL_H_
+
+int	mptable_build(struct vmctx *ctx, int ncpu);
+void	mptable_add_oemtbl(void *tbl, int tblsz);
+
+#endif /* _MPTBL_H_ */
diff --git a/usr/src/cmd/bhyve/pci_ahci.c b/usr/src/cmd/bhyve/pci_ahci.c
new file mode 100644
index 0000000000..b68c977c1f
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_ahci.c
@@ -0,0 +1,2009 @@
+/*-
+ * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_ahci.c 274045 2014-11-03 12:55:31Z tychon $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_ahci.c 274045 2014-11-03 12:55:31Z tychon $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+#include <sys/ata.h>
+#include <sys/endian.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <inttypes.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "ahci.h"
+#include "block_if.h"
+
+#define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
+
+#define	PxSIG_ATA	0x00000101 /* ATA drive */
+#define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
+
+enum sata_fis_type {
+	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
+	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
+	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
+	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
+	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
+	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
+	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
+	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
+};
+
+/*
+ * SCSI opcodes
+ */
+#define	TEST_UNIT_READY		0x00
+#define	REQUEST_SENSE		0x03
+#define	INQUIRY			0x12
+#define	START_STOP_UNIT		0x1B
+#define	PREVENT_ALLOW		0x1E
+#define	READ_CAPACITY		0x25
+#define	READ_10			0x28
+#define	POSITION_TO_ELEMENT	0x2B
+#define	READ_TOC		0x43
+#define	GET_EVENT_STATUS_NOTIFICATION 0x4A
+#define	MODE_SENSE_10		0x5A
+#define	READ_12			0xA8
+#define	READ_CD			0xBE
+
+/*
+ * SCSI mode page codes
+ */
+#define	MODEPAGE_RW_ERROR_RECOVERY	0x01
+#define	MODEPAGE_CD_CAPABILITIES	0x2A
+
+/*
+ * ATA commands
+ */
+#define	ATA_SF_ENAB_SATA_SF		0x10
+#define		ATA_SATA_SF_AN		0x05
+#define	ATA_SF_DIS_SATA_SF		0x90
+
+/*
+ * Debug printf
+ */
+#ifdef AHCI_DEBUG
+static FILE *dbg;
+#define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
+#else
+#define DPRINTF(format, arg...)
+#endif
+#define WPRINTF(format, arg...) printf(format, ##arg)
+
+struct ahci_ioreq {
+	struct blockif_req io_req;
+	struct ahci_port *io_pr;
+	STAILQ_ENTRY(ahci_ioreq) io_flist;
+	TAILQ_ENTRY(ahci_ioreq) io_blist;
+	uint8_t *cfis;
+	uint32_t len;
+	uint32_t done;
+	int slot;
+	int prdtl;
+};
+
+struct ahci_port {
+	struct blockif_ctxt *bctx;
+	struct pci_ahci_softc *pr_sc;
+	uint8_t *cmd_lst;
+	uint8_t *rfis;
+	int atapi;
+	int reset;
+	int mult_sectors;
+	uint8_t xfermode;
+	uint8_t sense_key;
+	uint8_t asc;
+	uint32_t pending;
+
+	uint32_t clb;
+	uint32_t clbu;
+	uint32_t fb;
+	uint32_t fbu;
+	uint32_t is;
+	uint32_t ie;
+	uint32_t cmd;
+	uint32_t unused0;
+	uint32_t tfd;
+	uint32_t sig;
+	uint32_t ssts;
+	uint32_t sctl;
+	uint32_t serr;
+	uint32_t sact;
+	uint32_t ci;
+	uint32_t sntf;
+	uint32_t fbs;
+
+	/*
+	 * i/o request info
+	 */
+	struct ahci_ioreq *ioreq;
+	int ioqsz;
+	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
+	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
+};
+
+struct ahci_cmd_hdr {
+	uint16_t flags;
+	uint16_t prdtl;
+	uint32_t prdbc;
+	uint64_t ctba;
+	uint32_t reserved[4];
+};
+
+struct ahci_prdt_entry {
+	uint64_t dba;
+	uint32_t reserved;
+#define	DBCMASK		0x3fffff
+	uint32_t dbc;
+};
+
+struct pci_ahci_softc {
+	struct pci_devinst *asc_pi;
+	pthread_mutex_t	mtx;
+	int ports;
+	uint32_t cap;
+	uint32_t ghc;
+	uint32_t is;
+	uint32_t pi;
+	uint32_t vs;
+	uint32_t ccc_ctl;
+	uint32_t ccc_pts;
+	uint32_t em_loc;
+	uint32_t em_ctl;
+	uint32_t cap2;
+	uint32_t bohc;
+	uint32_t lintr;
+	struct ahci_port port[MAX_PORTS];
+};
+#define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
+
+static inline void lba_to_msf(uint8_t *buf, int lba)
+{
+	lba += 150;
+	buf[0] = (lba / 75) / 60;
+	buf[1] = (lba / 75) % 60;
+	buf[2] = lba % 75;
+}
+
+/*
+ * generate HBA intr depending on whether or not ports within
+ * the controller have an interrupt pending.
+ */
+static void
+ahci_generate_intr(struct pci_ahci_softc *sc)
+{
+	struct pci_devinst *pi;
+	int i;
+
+	pi = sc->asc_pi;
+
+	for (i = 0; i < sc->ports; i++) {
+		struct ahci_port *pr;
+		pr = &sc->port[i];
+		if (pr->is & pr->ie)
+			sc->is |= (1 << i);
+	}
+
+	DPRINTF("%s %x\n", __func__, sc->is);
+
+	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {		
+		if (pci_msi_enabled(pi)) {
+			/*
+			 * Generate an MSI interrupt on every edge
+			 */
+			pci_generate_msi(pi, 0);
+		} else if (!sc->lintr) {
+			/*
+			 * Only generate a pin-based interrupt if one wasn't
+			 * in progress
+			 */
+			sc->lintr = 1;
+			pci_lintr_assert(pi);
+		}
+	} else if (sc->lintr) {
+		/*
+		 * No interrupts: deassert pin-based signal if it had
+		 * been asserted
+		 */
+		pci_lintr_deassert(pi);
+		sc->lintr = 0;
+	}
+}
+
+static void
+ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
+{
+	int offset, len, irq;
+
+	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
+		return;
+
+	switch (ft) {
+	case FIS_TYPE_REGD2H:
+		offset = 0x40;
+		len = 20;
+		irq = AHCI_P_IX_DHR;
+		break;
+	case FIS_TYPE_SETDEVBITS:
+		offset = 0x58;
+		len = 8;
+		irq = AHCI_P_IX_SDB;
+		break;
+	case FIS_TYPE_PIOSETUP:
+		offset = 0x20;
+		len = 20;
+		irq = 0;
+		break;
+	default:
+		WPRINTF("unsupported fis type %d\n", ft);
+		return;
+	}
+	memcpy(p->rfis + offset, fis, len);
+	if (irq) {
+		p->is |= irq;
+		ahci_generate_intr(p->pr_sc);
+	}
+}
+
+static void
+ahci_write_fis_piosetup(struct ahci_port *p)
+{
+	uint8_t fis[20];
+
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_PIOSETUP;
+	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
+}
+
+static void
+ahci_write_fis_sdb(struct ahci_port *p, int slot, uint32_t tfd)
+{
+	uint8_t fis[8];
+	uint8_t error;
+
+	error = (tfd >> 8) & 0xff;
+	memset(fis, 0, sizeof(fis));
+	fis[0] = error;
+	fis[2] = tfd & 0x77;
+	*(uint32_t *)(fis + 4) = (1 << slot);
+	if (fis[2] & ATA_S_ERROR)
+		p->is |= AHCI_P_IX_TFE;
+	p->tfd = tfd;
+	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
+}
+
+static void
+ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
+{
+	uint8_t fis[20];
+	uint8_t error;
+
+	error = (tfd >> 8) & 0xff;
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_REGD2H;
+	fis[1] = (1 << 6);
+	fis[2] = tfd & 0xff;
+	fis[3] = error;
+	fis[4] = cfis[4];
+	fis[5] = cfis[5];
+	fis[6] = cfis[6];
+	fis[7] = cfis[7];
+	fis[8] = cfis[8];
+	fis[9] = cfis[9];
+	fis[10] = cfis[10];
+	fis[11] = cfis[11];
+	fis[12] = cfis[12];
+	fis[13] = cfis[13];
+	if (fis[2] & ATA_S_ERROR)
+		p->is |= AHCI_P_IX_TFE;
+	else
+		p->ci &= ~(1 << slot);
+	p->tfd = tfd;
+	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
+}
+
+static void
+ahci_write_reset_fis_d2h(struct ahci_port *p)
+{
+	uint8_t fis[20];
+
+	memset(fis, 0, sizeof(fis));
+	fis[0] = FIS_TYPE_REGD2H;
+	fis[3] = 1;
+	fis[4] = 1;
+	if (p->atapi) {
+		fis[5] = 0x14;
+		fis[6] = 0xeb;
+	}
+	fis[12] = 1;
+	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
+}
+
+static void
+ahci_check_stopped(struct ahci_port *p)
+{
+	/*
+	 * If we are no longer processing the command list and nothing
+	 * is in-flight, clear the running bit, the current command
+	 * slot, the command issue and active bits.
+	 */
+	if (!(p->cmd & AHCI_P_CMD_ST)) {
+		if (p->pending == 0) {
+			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
+			p->ci = 0;
+			p->sact = 0;
+		}
+	}
+}
+
+static void
+ahci_port_stop(struct ahci_port *p)
+{
+	struct ahci_ioreq *aior;
+	uint8_t *cfis;
+	int slot;
+	int ncq;
+	int error;
+
+	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
+
+	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
+		/*
+		 * Try to cancel the outstanding blockif request.
+		 */
+		error = blockif_cancel(p->bctx, &aior->io_req);
+		if (error != 0)
+			continue;
+
+		slot = aior->slot;
+		cfis = aior->cfis;
+		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+		    cfis[2] == ATA_READ_FPDMA_QUEUED)
+			ncq = 1;
+
+		if (ncq)
+			p->sact &= ~(1 << slot);
+		else
+			p->ci &= ~(1 << slot);
+
+		/*
+		 * This command is now done.
+		 */
+		p->pending &= ~(1 << slot);
+
+		/*
+		 * Delete the blockif request from the busy list
+		 */
+		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+		/*
+		 * Move the blockif request back to the free list
+		 */
+		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+	}
+
+	ahci_check_stopped(p);
+}
+
+static void
+ahci_port_reset(struct ahci_port *pr)
+{
+	pr->sctl = 0;
+	pr->serr = 0;
+	pr->sact = 0;
+	pr->xfermode = ATA_UDMA6;
+	pr->mult_sectors = 128;
+
+	if (!pr->bctx) {
+		pr->ssts = ATA_SS_DET_NO_DEVICE;
+		pr->sig = 0xFFFFFFFF;
+		pr->tfd = 0x7F;
+		return;
+	}
+	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_SPD_GEN2 |
+		ATA_SS_IPM_ACTIVE;
+	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
+	if (!pr->atapi) {
+		pr->sig = PxSIG_ATA;
+		pr->tfd |= ATA_S_READY;
+	} else
+		pr->sig = PxSIG_ATAPI;
+	ahci_write_reset_fis_d2h(pr);
+}
+
+static void
+ahci_reset(struct pci_ahci_softc *sc)
+{
+	int i;
+
+	sc->ghc = AHCI_GHC_AE;
+	sc->is = 0;
+
+	if (sc->lintr) {
+		pci_lintr_deassert(sc->asc_pi);
+		sc->lintr = 0;
+	}
+
+	for (i = 0; i < sc->ports; i++) {
+		sc->port[i].ie = 0;
+		sc->port[i].is = 0;
+		ahci_port_reset(&sc->port[i]);
+	}
+}
+
+static void
+ata_string(uint8_t *dest, const char *src, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (*src)
+			dest[i ^ 1] = *src++;
+		else
+			dest[i ^ 1] = ' ';
+	}
+}
+
+static void
+atapi_string(uint8_t *dest, const char *src, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (*src)
+			dest[i] = *src++;
+		else
+			dest[i] = ' ';
+	}
+}
+
+static void
+ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
+    int seek)
+{
+	struct ahci_ioreq *aior;
+	struct blockif_req *breq;
+	struct pci_ahci_softc *sc;
+	struct ahci_prdt_entry *prdt;
+	struct ahci_cmd_hdr *hdr;
+	uint64_t lba;
+	uint32_t len;
+	int i, err, iovcnt, ncq, readop;
+
+	sc = p->pr_sc;
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	ncq = 0;
+	readop = 1;
+
+	prdt += seek;
+	if (cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
+			cfis[2] == ATA_WRITE_FPDMA_QUEUED)
+		readop = 0;
+
+	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+			cfis[2] == ATA_READ_FPDMA_QUEUED) {
+		lba = ((uint64_t)cfis[10] << 40) |
+			((uint64_t)cfis[9] << 32) |
+			((uint64_t)cfis[8] << 24) |
+			((uint64_t)cfis[6] << 16) |
+			((uint64_t)cfis[5] << 8) |
+			cfis[4];
+		len = cfis[11] << 8 | cfis[3];
+		if (!len)
+			len = 65536;
+		ncq = 1;
+	} else if (cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
+		lba = ((uint64_t)cfis[10] << 40) |
+			((uint64_t)cfis[9] << 32) |
+			((uint64_t)cfis[8] << 24) |
+			((uint64_t)cfis[6] << 16) |
+			((uint64_t)cfis[5] << 8) |
+			cfis[4];
+		len = cfis[13] << 8 | cfis[12];
+		if (!len)
+			len = 65536;
+	} else {
+		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
+			(cfis[5] << 8) | cfis[4];
+		len = cfis[12];
+		if (!len)
+			len = 256;
+	}
+	lba *= blockif_sectsz(p->bctx);
+	len *= blockif_sectsz(p->bctx);
+
+	/*
+	 * Pull request off free list
+	 */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = len;
+	aior->done = done;
+	breq = &aior->io_req;
+	breq->br_offset = lba + done;
+	iovcnt = hdr->prdtl - seek;
+	if (iovcnt > BLOCKIF_IOV_MAX) {
+		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
+		iovcnt = BLOCKIF_IOV_MAX;
+	} else
+		aior->prdtl = 0;
+	breq->br_iovcnt = iovcnt;
+
+	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	/*
+	 * Build up the iovec based on the prdt
+	 */
+	for (i = 0; i < iovcnt; i++) {
+		uint32_t dbcsz;
+
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
+		    prdt->dba, dbcsz);
+		breq->br_iov[i].iov_len = dbcsz;
+		aior->done += dbcsz;
+		prdt++;
+	}
+	if (readop)
+		err = blockif_read(p->bctx, breq);
+	else
+		err = blockif_write(p->bctx, breq);
+	assert(err == 0);
+
+	if (ncq)
+		p->ci &= ~(1 << slot);
+}
+
+static void
+ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	struct ahci_ioreq *aior;
+	struct blockif_req *breq;
+	int err;
+
+	/*
+	 * Pull request off free list
+	 */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = 0;
+	aior->done = 0;
+	aior->prdtl = 0;
+	breq = &aior->io_req;
+
+	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	err = blockif_flush(p->bctx, breq);
+	assert(err == 0);
+}
+
+static inline void
+write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
+		void *buf, int size)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_prdt_entry *prdt;
+	void *from;
+	int i, len;
+
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	len = size;
+	from = buf;
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+	for (i = 0; i < hdr->prdtl && len; i++) {
+		uint8_t *ptr;
+		uint32_t dbcsz;
+		int sublen;
+
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
+		sublen = len < dbcsz ? len : dbcsz;
+		memcpy(ptr, from, sublen);
+		len -= sublen;
+		from += sublen;
+		prdt++;
+	}
+	hdr->prdbc = size - len;
+}
+
+static void
+handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	struct ahci_cmd_hdr *hdr;
+
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	if (p->atapi || hdr->prdtl == 0) {
+		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
+		p->is |= AHCI_P_IX_TFE;
+	} else {
+		uint16_t buf[256];
+		uint64_t sectors;
+		uint16_t cyl;
+		uint8_t sech, heads;
+
+		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
+		blockif_chs(p->bctx, &cyl, &heads, &sech);
+		memset(buf, 0, sizeof(buf));
+		buf[0] = 0x0040;
+		buf[1] = cyl;
+		buf[3] = heads;
+		buf[6] = sech;
+		/* TODO emulate different serial? */
+		ata_string((uint8_t *)(buf+10), "123456", 20);
+		ata_string((uint8_t *)(buf+23), "001", 8);
+		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
+		buf[47] = (0x8000 | 128);
+		buf[48] = 0x1;
+		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
+		buf[50] = (1 << 14);
+		buf[53] = (1 << 1 | 1 << 2);
+		if (p->mult_sectors)
+			buf[59] = (0x100 | p->mult_sectors);
+		buf[60] = sectors;
+		buf[61] = (sectors >> 16);
+		buf[63] = 0x7;
+		if (p->xfermode & ATA_WDMA0)
+			buf[63] |= (1 << ((p->xfermode & 7) + 8));
+		buf[64] = 0x3;
+		buf[65] = 100;
+		buf[66] = 100;
+		buf[67] = 100;
+		buf[68] = 100;
+		buf[75] = 31;
+		buf[76] = (1 << 8 | 1 << 2);
+		buf[80] = 0x1f0;
+		buf[81] = 0x28;
+		buf[82] = (1 << 5 | 1 << 14);
+		buf[83] = (1 << 10 | 1 << 12 | 1 << 13 | 1 << 14);
+		buf[84] = (1 << 14);
+		buf[85] = (1 << 5 | 1 << 14);
+		buf[86] = (1 << 10 | 1 << 12 | 1 << 13);
+		buf[87] = (1 << 14);
+		buf[88] = 0x7f;
+		if (p->xfermode & ATA_UDMA0)
+			buf[88] |= (1 << ((p->xfermode & 7) + 8));
+		buf[93] = (1 | 1 <<14);
+		buf[100] = sectors;
+		buf[101] = (sectors >> 16);
+		buf[102] = (sectors >> 32);
+		buf[103] = (sectors >> 48);
+		ahci_write_fis_piosetup(p);
+		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
+		p->tfd = ATA_S_DSC | ATA_S_READY;
+		p->is |= AHCI_P_IX_DP;
+		p->ci &= ~(1 << slot);
+	}
+	ahci_generate_intr(p->pr_sc);
+}
+
+static void
+handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	if (!p->atapi) {
+		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
+		p->is |= AHCI_P_IX_TFE;
+	} else {
+		uint16_t buf[256];
+
+		memset(buf, 0, sizeof(buf));
+		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
+		/* TODO emulate different serial? */
+		ata_string((uint8_t *)(buf+10), "123456", 20);
+		ata_string((uint8_t *)(buf+23), "001", 8);
+		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
+		buf[49] = (1 << 9 | 1 << 8);
+		buf[50] = (1 << 14 | 1);
+		buf[53] = (1 << 2 | 1 << 1);
+		buf[62] = 0x3f;
+		buf[63] = 7;
+		buf[64] = 3;
+		buf[65] = 100;
+		buf[66] = 100;
+		buf[67] = 100;
+		buf[68] = 100;
+		buf[76] = (1 << 2 | 1 << 1);
+		buf[78] = (1 << 5);
+		buf[80] = (0x1f << 4);
+		buf[82] = (1 << 4);
+		buf[83] = (1 << 14);
+		buf[84] = (1 << 14);
+		buf[85] = (1 << 4);
+		buf[87] = (1 << 14);
+		buf[88] = (1 << 14 | 0x7f);
+		ahci_write_fis_piosetup(p);
+		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
+		p->tfd = ATA_S_DSC | ATA_S_READY;
+		p->is |= AHCI_P_IX_DHR;
+		p->ci &= ~(1 << slot);
+	}
+	ahci_generate_intr(p->pr_sc);
+}
+
+static void
+atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t buf[36];
+	uint8_t *acmd;
+	int len;
+
+	acmd = cfis + 0x40;
+
+	buf[0] = 0x05;
+	buf[1] = 0x80;
+	buf[2] = 0x00;
+	buf[3] = 0x21;
+	buf[4] = 31;
+	buf[5] = 0;
+	buf[6] = 0;
+	buf[7] = 0;
+	atapi_string(buf + 8, "BHYVE", 8);
+	atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
+	atapi_string(buf + 32, "001", 4);
+
+	len = sizeof(buf);
+	if (len > acmd[4])
+		len = acmd[4];
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	write_prdt(p, slot, cfis, buf, len);
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+}
+
+static void
+atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t buf[8];
+	uint64_t sectors;
+
+	sectors = blockif_size(p->bctx) / 2048;
+	be32enc(buf, sectors - 1);
+	be32enc(buf + 4, 2048);
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	write_prdt(p, slot, cfis, buf, sizeof(buf));
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+}
+
+static void
+atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd;
+	uint8_t format;
+	int len;
+
+	acmd = cfis + 0x40;
+
+	len = be16dec(acmd + 7);
+	format = acmd[9] >> 6;
+	switch (format) {
+	case 0:
+	{
+		int msf, size;
+		uint64_t sectors;
+		uint8_t start_track, buf[20], *bp;
+
+		msf = (acmd[1] >> 1) & 1;
+		start_track = acmd[6];
+		if (start_track > 1 && start_track != 0xaa) {
+			uint32_t tfd;
+			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+			p->asc = 0x24;
+			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+			ahci_write_fis_d2h(p, slot, cfis, tfd);
+			return;
+		}
+		bp = buf + 2;
+		*bp++ = 1;
+		*bp++ = 1;
+		if (start_track <= 1) {
+			*bp++ = 0;
+			*bp++ = 0x14;
+			*bp++ = 1;
+			*bp++ = 0;
+			if (msf) {
+				*bp++ = 0;
+				lba_to_msf(bp, 0);
+				bp += 3;
+			} else {
+				*bp++ = 0;
+				*bp++ = 0;
+				*bp++ = 0;
+				*bp++ = 0;
+			}
+		}
+		*bp++ = 0;
+		*bp++ = 0x14;
+		*bp++ = 0xaa;
+		*bp++ = 0;
+		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
+		sectors >>= 2;
+		if (msf) {
+			*bp++ = 0;
+			lba_to_msf(bp, sectors);
+			bp += 3;
+		} else {
+			be32enc(bp, sectors);
+			bp += 4;
+		}
+		size = bp - buf;
+		be16enc(buf, size - 2);
+		if (len > size)
+			len = size;
+		write_prdt(p, slot, cfis, buf, len);
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	}
+	case 1:
+	{
+		uint8_t buf[12];
+
+		memset(buf, 0, sizeof(buf));
+		buf[1] = 0xa;
+		buf[2] = 0x1;
+		buf[3] = 0x1;
+		if (len > sizeof(buf))
+			len = sizeof(buf);
+		write_prdt(p, slot, cfis, buf, len);
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	}
+	case 2:
+	{
+		int msf, size;
+		uint64_t sectors;
+		uint8_t start_track, *bp, buf[50];
+
+		msf = (acmd[1] >> 1) & 1;
+		start_track = acmd[6];
+		bp = buf + 2;
+		*bp++ = 1;
+		*bp++ = 1;
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 0xa0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 1;
+		*bp++ = 0;
+		*bp++ = 0;
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 0xa1;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 1;
+		*bp++ = 0;
+		*bp++ = 0;
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 0xa2;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
+		sectors >>= 2;
+		if (msf) {
+			*bp++ = 0;
+			lba_to_msf(bp, sectors);
+			bp += 3;
+		} else {
+			be32enc(bp, sectors);
+			bp += 4;
+		}
+
+		*bp++ = 1;
+		*bp++ = 0x14;
+		*bp++ = 0;
+		*bp++ = 1;
+		*bp++ = 0;
+		*bp++ = 0;
+		*bp++ = 0;
+		if (msf) {
+			*bp++ = 0;
+			lba_to_msf(bp, 0);
+			bp += 3;
+		} else {
+			*bp++ = 0;
+			*bp++ = 0;
+			*bp++ = 0;
+			*bp++ = 0;
+		}
+
+		size = bp - buf;
+		be16enc(buf, size - 2);
+		if (len > size)
+			len = size;
+		write_prdt(p, slot, cfis, buf, len);
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	}
+	default:
+	{
+		uint32_t tfd;
+
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x24;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, tfd);
+		break;
+	}
+	}
+}
+
+static void
+atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
+		uint32_t done, int seek)
+{
+	struct ahci_ioreq *aior;
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_prdt_entry *prdt;
+	struct blockif_req *breq;
+	struct pci_ahci_softc *sc;
+	uint8_t *acmd;
+	uint64_t lba;
+	uint32_t len;
+	int i, err, iovcnt;
+
+	sc = p->pr_sc;
+	acmd = cfis + 0x40;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+
+	prdt += seek;
+	lba = be32dec(acmd + 2);
+	if (acmd[0] == READ_10)
+		len = be16dec(acmd + 7);
+	else
+		len = be32dec(acmd + 6);
+	if (len == 0) {
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+	}
+	lba *= 2048;
+	len *= 2048;
+
+	/*
+	 * Pull request off free list
+	 */
+	aior = STAILQ_FIRST(&p->iofhd);
+	assert(aior != NULL);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+	aior->cfis = cfis;
+	aior->slot = slot;
+	aior->len = len;
+	aior->done = done;
+	breq = &aior->io_req;
+	breq->br_offset = lba + done;
+	iovcnt = hdr->prdtl - seek;
+	if (iovcnt > BLOCKIF_IOV_MAX) {
+		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
+		iovcnt = BLOCKIF_IOV_MAX;
+	} else
+		aior->prdtl = 0;
+	breq->br_iovcnt = iovcnt;
+
+	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	/*
+	 * Build up the iovec based on the prdt
+	 */
+	for (i = 0; i < iovcnt; i++) {
+		uint32_t dbcsz;
+
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
+		    prdt->dba, dbcsz);
+		breq->br_iov[i].iov_len = dbcsz;
+		aior->done += dbcsz;
+		prdt++;
+	}
+	err = blockif_read(p->bctx, breq);
+	assert(err == 0);
+}
+
+static void
+atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t buf[64];
+	uint8_t *acmd;
+	int len;
+
+	acmd = cfis + 0x40;
+	len = acmd[4];
+	if (len > sizeof(buf))
+		len = sizeof(buf);
+	memset(buf, 0, len);
+	buf[0] = 0x70 | (1 << 7);
+	buf[2] = p->sense_key;
+	buf[7] = 10;
+	buf[12] = p->asc;
+	write_prdt(p, slot, cfis, buf, len);
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+}
+
+static void
+atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd = cfis + 0x40;
+	uint32_t tfd;
+
+	switch (acmd[4] & 3) {
+	case 0:
+	case 1:
+	case 3:
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		tfd = ATA_S_READY | ATA_S_DSC;
+		break;
+	case 2:
+		/* TODO eject media */
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x53;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		break;
+	}
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+}
+
+static void
+atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd;
+	uint32_t tfd;
+	uint8_t pc, code;
+	int len;
+
+	acmd = cfis + 0x40;
+	len = be16dec(acmd + 7);
+	pc = acmd[2] >> 6;
+	code = acmd[2] & 0x3f;
+
+	switch (pc) {
+	case 0:
+		switch (code) {
+		case MODEPAGE_RW_ERROR_RECOVERY:
+		{
+			uint8_t buf[16];
+
+			if (len > sizeof(buf))
+				len = sizeof(buf);
+
+			memset(buf, 0, sizeof(buf));
+			be16enc(buf, 16 - 2);
+			buf[2] = 0x70;
+			buf[8] = 0x01;
+			buf[9] = 16 - 10;
+			buf[11] = 0x05;
+			write_prdt(p, slot, cfis, buf, len);
+			tfd = ATA_S_READY | ATA_S_DSC;
+			break;
+		}
+		case MODEPAGE_CD_CAPABILITIES:
+		{
+			uint8_t buf[30];
+
+			if (len > sizeof(buf))
+				len = sizeof(buf);
+
+			memset(buf, 0, sizeof(buf));
+			be16enc(buf, 30 - 2);
+			buf[2] = 0x70;
+			buf[8] = 0x2A;
+			buf[9] = 30 - 10;
+			buf[10] = 0x08;
+			buf[12] = 0x71;
+			be16enc(&buf[18], 2);
+			be16enc(&buf[20], 512);
+			write_prdt(p, slot, cfis, buf, len);
+			tfd = ATA_S_READY | ATA_S_DSC;
+			break;
+		}
+		default:
+			goto error;
+			break;
+		}
+		break;
+	case 3:
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x39;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		break;
+error:
+	case 1:
+	case 2:
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x24;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		break;
+	}
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+}
+
+static void
+atapi_get_event_status_notification(struct ahci_port *p, int slot,
+    uint8_t *cfis)
+{
+	uint8_t *acmd;
+	uint32_t tfd;
+
+	acmd = cfis + 0x40;
+
+	/* we don't support asynchronous operation */
+	if (!(acmd[1] & 1)) {
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x24;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+	} else {
+		uint8_t buf[8];
+		int len;
+
+		len = be16dec(acmd + 7);
+		if (len > sizeof(buf))
+			len = sizeof(buf);
+
+		memset(buf, 0, sizeof(buf));
+		be16enc(buf, 8 - 2);
+		buf[2] = 0x04;
+		buf[3] = 0x10;
+		buf[5] = 0x02;
+		write_prdt(p, slot, cfis, buf, len);
+		tfd = ATA_S_READY | ATA_S_DSC;
+	}
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+}
+
+static void
+handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+	uint8_t *acmd;
+
+	acmd = cfis + 0x40;
+
+#ifdef AHCI_DEBUG
+	{
+		int i;
+		DPRINTF("ACMD:");
+		for (i = 0; i < 16; i++)
+			DPRINTF("%02x ", acmd[i]);
+		DPRINTF("\n");
+	}
+#endif
+
+	switch (acmd[0]) {
+	case TEST_UNIT_READY:
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	case INQUIRY:
+		atapi_inquiry(p, slot, cfis);
+		break;
+	case READ_CAPACITY:
+		atapi_read_capacity(p, slot, cfis);
+		break;
+	case PREVENT_ALLOW:
+		/* TODO */
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	case READ_TOC:
+		atapi_read_toc(p, slot, cfis);
+		break;
+	case READ_10:
+	case READ_12:
+		atapi_read(p, slot, cfis, 0, 0);
+		break;
+	case REQUEST_SENSE:
+		atapi_request_sense(p, slot, cfis);
+		break;
+	case START_STOP_UNIT:
+		atapi_start_stop_unit(p, slot, cfis);
+		break;
+	case MODE_SENSE_10:
+		atapi_mode_sense(p, slot, cfis);
+		break;
+	case GET_EVENT_STATUS_NOTIFICATION:
+		atapi_get_event_status_notification(p, slot, cfis);
+		break;
+	default:
+		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x20;
+		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
+				ATA_S_READY | ATA_S_ERROR);
+		break;
+	}
+}
+
+static void
+ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
+{
+
+	switch (cfis[2]) {
+	case ATA_ATA_IDENTIFY:
+		handle_identify(p, slot, cfis);
+		break;
+	case ATA_SETFEATURES:
+	{
+		switch (cfis[3]) {
+		case ATA_SF_ENAB_SATA_SF:
+			switch (cfis[12]) {
+			case ATA_SATA_SF_AN:
+				p->tfd = ATA_S_DSC | ATA_S_READY;
+				break;
+			default:
+				p->tfd = ATA_S_ERROR | ATA_S_READY;
+				p->tfd |= (ATA_ERROR_ABORT << 8);
+				break;
+			}
+			break;
+		case ATA_SF_ENAB_WCACHE:
+		case ATA_SF_DIS_WCACHE:
+		case ATA_SF_ENAB_RCACHE:
+		case ATA_SF_DIS_RCACHE:
+			p->tfd = ATA_S_DSC | ATA_S_READY;
+			break;
+		case ATA_SF_SETXFER:
+		{
+			switch (cfis[12] & 0xf8) {
+			case ATA_PIO:
+			case ATA_PIO0:
+				break;
+			case ATA_WDMA0:
+			case ATA_UDMA0:
+				p->xfermode = (cfis[12] & 0x7);
+				break;
+			}
+			p->tfd = ATA_S_DSC | ATA_S_READY;
+			break;
+		}
+		default:
+			p->tfd = ATA_S_ERROR | ATA_S_READY;
+			p->tfd |= (ATA_ERROR_ABORT << 8);
+			break;
+		}
+		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
+		break;
+	}
+	case ATA_SET_MULTI:
+		if (cfis[12] != 0 &&
+			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
+			p->tfd = ATA_S_ERROR | ATA_S_READY;
+			p->tfd |= (ATA_ERROR_ABORT << 8);
+		} else {
+			p->mult_sectors = cfis[12];
+			p->tfd = ATA_S_DSC | ATA_S_READY;
+		}
+		p->is |= AHCI_P_IX_DP;
+		p->ci &= ~(1 << slot);
+		ahci_generate_intr(p->pr_sc);
+		break;
+	case ATA_READ_DMA:
+	case ATA_WRITE_DMA:
+	case ATA_READ_DMA48:
+	case ATA_WRITE_DMA48:
+	case ATA_READ_FPDMA_QUEUED:
+	case ATA_WRITE_FPDMA_QUEUED:
+		ahci_handle_dma(p, slot, cfis, 0, 0);
+		break;
+	case ATA_FLUSHCACHE:
+	case ATA_FLUSHCACHE48:
+		ahci_handle_flush(p, slot, cfis);
+		break;
+	case ATA_STANDBY_CMD:
+		break;
+	case ATA_NOP:
+	case ATA_STANDBY_IMMEDIATE:
+	case ATA_IDLE_IMMEDIATE:
+	case ATA_SLEEP:
+		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+		break;
+	case ATA_ATAPI_IDENTIFY:
+		handle_atapi_identify(p, slot, cfis);
+		break;
+	case ATA_PACKET_CMD:
+		if (!p->atapi) {
+			p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
+			p->is |= AHCI_P_IX_TFE;
+			ahci_generate_intr(p->pr_sc);
+		} else
+			handle_packet_cmd(p, slot, cfis);
+		break;
+	default:
+		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
+		p->tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
+		p->is |= AHCI_P_IX_TFE;
+		ahci_generate_intr(p->pr_sc);
+		break;
+	}
+}
+
+static void
+ahci_handle_slot(struct ahci_port *p, int slot)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_prdt_entry *prdt;
+	struct pci_ahci_softc *sc;
+	uint8_t *cfis;
+	int cfl;
+
+	sc = p->pr_sc;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+	cfl = (hdr->flags & 0x1f) * 4;
+	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
+			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
+	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
+
+#ifdef AHCI_DEBUG
+	DPRINTF("\ncfis:");
+	for (i = 0; i < cfl; i++) {
+		if (i % 10 == 0)
+			DPRINTF("\n");
+		DPRINTF("%02x ", cfis[i]);
+	}
+	DPRINTF("\n");
+
+	for (i = 0; i < hdr->prdtl; i++) {
+		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
+		prdt++;
+	}
+#endif
+
+	if (cfis[0] != FIS_TYPE_REGH2D) {
+		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
+		return;
+	}
+
+	if (cfis[1] & 0x80) {
+		ahci_handle_cmd(p, slot, cfis);
+	} else {
+		if (cfis[15] & (1 << 2))
+			p->reset = 1;
+		else if (p->reset) {
+			p->reset = 0;
+			ahci_port_reset(p);
+		}
+		p->ci &= ~(1 << slot);
+	}
+}
+
+static void
+ahci_handle_port(struct ahci_port *p)
+{
+	int i;
+
+	if (!(p->cmd & AHCI_P_CMD_ST))
+		return;
+
+	/*
+	 * Search for any new commands to issue ignoring those that
+	 * are already in-flight.
+	 */
+	for (i = 0; (i < 32) && p->ci; i++) {
+		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
+			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
+			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
+			ahci_handle_slot(p, i);
+		}
+	}
+}
+
+/*
+ * blockif callback routine - this runs in the context of the blockif
+ * i/o thread, so the mutex needs to be acquired.
+ */
+static void
+ata_ioreq_cb(struct blockif_req *br, int err)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_ioreq *aior;
+	struct ahci_port *p;
+	struct pci_ahci_softc *sc;
+	uint32_t tfd;
+	uint8_t *cfis;
+	int pending, slot, ncq;
+
+	DPRINTF("%s %d\n", __func__, err);
+
+	ncq = 0;
+	aior = br->br_param;
+	p = aior->io_pr;
+	cfis = aior->cfis;
+	slot = aior->slot;
+	pending = aior->prdtl;
+	sc = p->pr_sc;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
+
+	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+			cfis[2] == ATA_READ_FPDMA_QUEUED)
+		ncq = 1;
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/*
+	 * Delete the blockif request from the busy list
+	 */
+	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+	/*
+	 * Move the blockif request back to the free list
+	 */
+	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+
+	if (pending && !err) {
+		ahci_handle_dma(p, slot, cfis, aior->done,
+		    hdr->prdtl - pending);
+		goto out;
+	}
+
+	if (!err && aior->done == aior->len) {
+		tfd = ATA_S_READY | ATA_S_DSC;
+		if (ncq)
+			hdr->prdbc = 0;
+		else
+			hdr->prdbc = aior->len;
+	} else {
+		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
+		hdr->prdbc = 0;
+		if (ncq)
+			p->serr |= (1 << slot);
+	}
+
+	if (ncq) {
+		p->sact &= ~(1 << slot);
+		ahci_write_fis_sdb(p, slot, tfd);
+	} else
+		ahci_write_fis_d2h(p, slot, cfis, tfd);
+
+	/*
+	 * This command is now complete.
+	 */
+	p->pending &= ~(1 << slot);
+
+	ahci_check_stopped(p);
+out:
+	pthread_mutex_unlock(&sc->mtx);
+	DPRINTF("%s exit\n", __func__);
+}
+
+static void
+atapi_ioreq_cb(struct blockif_req *br, int err)
+{
+	struct ahci_cmd_hdr *hdr;
+	struct ahci_ioreq *aior;
+	struct ahci_port *p;
+	struct pci_ahci_softc *sc;
+	uint8_t *cfis;
+	uint32_t tfd;
+	int pending, slot;
+
+	DPRINTF("%s %d\n", __func__, err);
+
+	aior = br->br_param;
+	p = aior->io_pr;
+	cfis = aior->cfis;
+	slot = aior->slot;
+	pending = aior->prdtl;
+	sc = p->pr_sc;
+	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/*
+	 * Delete the blockif request from the busy list
+	 */
+	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+	/*
+	 * Move the blockif request back to the free list
+	 */
+	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+
+	if (pending && !err) {
+		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
+		goto out;
+	}
+
+	if (!err && aior->done == aior->len) {
+		tfd = ATA_S_READY | ATA_S_DSC;
+		hdr->prdbc = aior->len;
+	} else {
+		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
+		p->asc = 0x21;
+		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
+		hdr->prdbc = 0;
+	}
+
+	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
+	ahci_write_fis_d2h(p, slot, cfis, tfd);
+
+	/*
+	 * This command is now complete.
+	 */
+	p->pending &= ~(1 << slot);
+
+	ahci_check_stopped(p);
+out:
+	pthread_mutex_unlock(&sc->mtx);
+	DPRINTF("%s exit\n", __func__);
+}
+
+static void
+pci_ahci_ioreq_init(struct ahci_port *pr)
+{
+	struct ahci_ioreq *vr;
+	int i;
+
+	pr->ioqsz = blockif_queuesz(pr->bctx);
+	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
+	STAILQ_INIT(&pr->iofhd);
+
+	/*
+	 * Add all i/o request entries to the free queue
+	 */
+	for (i = 0; i < pr->ioqsz; i++) {
+		vr = &pr->ioreq[i];
+		vr->io_pr = pr;
+		if (!pr->atapi)
+			vr->io_req.br_callback = ata_ioreq_cb;
+		else
+			vr->io_req.br_callback = atapi_ioreq_cb;
+		vr->io_req.br_param = vr;
+		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
+	}
+
+	TAILQ_INIT(&pr->iobhd);
+}
+
+static void
+pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
+{
+	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
+	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
+	struct ahci_port *p = &sc->port[port];
+
+	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
+		port, offset, value);
+
+	switch (offset) {
+	case AHCI_P_CLB:
+		p->clb = value;
+		break;
+	case AHCI_P_CLBU:
+		p->clbu = value;
+		break;
+	case AHCI_P_FB:
+		p->fb = value;
+		break;
+	case AHCI_P_FBU:
+		p->fbu = value;
+		break;
+	case AHCI_P_IS:
+		p->is &= ~value;
+		break;
+	case AHCI_P_IE:
+		p->ie = value & 0xFDC000FF;
+		ahci_generate_intr(sc);
+		break;
+	case AHCI_P_CMD:
+	{
+		p->cmd = value;
+		
+		if (!(value & AHCI_P_CMD_ST)) {
+			ahci_port_stop(p);
+		} else {
+			uint64_t clb;
+
+			p->cmd |= AHCI_P_CMD_CR;
+			clb = (uint64_t)p->clbu << 32 | p->clb;
+			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
+					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
+		}
+
+		if (value & AHCI_P_CMD_FRE) {
+			uint64_t fb;
+
+			p->cmd |= AHCI_P_CMD_FR;
+			fb = (uint64_t)p->fbu << 32 | p->fb;
+			/* we don't support FBSCP, so rfis size is 256Bytes */
+			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
+		} else {
+			p->cmd &= ~AHCI_P_CMD_FR;
+		}
+
+		if (value & AHCI_P_CMD_CLO) {
+			p->tfd = 0;
+			p->cmd &= ~AHCI_P_CMD_CLO;
+		}
+
+		ahci_handle_port(p);
+		break;
+	}
+	case AHCI_P_TFD:
+	case AHCI_P_SIG:
+	case AHCI_P_SSTS:
+		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
+		break;
+	case AHCI_P_SCTL:
+		if (!(p->cmd & AHCI_P_CMD_ST)) {
+			if (value & ATA_SC_DET_RESET)
+				ahci_port_reset(p);
+			p->sctl = value;
+		}
+		break;
+	case AHCI_P_SERR:
+		p->serr &= ~value;
+		break;
+	case AHCI_P_SACT:
+		p->sact |= value;
+		break;
+	case AHCI_P_CI:
+		p->ci |= value;
+		ahci_handle_port(p);
+		break;
+	case AHCI_P_SNTF:
+	case AHCI_P_FBS:
+	default:
+		break;
+	}
+}
+
+static void
+pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
+{
+	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
+		offset, value);
+
+	switch (offset) {
+	case AHCI_CAP:
+	case AHCI_PI:
+	case AHCI_VS:
+	case AHCI_CAP2:
+		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
+		break;
+	case AHCI_GHC:
+		if (value & AHCI_GHC_HR)
+			ahci_reset(sc);
+		else if (value & AHCI_GHC_IE) {
+			sc->ghc |= AHCI_GHC_IE;
+			ahci_generate_intr(sc);
+		}
+		break;
+	case AHCI_IS:
+		sc->is &= ~value;
+		ahci_generate_intr(sc);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		int baridx, uint64_t offset, int size, uint64_t value)
+{
+	struct pci_ahci_softc *sc = pi->pi_arg;
+
+	assert(baridx == 5);
+	assert(size == 4);
+
+	pthread_mutex_lock(&sc->mtx);
+
+	if (offset < AHCI_OFFSET)
+		pci_ahci_host_write(sc, offset, value);
+	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
+		pci_ahci_port_write(sc, offset, value);
+	else
+		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
+
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+static uint64_t
+pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
+{
+	uint32_t value;
+
+	switch (offset) {
+	case AHCI_CAP:
+	case AHCI_GHC:
+	case AHCI_IS:
+	case AHCI_PI:
+	case AHCI_VS:
+	case AHCI_CCCC:
+	case AHCI_CCCP:
+	case AHCI_EM_LOC:
+	case AHCI_EM_CTL:
+	case AHCI_CAP2:
+	{
+		uint32_t *p = &sc->cap;
+		p += (offset - AHCI_CAP) / sizeof(uint32_t);
+		value = *p;
+		break;
+	}
+	default:
+		value = 0;
+		break;
+	}
+	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
+		offset, value);
+
+	return (value);
+}
+
+static uint64_t
+pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
+{
+	uint32_t value;
+	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
+	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
+
+	switch (offset) {
+	case AHCI_P_CLB:
+	case AHCI_P_CLBU:
+	case AHCI_P_FB:
+	case AHCI_P_FBU:
+	case AHCI_P_IS:
+	case AHCI_P_IE:
+	case AHCI_P_CMD:
+	case AHCI_P_TFD:
+	case AHCI_P_SIG:
+	case AHCI_P_SSTS:
+	case AHCI_P_SCTL:
+	case AHCI_P_SERR:
+	case AHCI_P_SACT:
+	case AHCI_P_CI:
+	case AHCI_P_SNTF:
+	case AHCI_P_FBS:
+	{
+		uint32_t *p= &sc->port[port].clb;
+		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
+		value = *p;
+		break;
+	}
+	default:
+		value = 0;
+		break;
+	}
+
+	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
+		port, offset, value);
+
+	return value;
+}
+
+static uint64_t
+pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+    uint64_t offset, int size)
+{
+	struct pci_ahci_softc *sc = pi->pi_arg;
+	uint32_t value;
+
+	assert(baridx == 5);
+	assert(size == 4);
+
+	pthread_mutex_lock(&sc->mtx);
+
+	if (offset < AHCI_OFFSET)
+		value = pci_ahci_host_read(sc, offset);
+	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
+		value = pci_ahci_port_read(sc, offset);
+	else {
+		value = 0;
+		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
+	}
+
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (value);
+}
+
+static int
+pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
+{
+	char bident[sizeof("XX:X:X")];
+	struct blockif_ctxt *bctxt;
+	struct pci_ahci_softc *sc;
+	int ret, slots;
+
+	ret = 0;
+
+	if (opts == NULL) {
+		fprintf(stderr, "pci_ahci: backing device required\n");
+		return (1);
+	}
+
+#ifdef AHCI_DEBUG
+	dbg = fopen("/tmp/log", "w+");
+#endif
+
+	sc = calloc(1, sizeof(struct pci_ahci_softc));
+	pi->pi_arg = sc;
+	sc->asc_pi = pi;
+	sc->ports = MAX_PORTS;
+
+	/*
+	 * Only use port 0 for a backing device. All other ports will be
+	 * marked as unused
+	 */
+	sc->port[0].atapi = atapi;
+
+	/*
+	 * Attempt to open the backing image. Use the PCI
+	 * slot/func for the identifier string.
+	 */
+	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
+	bctxt = blockif_open(opts, bident);
+	if (bctxt == NULL) {       	
+		ret = 1;
+		goto open_fail;
+	}	
+	sc->port[0].bctx = bctxt;
+	sc->port[0].pr_sc = sc;
+
+	/*
+	 * Allocate blockif request structures and add them
+	 * to the free list
+	 */
+	pci_ahci_ioreq_init(&sc->port[0]);
+
+	pthread_mutex_init(&sc->mtx, NULL);
+
+	/* Intel ICH8 AHCI */
+	slots = sc->port[0].ioqsz;
+	if (slots > 32)
+		slots = 32;
+	--slots;
+	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
+	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
+	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
+	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
+	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
+
+	/* Only port 0 implemented */
+	sc->pi = 1;
+	sc->vs = 0x10300;
+	sc->cap2 = AHCI_CAP2_APST;
+	ahci_reset(sc);
+
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
+	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
+	pci_emul_add_msicap(pi, 1);
+	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
+	    AHCI_OFFSET + sc->ports * AHCI_STEP);
+
+	pci_lintr_request(pi);
+
+open_fail:
+	if (ret) {
+		blockif_close(sc->port[0].bctx);
+		free(sc);
+	}
+
+	return (ret);
+}
+
+static int
+pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	return (pci_ahci_init(ctx, pi, opts, 0));
+}
+
+static int
+pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	return (pci_ahci_init(ctx, pi, opts, 1));
+}
+
+/*
+ * Use separate emulation names to distinguish drive and atapi devices
+ */
+struct pci_devemu pci_de_ahci_hd = {
+	.pe_emu =	"ahci-hd",
+	.pe_init =	pci_ahci_hd_init,
+	.pe_barwrite =	pci_ahci_write,
+	.pe_barread =	pci_ahci_read
+};
+PCI_EMUL_SET(pci_de_ahci_hd);
+
+struct pci_devemu pci_de_ahci_cd = {
+	.pe_emu =	"ahci-cd",
+	.pe_init =	pci_ahci_atapi_init,
+	.pe_barwrite =	pci_ahci_write,
+	.pe_barread =	pci_ahci_read
+};
+PCI_EMUL_SET(pci_de_ahci_cd);
diff --git a/usr/src/cmd/bhyve/pci_emul.c b/usr/src/cmd/bhyve/pci_emul.c
new file mode 100644
index 0000000000..3b4ca805cc
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_emul.c
@@ -0,0 +1,2103 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_emul.c 269700 2014-08-08 03:49:01Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_emul.c 269700 2014-08-08 03:49:01Z neel $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/errno.h>
+
+#include <ctype.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "bhyverun.h"
+#include "inout.h"
+#include "ioapic.h"
+#include "mem.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+
+#define CONF1_ADDR_PORT    0x0cf8
+#define CONF1_DATA_PORT    0x0cfc
+
+#define CONF1_ENABLE	   0x80000000ul
+
+#define	CFGWRITE(pi,off,val,b)						\
+do {									\
+	if ((b) == 1) {							\
+		pci_set_cfgdata8((pi),(off),(val));			\
+	} else if ((b) == 2) {						\
+		pci_set_cfgdata16((pi),(off),(val));			\
+	} else {							\
+		pci_set_cfgdata32((pi),(off),(val));			\
+	}								\
+} while (0)
+
+#define	MAXBUSES	(PCI_BUSMAX + 1)
+#define MAXSLOTS	(PCI_SLOTMAX + 1)
+#define	MAXFUNCS	(PCI_FUNCMAX + 1)
+
+struct funcinfo {
+	char	*fi_name;
+	char	*fi_param;
+	struct pci_devinst *fi_devi;
+};
+
+struct intxinfo {
+	int	ii_count;
+	int	ii_pirq_pin;
+	int	ii_ioapic_irq;
+};
+
+struct slotinfo {
+	struct intxinfo si_intpins[4];
+	struct funcinfo si_funcs[MAXFUNCS];
+};
+
+struct businfo {
+	uint16_t iobase, iolimit;		/* I/O window */
+	uint32_t membase32, memlimit32;		/* mmio window below 4GB */
+	uint64_t membase64, memlimit64;		/* mmio window above 4GB */
+	struct slotinfo slotinfo[MAXSLOTS];
+};
+
+static struct businfo *pci_businfo[MAXBUSES];
+
+SET_DECLARE(pci_devemu_set, struct pci_devemu);
+
+static uint64_t pci_emul_iobase;
+static uint64_t pci_emul_membase32;
+static uint64_t pci_emul_membase64;
+
+#define	PCI_EMUL_IOBASE		0x2000
+#define	PCI_EMUL_IOLIMIT	0x10000
+
+#define	PCI_EMUL_ECFG_BASE	0xE0000000		    /* 3.5GB */
+#define	PCI_EMUL_ECFG_SIZE	(MAXBUSES * 1024 * 1024)    /* 1MB per bus */
+SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
+
+#define	PCI_EMUL_MEMLIMIT32	PCI_EMUL_ECFG_BASE
+
+#define	PCI_EMUL_MEMBASE64	0xD000000000UL
+#define	PCI_EMUL_MEMLIMIT64	0xFD00000000UL
+
+static struct pci_devemu *pci_emul_finddev(char *name);
+static void pci_lintr_route(struct pci_devinst *pi);
+static void pci_lintr_update(struct pci_devinst *pi);
+static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot,
+    int func, int coff, int bytes, uint32_t *val);
+
+/*
+ * I/O access
+ */
+
+/*
+ * Slot options are in the form:
+ *
+ *  <bus>:<slot>:<func>,<emul>[,<config>]
+ *  <slot>[:<func>],<emul>[,<config>]
+ *
+ *  slot is 0..31
+ *  func is 0..7
+ *  emul is a string describing the type of PCI device e.g. virtio-net
+ *  config is an optional string, depending on the device, that can be
+ *  used for configuration.
+ *   Examples are:
+ *     1,virtio-net,tap0
+ *     3:0,dummy
+ */
+static void
+pci_parse_slot_usage(char *aopt)
+{
+
+	fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt);
+}
+
+int
+pci_parse_slot(char *opt)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	char *emul, *config, *str, *cp;
+	int error, bnum, snum, fnum;
+
+	error = -1;
+	str = strdup(opt);
+
+	emul = config = NULL;
+	if ((cp = strchr(str, ',')) != NULL) {
+		*cp = '\0';
+		emul = cp + 1;
+		if ((cp = strchr(emul, ',')) != NULL) {
+			*cp = '\0';
+			config = cp + 1;
+		}
+	} else {
+		pci_parse_slot_usage(opt);
+		goto done;
+	}
+
+	/* <bus>:<slot>:<func> */
+	if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) {
+		bnum = 0;
+		/* <slot>:<func> */
+		if (sscanf(str, "%d:%d", &snum, &fnum) != 2) {
+			fnum = 0;
+			/* <slot> */
+			if (sscanf(str, "%d", &snum) != 1) {
+				snum = -1;
+			}
+		}
+	}
+
+	if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS ||
+	    fnum < 0 || fnum >= MAXFUNCS) {
+		pci_parse_slot_usage(opt);
+		goto done;
+	}
+
+	if (pci_businfo[bnum] == NULL)
+		pci_businfo[bnum] = calloc(1, sizeof(struct businfo));
+
+	bi = pci_businfo[bnum];
+	si = &bi->slotinfo[snum];
+
+	if (si->si_funcs[fnum].fi_name != NULL) {
+		fprintf(stderr, "pci slot %d:%d already occupied!\n",
+			snum, fnum);
+		goto done;
+	}
+
+	if (pci_emul_finddev(emul) == NULL) {
+		fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n",
+			snum, fnum, emul);
+		goto done;
+	}
+
+	error = 0;
+	si->si_funcs[fnum].fi_name = emul;
+	si->si_funcs[fnum].fi_param = config;
+
+done:
+	if (error)
+		free(str);
+
+	return (error);
+}
+
+static int
+pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset)
+{
+
+	if (offset < pi->pi_msix.pba_offset)
+		return (0);
+
+	if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
+		return (0);
+	}
+
+	return (1);
+}
+
+int
+pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
+		     uint64_t value)
+{
+	int msix_entry_offset;
+	int tab_index;
+	char *dest;
+
+	/* support only 4 or 8 byte writes */
+	if (size != 4 && size != 8)
+		return (-1);
+
+	/*
+	 * Return if table index is beyond what device supports
+	 */
+	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
+	if (tab_index >= pi->pi_msix.table_count)
+		return (-1);
+
+	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
+
+	/* support only aligned writes */
+	if ((msix_entry_offset % size) != 0)
+		return (-1);
+
+	dest = (char *)(pi->pi_msix.table + tab_index);
+	dest += msix_entry_offset;
+
+	if (size == 4)
+		*((uint32_t *)dest) = value;
+	else
+		*((uint64_t *)dest) = value;
+
+	return (0);
+}
+
+uint64_t
+pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size)
+{
+	char *dest;
+	int msix_entry_offset;
+	int tab_index;
+	uint64_t retval = ~0;
+
+	/*
+	 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X
+	 * table but we also allow 1 byte access to accomodate reads from
+	 * ddb.
+	 */
+	if (size != 1 && size != 4 && size != 8)
+		return (retval);
+
+	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
+
+	/* support only aligned reads */
+	if ((msix_entry_offset % size) != 0) {
+		return (retval);
+	}
+
+	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
+
+	if (tab_index < pi->pi_msix.table_count) {
+		/* valid MSI-X Table access */
+		dest = (char *)(pi->pi_msix.table + tab_index);
+		dest += msix_entry_offset;
+
+		if (size == 1)
+			retval = *((uint8_t *)dest);
+		else if (size == 4)
+			retval = *((uint32_t *)dest);
+		else
+			retval = *((uint64_t *)dest);
+	} else if (pci_valid_pba_offset(pi, offset)) {
+		/* return 0 for PBA access */
+		retval = 0;
+	}
+
+	return (retval);
+}
+
+int
+pci_msix_table_bar(struct pci_devinst *pi)
+{
+
+	if (pi->pi_msix.table != NULL)
+		return (pi->pi_msix.table_bar);
+	else
+		return (-1);
+}
+
+int
+pci_msix_pba_bar(struct pci_devinst *pi)
+{
+
+	if (pi->pi_msix.table != NULL)
+		return (pi->pi_msix.pba_bar);
+	else
+		return (-1);
+}
+
+static int
+pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		    uint32_t *eax, void *arg)
+{
+	struct pci_devinst *pdi = arg;
+	struct pci_devemu *pe = pdi->pi_d;
+	uint64_t offset;
+	int i;
+
+	for (i = 0; i <= PCI_BARMAX; i++) {
+		if (pdi->pi_bar[i].type == PCIBAR_IO &&
+		    port >= pdi->pi_bar[i].addr &&
+		    port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
+			offset = port - pdi->pi_bar[i].addr;
+			if (in)
+				*eax = (*pe->pe_barread)(ctx, vcpu, pdi, i,
+							 offset, bytes);
+			else
+				(*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset,
+						   bytes, *eax);
+			return (0);
+		}
+	}
+	return (-1);
+}
+
+static int
+pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+		     int size, uint64_t *val, void *arg1, long arg2)
+{
+	struct pci_devinst *pdi = arg1;
+	struct pci_devemu *pe = pdi->pi_d;
+	uint64_t offset;
+	int bidx = (int) arg2;
+
+	assert(bidx <= PCI_BARMAX);
+	assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
+	       pdi->pi_bar[bidx].type == PCIBAR_MEM64);
+	assert(addr >= pdi->pi_bar[bidx].addr &&
+	       addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
+
+	offset = addr - pdi->pi_bar[bidx].addr;
+
+	if (dir == MEM_F_WRITE) {
+		if (size == 8) {
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset,
+					   4, *val & 0xffffffff);
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4,
+					   4, *val >> 32);
+		} else {
+			(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset,
+					   size, *val);
+		}
+	} else {
+		if (size == 8) {
+			*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						 offset, 4);
+			*val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						  offset + 4, 4) << 32;
+		} else {
+			*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx,
+						 offset, size);
+		}
+	}
+
+	return (0);
+}
+
+
+static int
+pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
+			uint64_t *addr)
+{
+	uint64_t base;
+
+	assert((size & (size - 1)) == 0);	/* must be a power of 2 */
+
+	base = roundup2(*baseptr, size);
+
+	if (base + size <= limit) {
+		*addr = base;
+		*baseptr = base + size;
+		return (0);
+	} else
+		return (-1);
+}
+
+int
+pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
+		   uint64_t size)
+{
+
+	return (pci_emul_alloc_pbar(pdi, idx, 0, type, size));
+}
+
+/*
+ * Register (or unregister) the MMIO or I/O region associated with the BAR
+ * register 'idx' of an emulated pci device.
+ */
+static void
+modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
+{
+	int error;
+	struct inout_port iop;
+	struct mem_range mr;
+
+	switch (pi->pi_bar[idx].type) {
+	case PCIBAR_IO:
+		bzero(&iop, sizeof(struct inout_port));
+		iop.name = pi->pi_name;
+		iop.port = pi->pi_bar[idx].addr;
+		iop.size = pi->pi_bar[idx].size;
+		if (registration) {
+			iop.flags = IOPORT_F_INOUT;
+			iop.handler = pci_emul_io_handler;
+			iop.arg = pi;
+			error = register_inout(&iop);
+		} else 
+			error = unregister_inout(&iop);
+		break;
+	case PCIBAR_MEM32:
+	case PCIBAR_MEM64:
+		bzero(&mr, sizeof(struct mem_range));
+		mr.name = pi->pi_name;
+		mr.base = pi->pi_bar[idx].addr;
+		mr.size = pi->pi_bar[idx].size;
+		if (registration) {
+			mr.flags = MEM_F_RW;
+			mr.handler = pci_emul_mem_handler;
+			mr.arg1 = pi;
+			mr.arg2 = idx;
+			error = register_mem(&mr);
+		} else
+			error = unregister_mem(&mr);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	assert(error == 0);
+}
+
+static void
+unregister_bar(struct pci_devinst *pi, int idx)
+{
+
+	modify_bar_registration(pi, idx, 0);
+}
+
+static void
+register_bar(struct pci_devinst *pi, int idx)
+{
+
+	modify_bar_registration(pi, idx, 1);
+}
+
+/* Are we decoding i/o port accesses for the emulated pci device? */
+static int
+porten(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+	return (cmd & PCIM_CMD_PORTEN);
+}
+
+/* Are we decoding memory accesses for the emulated pci device? */
+static int
+memen(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+	return (cmd & PCIM_CMD_MEMEN);
+}
+
+/*
+ * Update the MMIO or I/O address that is decoded by the BAR register.
+ *
+ * If the pci device has enabled the address space decoding then intercept
+ * the address range decoded by the BAR register.
+ */
+static void
+update_bar_address(struct  pci_devinst *pi, uint64_t addr, int idx, int type)
+{
+	int decode;
+
+	if (pi->pi_bar[idx].type == PCIBAR_IO)
+		decode = porten(pi);
+	else
+		decode = memen(pi);
+
+	if (decode)
+		unregister_bar(pi, idx);
+
+	switch (type) {
+	case PCIBAR_IO:
+	case PCIBAR_MEM32:
+		pi->pi_bar[idx].addr = addr;
+		break;
+	case PCIBAR_MEM64:
+		pi->pi_bar[idx].addr &= ~0xffffffffUL;
+		pi->pi_bar[idx].addr |= addr;
+		break;
+	case PCIBAR_MEMHI64:
+		pi->pi_bar[idx].addr &= 0xffffffff;
+		pi->pi_bar[idx].addr |= addr;
+		break;
+	default:
+		assert(0);
+	}
+
+	if (decode)
+		register_bar(pi, idx);
+}
+
+int
+pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
+		    enum pcibar_type type, uint64_t size)
+{
+	int error;
+	uint64_t *baseptr, limit, addr, mask, lobits, bar;
+
+	assert(idx >= 0 && idx <= PCI_BARMAX);
+
+	if ((size & (size - 1)) != 0)
+		size = 1UL << flsl(size);	/* round up to a power of 2 */
+
+	/* Enforce minimum BAR sizes required by the PCI standard */
+	if (type == PCIBAR_IO) {
+		if (size < 4)
+			size = 4;
+	} else {
+		if (size < 16)
+			size = 16;
+	}
+
+	switch (type) {
+	case PCIBAR_NONE:
+		baseptr = NULL;
+		addr = mask = lobits = 0;
+		break;
+	case PCIBAR_IO:
+		baseptr = &pci_emul_iobase;
+		limit = PCI_EMUL_IOLIMIT;
+		mask = PCIM_BAR_IO_BASE;
+		lobits = PCIM_BAR_IO_SPACE;
+		break;
+	case PCIBAR_MEM64:
+		/*
+		 * XXX
+		 * Some drivers do not work well if the 64-bit BAR is allocated
+		 * above 4GB. Allow for this by allocating small requests under
+		 * 4GB unless then allocation size is larger than some arbitrary
+		 * number (32MB currently).
+		 */
+		if (size > 32 * 1024 * 1024) {
+			/*
+			 * XXX special case for device requiring peer-peer DMA
+			 */
+			if (size == 0x100000000UL)
+				baseptr = &hostbase;
+			else
+				baseptr = &pci_emul_membase64;
+			limit = PCI_EMUL_MEMLIMIT64;
+			mask = PCIM_BAR_MEM_BASE;
+			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
+				 PCIM_BAR_MEM_PREFETCH;
+			break;
+		} else {
+			baseptr = &pci_emul_membase32;
+			limit = PCI_EMUL_MEMLIMIT32;
+			mask = PCIM_BAR_MEM_BASE;
+			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+		}
+		break;
+	case PCIBAR_MEM32:
+		baseptr = &pci_emul_membase32;
+		limit = PCI_EMUL_MEMLIMIT32;
+		mask = PCIM_BAR_MEM_BASE;
+		lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+		break;
+	default:
+		printf("pci_emul_alloc_base: invalid bar type %d\n", type);
+		assert(0);
+	}
+
+	if (baseptr != NULL) {
+		error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
+		if (error != 0)
+			return (error);
+	}
+
+	pdi->pi_bar[idx].type = type;
+	pdi->pi_bar[idx].addr = addr;
+	pdi->pi_bar[idx].size = size;
+
+	/* Initialize the BAR register in config space */
+	bar = (addr & mask) | lobits;
+	pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
+
+	if (type == PCIBAR_MEM64) {
+		assert(idx + 1 <= PCI_BARMAX);
+		pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
+		pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
+	}
+	
+	register_bar(pdi, idx);
+
+	return (0);
+}
+
+#define	CAP_START_OFFSET	0x40
+static int
+pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
+{
+	int i, capoff, reallen;
+	uint16_t sts;
+
+	assert(caplen > 0);
+
+	reallen = roundup2(caplen, 4);		/* dword aligned */
+
+	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
+	if ((sts & PCIM_STATUS_CAPPRESENT) == 0)
+		capoff = CAP_START_OFFSET;
+	else
+		capoff = pi->pi_capend + 1;
+
+	/* Check if we have enough space */
+	if (capoff + reallen > PCI_REGMAX + 1)
+		return (-1);
+
+	/* Set the previous capability pointer */
+	if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
+		pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
+		pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
+	} else
+		pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff);
+
+	/* Copy the capability */
+	for (i = 0; i < caplen; i++)
+		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
+
+	/* Set the next capability pointer */
+	pci_set_cfgdata8(pi, capoff + 1, 0);
+
+	pi->pi_prevcap = capoff;
+	pi->pi_capend = capoff + reallen - 1;
+	return (0);
+}
+
+static struct pci_devemu *
+pci_emul_finddev(char *name)
+{
+	struct pci_devemu **pdpp, *pdp;
+
+	SET_FOREACH(pdpp, pci_devemu_set) {
+		pdp = *pdpp;
+		if (!strcmp(pdp->pe_emu, name)) {
+			return (pdp);
+		}
+	}
+
+	return (NULL);
+}
+
+static int
+pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot,
+    int func, struct funcinfo *fi)
+{
+	struct pci_devinst *pdi;
+	int err;
+
+	pdi = calloc(1, sizeof(struct pci_devinst));
+
+	pdi->pi_vmctx = ctx;
+	pdi->pi_bus = bus;
+	pdi->pi_slot = slot;
+	pdi->pi_func = func;
+	pthread_mutex_init(&pdi->pi_lintr.lock, NULL);
+	pdi->pi_lintr.pin = 0;
+	pdi->pi_lintr.state = IDLE;
+	pdi->pi_lintr.pirq_pin = 0;
+	pdi->pi_lintr.ioapic_irq = 0;
+	pdi->pi_d = pde;
+	snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot);
+
+	/* Disable legacy interrupts */
+	pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
+	pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
+
+	pci_set_cfgdata8(pdi, PCIR_COMMAND,
+		    PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
+
+	err = (*pde->pe_init)(ctx, pdi, fi->fi_param);
+	if (err == 0)
+		fi->fi_devi = pdi;
+	else
+		free(pdi);
+
+	return (err);
+}
+
+void
+pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
+{
+	int mmc;
+
+	CTASSERT(sizeof(struct msicap) == 14);
+
+	/* Number of msi messages must be a power of 2 between 1 and 32 */
+	assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
+	mmc = ffs(msgnum) - 1;
+
+	bzero(msicap, sizeof(struct msicap));
+	msicap->capid = PCIY_MSI;
+	msicap->nextptr = nextptr;
+	msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
+}
+
+int
+pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
+{
+	struct msicap msicap;
+
+	pci_populate_msicap(&msicap, msgnum, 0);
+
+	return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
+}
+
+static void
+pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum,
+		     uint32_t msix_tab_size)
+{
+	CTASSERT(sizeof(struct msixcap) == 12);
+
+	assert(msix_tab_size % 4096 == 0);
+
+	bzero(msixcap, sizeof(struct msixcap));
+	msixcap->capid = PCIY_MSIX;
+
+	/*
+	 * Message Control Register, all fields set to
+	 * zero except for the Table Size.
+	 * Note: Table size N is encoded as N-1
+	 */
+	msixcap->msgctrl = msgnum - 1;
+
+	/*
+	 * MSI-X BAR setup:
+	 * - MSI-X table start at offset 0
+	 * - PBA table starts at a 4K aligned offset after the MSI-X table
+	 */
+	msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK;
+	msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK);
+}
+
+static void
+pci_msix_table_init(struct pci_devinst *pi, int table_entries)
+{
+	int i, table_size;
+
+	assert(table_entries > 0);
+	assert(table_entries <= MAX_MSIX_TABLE_ENTRIES);
+
+	table_size = table_entries * MSIX_TABLE_ENTRY_SIZE;
+	pi->pi_msix.table = calloc(1, table_size);
+
+	/* set mask bit of vector control register */
+	for (i = 0; i < table_entries; i++)
+		pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK;
+}
+
+int
+pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum)
+{
+	uint32_t tab_size;
+	struct msixcap msixcap;
+
+	assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES);
+	assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0);
+	
+	tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE;
+
+	/* Align table size to nearest 4K */
+	tab_size = roundup2(tab_size, 4096);
+
+	pi->pi_msix.table_bar = barnum;
+	pi->pi_msix.pba_bar   = barnum;
+	pi->pi_msix.table_offset = 0;
+	pi->pi_msix.table_count = msgnum;
+	pi->pi_msix.pba_offset = tab_size;
+	pi->pi_msix.pba_size = PBA_SIZE(msgnum);
+
+	pci_msix_table_init(pi, msgnum);
+
+	pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size);
+
+	/* allocate memory for MSI-X Table and PBA */
+	pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32,
+				tab_size + pi->pi_msix.pba_size);
+
+	return (pci_emul_add_capability(pi, (u_char *)&msixcap,
+					sizeof(msixcap)));
+}
+
+void
+msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+		 int bytes, uint32_t val)
+{
+	uint16_t msgctrl, rwmask;
+	int off, table_bar;
+	
+	off = offset - capoff;
+	table_bar = pi->pi_msix.table_bar;
+	/* Message Control Register */
+	if (off == 2 && bytes == 2) {
+		rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK;
+		msgctrl = pci_get_cfgdata16(pi, offset);
+		msgctrl &= ~rwmask;
+		msgctrl |= val & rwmask;
+		val = msgctrl;
+
+		pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE;
+		pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK;
+		pci_lintr_update(pi);
+	} 
+	
+	CFGWRITE(pi, offset, val, bytes);
+}
+
+void
+msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+		int bytes, uint32_t val)
+{
+	uint16_t msgctrl, rwmask, msgdata, mme;
+	uint32_t addrlo;
+
+	/*
+	 * If guest is writing to the message control register make sure
+	 * we do not overwrite read-only fields.
+	 */
+	if ((offset - capoff) == 2 && bytes == 2) {
+		rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
+		msgctrl = pci_get_cfgdata16(pi, offset);
+		msgctrl &= ~rwmask;
+		msgctrl |= val & rwmask;
+		val = msgctrl;
+
+		addrlo = pci_get_cfgdata32(pi, capoff + 4);
+		if (msgctrl & PCIM_MSICTRL_64BIT)
+			msgdata = pci_get_cfgdata16(pi, capoff + 12);
+		else
+			msgdata = pci_get_cfgdata16(pi, capoff + 8);
+
+		mme = msgctrl & PCIM_MSICTRL_MME_MASK;
+		pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
+		if (pi->pi_msi.enabled) {
+			pi->pi_msi.addr = addrlo;
+			pi->pi_msi.msg_data = msgdata;
+			pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
+		} else {
+			pi->pi_msi.maxmsgnum = 0;
+		}
+		pci_lintr_update(pi);
+	}
+
+	CFGWRITE(pi, offset, val, bytes);
+}
+
+void
+pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+		 int bytes, uint32_t val)
+{
+
+	/* XXX don't write to the readonly parts */
+	CFGWRITE(pi, offset, val, bytes);
+}
+
+#define	PCIECAP_VERSION	0x2
+int
+pci_emul_add_pciecap(struct pci_devinst *pi, int type)
+{
+	int err;
+	struct pciecap pciecap;
+
+	CTASSERT(sizeof(struct pciecap) == 60);
+
+	if (type != PCIEM_TYPE_ROOT_PORT)
+		return (-1);
+
+	bzero(&pciecap, sizeof(pciecap));
+
+	pciecap.capid = PCIY_EXPRESS;
+	pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT;
+	pciecap.link_capabilities = 0x411;	/* gen1, x1 */
+	pciecap.link_status = 0x11;		/* gen1, x1 */
+
+	err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap));
+	return (err);
+}
+
+/*
+ * This function assumes that 'coff' is in the capabilities region of the
+ * config space.
+ */
+static void
+pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val)
+{
+	int capid;
+	uint8_t capoff, nextoff;
+
+	/* Do not allow un-aligned writes */
+	if ((offset & (bytes - 1)) != 0)
+		return;
+
+	/* Find the capability that we want to update */
+	capoff = CAP_START_OFFSET;
+	while (1) {
+		nextoff = pci_get_cfgdata8(pi, capoff + 1);
+		if (nextoff == 0)
+			break;
+		if (offset >= capoff && offset < nextoff)
+			break;
+
+		capoff = nextoff;
+	}
+	assert(offset >= capoff);
+
+	/*
+	 * Capability ID and Next Capability Pointer are readonly.
+	 * However, some o/s's do 4-byte writes that include these.
+	 * For this case, trim the write back to 2 bytes and adjust
+	 * the data.
+	 */
+	if (offset == capoff || offset == capoff + 1) {
+		if (offset == capoff && bytes == 4) {
+			bytes = 2;
+			offset += 2;
+			val >>= 16;
+		} else
+			return;
+	}
+
+	capid = pci_get_cfgdata8(pi, capoff);
+	switch (capid) {
+	case PCIY_MSI:
+		msicap_cfgwrite(pi, capoff, offset, bytes, val);
+		break;
+	case PCIY_MSIX:
+		msixcap_cfgwrite(pi, capoff, offset, bytes, val);
+		break;
+	case PCIY_EXPRESS:
+		pciecap_cfgwrite(pi, capoff, offset, bytes, val);
+		break;
+	default:
+		break;
+	}
+}
+
+static int
+pci_emul_iscap(struct pci_devinst *pi, int offset)
+{
+	uint16_t sts;
+
+	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
+	if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
+		if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend)
+			return (1);
+	}
+	return (0);
+}
+
+static int
+pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+			  int size, uint64_t *val, void *arg1, long arg2)
+{
+	/*
+	 * Ignore writes; return 0xff's for reads. The mem read code
+	 * will take care of truncating to the correct size.
+	 */
+	if (dir == MEM_F_READ) {
+		*val = 0xffffffffffffffff;
+	}
+
+	return (0);
+}
+
+static int
+pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+    int bytes, uint64_t *val, void *arg1, long arg2)
+{
+	int bus, slot, func, coff, in;
+
+	coff = addr & 0xfff;
+	func = (addr >> 12) & 0x7;
+	slot = (addr >> 15) & 0x1f;
+	bus = (addr >> 20) & 0xff;
+	in = (dir == MEM_F_READ);
+	if (in)
+		*val = ~0UL;
+	pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val);
+	return (0);
+}
+
+uint64_t
+pci_ecfg_base(void)
+{
+
+	return (PCI_EMUL_ECFG_BASE);
+}
+
+#define	BUSIO_ROUNDUP		32
+#define	BUSMEM_ROUNDUP		(1024 * 1024)
+
+int
+init_pci(struct vmctx *ctx)
+{
+	struct mem_range mr;
+	struct pci_devemu *pde;
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct funcinfo *fi;
+	size_t lowmem;
+	int bus, slot, func;
+	int error;
+
+	pci_emul_iobase = PCI_EMUL_IOBASE;
+	pci_emul_membase32 = vm_get_lowmem_limit(ctx);
+	pci_emul_membase64 = PCI_EMUL_MEMBASE64;
+
+	for (bus = 0; bus < MAXBUSES; bus++) {
+		if ((bi = pci_businfo[bus]) == NULL)
+			continue;
+		/* 
+		 * Keep track of the i/o and memory resources allocated to
+		 * this bus.
+		 */
+		bi->iobase = pci_emul_iobase;
+		bi->membase32 = pci_emul_membase32;
+		bi->membase64 = pci_emul_membase64;
+
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			si = &bi->slotinfo[slot];
+			for (func = 0; func < MAXFUNCS; func++) {
+				fi = &si->si_funcs[func];
+				if (fi->fi_name == NULL)
+					continue;
+				pde = pci_emul_finddev(fi->fi_name);
+				assert(pde != NULL);
+				error = pci_emul_init(ctx, pde, bus, slot,
+				    func, fi);
+				if (error)
+					return (error);
+			}
+		}
+
+		/*
+		 * Add some slop to the I/O and memory resources decoded by
+		 * this bus to give a guest some flexibility if it wants to
+		 * reprogram the BARs.
+		 */
+		pci_emul_iobase += BUSIO_ROUNDUP;
+		pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
+		bi->iolimit = pci_emul_iobase;
+
+		pci_emul_membase32 += BUSMEM_ROUNDUP;
+		pci_emul_membase32 = roundup2(pci_emul_membase32,
+		    BUSMEM_ROUNDUP);
+		bi->memlimit32 = pci_emul_membase32;
+
+		pci_emul_membase64 += BUSMEM_ROUNDUP;
+		pci_emul_membase64 = roundup2(pci_emul_membase64,
+		    BUSMEM_ROUNDUP);
+		bi->memlimit64 = pci_emul_membase64;
+	}
+
+	/*
+	 * PCI backends are initialized before routing INTx interrupts
+	 * so that LPC devices are able to reserve ISA IRQs before
+	 * routing PIRQ pins.
+	 */
+	for (bus = 0; bus < MAXBUSES; bus++) {
+		if ((bi = pci_businfo[bus]) == NULL)
+			continue;
+
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			si = &bi->slotinfo[slot];
+			for (func = 0; func < MAXFUNCS; func++) {
+				fi = &si->si_funcs[func];
+				if (fi->fi_devi == NULL)
+					continue;
+				pci_lintr_route(fi->fi_devi);
+			}
+		}
+	}
+	lpc_pirq_routed();
+
+	/*
+	 * The guest physical memory map looks like the following:
+	 * [0,		    lowmem)		guest system memory
+	 * [lowmem,	    lowmem_limit)	memory hole (may be absent)
+	 * [lowmem_limit,   0xE0000000)		PCI hole (32-bit BAR allocation)
+	 * [0xE0000000,	    0xF0000000)		PCI extended config window
+	 * [0xF0000000,	    4GB)		LAPIC, IOAPIC, HPET, firmware
+	 * [4GB,	    4GB + highmem)
+	 */
+
+	/*
+	 * Accesses to memory addresses that are not allocated to system
+	 * memory or PCI devices return 0xff's.
+	 */
+	lowmem = vm_get_lowmem_size(ctx);
+	bzero(&mr, sizeof(struct mem_range));
+	mr.name = "PCI hole";
+	mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
+	mr.base = lowmem;
+	mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem;
+	mr.handler = pci_emul_fallback_handler;
+	error = register_mem_fallback(&mr);
+	assert(error == 0);
+
+	/* PCI extended config space */
+	bzero(&mr, sizeof(struct mem_range));
+	mr.name = "PCI ECFG";
+	mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
+	mr.base = PCI_EMUL_ECFG_BASE;
+	mr.size = PCI_EMUL_ECFG_SIZE;
+	mr.handler = pci_emul_ecfg_handler;
+	error = register_mem(&mr);
+	assert(error == 0);
+
+	return (0);
+}
+
+#ifdef	__FreeBSD__
+static void
+pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq,
+    void *arg)
+{
+
+	dsdt_line("  Package ()");
+	dsdt_line("  {");
+	dsdt_line("    0x%X,", slot << 16 | 0xffff);
+	dsdt_line("    0x%02X,", pin - 1);
+	dsdt_line("    Zero,");
+	dsdt_line("    0x%X", ioapic_irq);
+	dsdt_line("  },");
+}
+
+static void
+pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq,
+    void *arg)
+{
+	char *name;
+
+	name = lpc_pirq_name(pirq_pin);
+	if (name == NULL)
+		return;
+	dsdt_line("  Package ()");
+	dsdt_line("  {");
+	dsdt_line("    0x%X,", slot << 16 | 0xffff);
+	dsdt_line("    0x%02X,", pin - 1);
+	dsdt_line("    %s,", name);
+	dsdt_line("    0x00");
+	dsdt_line("  },");
+	free(name);
+}
+
+/*
+ * A bhyve virtual machine has a flat PCI hierarchy with a root port
+ * corresponding to each PCI bus.
+ */
+static void
+pci_bus_write_dsdt(int bus)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct pci_devinst *pi;
+	int count, func, slot;
+
+	/*
+	 * If there are no devices on this 'bus' then just return.
+	 */
+	if ((bi = pci_businfo[bus]) == NULL) {
+		/*
+		 * Bus 0 is special because it decodes the I/O ports used
+		 * for PCI config space access even if there are no devices
+		 * on it.
+		 */
+		if (bus != 0)
+			return;
+	}
+
+	dsdt_line("  Device (PC%02X)", bus);
+	dsdt_line("  {");
+	dsdt_line("    Name (_HID, EisaId (\"PNP0A03\"))");
+	dsdt_line("    Name (_ADR, Zero)");
+
+	dsdt_line("    Method (_BBN, 0, NotSerialized)");
+	dsdt_line("    {");
+	dsdt_line("        Return (0x%08X)", bus);
+	dsdt_line("    }");
+	dsdt_line("    Name (_CRS, ResourceTemplate ()");
+	dsdt_line("    {");
+	dsdt_line("      WordBusNumber (ResourceProducer, MinFixed, "
+	    "MaxFixed, PosDecode,");
+	dsdt_line("        0x0000,             // Granularity");
+	dsdt_line("        0x%04X,             // Range Minimum", bus);
+	dsdt_line("        0x%04X,             // Range Maximum", bus);
+	dsdt_line("        0x0000,             // Translation Offset");
+	dsdt_line("        0x0001,             // Length");
+	dsdt_line("        ,, )");
+
+	if (bus == 0) {
+		dsdt_indent(3);
+		dsdt_fixed_ioport(0xCF8, 8);
+		dsdt_unindent(3);
+
+		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+		    "PosDecode, EntireRange,");
+		dsdt_line("        0x0000,             // Granularity");
+		dsdt_line("        0x0000,             // Range Minimum");
+		dsdt_line("        0x0CF7,             // Range Maximum");
+		dsdt_line("        0x0000,             // Translation Offset");
+		dsdt_line("        0x0CF8,             // Length");
+		dsdt_line("        ,, , TypeStatic)");
+
+		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+		    "PosDecode, EntireRange,");
+		dsdt_line("        0x0000,             // Granularity");
+		dsdt_line("        0x0D00,             // Range Minimum");
+		dsdt_line("        0x%04X,             // Range Maximum",
+		    PCI_EMUL_IOBASE - 1);
+		dsdt_line("        0x0000,             // Translation Offset");
+		dsdt_line("        0x%04X,             // Length",
+		    PCI_EMUL_IOBASE - 0x0D00);
+		dsdt_line("        ,, , TypeStatic)");
+
+		if (bi == NULL) {
+			dsdt_line("    })");
+			goto done;
+		}
+	}
+	assert(bi != NULL);
+
+	/* i/o window */
+	dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
+	    "PosDecode, EntireRange,");
+	dsdt_line("        0x0000,             // Granularity");
+	dsdt_line("        0x%04X,             // Range Minimum", bi->iobase);
+	dsdt_line("        0x%04X,             // Range Maximum",
+	    bi->iolimit - 1);
+	dsdt_line("        0x0000,             // Translation Offset");
+	dsdt_line("        0x%04X,             // Length",
+	    bi->iolimit - bi->iobase);
+	dsdt_line("        ,, , TypeStatic)");
+
+	/* mmio window (32-bit) */
+	dsdt_line("      DWordMemory (ResourceProducer, PosDecode, "
+	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
+	dsdt_line("        0x00000000,         // Granularity");
+	dsdt_line("        0x%08X,         // Range Minimum\n", bi->membase32);
+	dsdt_line("        0x%08X,         // Range Maximum\n",
+	    bi->memlimit32 - 1);
+	dsdt_line("        0x00000000,         // Translation Offset");
+	dsdt_line("        0x%08X,         // Length\n",
+	    bi->memlimit32 - bi->membase32);
+	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
+
+	/* mmio window (64-bit) */
+	dsdt_line("      QWordMemory (ResourceProducer, PosDecode, "
+	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
+	dsdt_line("        0x0000000000000000, // Granularity");
+	dsdt_line("        0x%016lX, // Range Minimum\n", bi->membase64);
+	dsdt_line("        0x%016lX, // Range Maximum\n",
+	    bi->memlimit64 - 1);
+	dsdt_line("        0x0000000000000000, // Translation Offset");
+	dsdt_line("        0x%016lX, // Length\n",
+	    bi->memlimit64 - bi->membase64);
+	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
+	dsdt_line("    })");
+
+	count = pci_count_lintr(bus);
+	if (count != 0) {
+		dsdt_indent(2);
+		dsdt_line("Name (PPRT, Package ()");
+		dsdt_line("{");
+		pci_walk_lintr(bus, pci_pirq_prt_entry, NULL);
+ 		dsdt_line("})");
+		dsdt_line("Name (APRT, Package ()");
+		dsdt_line("{");
+		pci_walk_lintr(bus, pci_apic_prt_entry, NULL);
+ 		dsdt_line("})");
+		dsdt_line("Method (_PRT, 0, NotSerialized)");
+		dsdt_line("{");
+		dsdt_line("  If (PICM)");
+		dsdt_line("  {");
+		dsdt_line("    Return (APRT)");
+		dsdt_line("  }");
+		dsdt_line("  Else");
+		dsdt_line("  {");
+		dsdt_line("    Return (PPRT)");
+		dsdt_line("  }");
+		dsdt_line("}");
+		dsdt_unindent(2);
+	}
+
+	dsdt_indent(2);
+	for (slot = 0; slot < MAXSLOTS; slot++) {
+		si = &bi->slotinfo[slot];
+		for (func = 0; func < MAXFUNCS; func++) {
+			pi = si->si_funcs[func].fi_devi;
+			if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL)
+				pi->pi_d->pe_write_dsdt(pi);
+		}
+	}
+	dsdt_unindent(2);
+done:
+	dsdt_line("  }");
+}
+
+void
+pci_write_dsdt(void)
+{
+	int bus;
+
+	dsdt_indent(1);
+	dsdt_line("Name (PICM, 0x00)");
+	dsdt_line("Method (_PIC, 1, NotSerialized)");
+	dsdt_line("{");
+	dsdt_line("  Store (Arg0, PICM)");
+	dsdt_line("}");
+	dsdt_line("");
+	dsdt_line("Scope (_SB)");
+	dsdt_line("{");
+	for (bus = 0; bus < MAXBUSES; bus++)
+		pci_bus_write_dsdt(bus);
+	dsdt_line("}");
+	dsdt_unindent(1);
+}
+#endif
+
+int
+pci_bus_configured(int bus)
+{
+	assert(bus >= 0 && bus < MAXBUSES);
+	return (pci_businfo[bus] != NULL);
+}
+
+int
+pci_msi_enabled(struct pci_devinst *pi)
+{
+	return (pi->pi_msi.enabled);
+}
+
+int
+pci_msi_maxmsgnum(struct pci_devinst *pi)
+{
+	if (pi->pi_msi.enabled)
+		return (pi->pi_msi.maxmsgnum);
+	else
+		return (0);
+}
+
+int
+pci_msix_enabled(struct pci_devinst *pi)
+{
+
+	return (pi->pi_msix.enabled && !pi->pi_msi.enabled);
+}
+
+void
+pci_generate_msix(struct pci_devinst *pi, int index)
+{
+	struct msix_table_entry *mte;
+
+	if (!pci_msix_enabled(pi))
+		return;
+
+	if (pi->pi_msix.function_mask)
+		return;
+
+	if (index >= pi->pi_msix.table_count)
+		return;
+
+	mte = &pi->pi_msix.table[index];
+	if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
+		/* XXX Set PBA bit if interrupt is disabled */
+		vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data);
+	}
+}
+
+void
+pci_generate_msi(struct pci_devinst *pi, int index)
+{
+
+	if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
+		vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr,
+			     pi->pi_msi.msg_data + index);
+	}
+}
+
+static bool
+pci_lintr_permitted(struct pci_devinst *pi)
+{
+	uint16_t cmd;
+
+	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+	return (!(pi->pi_msi.enabled || pi->pi_msix.enabled ||
+		(cmd & PCIM_CMD_INTxDIS)));
+}
+
+void
+pci_lintr_request(struct pci_devinst *pi)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	int bestpin, bestcount, pin;
+
+	bi = pci_businfo[pi->pi_bus];
+	assert(bi != NULL);
+
+	/*
+	 * Just allocate a pin from our slot.  The pin will be
+	 * assigned IRQs later when interrupts are routed.
+	 */
+	si = &bi->slotinfo[pi->pi_slot];
+	bestpin = 0;
+	bestcount = si->si_intpins[0].ii_count;
+	for (pin = 1; pin < 4; pin++) {
+		if (si->si_intpins[pin].ii_count < bestcount) {
+			bestpin = pin;
+			bestcount = si->si_intpins[pin].ii_count;
+		}
+	}
+
+	si->si_intpins[bestpin].ii_count++;
+	pi->pi_lintr.pin = bestpin + 1;
+	pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1);
+}
+
+static void
+pci_lintr_route(struct pci_devinst *pi)
+{
+	struct businfo *bi;
+	struct intxinfo *ii;
+
+	if (pi->pi_lintr.pin == 0)
+		return;
+
+	bi = pci_businfo[pi->pi_bus];
+	assert(bi != NULL);
+	ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1];
+
+	/*
+	 * Attempt to allocate an I/O APIC pin for this intpin if one
+	 * is not yet assigned.
+	 */
+	if (ii->ii_ioapic_irq == 0)
+		ii->ii_ioapic_irq = ioapic_pci_alloc_irq();
+	assert(ii->ii_ioapic_irq > 0);
+
+	/*
+	 * Attempt to allocate a PIRQ pin for this intpin if one is
+	 * not yet assigned.
+	 */
+	if (ii->ii_pirq_pin == 0)
+		ii->ii_pirq_pin = pirq_alloc_pin(pi->pi_vmctx);
+	assert(ii->ii_pirq_pin > 0);
+
+	pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq;
+	pi->pi_lintr.pirq_pin = ii->ii_pirq_pin;
+	pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin));
+}
+
+void
+pci_lintr_assert(struct pci_devinst *pi)
+{
+
+	assert(pi->pi_lintr.pin > 0);
+
+	pthread_mutex_lock(&pi->pi_lintr.lock);
+	if (pi->pi_lintr.state == IDLE) {
+		if (pci_lintr_permitted(pi)) {
+			pi->pi_lintr.state = ASSERTED;
+			pci_irq_assert(pi);
+		} else
+			pi->pi_lintr.state = PENDING;
+	}
+	pthread_mutex_unlock(&pi->pi_lintr.lock);
+}
+
+void
+pci_lintr_deassert(struct pci_devinst *pi)
+{
+
+	assert(pi->pi_lintr.pin > 0);
+
+	pthread_mutex_lock(&pi->pi_lintr.lock);
+	if (pi->pi_lintr.state == ASSERTED) {
+		pi->pi_lintr.state = IDLE;
+		pci_irq_deassert(pi);
+	} else if (pi->pi_lintr.state == PENDING)
+		pi->pi_lintr.state = IDLE;
+	pthread_mutex_unlock(&pi->pi_lintr.lock);
+}
+
+static void
+pci_lintr_update(struct pci_devinst *pi)
+{
+
+	pthread_mutex_lock(&pi->pi_lintr.lock);
+	if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) {
+		pci_irq_deassert(pi);
+		pi->pi_lintr.state = PENDING;
+	} else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) {
+		pi->pi_lintr.state = ASSERTED;
+		pci_irq_assert(pi);
+	}
+	pthread_mutex_unlock(&pi->pi_lintr.lock);
+}
+
+int
+pci_count_lintr(int bus)
+{
+	int count, slot, pin;
+	struct slotinfo *slotinfo;
+
+	count = 0;
+	if (pci_businfo[bus] != NULL) {
+		for (slot = 0; slot < MAXSLOTS; slot++) {
+			slotinfo = &pci_businfo[bus]->slotinfo[slot];
+			for (pin = 0; pin < 4; pin++) {
+				if (slotinfo->si_intpins[pin].ii_count != 0)
+					count++;
+			}
+		}
+	}
+	return (count);
+}
+
+void
+pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct intxinfo *ii;
+	int slot, pin;
+
+	if ((bi = pci_businfo[bus]) == NULL)
+		return;
+
+	for (slot = 0; slot < MAXSLOTS; slot++) {
+		si = &bi->slotinfo[slot];
+		for (pin = 0; pin < 4; pin++) {
+			ii = &si->si_intpins[pin];
+			if (ii->ii_count != 0)
+				cb(bus, slot, pin + 1, ii->ii_pirq_pin,
+				    ii->ii_ioapic_irq, arg);
+		}
+	}
+}
+
+/*
+ * Return 1 if the emulated device in 'slot' is a multi-function device.
+ * Return 0 otherwise.
+ */
+static int
+pci_emul_is_mfdev(int bus, int slot)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	int f, numfuncs;
+
+	numfuncs = 0;
+	if ((bi = pci_businfo[bus]) != NULL) {
+		si = &bi->slotinfo[slot];
+		for (f = 0; f < MAXFUNCS; f++) {
+			if (si->si_funcs[f].fi_devi != NULL) {
+				numfuncs++;
+			}
+		}
+	}
+	return (numfuncs > 1);
+}
+
+/*
+ * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on
+ * whether or not is a multi-function being emulated in the pci 'slot'.
+ */
+static void
+pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
+{
+	int mfdev;
+
+	if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) {
+		mfdev = pci_emul_is_mfdev(bus, slot);
+		switch (bytes) {
+		case 1:
+		case 2:
+			*rv &= ~PCIM_MFDEV;
+			if (mfdev) {
+				*rv |= PCIM_MFDEV;
+			}
+			break;
+		case 4:
+			*rv &= ~(PCIM_MFDEV << 16);
+			if (mfdev) {
+				*rv |= (PCIM_MFDEV << 16);
+			}
+			break;
+		}
+	}
+}
+
+static uint32_t
+bits_changed(uint32_t old, uint32_t new, uint32_t mask)
+{
+
+	return ((old ^ new) & mask);
+}
+
+static void
+pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
+{
+	int i;
+	uint16_t old;
+
+	/*
+	 * The command register is at an offset of 4 bytes and thus the
+	 * guest could write 1, 2 or 4 bytes starting at this offset.
+	 */
+
+	old = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* stash old value */
+	CFGWRITE(pi, PCIR_COMMAND, new, bytes);		/* update config */
+	new = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* get updated value */
+
+	/*
+	 * If the MMIO or I/O address space decoding has changed then
+	 * register/unregister all BARs that decode that address space.
+	 */
+	for (i = 0; i <= PCI_BARMAX; i++) {
+		switch (pi->pi_bar[i].type) {
+			case PCIBAR_NONE:
+			case PCIBAR_MEMHI64:
+				break;
+			case PCIBAR_IO:
+				/* I/O address space decoding changed? */
+				if (bits_changed(old, new, PCIM_CMD_PORTEN)) {
+					if (porten(pi))
+						register_bar(pi, i);
+					else
+						unregister_bar(pi, i);
+				}
+				break;
+			case PCIBAR_MEM32:
+			case PCIBAR_MEM64:
+				/* MMIO address space decoding changed? */
+				if (bits_changed(old, new, PCIM_CMD_MEMEN)) {
+					if (memen(pi))
+						register_bar(pi, i);
+					else
+						unregister_bar(pi, i);
+				}
+				break; 
+			default:
+				assert(0); 
+		}
+	}
+
+	/*
+	 * If INTx has been unmasked and is pending, assert the
+	 * interrupt.
+	 */
+	pci_lintr_update(pi);
+}	
+
+static void
+pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
+    int coff, int bytes, uint32_t *eax)
+{
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct pci_devinst *pi;
+	struct pci_devemu *pe;
+	int idx, needcfg;
+	uint64_t addr, bar, mask;
+
+	if ((bi = pci_businfo[bus]) != NULL) {
+		si = &bi->slotinfo[slot];
+		pi = si->si_funcs[func].fi_devi;
+	} else
+		pi = NULL;
+
+	/*
+	 * Just return if there is no device at this slot:func or if the
+	 * the guest is doing an un-aligned access.
+	 */
+	if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) ||
+	    (coff & (bytes - 1)) != 0) {
+		if (in)
+			*eax = 0xffffffff;
+		return;
+	}
+
+	/*
+	 * Ignore all writes beyond the standard config space and return all
+	 * ones on reads.
+	 */
+	if (coff >= PCI_REGMAX + 1) {
+		if (in) {
+			*eax = 0xffffffff;
+			/*
+			 * Extended capabilities begin at offset 256 in config
+			 * space. Absence of extended capabilities is signaled
+			 * with all 0s in the extended capability header at
+			 * offset 256.
+			 */
+			if (coff <= PCI_REGMAX + 4)
+				*eax = 0x00000000;
+		}
+		return;
+	}
+
+	pe = pi->pi_d;
+
+	/*
+	 * Config read
+	 */
+	if (in) {
+		/* Let the device emulation override the default handler */
+		if (pe->pe_cfgread != NULL) {
+			needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes,
+			    eax);
+		} else {
+			needcfg = 1;
+		}
+
+		if (needcfg) {
+			if (bytes == 1)
+				*eax = pci_get_cfgdata8(pi, coff);
+			else if (bytes == 2)
+				*eax = pci_get_cfgdata16(pi, coff);
+			else
+				*eax = pci_get_cfgdata32(pi, coff);
+		}
+
+		pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax);
+	} else {
+		/* Let the device emulation override the default handler */
+		if (pe->pe_cfgwrite != NULL &&
+		    (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0)
+			return;
+
+		/*
+		 * Special handling for write to BAR registers
+		 */
+		if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
+			/*
+			 * Ignore writes to BAR registers that are not
+			 * 4-byte aligned.
+			 */
+			if (bytes != 4 || (coff & 0x3) != 0)
+				return;
+			idx = (coff - PCIR_BAR(0)) / 4;
+			mask = ~(pi->pi_bar[idx].size - 1);
+			switch (pi->pi_bar[idx].type) {
+			case PCIBAR_NONE:
+				pi->pi_bar[idx].addr = bar = 0;
+				break;
+			case PCIBAR_IO:
+				addr = *eax & mask;
+				addr &= 0xffff;
+				bar = addr | PCIM_BAR_IO_SPACE;
+				/*
+				 * Register the new BAR value for interception
+				 */
+				if (addr != pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_IO);
+				}
+				break;
+			case PCIBAR_MEM32:
+				addr = bar = *eax & mask;
+				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+				if (addr != pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_MEM32);
+				}
+				break;
+			case PCIBAR_MEM64:
+				addr = bar = *eax & mask;
+				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
+				       PCIM_BAR_MEM_PREFETCH;
+				if (addr != (uint32_t)pi->pi_bar[idx].addr) {
+					update_bar_address(pi, addr, idx,
+							   PCIBAR_MEM64);
+				}
+				break;
+			case PCIBAR_MEMHI64:
+				mask = ~(pi->pi_bar[idx - 1].size - 1);
+				addr = ((uint64_t)*eax << 32) & mask;
+				bar = addr >> 32;
+				if (bar != pi->pi_bar[idx - 1].addr >> 32) {
+					update_bar_address(pi, addr, idx - 1,
+							   PCIBAR_MEMHI64);
+				}
+				break;
+			default:
+				assert(0);
+			}
+			pci_set_cfgdata32(pi, coff, bar);
+
+		} else if (pci_emul_iscap(pi, coff)) {
+			pci_emul_capwrite(pi, coff, bytes, *eax);
+		} else if (coff == PCIR_COMMAND) {
+			pci_emul_cmdwrite(pi, *eax, bytes);
+		} else {
+			CFGWRITE(pi, coff, *eax, bytes);
+		}
+	}
+}
+
+static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff;
+
+static int
+pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		 uint32_t *eax, void *arg)
+{
+	uint32_t x;
+
+	if (bytes != 4) {
+		if (in)
+			*eax = (bytes == 2) ? 0xffff : 0xff;
+		return (0);
+	}
+
+	if (in) {
+		x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff;
+		if (cfgenable)
+			x |= CONF1_ENABLE;
+		*eax = x;
+	} else {
+		x = *eax;
+		cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
+		cfgoff = x & PCI_REGMAX;
+		cfgfunc = (x >> 8) & PCI_FUNCMAX;
+		cfgslot = (x >> 11) & PCI_SLOTMAX;
+		cfgbus = (x >> 16) & PCI_BUSMAX;
+	}
+
+	return (0);
+}
+INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr);
+
+static int
+pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		 uint32_t *eax, void *arg)
+{
+	int coff;
+
+	assert(bytes == 1 || bytes == 2 || bytes == 4);
+
+	coff = cfgoff + (port - CONF1_DATA_PORT);
+	if (cfgenable) {
+		pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes,
+		    eax);
+	} else {
+		/* Ignore accesses to cfgdata if not enabled by cfgaddr */
+		if (in)
+			*eax = 0xffffffff;
+	}
+	return (0);
+}
+
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
+INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
+
+#define PCI_EMUL_TEST
+#ifdef PCI_EMUL_TEST
+/*
+ * Define a dummy test device
+ */
+#define DIOSZ	8
+#define DMEMSZ	4096
+struct pci_emul_dsoftc {
+	uint8_t   ioregs[DIOSZ];
+	uint8_t	  memregs[DMEMSZ];
+};
+
+#define	PCI_EMUL_MSI_MSGS	 4
+#define	PCI_EMUL_MSIX_MSGS	16
+
+static int
+pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	int error;
+	struct pci_emul_dsoftc *sc;
+
+	sc = calloc(1, sizeof(struct pci_emul_dsoftc));
+
+	pi->pi_arg = sc;
+
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
+	pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
+
+	error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS);
+	assert(error == 0);
+
+	error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ);
+	assert(error == 0);
+
+	error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
+	assert(error == 0);
+
+	return (0);
+}
+
+static void
+pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	      uint64_t offset, int size, uint64_t value)
+{
+	int i;
+	struct pci_emul_dsoftc *sc = pi->pi_arg;
+
+	if (baridx == 0) {
+		if (offset + size > DIOSZ) {
+			printf("diow: iow too large, offset %ld size %d\n",
+			       offset, size);
+			return;
+		}
+
+		if (size == 1) {
+			sc->ioregs[offset] = value & 0xff;
+		} else if (size == 2) {
+			*(uint16_t *)&sc->ioregs[offset] = value & 0xffff;
+		} else if (size == 4) {
+			*(uint32_t *)&sc->ioregs[offset] = value;
+		} else {
+			printf("diow: iow unknown size %d\n", size);
+		}
+
+		/*
+		 * Special magic value to generate an interrupt
+		 */
+		if (offset == 4 && size == 4 && pci_msi_enabled(pi))
+			pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
+
+		if (value == 0xabcdef) {
+			for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
+				pci_generate_msi(pi, i);
+		}
+	}
+
+	if (baridx == 1) {
+		if (offset + size > DMEMSZ) {
+			printf("diow: memw too large, offset %ld size %d\n",
+			       offset, size);
+			return;
+		}
+
+		if (size == 1) {
+			sc->memregs[offset] = value;
+		} else if (size == 2) {
+			*(uint16_t *)&sc->memregs[offset] = value;
+		} else if (size == 4) {
+			*(uint32_t *)&sc->memregs[offset] = value;
+		} else if (size == 8) {
+			*(uint64_t *)&sc->memregs[offset] = value;
+		} else {
+			printf("diow: memw unknown size %d\n", size);
+		}
+		
+		/*
+		 * magic interrupt ??
+		 */
+	}
+
+	if (baridx > 1) {
+		printf("diow: unknown bar idx %d\n", baridx);
+	}
+}
+
+static uint64_t
+pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
+	      uint64_t offset, int size)
+{
+	struct pci_emul_dsoftc *sc = pi->pi_arg;
+	uint32_t value;
+
+	if (baridx == 0) {
+		if (offset + size > DIOSZ) {
+			printf("dior: ior too large, offset %ld size %d\n",
+			       offset, size);
+			return (0);
+		}
+	
+		if (size == 1) {
+			value = sc->ioregs[offset];
+		} else if (size == 2) {
+			value = *(uint16_t *) &sc->ioregs[offset];
+		} else if (size == 4) {
+			value = *(uint32_t *) &sc->ioregs[offset];
+		} else {
+			printf("dior: ior unknown size %d\n", size);
+		}
+	}
+	
+	if (baridx == 1) {
+		if (offset + size > DMEMSZ) {
+			printf("dior: memr too large, offset %ld size %d\n",
+			       offset, size);
+			return (0);
+		}
+	
+		if (size == 1) {
+			value = sc->memregs[offset];
+		} else if (size == 2) {
+			value = *(uint16_t *) &sc->memregs[offset];
+		} else if (size == 4) {
+			value = *(uint32_t *) &sc->memregs[offset];
+		} else if (size == 8) {
+			value = *(uint64_t *) &sc->memregs[offset];
+		} else {
+			printf("dior: ior unknown size %d\n", size);
+		}
+	}
+
+
+	if (baridx > 1) {
+		printf("dior: unknown bar idx %d\n", baridx);
+		return (0);
+	}
+
+	return (value);
+}
+
+struct pci_devemu pci_dummy = {
+	.pe_emu = "dummy",
+	.pe_init = pci_emul_dinit,
+	.pe_barwrite = pci_emul_diow,
+	.pe_barread = pci_emul_dior
+};
+PCI_EMUL_SET(pci_dummy);
+
+#endif /* PCI_EMUL_TEST */
diff --git a/usr/src/cmd/bhyve/pci_emul.h b/usr/src/cmd/bhyve/pci_emul.h
new file mode 100644
index 0000000000..6af01c4c3c
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_emul.h
@@ -0,0 +1,283 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_emul.h 269700 2014-08-08 03:49:01Z neel $
+ */
+
+#ifndef _PCI_EMUL_H_
+#define _PCI_EMUL_H_
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/kernel.h>
+#include <sys/_pthreadtypes.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <assert.h>
+
+#define	PCI_BARMAX	PCIR_MAX_BAR_0	/* BAR registers in a Type 0 header */
+
+struct vmctx;
+struct pci_devinst;
+struct memory_region;
+
+struct pci_devemu {
+	char      *pe_emu;		/* Name of device emulation */
+
+	/* instance creation */
+	int       (*pe_init)(struct vmctx *, struct pci_devinst *,
+			     char *opts);
+
+	/* ACPI DSDT enumeration */
+	void	(*pe_write_dsdt)(struct pci_devinst *);
+
+	/* config space read/write callbacks */
+	int	(*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
+			       struct pci_devinst *pi, int offset,
+			       int bytes, uint32_t val);
+	int	(*pe_cfgread)(struct vmctx *ctx, int vcpu,
+			      struct pci_devinst *pi, int offset,
+			      int bytes, uint32_t *retval);
+
+	/* BAR read/write callbacks */
+	void      (*pe_barwrite)(struct vmctx *ctx, int vcpu,
+				 struct pci_devinst *pi, int baridx,
+				 uint64_t offset, int size, uint64_t value);
+	uint64_t  (*pe_barread)(struct vmctx *ctx, int vcpu,
+				struct pci_devinst *pi, int baridx,
+				uint64_t offset, int size);
+};
+#define PCI_EMUL_SET(x)   DATA_SET(pci_devemu_set, x);
+
+enum pcibar_type {
+	PCIBAR_NONE,
+	PCIBAR_IO,
+	PCIBAR_MEM32,
+	PCIBAR_MEM64,
+	PCIBAR_MEMHI64
+};
+
+struct pcibar {
+	enum pcibar_type	type;		/* io or memory */
+	uint64_t		size;
+	uint64_t		addr;
+};
+
+#define PI_NAMESZ	40
+
+struct msix_table_entry {
+	uint64_t	addr;
+	uint32_t	msg_data;
+	uint32_t	vector_control;
+} __packed;
+
+/* 
+ * In case the structure is modified to hold extra information, use a define
+ * for the size that should be emulated.
+ */
+#define	MSIX_TABLE_ENTRY_SIZE	16
+#define MAX_MSIX_TABLE_ENTRIES	2048
+#define	PBA_SIZE(msgnum)	(roundup2((msgnum), 64) / 8)
+
+enum lintr_stat {
+	IDLE,
+	ASSERTED,
+	PENDING
+};
+
+struct pci_devinst {
+	struct pci_devemu *pi_d;
+	struct vmctx *pi_vmctx;
+	uint8_t	  pi_bus, pi_slot, pi_func;
+	char	  pi_name[PI_NAMESZ];
+	int	  pi_bar_getsize;
+	int	  pi_prevcap;
+	int	  pi_capend;
+
+	struct {
+		int8_t    	pin;
+		enum lintr_stat	state;
+		int		pirq_pin;
+		int	  	ioapic_irq;
+		pthread_mutex_t	lock;
+	} pi_lintr;
+
+	struct {
+		int		enabled;
+		uint64_t	addr;
+		uint64_t	msg_data;
+		int		maxmsgnum;
+	} pi_msi;
+
+	struct {
+		int	enabled;
+		int	table_bar;
+		int	pba_bar;
+		uint32_t table_offset;
+		int	table_count;
+		uint32_t pba_offset;
+		int	pba_size;
+		int	function_mask; 	
+		struct msix_table_entry *table;	/* allocated at runtime */
+	} pi_msix;
+
+	void      *pi_arg;		/* devemu-private data */
+
+	u_char	  pi_cfgdata[PCI_REGMAX + 1];
+	struct pcibar pi_bar[PCI_BARMAX + 1];
+};
+
+struct msicap {
+	uint8_t		capid;
+	uint8_t		nextptr;
+	uint16_t	msgctrl;
+	uint32_t	addrlo;
+	uint32_t	addrhi;
+	uint16_t	msgdata;
+} __packed;
+
+struct msixcap {
+	uint8_t		capid;
+	uint8_t		nextptr;
+	uint16_t	msgctrl;
+	uint32_t	table_info;	/* bar index and offset within it */
+	uint32_t	pba_info;	/* bar index and offset within it */
+} __packed;
+
+struct pciecap {
+	uint8_t		capid;
+	uint8_t		nextptr;
+	uint16_t	pcie_capabilities;
+
+	uint32_t	dev_capabilities;	/* all devices */
+	uint16_t	dev_control;
+	uint16_t	dev_status;
+
+	uint32_t	link_capabilities;	/* devices with links */
+	uint16_t	link_control;
+	uint16_t	link_status;
+
+	uint32_t	slot_capabilities;	/* ports with slots */
+	uint16_t	slot_control;
+	uint16_t	slot_status;
+
+	uint16_t	root_control;		/* root ports */
+	uint16_t	root_capabilities;
+	uint32_t	root_status;
+
+	uint32_t	dev_capabilities2;	/* all devices */
+	uint16_t	dev_control2;
+	uint16_t	dev_status2;
+
+	uint32_t	link_capabilities2;	/* devices with links */
+	uint16_t	link_control2;
+	uint16_t	link_status2;
+
+	uint32_t	slot_capabilities2;	/* ports with slots */
+	uint16_t	slot_control2;
+	uint16_t	slot_status2;
+} __packed;
+
+typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin,
+    int ioapic_irq, void *arg);
+
+int	init_pci(struct vmctx *ctx);
+void	msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+	    int bytes, uint32_t val);
+void	msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
+	    int bytes, uint32_t val);
+void	pci_callback(void);
+int	pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
+	    enum pcibar_type type, uint64_t size);
+int	pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx,
+	    uint64_t hostbase, enum pcibar_type type, uint64_t size);
+int	pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
+int	pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
+void	pci_generate_msi(struct pci_devinst *pi, int msgnum);
+void	pci_generate_msix(struct pci_devinst *pi, int msgnum);
+void	pci_lintr_assert(struct pci_devinst *pi);
+void	pci_lintr_deassert(struct pci_devinst *pi);
+void	pci_lintr_request(struct pci_devinst *pi);
+int	pci_msi_enabled(struct pci_devinst *pi);
+int	pci_msix_enabled(struct pci_devinst *pi);
+int	pci_msix_table_bar(struct pci_devinst *pi);
+int	pci_msix_pba_bar(struct pci_devinst *pi);
+int	pci_msi_msgnum(struct pci_devinst *pi);
+int	pci_parse_slot(char *opt);
+void	pci_populate_msicap(struct msicap *cap, int msgs, int nextptr);
+int	pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum);
+int	pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
+			     uint64_t value);
+uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size);
+int	pci_count_lintr(int bus);
+void	pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg);
+void	pci_write_dsdt(void);
+uint64_t pci_ecfg_base(void);
+int	pci_bus_configured(int bus);
+
+static __inline void 
+pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
+{
+	assert(offset <= PCI_REGMAX);
+	*(uint8_t *)(pi->pi_cfgdata + offset) = val;
+}
+
+static __inline void 
+pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val)
+{
+	assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
+	*(uint16_t *)(pi->pi_cfgdata + offset) = val;
+}
+
+static __inline void 
+pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val)
+{
+	assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
+	*(uint32_t *)(pi->pi_cfgdata + offset) = val;
+}
+
+static __inline uint8_t
+pci_get_cfgdata8(struct pci_devinst *pi, int offset)
+{
+	assert(offset <= PCI_REGMAX);
+	return (*(uint8_t *)(pi->pi_cfgdata + offset));
+}
+
+static __inline uint16_t
+pci_get_cfgdata16(struct pci_devinst *pi, int offset)
+{
+	assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0);
+	return (*(uint16_t *)(pi->pi_cfgdata + offset));
+}
+
+static __inline uint32_t
+pci_get_cfgdata32(struct pci_devinst *pi, int offset)
+{
+	assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0);
+	return (*(uint32_t *)(pi->pi_cfgdata + offset));
+}
+
+#endif /* _PCI_EMUL_H_ */
diff --git a/usr/src/cmd/bhyve/pci_hostbridge.c b/usr/src/cmd/bhyve/pci_hostbridge.c
new file mode 100644
index 0000000000..08956d082e
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_hostbridge.c
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_hostbridge.c 283264 2015-05-21 20:11:52Z tychon $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_hostbridge.c 283264 2015-05-21 20:11:52Z tychon $");
+
+#include "pci_emul.h"
+
+static int
+pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	/* config space */
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275);	/* NetApp */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275);	/* NetApp */
+	pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
+	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
+
+	pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_PORT);
+
+	return (0);
+}
+
+static int
+pci_amd_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	(void) pci_hostbridge_init(ctx, pi, opts);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1022);	/* AMD */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x7432);	/* made up */
+
+	return (0);
+}
+
+struct pci_devemu pci_de_amd_hostbridge = {
+	.pe_emu = "amd_hostbridge",
+	.pe_init = pci_amd_hostbridge_init,
+};
+PCI_EMUL_SET(pci_de_amd_hostbridge);
+
+struct pci_devemu pci_de_hostbridge = {
+	.pe_emu = "hostbridge",
+	.pe_init = pci_hostbridge_init,
+};
+PCI_EMUL_SET(pci_de_hostbridge);
diff --git a/usr/src/cmd/bhyve/pci_irq.c b/usr/src/cmd/bhyve/pci_irq.c
new file mode 100644
index 0000000000..97ee330c65
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_irq.c
@@ -0,0 +1,351 @@
+/*-
+ * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_irq.c 266125 2014-05-15 14:16:55Z jhb $");
+
+#include <sys/param.h>
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "inout.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+
+/*
+ * Implement an 8 pin PCI interrupt router compatible with the router
+ * present on Intel's ICH10 chip.
+ */
+
+/* Fields in each PIRQ register. */
+#define	PIRQ_DIS	0x80
+#define	PIRQ_IRQ	0x0f
+
+/* Only IRQs 3-7, 9-12, and 14-15 are permitted. */
+#define	PERMITTED_IRQS	0xdef8
+#define	IRQ_PERMITTED(irq)	(((1U << (irq)) & PERMITTED_IRQS) != 0)
+
+/* IRQ count to disable an IRQ. */
+#define	IRQ_DISABLED	0xff
+
+static struct pirq {
+	uint8_t	reg;
+	int	use_count;
+	int	active_count;
+	pthread_mutex_t lock;
+} pirqs[8];
+
+static u_char irq_counts[16];
+static int pirq_cold = 1;
+
+/*
+ * Returns true if this pin is enabled with a valid IRQ.  Setting the
+ * register to a reserved IRQ causes interrupts to not be asserted as
+ * if the pin was disabled.
+ */
+static bool
+pirq_valid_irq(int reg)
+{
+
+	if (reg & PIRQ_DIS)
+		return (false);
+	return (IRQ_PERMITTED(reg & PIRQ_IRQ));
+}
+
+uint8_t
+pirq_read(int pin)
+{
+
+	assert(pin > 0 && pin <= nitems(pirqs));
+	return (pirqs[pin - 1].reg);
+}
+
+void
+pirq_write(struct vmctx *ctx, int pin, uint8_t val)
+{
+	struct pirq *pirq;
+
+	assert(pin > 0 && pin <= nitems(pirqs));
+	pirq = &pirqs[pin - 1];
+	pthread_mutex_lock(&pirq->lock);
+	if (pirq->reg != (val & (PIRQ_DIS | PIRQ_IRQ))) {
+		if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
+			vm_isa_deassert_irq(ctx, pirq->reg & PIRQ_IRQ, -1);
+		pirq->reg = val & (PIRQ_DIS | PIRQ_IRQ);
+		if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
+			vm_isa_assert_irq(ctx, pirq->reg & PIRQ_IRQ, -1);
+	}
+	pthread_mutex_unlock(&pirq->lock);
+}
+
+void
+pci_irq_reserve(int irq)
+{
+
+	assert(irq < nitems(irq_counts));
+	assert(pirq_cold);
+	assert(irq_counts[irq] == 0 || irq_counts[irq] == IRQ_DISABLED);
+	irq_counts[irq] = IRQ_DISABLED;
+}
+
+void
+pci_irq_use(int irq)
+{
+
+	assert(irq < nitems(irq_counts));
+	assert(pirq_cold);
+	if (irq_counts[irq] != IRQ_DISABLED)
+		irq_counts[irq]++;
+}
+
+void
+pci_irq_init(struct vmctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < nitems(pirqs); i++) {
+		pirqs[i].reg = PIRQ_DIS;
+		pirqs[i].use_count = 0;
+		pirqs[i].active_count = 0;
+		pthread_mutex_init(&pirqs[i].lock, NULL);
+	}
+	for (i = 0; i < nitems(irq_counts); i++) {
+		if (IRQ_PERMITTED(i))
+			irq_counts[i] = 0;
+		else
+			irq_counts[i] = IRQ_DISABLED;
+	}
+}
+
+void
+pci_irq_assert(struct pci_devinst *pi)
+{
+	struct pirq *pirq;
+
+	if (pi->pi_lintr.pirq_pin > 0) {
+		assert(pi->pi_lintr.pirq_pin <= nitems(pirqs));
+		pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
+		pthread_mutex_lock(&pirq->lock);
+		pirq->active_count++;
+		if (pirq->active_count == 1 && pirq_valid_irq(pirq->reg)) {
+			vm_isa_assert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ,
+			    pi->pi_lintr.ioapic_irq);
+			pthread_mutex_unlock(&pirq->lock);
+			return;
+		}
+		pthread_mutex_unlock(&pirq->lock);
+	}
+	vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq);
+}
+
+void
+pci_irq_deassert(struct pci_devinst *pi)
+{
+	struct pirq *pirq;
+
+	if (pi->pi_lintr.pirq_pin > 0) {
+		assert(pi->pi_lintr.pirq_pin <= nitems(pirqs));
+		pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
+		pthread_mutex_lock(&pirq->lock);
+		pirq->active_count--;
+		if (pirq->active_count == 0 && pirq_valid_irq(pirq->reg)) {
+			vm_isa_deassert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ,
+			    pi->pi_lintr.ioapic_irq);
+			pthread_mutex_unlock(&pirq->lock);
+			return;
+		}
+		pthread_mutex_unlock(&pirq->lock);
+	}
+	vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq);
+}
+
+int
+pirq_alloc_pin(struct vmctx *ctx)
+{
+	int best_count, best_irq, best_pin, irq, pin;
+
+	pirq_cold = 1;
+
+	/* First, find the least-used PIRQ pin. */
+	best_pin = 0;
+	best_count = pirqs[0].use_count;
+	for (pin = 1; pin < nitems(pirqs); pin++) {
+		if (pirqs[pin].use_count < best_count) {
+			best_pin = pin;
+			best_count = pirqs[pin].use_count;
+		}
+	}
+	pirqs[best_pin].use_count++;
+
+	/* Second, route this pin to an IRQ. */
+	if (pirqs[best_pin].reg == PIRQ_DIS) {
+		best_irq = -1;
+		best_count = 0;
+		for (irq = 0; irq < nitems(irq_counts); irq++) {
+			if (irq_counts[irq] == IRQ_DISABLED)
+				continue;
+			if (best_irq == -1 || irq_counts[irq] < best_count) {
+				best_irq = irq;
+				best_count = irq_counts[irq];
+			}
+		}
+		assert(best_irq != 0);
+		irq_counts[best_irq]++;
+		pirqs[best_pin].reg = best_irq;
+		vm_isa_set_irq_trigger(ctx, best_irq, LEVEL_TRIGGER);
+	}
+
+	return (best_pin + 1);
+}
+
+int
+pirq_irq(int pin)
+{
+
+	if (pin == -1)
+		return (255);
+	assert(pin > 0 && pin <= nitems(pirqs));
+	return (pirqs[pin - 1].reg & PIRQ_IRQ);
+}
+
+/* XXX: Generate $PIR table. */
+
+#ifdef	__FreeBSD__
+static void
+pirq_dsdt(void)
+{
+	char *irq_prs, *old;
+	int irq, pin;
+
+	irq_prs = NULL;
+	for (irq = 0; irq < nitems(irq_counts); irq++) {
+		if (!IRQ_PERMITTED(irq))
+			continue;
+		if (irq_prs == NULL)
+			asprintf(&irq_prs, "%d", irq);
+		else {
+			old = irq_prs;
+			asprintf(&irq_prs, "%s,%d", old, irq);
+			free(old);
+		}
+	}
+
+	/*
+	 * A helper method to validate a link register's value.  This
+	 * duplicates pirq_valid_irq().
+	 */
+	dsdt_line("");
+	dsdt_line("Method (PIRV, 1, NotSerialized)");
+	dsdt_line("{");
+	dsdt_line("  If (And (Arg0, 0x%02X))", PIRQ_DIS);
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  And (Arg0, 0x%02X, Local0)", PIRQ_IRQ);
+	dsdt_line("  If (LLess (Local0, 0x03))");
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  If (LEqual (Local0, 0x08))");
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  If (LEqual (Local0, 0x0D))");
+	dsdt_line("  {");
+	dsdt_line("    Return (0x00)");
+	dsdt_line("  }");
+	dsdt_line("  Return (0x01)");
+	dsdt_line("}");
+
+	for (pin = 0; pin < nitems(pirqs); pin++) {
+		dsdt_line("");
+		dsdt_line("Device (LNK%c)", 'A' + pin);
+		dsdt_line("{");
+		dsdt_line("  Name (_HID, EisaId (\"PNP0C0F\"))");
+		dsdt_line("  Name (_UID, 0x%02X)", pin + 1);
+		dsdt_line("  Method (_STA, 0, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    If (PIRV (PIR%c))", 'A' + pin);
+		dsdt_line("    {");
+		dsdt_line("       Return (0x0B)");
+		dsdt_line("    }");
+		dsdt_line("    Else");
+		dsdt_line("    {");
+		dsdt_line("       Return (0x09)");
+		dsdt_line("    }");
+		dsdt_line("  }");
+		dsdt_line("  Name (_PRS, ResourceTemplate ()");
+		dsdt_line("  {");
+		dsdt_line("    IRQ (Level, ActiveLow, Shared, )");
+		dsdt_line("      {%s}", irq_prs);
+		dsdt_line("  })");
+		dsdt_line("  Name (CB%02X, ResourceTemplate ()", pin + 1);
+		dsdt_line("  {");
+		dsdt_line("    IRQ (Level, ActiveLow, Shared, )");
+		dsdt_line("      {}");
+		dsdt_line("  })");
+		dsdt_line("  CreateWordField (CB%02X, 0x01, CIR%c)",
+		    pin + 1, 'A' + pin);
+		dsdt_line("  Method (_CRS, 0, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    And (PIR%c, 0x%02X, Local0)", 'A' + pin,
+		    PIRQ_DIS | PIRQ_IRQ);
+		dsdt_line("    If (PIRV (Local0))");
+		dsdt_line("    {");
+		dsdt_line("      ShiftLeft (0x01, Local0, CIR%c)", 'A' + pin);
+		dsdt_line("    }");
+		dsdt_line("    Else");
+		dsdt_line("    {");
+		dsdt_line("      Store (0x00, CIR%c)", 'A' + pin);
+		dsdt_line("    }");
+		dsdt_line("    Return (CB%02X)", pin + 1);
+		dsdt_line("  }");
+		dsdt_line("  Method (_DIS, 0, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    Store (0x80, PIR%c)", 'A' + pin);
+		dsdt_line("  }");
+		dsdt_line("  Method (_SRS, 1, NotSerialized)");
+		dsdt_line("  {");
+		dsdt_line("    CreateWordField (Arg0, 0x01, SIR%c)", 'A' + pin);
+		dsdt_line("    FindSetRightBit (SIR%c, Local0)", 'A' + pin);
+		dsdt_line("    Store (Decrement (Local0), PIR%c)", 'A' + pin);
+		dsdt_line("  }");
+		dsdt_line("}");
+	}
+	free(irq_prs);
+}
+LPC_DSDT(pirq_dsdt);
+#endif
diff --git a/usr/src/cmd/bhyve/pci_irq.h b/usr/src/cmd/bhyve/pci_irq.h
new file mode 100644
index 0000000000..483f12b61e
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_irq.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_irq.h 266125 2014-05-15 14:16:55Z jhb $
+ */
+
+#ifndef __PCI_IRQ_H__
+#define	__PCI_IRQ_H__
+
+struct pci_devinst;
+
+void	pci_irq_assert(struct pci_devinst *pi);
+void	pci_irq_deassert(struct pci_devinst *pi);
+void	pci_irq_init(struct vmctx *ctx);
+void	pci_irq_reserve(int irq);
+void	pci_irq_use(int irq);
+int	pirq_alloc_pin(struct vmctx *ctx);
+int	pirq_irq(int pin);
+uint8_t	pirq_read(int pin);
+void	pirq_write(struct vmctx *ctx, int pin, uint8_t val);
+
+#endif
diff --git a/usr/src/cmd/bhyve/pci_lpc.c b/usr/src/cmd/bhyve/pci_lpc.c
new file mode 100644
index 0000000000..8c060150dc
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_lpc.c
@@ -0,0 +1,433 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_lpc.c 266933 2014-05-31 23:37:34Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_lpc.c 266933 2014-05-31 23:37:34Z neel $");
+
+#include <sys/types.h>
+#include <machine/vmm.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "inout.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "uart_emul.h"
+
+#define	IO_ICU1		0x20
+#define	IO_ICU2		0xA0
+
+SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt);
+SET_DECLARE(lpc_sysres_set, struct lpc_sysres);
+
+#define	ELCR_PORT	0x4d0
+SYSRES_IO(ELCR_PORT, 2);
+
+#define	IO_TIMER1_PORT	0x40
+
+#define	NMISC_PORT	0x61
+SYSRES_IO(NMISC_PORT, 1);
+
+static struct pci_devinst *lpc_bridge;
+
+#define	LPC_UART_NUM	2
+static struct lpc_uart_softc {
+	struct uart_softc *uart_softc;
+	const char *opts;
+	int	iobase;
+	int	irq;
+	int	enabled;
+} lpc_uart_softc[LPC_UART_NUM];
+
+static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" };
+
+/*
+ * LPC device configuration is in the following form:
+ * <lpc_device_name>[,<options>]
+ * For e.g. "com1,stdio"
+ */
+int
+lpc_device_parse(const char *opts)
+{
+	int unit, error;
+	char *str, *cpy, *lpcdev;
+
+	error = -1;
+	str = cpy = strdup(opts);
+	lpcdev = strsep(&str, ",");
+	if (lpcdev != NULL) {
+		for (unit = 0; unit < LPC_UART_NUM; unit++) {
+			if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) {
+				lpc_uart_softc[unit].opts = str;
+				error = 0;
+				goto done;
+			}
+		}
+	}
+
+done:
+	if (error)
+		free(cpy);
+
+	return (error);
+}
+
+static void
+lpc_uart_intr_assert(void *arg)
+{
+	struct lpc_uart_softc *sc = arg;
+
+	assert(sc->irq >= 0);
+
+	vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq);
+}
+
+static void
+lpc_uart_intr_deassert(void *arg)
+{
+	/* 
+	 * The COM devices on the LPC bus generate edge triggered interrupts,
+	 * so nothing more to do here.
+	 */
+}
+
+static int
+lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		    uint32_t *eax, void *arg)
+{
+	int offset;
+	struct lpc_uart_softc *sc = arg;
+
+	offset = port - sc->iobase;
+
+	switch (bytes) {
+	case 1:
+		if (in)
+			*eax = uart_read(sc->uart_softc, offset);
+		else
+			uart_write(sc->uart_softc, offset, *eax);
+		break;
+	case 2:
+		if (in) {
+			*eax = uart_read(sc->uart_softc, offset);
+			*eax |= uart_read(sc->uart_softc, offset + 1) << 8;
+		} else {
+			uart_write(sc->uart_softc, offset, *eax);
+			uart_write(sc->uart_softc, offset + 1, *eax >> 8);
+		}
+		break;
+	default:
+		return (-1);
+	}
+
+	return (0);
+}
+
+static int
+lpc_init(void)
+{
+	struct lpc_uart_softc *sc;
+	struct inout_port iop;
+	const char *name;
+	int unit, error;
+
+	/* COM1 and COM2 */
+	for (unit = 0; unit < LPC_UART_NUM; unit++) {
+		sc = &lpc_uart_softc[unit];
+		name = lpc_uart_names[unit];
+
+		if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) {
+			fprintf(stderr, "Unable to allocate resources for "
+			    "LPC device %s\n", name);
+			return (-1);
+		}
+		pci_irq_reserve(sc->irq);
+
+		sc->uart_softc = uart_init(lpc_uart_intr_assert,
+				    lpc_uart_intr_deassert, sc);
+
+		if (uart_set_backend(sc->uart_softc, sc->opts) != 0) {
+			fprintf(stderr, "Unable to initialize backend '%s' "
+			    "for LPC device %s\n", sc->opts, name);
+			return (-1);
+		}
+
+		bzero(&iop, sizeof(struct inout_port));
+		iop.name = name;
+		iop.port = sc->iobase;
+		iop.size = UART_IO_BAR_SIZE;
+		iop.flags = IOPORT_F_INOUT;
+		iop.handler = lpc_uart_io_handler;
+		iop.arg = sc;
+
+		error = register_inout(&iop);
+		assert(error == 0);
+		sc->enabled = 1;
+	}
+
+	return (0);
+}
+
+#ifdef	__FreeBSD__
+static void
+pci_lpc_write_dsdt(struct pci_devinst *pi)
+{
+	struct lpc_dsdt **ldpp, *ldp;
+
+	dsdt_line("");
+	dsdt_line("Device (ISA)");
+	dsdt_line("{");
+	dsdt_line("  Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func);
+	dsdt_line("  OperationRegion (LPCR, PCI_Config, 0x00, 0x100)");
+	dsdt_line("  Field (LPCR, AnyAcc, NoLock, Preserve)");
+	dsdt_line("  {");
+	dsdt_line("    Offset (0x60),");
+	dsdt_line("    PIRA,   8,");
+	dsdt_line("    PIRB,   8,");
+	dsdt_line("    PIRC,   8,");
+	dsdt_line("    PIRD,   8,");
+	dsdt_line("    Offset (0x68),");
+	dsdt_line("    PIRE,   8,");
+	dsdt_line("    PIRF,   8,");
+	dsdt_line("    PIRG,   8,");
+	dsdt_line("    PIRH,   8");
+	dsdt_line("  }");
+	dsdt_line("");
+
+	dsdt_indent(1);
+	SET_FOREACH(ldpp, lpc_dsdt_set) {
+		ldp = *ldpp;
+		ldp->handler();
+	}
+
+	dsdt_line("");
+	dsdt_line("Device (PIC)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0000\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(IO_ICU1, 2);
+	dsdt_fixed_ioport(IO_ICU2, 2);
+	dsdt_fixed_irq(2);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+
+	dsdt_line("");
+	dsdt_line("Device (TIMR)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0100\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(IO_TIMER1_PORT, 4);
+	dsdt_fixed_irq(0);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+	dsdt_unindent(1);
+
+	dsdt_line("}");
+}
+
+static void
+pci_lpc_sysres_dsdt(void)
+{
+	struct lpc_sysres **lspp, *lsp;
+
+	dsdt_line("");
+	dsdt_line("Device (SIO)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0C02\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+
+	dsdt_indent(2);
+	SET_FOREACH(lspp, lpc_sysres_set) {
+		lsp = *lspp;
+		switch (lsp->type) {
+		case LPC_SYSRES_IO:
+			dsdt_fixed_ioport(lsp->base, lsp->length);
+			break;
+		case LPC_SYSRES_MEM:
+			dsdt_fixed_mem32(lsp->base, lsp->length);
+			break;
+		}
+	}
+	dsdt_unindent(2);
+
+	dsdt_line("  })");
+	dsdt_line("}");
+}
+LPC_DSDT(pci_lpc_sysres_dsdt);
+
+static void
+pci_lpc_uart_dsdt(void)
+{
+	struct lpc_uart_softc *sc;
+	int unit;
+
+	for (unit = 0; unit < LPC_UART_NUM; unit++) {
+		sc = &lpc_uart_softc[unit];
+		if (!sc->enabled)
+			continue;
+		dsdt_line("");
+		dsdt_line("Device (%s)", lpc_uart_names[unit]);
+		dsdt_line("{");
+		dsdt_line("  Name (_HID, EisaId (\"PNP0501\"))");
+		dsdt_line("  Name (_UID, %d)", unit + 1);
+		dsdt_line("  Name (_CRS, ResourceTemplate ()");
+		dsdt_line("  {");
+		dsdt_indent(2);
+		dsdt_fixed_ioport(sc->iobase, UART_IO_BAR_SIZE);
+		dsdt_fixed_irq(sc->irq);
+		dsdt_unindent(2);
+		dsdt_line("  })");
+		dsdt_line("}");
+	}
+}
+LPC_DSDT(pci_lpc_uart_dsdt);
+#endif
+
+static int
+pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		  int coff, int bytes, uint32_t val)
+{
+	int pirq_pin;
+
+	if (bytes == 1) {
+		pirq_pin = 0;
+		if (coff >= 0x60 && coff <= 0x63)
+			pirq_pin = coff - 0x60 + 1;
+		if (coff >= 0x68 && coff <= 0x6b)
+			pirq_pin = coff - 0x68 + 5;
+		if (pirq_pin != 0) {
+			pirq_write(ctx, pirq_pin, val);
+			pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin));
+			return (0);
+		}
+	}
+	return (-1);
+}
+
+static void
+pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	       int baridx, uint64_t offset, int size, uint64_t value)
+{
+}
+
+static uint64_t
+pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	      int baridx, uint64_t offset, int size)
+{
+	return (0);
+}
+
+#define	LPC_DEV		0x7000
+#define	LPC_VENDOR	0x8086
+
+static int
+pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+
+	/*
+	 * Do not allow more than one LPC bridge to be configured.
+	 */
+	if (lpc_bridge != NULL) {
+		fprintf(stderr, "Only one LPC bridge is allowed.\n");
+		return (-1);
+	}
+
+	/*
+	 * Enforce that the LPC can only be configured on bus 0. This
+	 * simplifies the ACPI DSDT because it can provide a decode for
+	 * all legacy i/o ports behind bus 0.
+	 */
+	if (pi->pi_bus != 0) {
+		fprintf(stderr, "LPC bridge can be present only on bus 0.\n");
+		return (-1);
+	}
+
+	if (lpc_init() != 0)
+		return (-1);
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
+	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+
+	lpc_bridge = pi;
+
+	return (0);
+}
+
+char *
+lpc_pirq_name(int pin)
+{
+	char *name;
+
+	if (lpc_bridge == NULL)
+		return (NULL);
+	asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1);
+	return (name);
+}
+
+void
+lpc_pirq_routed(void)
+{
+	int pin;
+
+	if (lpc_bridge == NULL)
+		return;
+
+ 	for (pin = 0; pin < 4; pin++)
+		pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1));
+	for (pin = 0; pin < 4; pin++)
+		pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
+}
+
+struct pci_devemu pci_de_lpc = {
+	.pe_emu =	"lpc",
+	.pe_init =	pci_lpc_init,
+#ifdef	__FreeBSD__
+	.pe_write_dsdt = pci_lpc_write_dsdt,
+#endif
+	.pe_cfgwrite =	pci_lpc_cfgwrite,
+	.pe_barwrite =	pci_lpc_write,
+	.pe_barread =	pci_lpc_read
+};
+PCI_EMUL_SET(pci_de_lpc);
diff --git a/usr/src/cmd/bhyve/pci_lpc.h b/usr/src/cmd/bhyve/pci_lpc.h
new file mode 100644
index 0000000000..4f725b1dd3
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_lpc.h
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_lpc.h 266125 2014-05-15 14:16:55Z jhb $
+ */
+
+#ifndef _LPC_H_
+#define	_LPC_H_
+
+#include <sys/linker_set.h>
+
+typedef void (*lpc_write_dsdt_t)(void);
+
+struct lpc_dsdt {
+	lpc_write_dsdt_t handler;
+};
+
+#define	LPC_DSDT(handler)						\
+	static struct lpc_dsdt __CONCAT(__lpc_dsdt, __LINE__) = {	\
+		(handler),						\
+	};								\
+	DATA_SET(lpc_dsdt_set, __CONCAT(__lpc_dsdt, __LINE__))
+
+enum lpc_sysres_type {
+	LPC_SYSRES_IO,
+	LPC_SYSRES_MEM
+};
+
+struct lpc_sysres {
+	enum lpc_sysres_type type;
+	uint32_t base;
+	uint32_t length;
+};
+
+#define	LPC_SYSRES(type, base, length)					\
+	static struct lpc_sysres __CONCAT(__lpc_sysres, __LINE__) = {	\
+		(type),							\
+		(base),							\
+		(length)						\
+	};								\
+	DATA_SET(lpc_sysres_set, __CONCAT(__lpc_sysres, __LINE__))
+
+#define	SYSRES_IO(base, length)		LPC_SYSRES(LPC_SYSRES_IO, base, length)
+#define	SYSRES_MEM(base, length)	LPC_SYSRES(LPC_SYSRES_MEM, base, length)
+
+int	lpc_device_parse(const char *opt);
+char	*lpc_pirq_name(int pin);
+void	lpc_pirq_routed(void);
+
+#endif
diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c
new file mode 100644
index 0000000000..65e2d9c57d
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_virtio_block.c
@@ -0,0 +1,392 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_block.c 266935 2014-06-01 02:47:09Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_block.c 266935 2014-06-01 02:47:09Z neel $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <md5.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+
+#define VTBLK_RINGSZ	64
+
+#ifdef	__FreeBSD__
+#define VTBLK_MAXSEGS	32
+#else
+#define	VTBLK_MAXSEGS	16
+#endif
+
+#define VTBLK_S_OK	0
+#define VTBLK_S_IOERR	1
+#define	VTBLK_S_UNSUPP	2
+
+#define	VTBLK_BLK_ID_BYTES	20
+
+/* Capability bits */
+#define	VTBLK_F_SEG_MAX		(1 << 2)	/* Maximum request segments */
+#define	VTBLK_F_BLK_SIZE       	(1 << 6)	/* cfg block size valid */
+
+/*
+ * Host capabilities
+ */
+#define VTBLK_S_HOSTCAPS      \
+  ( VTBLK_F_SEG_MAX  |						    \
+    VTBLK_F_BLK_SIZE |						    \
+    VIRTIO_RING_F_INDIRECT_DESC )	/* indirect descriptors */
+
+/*
+ * Config space "registers"
+ */
+struct vtblk_config {
+	uint64_t	vbc_capacity;
+	uint32_t	vbc_size_max;
+	uint32_t	vbc_seg_max;
+	uint16_t	vbc_geom_c;
+	uint8_t		vbc_geom_h;
+	uint8_t		vbc_geom_s;
+	uint32_t	vbc_blk_size;
+	uint32_t	vbc_sectors_max;
+} __packed;
+
+/*
+ * Fixed-size block header
+ */
+struct virtio_blk_hdr {
+#define	VBH_OP_READ		0
+#define	VBH_OP_WRITE		1
+#define	VBH_OP_IDENT		8		
+#define	VBH_FLAG_BARRIER	0x80000000	/* OR'ed into vbh_type */
+	uint32_t       	vbh_type;
+	uint32_t	vbh_ioprio;
+	uint64_t	vbh_sector;
+} __packed;
+
+/*
+ * Debug printf
+ */
+static int pci_vtblk_debug;
+#define DPRINTF(params) if (pci_vtblk_debug) printf params
+#define WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtblk_softc {
+	struct virtio_softc vbsc_vs;
+	pthread_mutex_t vsc_mtx;
+	struct vqueue_info vbsc_vq;
+	int		vbsc_fd;
+	struct vtblk_config vbsc_cfg;	
+	char vbsc_ident[VTBLK_BLK_ID_BYTES];
+};
+
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+	"vtblk",		/* our name */
+	1,			/* we support 1 virtqueue */
+	sizeof(struct vtblk_config), /* config reg size */
+	pci_vtblk_reset,	/* reset */
+	pci_vtblk_notify,	/* device-wide qnotify */
+	pci_vtblk_cfgread,	/* read PCI config */
+	pci_vtblk_cfgwrite,	/* write PCI config */
+	VTBLK_S_HOSTCAPS,	/* our capabilities */
+};
+
+static void
+pci_vtblk_reset(void *vsc)
+{
+	struct pci_vtblk_softc *sc = vsc;
+
+	DPRINTF(("vtblk: device reset requested !\n"));
+	vi_reset_dev(&sc->vbsc_vs);
+}
+
+static void
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
+{
+	struct virtio_blk_hdr *vbh;
+	uint8_t *status;
+	int i, n;
+	int err;
+	int iolen;
+	int writeop, type;
+	off_t offset;
+	struct iovec iov[VTBLK_MAXSEGS + 2];
+	uint16_t flags[VTBLK_MAXSEGS + 2];
+
+	n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags);
+
+	/*
+	 * The first descriptor will be the read-only fixed header,
+	 * and the last is for status (hence +2 above and below).
+	 * The remaining iov's are the actual data I/O vectors.
+	 *
+	 * XXX - note - this fails on crash dump, which does a
+	 * VIRTIO_BLK_T_FLUSH with a zero transfer length
+	 */
+	assert(n >= 2 && n <= VTBLK_MAXSEGS + 2);
+
+	assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+	vbh = (struct virtio_block_hdr *)iov[0].iov_base;
+
+	status = iov[--n].iov_base;
+	assert(iov[n].iov_len == 1);
+	assert(flags[n] & VRING_DESC_F_WRITE);
+
+	/*
+	 * XXX
+	 * The guest should not be setting the BARRIER flag because
+	 * we don't advertise the capability.
+	 */
+	type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
+	writeop = (type == VBH_OP_WRITE);
+
+	offset = vbh->vbh_sector * DEV_BSIZE;
+
+	iolen = 0;
+	for (i = 1; i < n; i++) {
+		/*
+		 * - write op implies read-only descriptor,
+		 * - read/ident op implies write-only descriptor,
+		 * therefore test the inverse of the descriptor bit
+		 * to the op.
+		 */
+		assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+		iolen += iov[i].iov_len;
+	}
+
+	DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 
+		 writeop ? "write" : "read/ident", iolen, i - 1, offset));
+
+	switch (type) {
+	case VBH_OP_WRITE:
+		err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset);
+		break;
+	case VBH_OP_READ:
+		err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset);
+		break;
+	case VBH_OP_IDENT:
+		/* Assume a single buffer */
+		strlcpy(iov[1].iov_base, sc->vbsc_ident,
+		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
+		err = 0;
+		break;
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	/* convert errno into a virtio block error return */
+	if (err < 0) {
+		if (err == -ENOSYS)
+			*status = VTBLK_S_UNSUPP;
+		else
+			*status = VTBLK_S_IOERR;
+	} else
+		*status = VTBLK_S_OK;
+
+	/*
+	 * Return the descriptor back to the host.
+	 * We wrote 1 byte (our status) to host.
+	 */
+	vq_relchain(vq, 1);
+}
+
+static void
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtblk_softc *sc = vsc;
+
+	vq_startchains(vq);
+	while (vq_has_descs(vq))
+		pci_vtblk_proc(sc, vq);
+	vq_endchains(vq, 1);	/* Generate interrupt if appropriate. */
+}
+
+static int
+pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	struct stat sbuf;
+	MD5_CTX mdctx;
+	u_char digest[16];
+	struct pci_vtblk_softc *sc;
+	off_t size;	
+	int fd;
+	int sectsz;
+
+	if (opts == NULL) {
+		printf("virtio-block: backing device required\n");
+		return (1);
+	}
+
+	/*
+	 * The supplied backing file has to exist
+	 */
+	fd = open(opts, O_RDWR);
+	if (fd < 0) {
+		perror("Could not open backing file");
+		return (1);
+	}
+
+	if (fstat(fd, &sbuf) < 0) {
+		perror("Could not stat backing file");
+		close(fd);
+		return (1);
+	}
+
+	/*
+	 * Deal with raw devices
+	 */
+	size = sbuf.st_size;
+	sectsz = DEV_BSIZE;
+#ifdef	__FreeBSD__
+	if (S_ISCHR(sbuf.st_mode)) {
+		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
+		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
+			perror("Could not fetch dev blk/sector size");
+			close(fd);
+			return (1);
+		}
+		assert(size != 0);
+		assert(sectsz != 0);
+	}
+#endif
+
+	sc = calloc(1, sizeof(struct pci_vtblk_softc));
+
+	/* record fd of storage device/file */
+	sc->vbsc_fd = fd;
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	/* init virtio softc and virtqueues */
+	vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+	sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
+
+	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
+	/*
+	 * Create an identifier for the backing file. Use parts of the
+	 * md5 sum of the filename
+	 */
+	MD5Init(&mdctx);
+	MD5Update(&mdctx, opts, strlen(opts));
+	MD5Final(digest, &mdctx);	
+	sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
+	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
+
+	/* setup virtio block config space */
+	sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */
+	sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
+	sc->vbsc_cfg.vbc_blk_size = sectsz;
+	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
+	sc->vbsc_cfg.vbc_geom_c = 0;	/* no geometry */
+	sc->vbsc_cfg.vbc_geom_h = 0;
+	sc->vbsc_cfg.vbc_geom_s = 0;
+	sc->vbsc_cfg.vbc_sectors_max = 0;
+
+	/*
+	 * Should we move some of this into virtio.c?  Could
+	 * have the device, class, and subdev_0 as fields in
+	 * the virtio constants structure.
+	 */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
+
+	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+	vi_set_io_bar(&sc->vbsc_vs, 0);
+	return (0);
+}
+
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+
+	DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
+	return (1);
+}
+
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtblk_softc *sc = vsc;
+	void *ptr;
+
+	/* our caller has already verified offset and size */
+	ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+struct pci_devemu pci_de_vblk = {
+	.pe_emu =	"virtio-blk",
+	.pe_init =	pci_vtblk_init,
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
+};
+PCI_EMUL_SET(pci_de_vblk);
diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c
new file mode 100644
index 0000000000..e58bdd0115
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_virtio_net.c
@@ -0,0 +1,870 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 253440 2013-07-17 23:37:33Z grehan $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 253440 2013-07-17 23:37:33Z grehan $");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/select.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <net/ethernet.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <md5.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#ifndef	__FreeBSD__
+#include <poll.h>
+#include <libdlpi.h>
+#endif
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#ifdef	__FreeBSD__
+#include "mevent.h"
+#endif
+#include "virtio.h"
+
+#define VTNET_RINGSZ	1024
+
+#define VTNET_MAXSEGS	32
+
+/*
+ * Host capabilities.  Note that we only offer a few of these.
+ */
+#define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
+#define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
+#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
+#define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
+#define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
+#define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
+#define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
+#define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
+#define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
+#define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
+#define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
+#define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
+#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
+#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
+#define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
+#define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
+#define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
+#define	VIRTIO_NET_F_GUEST_ANNOUNCE \
+				(1 << 21) /* guest can send gratuitous pkts */
+
+#define VTNET_S_HOSTCAPS      \
+  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
+    VIRTIO_F_NOTIFY_ON_EMPTY)
+
+/*
+ * PCI config-space "registers"
+ */
+struct virtio_net_config {
+	uint8_t  mac[6];
+	uint16_t status;
+} __packed;
+
+/*
+ * Queue definitions.
+ */
+#define VTNET_RXQ	0
+#define VTNET_TXQ	1
+#define VTNET_CTLQ	2	/* NB: not yet supported */
+
+#define VTNET_MAXQ	3
+
+/*
+ * Fixed network header size
+ */
+struct virtio_net_rxhdr {
+	uint8_t		vrh_flags;
+	uint8_t		vrh_gso_type;
+	uint16_t	vrh_hdr_len;
+	uint16_t	vrh_gso_size;
+	uint16_t	vrh_csum_start;
+	uint16_t	vrh_csum_offset;
+	uint16_t	vrh_bufs;
+} __packed;
+
+/*
+ * Debug printf
+ */
+static int pci_vtnet_debug;
+#define DPRINTF(params) if (pci_vtnet_debug) printf params
+#define WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtnet_softc {
+	struct virtio_softc vsc_vs;
+	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
+	pthread_mutex_t vsc_mtx;
+	struct mevent	*vsc_mevp;
+
+#ifdef	__FreeBSD
+	int		vsc_tapfd;
+#else
+	dlpi_handle_t	vsc_dhp;
+	int		vsc_dlpifd;
+#endif
+	int		vsc_rx_ready;
+	volatile int	resetting;	/* set and checked outside lock */
+
+	uint32_t	vsc_features;
+	struct virtio_net_config vsc_config;
+
+	pthread_mutex_t	rx_mtx;
+	int		rx_in_progress;
+
+	pthread_t 	tx_tid;
+	pthread_mutex_t	tx_mtx;
+	pthread_cond_t	tx_cond;
+	int		tx_in_progress;
+};
+
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+	"vtnet",		/* our name */
+	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
+	sizeof(struct virtio_net_config), /* config reg size */
+	pci_vtnet_reset,	/* reset */
+	NULL,			/* device-wide qnotify -- not used */
+	pci_vtnet_cfgread,	/* read PCI config */
+	pci_vtnet_cfgwrite,	/* write PCI config */
+	VTNET_S_HOSTCAPS,	/* our capabilities */
+};
+
+/*
+ * If the transmit thread is active then stall until it is done.
+ */
+static void
+pci_vtnet_txwait(struct pci_vtnet_softc *sc)
+{
+
+	pthread_mutex_lock(&sc->tx_mtx);
+	while (sc->tx_in_progress) {
+		pthread_mutex_unlock(&sc->tx_mtx);
+		usleep(10000);
+		pthread_mutex_lock(&sc->tx_mtx);
+	}
+	pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * If the receive thread is active then stall until it is done.
+ */
+static void
+pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
+{
+
+	pthread_mutex_lock(&sc->rx_mtx);
+	while (sc->rx_in_progress) {
+		pthread_mutex_unlock(&sc->rx_mtx);
+		usleep(10000);
+		pthread_mutex_lock(&sc->rx_mtx);
+	}
+	pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+static void
+pci_vtnet_reset(void *vsc)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	DPRINTF(("vtnet: device reset requested !\n"));
+
+	sc->resetting = 1;
+
+	/*
+	 * Wait for the transmit and receive threads to finish their
+	 * processing.
+	 */
+	pci_vtnet_txwait(sc);
+	pci_vtnet_rxwait(sc);
+
+	sc->vsc_rx_ready = 0;
+
+	/* now reset rings, MSI-X vectors, and negotiated capabilities */
+	vi_reset_dev(&sc->vsc_vs);
+
+	sc->resetting = 0;
+}
+
+/*
+ * Called to send a buffer chain out to the tap device
+ */
+#ifdef	__FreeBSD__
+static void
+pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+		 int len)
+{
+	static char pad[60]; /* all zero bytes */
+
+	if (sc->vsc_tapfd == -1)
+		return;
+
+	/*
+	 * If the length is < 60, pad out to that and add the
+	 * extra zero'd segment to the iov. It is guaranteed that
+	 * there is always an extra iov available by the caller.
+	 */
+	if (len < 60) {
+		iov[iovcnt].iov_base = pad;
+		iov[iovcnt].iov_len = 60 - len;
+		iovcnt++;
+	}
+	(void) writev(sc->vsc_tapfd, iov, iovcnt);
+}
+#else
+static void
+pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+		 int len)
+{
+	int i;
+
+	for (i = 0; i < iovcnt; i++) {
+		(void) dlpi_send(sc->vsc_dhp, NULL, NULL,
+				 iov[i].iov_base, iov[i].iov_len, NULL);
+	}
+}
+#endif
+
+#ifdef	__FreeBSD__
+/*
+ *  Called when there is read activity on the tap file descriptor.
+ * Each buffer posted by the guest is assumed to be able to contain
+ * an entire ethernet frame + rx header.
+ *  MP note: the dummybuf is only used for discarding frames, so there
+ * is no need for it to be per-vtnet or locked.
+ */
+static uint8_t dummybuf[2048];
+#endif
+
+static void
+pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
+{
+	struct vqueue_info *vq;
+	struct virtio_net_rxhdr *vrx;
+	uint8_t *buf;
+#ifdef	__FreeBSD__
+	int len;
+#endif
+	struct iovec iov[VTNET_MAXSEGS];
+#ifndef	__FreeBSD__
+	size_t len;
+	int ret;
+#endif
+	int total_len = 0;
+
+	/*
+	 * Should never be called without a valid tap fd
+	 */
+#ifdef	__FreeBSD__
+	assert(sc->vsc_tapfd != -1);
+#else
+	assert(sc->vsc_dlpifd != -1);
+#endif
+
+	/*
+	 * But, will be called when the rx ring hasn't yet
+	 * been set up or the guest is resetting the device.
+	 */
+	if (!sc->vsc_rx_ready || sc->resetting) {
+#ifdef	__FreeBSD__
+		/*
+		 * Drop the packet and try later.
+		 */
+		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
+#endif
+		return;
+	}
+
+	/*
+	 * Check for available rx buffers
+	 */
+	vq = &sc->vsc_queues[VTNET_RXQ];
+	vq_startchains(vq);
+	if (!vq_has_descs(vq)) {
+		/*
+		 * Drop the packet and try later.  Interrupt on
+		 * empty, if that's negotiated.
+		 */
+#ifdef	__FreeBSD__
+		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
+#endif
+		vq_endchains(vq, 1);
+		return;
+	}
+
+	do {
+		/*
+		 * Get descriptor chain
+		 */
+		if (sc->vsc_vs.vs_negotiated_caps & VIRTIO_NET_F_MRG_RXBUF) { 
+			assert(vq_getchain(vq, iov, 1, NULL) == 1);
+
+			/*
+			 * Get a pointer to the rx header, and use the
+			 * data immediately following it for the packet buffer.
+			 */
+			vrx = (struct virtio_net_rxhdr *)iov[0].iov_base;
+			buf = (uint8_t *)(vrx + 1);
+			total_len = iov[0].iov_len;
+#ifdef	__FreeBSD__
+			len = read(sc->vsc_tapfd, buf,
+			   iov[0].iov_len - sizeof(struct virtio_net_rxhdr));
+
+			if (len < 0 && errno == EWOULDBLOCK) {
+				/*
+				 * No more packets, but still some avail ring
+				 * entries.  Interrupt if needed/appropriate.
+				 */
+				vq_endchains(vq, 0);
+				return;
+			}
+#else
+			len = iov[0].iov_len - sizeof(struct virtio_net_rxhdr);
+			ret = dlpi_recv(sc->vsc_dhp, NULL, NULL, buf,
+			    &len, 0, NULL);
+			if (ret != DLPI_SUCCESS) {
+				/*
+				 * No more packets, but still some avail ring
+				 * entries.  Interrupt if needed/appropriate.
+				 */
+				vq_endchains(vq, 0);
+				return;
+			}
+#endif
+		} else {
+			int i;
+			int num_segs;
+			num_segs = vq_getchain(vq, iov,
+			    VTNET_MAXSEGS, NULL);
+			vrx = (struct virtio_net_rxhrd *)iov[0].iov_base;
+			total_len = iov[0].iov_len;
+			for (i = 1; i < num_segs; i++) {
+				buf = (uint8_t *)iov[i].iov_base;
+				total_len += iov[i].iov_len;
+#ifdef __FreeBSD__
+				len = read(sc->vsc_tapfd, buf, iov[i].iov_len);
+				if (len < 0 && errno == EWOULDBLOCK) {
+					/*
+					 * No more packets,
+					 * but still some avail ring entries.
+					 * Interrupt if needed/appropriate.
+					 */
+					break;
+				}
+#else
+				len = iov[i].iov_len;
+				ret = dlpi_recv(sc->vsc_dhp, NULL, NULL, buf,
+				    &len, 0, NULL);
+				if (ret != DLPI_SUCCESS) {
+					/*
+					 * No more packets,
+					 * but still some avail ring entries.
+					 * Interrupt if needed/appropriate.
+					 */
+					 total_len = 0;
+					 break;
+				}
+#endif
+			}
+			if (total_len == 0) {
+				vq_endchains(vq, 0);
+				return;
+			}
+		}
+
+		/*
+		 * The only valid field in the rx packet header is the
+		 * number of buffers, which is always 1 without TSO
+		 * support.
+		 */
+		memset(vrx, 0, sizeof(struct virtio_net_rxhdr));
+		vrx->vrh_bufs = 1;
+
+		/*
+		 * Release this chain and handle more chains.
+		 */
+		vq_relchain(vq, total_len);
+	} while (vq_has_descs(vq));
+
+	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+	vq_endchains(vq, 1);
+}
+
+#ifdef	__FreeBSD__
+static void
+pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
+{
+	struct pci_vtnet_softc *sc = param;
+
+	pthread_mutex_lock(&sc->rx_mtx);
+	sc->rx_in_progress = 1;
+	pci_vtnet_tap_rx(sc);
+	sc->rx_in_progress = 0;
+	pthread_mutex_unlock(&sc->rx_mtx);
+
+}
+#else
+static void *
+pci_vtnet_poll_thread(void *param)
+{
+	struct pci_vtnet_softc *sc = param;
+	pollfd_t pollset;
+
+	pollset.fd = sc->vsc_dlpifd;
+	pollset.events = POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND;
+
+	for (;;) {
+		if (poll(&pollset, 1, -1) < 0) {
+			if (errno == EINTR)
+				continue;
+			fprintf(stderr, "pci_vtnet_poll_thread poll() error %d\n", errno);
+			continue;
+		}
+		pthread_mutex_lock(&sc->vsc_mtx);
+		pci_vtnet_tap_rx(sc);
+		pthread_mutex_unlock(&sc->vsc_mtx);
+	}
+}
+#endif
+
+static void
+pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	/*
+	 * A qnotify means that the rx process can now begin
+	 */
+	if (sc->vsc_rx_ready == 0) {
+		sc->vsc_rx_ready = 1;
+	}
+}
+
+static void
+pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
+{
+	struct iovec iov[VTNET_MAXSEGS + 1];
+	int i, n;
+	int plen, tlen;
+
+	/*
+	 * Obtain chain of descriptors.  The first one is
+	 * really the header descriptor, so we need to sum
+	 * up two lengths: packet length and transfer length.
+	 */
+	n = vq_getchain(vq, iov, VTNET_MAXSEGS, NULL);
+	assert(n >= 1 && n <= VTNET_MAXSEGS);
+	plen = 0;
+	tlen = iov[0].iov_len;
+	for (i = 1; i < n; i++) {
+		plen += iov[i].iov_len;
+		tlen += iov[i].iov_len;
+	}
+
+	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
+	pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
+
+	/* chain is processed, release it and set tlen */
+	vq_relchain(vq, tlen);
+}
+
+static void
+pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	/*
+	 * Any ring entries to process?
+	 */
+	if (!vq_has_descs(vq))
+		return;
+
+	/* Signal the tx thread for processing */
+	pthread_mutex_lock(&sc->tx_mtx);
+	if (sc->tx_in_progress == 0)
+		pthread_cond_signal(&sc->tx_cond);
+	pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * Thread which will handle processing of TX desc
+ */
+static void *
+pci_vtnet_tx_thread(void *param)
+{
+	struct pci_vtnet_softc *sc = param;
+	struct vqueue_info *vq;
+	int have_work, error;
+
+	vq = &sc->vsc_queues[VTNET_TXQ];
+
+	/*
+	 * Let us wait till the tx queue pointers get initialised &
+	 * first tx signaled
+	 */
+	pthread_mutex_lock(&sc->tx_mtx);
+	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+	assert(error == 0);
+
+	for (;;) {
+		/* note - tx mutex is locked here */
+		do {
+			if (sc->resetting)
+				have_work = 0;
+			else
+				have_work = vq_has_descs(vq);
+
+			if (!have_work) {
+				sc->tx_in_progress = 0;
+				error = pthread_cond_wait(&sc->tx_cond,
+							  &sc->tx_mtx);
+				assert(error == 0);
+			}
+		} while (!have_work);
+		sc->tx_in_progress = 1;
+		pthread_mutex_unlock(&sc->tx_mtx);
+
+		vq_startchains(vq);
+		do {
+			/*
+			 * Run through entries, placing them into
+			 * iovecs and sending when an end-of-packet
+			 * is found
+			 */
+			pci_vtnet_proctx(sc, vq);
+		} while (vq_has_descs(vq));
+
+		/*
+		 * Generate an interrupt if needed.
+		 */
+		vq_endchains(vq, 1);
+
+		pthread_mutex_lock(&sc->tx_mtx);
+	}
+}
+
+#ifdef notyet
+static void
+pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
+{
+
+	DPRINTF(("vtnet: control qnotify!\n\r"));
+}
+#endif
+
+#ifdef	__FreeBSD__
+static int
+pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
+{
+        struct ether_addr *ea;
+        char *tmpstr;
+        char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
+
+        tmpstr = strsep(&mac_str,"=");
+       
+        if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
+                ea = ether_aton(mac_str);
+
+                if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+                    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+			fprintf(stderr, "Invalid MAC %s\n", mac_str);
+                        return (EINVAL);
+                } else
+                        memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
+        }
+
+        return (0);
+}
+#endif
+
+
+static int
+pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+#ifdef	__FreeBSD__
+	MD5_CTX mdctx;
+	unsigned char digest[16];
+#else
+	uchar_t physaddr[DLPI_PHYSADDR_MAX];
+	size_t physaddrlen = DLPI_PHYSADDR_MAX;
+	int error;
+#endif
+	char nstr[80];
+	char tname[MAXCOMLEN + 1];
+	struct pci_vtnet_softc *sc;
+	const char *env_msi;
+	char *devname;
+	char *vtopts;
+	int mac_provided;
+	int use_msix;
+
+	sc = malloc(sizeof(struct pci_vtnet_softc));
+	memset(sc, 0, sizeof(struct pci_vtnet_softc));
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
+	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
+	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
+	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
+	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
+#ifdef notyet
+	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
+        sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
+#endif
+ 
+	/*
+	 * Use MSI if set by user
+	 */
+	use_msix = 1;
+	if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) {
+		if (strcasecmp(env_msi, "yes") == 0)
+			use_msix = 0;
+	}
+
+	/*
+	 * Attempt to open the tap device and read the MAC address
+	 * if specified
+	 */
+	mac_provided = 0;
+#ifdef	__FreeBSD__
+	sc->vsc_tapfd = -1;
+#endif
+	if (opts != NULL) {
+		char tbuf[80];
+		int err;
+
+		devname = vtopts = strdup(opts);
+		(void) strsep(&vtopts, ",");
+
+#ifdef	__FreBSD__
+		if (vtopts != NULL) {
+			err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
+			if (err != 0) {
+				free(devname);
+				return (err);
+			}
+			mac_provided = 1;
+		}
+#endif
+
+		strcpy(tbuf, "/dev/");
+		strlcat(tbuf, devname, sizeof(tbuf));
+
+		free(devname);
+
+#ifdef	__FreeBSD__
+		sc->vsc_tapfd = open(tbuf, O_RDWR);
+		if (sc->vsc_tapfd == -1) {
+			WPRINTF(("open of tap device %s failed\n", tbuf));
+		} else {
+			/*
+			 * Set non-blocking and register for read
+			 * notifications with the event loop
+			 */
+			int opt = 1;
+			if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
+				WPRINTF(("tap device O_NONBLOCK failed\n"));
+				close(sc->vsc_tapfd);
+				sc->vsc_tapfd = -1;
+			}
+
+			sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
+						  EVF_READ,
+						  pci_vtnet_tap_callback,
+						  sc);
+			if (sc->vsc_mevp == NULL) {
+				WPRINTF(("Could not register event\n"));
+				close(sc->vsc_tapfd);
+				sc->vsc_tapfd = -1;
+			}
+		}		
+#else
+		if (dlpi_open(opts, &sc->vsc_dhp, DLPI_RAW) != DLPI_SUCCESS) {
+			 WPRINTF(("open of vnic device %s failed\n", opts));
+		}
+
+		if (dlpi_get_physaddr(sc->vsc_dhp, DL_CURR_PHYS_ADDR, physaddr, &physaddrlen) != DLPI_SUCCESS) {
+			 WPRINTF(("read MAC address of vnic device %s failed\n", opts));
+		}
+		if (physaddrlen != ETHERADDRL) {
+			WPRINTF(("bad MAC address len %d on vnic device %s\n", physaddrlen, opts));
+		}
+		memcpy(sc->vsc_config.mac, physaddr, ETHERADDRL);
+
+		if (dlpi_bind(sc->vsc_dhp, DLPI_ANY_SAP, NULL) != DLPI_SUCCESS) {
+			 WPRINTF(("bind of vnic device %s failed\n", opts));
+		}
+
+		if (dlpi_promiscon(sc->vsc_dhp, DL_PROMISC_PHYS) != DLPI_SUCCESS) {
+			 WPRINTF(("enable promiscous mode(physical) of vnic device %s failed\n", opts));
+		}
+		if (dlpi_promiscon(sc->vsc_dhp, DL_PROMISC_SAP) != DLPI_SUCCESS) {
+			 WPRINTF(("enable promiscous mode(SAP) of vnic device %s failed\n", opts));
+		}
+
+		sc->vsc_dlpifd = dlpi_fd(sc->vsc_dhp);
+
+		if (fcntl(sc->vsc_dlpifd, F_SETFL, O_NONBLOCK) < 0) {
+			 WPRINTF(("enable O_NONBLOCK of vnic device %s failed\n", opts));
+			 dlpi_close(sc->vsc_dhp);
+			 sc->vsc_dlpifd = -1;
+		}
+
+		error = pthread_create(NULL, NULL, pci_vtnet_poll_thread, sc);
+		assert(error == 0);
+#endif
+	}
+
+#ifdef	__FreeBSD__
+	/*
+	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
+	 * followed by an MD5 of the PCI slot/func number and dev name
+	 */
+	if (!mac_provided) {
+		snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
+	            pi->pi_func, vmname);
+
+		MD5Init(&mdctx);
+		MD5Update(&mdctx, nstr, strlen(nstr));
+		MD5Final(digest, &mdctx);
+
+		sc->vsc_config.mac[0] = 0x00;
+		sc->vsc_config.mac[1] = 0xa0;
+		sc->vsc_config.mac[2] = 0x98;
+		sc->vsc_config.mac[3] = digest[0];
+		sc->vsc_config.mac[4] = digest[1];
+		sc->vsc_config.mac[5] = digest[2];
+	}
+#endif
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
+
+	/* link always up */
+	sc->vsc_config.status = 1;
+	
+	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
+	if (vi_intr_init(&sc->vsc_vs, 1, use_msix))
+		return (1);
+
+	/* use BAR 0 to map config regs in IO space */
+	vi_set_io_bar(&sc->vsc_vs, 0);
+
+	sc->resetting = 0;
+
+	sc->rx_in_progress = 0;
+	pthread_mutex_init(&sc->rx_mtx, NULL); 
+
+	/* 
+	 * Initialize tx semaphore & spawn TX processing thread.
+	 * As of now, only one thread for TX desc processing is
+	 * spawned. 
+	 */
+	sc->tx_in_progress = 0;
+	pthread_mutex_init(&sc->tx_mtx, NULL);
+	pthread_cond_init(&sc->tx_cond, NULL);
+	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
+        snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot);
+        pthread_set_name_np(sc->tx_tid, tname);
+
+	return (0);
+}
+
+static int
+pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+	struct pci_vtnet_softc *sc = vsc;
+	void *ptr;
+
+	if (offset < 6) {
+		assert(offset + size <= 6);
+		/*
+		 * The driver is allowed to change the MAC address
+		 */
+		ptr = &sc->vsc_config.mac[offset];
+		memcpy(ptr, &value, size);
+	} else {
+		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
+		return (1);
+	}
+	return (0);
+}
+
+static int
+pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtnet_softc *sc = vsc;
+	void *ptr;
+
+	ptr = (uint8_t *)&sc->vsc_config + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+struct pci_devemu pci_de_vnet = {
+	.pe_emu = 	"virtio-net",
+	.pe_init =	pci_vtnet_init,
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
+};
+PCI_EMUL_SET(pci_de_vnet);
diff --git a/usr/src/cmd/bhyve/pci_virtio_viona.c b/usr/src/cmd/bhyve/pci_virtio_viona.c
new file mode 100644
index 0000000000..f4d5d528be
--- /dev/null
+++ b/usr/src/cmd/bhyve/pci_virtio_viona.c
@@ -0,0 +1,706 @@
+/*
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/ioctl.h>
+#include <sys/viona_io.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <signal.h>
+#include <poll.h>
+#include <libdladm.h>
+#include <libdllink.h>
+#include <libdlvnic.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+
+#define	VIONA_RINGSZ	1024
+
+/*
+ * PCI config-space register offsets
+ */
+#define	VIONA_R_CFG0	24
+#define	VIONA_R_CFG1	25
+#define	VIONA_R_CFG2	26
+#define	VIONA_R_CFG3	27
+#define	VIONA_R_CFG4	28
+#define	VIONA_R_CFG5	29
+#define	VIONA_R_CFG6	30
+#define	VIONA_R_CFG7	31
+#define	VIONA_R_MAX	31
+
+#define	VIONA_REGSZ	VIONA_R_MAX+1
+
+/*
+ * Host capabilities
+ */
+#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
+#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
+#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
+
+#define	VIONA_S_HOSTCAPS		\
+	(VIRTIO_NET_F_MAC |		\
+	VIRTIO_NET_F_MRG_RXBUF |	\
+	VIRTIO_NET_F_STATUS)
+
+/*
+ * Queue definitions.
+ */
+#define	VIONA_RXQ	0
+#define	VIONA_TXQ	1
+#define	VIONA_CTLQ	2
+
+#define	VIONA_MAXQ	3
+
+/*
+ * Debug printf
+ */
+static int pci_viona_debug;
+#define	DPRINTF(params) if (pci_viona_debug) printf params
+#define	WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_viona_softc {
+	struct pci_devinst *vsc_pi;
+	pthread_mutex_t vsc_mtx;
+
+	int		vsc_curq;
+	int		vsc_status;
+	int		vsc_isr;
+
+	datalink_id_t	vsc_linkid;
+	char		vsc_linkname[MAXLINKNAMELEN];
+	int		vsc_vnafd;
+
+	uint32_t	vsc_features;
+	uint8_t		vsc_macaddr[6];
+
+	uint64_t	vsc_pfn[VIONA_MAXQ];
+	uint16_t	vsc_msix_table_idx[VIONA_MAXQ];
+	/*
+	 * Flag to see if host is already sending data out.
+	 * If it is, no need to wait for lock and send interrupt to host
+	 * for new data.
+	 */
+	boolean_t	vsc_tx_kick_lock_held;
+
+	pthread_t	tx_tid;
+	pthread_mutex_t	tx_mtx;
+	pthread_cond_t	tx_cond;
+};
+#define	viona_ctx(sc)	((sc)->vsc_pi->pi_vmctx)
+
+/*
+ * Return the size of IO BAR that maps virtio header and device specific
+ * region. The size would vary depending on whether MSI-X is enabled or
+ * not.
+ */
+static uint64_t
+pci_viona_iosize(struct pci_devinst *pi)
+{
+	if (pci_msix_enabled(pi))
+		return (VIONA_REGSZ);
+	else
+		return (VIONA_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
+}
+
+static uint16_t
+pci_viona_qsize(int qnum)
+{
+	/* XXX no ctl queue currently */
+	if (qnum == VIONA_CTLQ) {
+		return (0);
+	}
+
+	/* XXX fixed currently. Maybe different for tx/rx/ctl */
+	return (VIONA_RINGSZ);
+}
+
+static void
+pci_viona_ring_reset(struct pci_viona_softc *sc, int ring)
+{
+	int	error;
+
+	assert(ring < VIONA_MAXQ);
+
+	switch (ring) {
+	case VIONA_RXQ:
+		error = ioctl(sc->vsc_vnafd, VNA_IOC_RX_RING_RESET);
+		if (error != 0) {
+			WPRINTF(("ioctl viona rx ring reset failed %d\n",
+			    error));
+		} else {
+			sc->vsc_pfn[VIONA_RXQ] = 0;
+		}
+		break;
+	case VIONA_TXQ:
+		error = ioctl(sc->vsc_vnafd, VNA_IOC_TX_RING_RESET);
+		if (error != 0) {
+			WPRINTF(("ioctl viona tx ring reset failed %d\n",
+			    error));
+		} else {
+			sc->vsc_pfn[VIONA_TXQ] = 0;
+		}
+		break;
+	case VIONA_CTLQ:
+	default:
+		break;
+	}
+}
+
+static void
+pci_viona_update_status(struct pci_viona_softc *sc, uint32_t value)
+{
+
+	if (value == 0) {
+		DPRINTF(("viona: device reset requested !\n"));
+		pci_viona_ring_reset(sc, VIONA_RXQ);
+		pci_viona_ring_reset(sc, VIONA_TXQ);
+	}
+
+	sc->vsc_status = value;
+}
+
+static void *
+pci_viona_poll_thread(void *param)
+{
+	struct pci_viona_softc *sc = param;
+	pollfd_t	pollset;
+	int			error;
+
+	pollset.fd = sc->vsc_vnafd;
+	pollset.events = POLLIN | POLLOUT;
+
+	for (;;) {
+		if (poll(&pollset, 1, -1) < 0) {
+			if (errno == EINTR || errno == EAGAIN) {
+				continue;
+			} else {
+				WPRINTF(("pci_viona_poll_thread poll()"
+				    "error %d\n", errno));
+				break;
+			}
+		}
+		if (pollset.revents & POLLIN) {
+			pci_generate_msix(sc->vsc_pi,
+			    sc->vsc_msix_table_idx[VIONA_RXQ]);
+			error = ioctl(sc->vsc_vnafd, VNA_IOC_RX_INTR_CLR);
+			if (error != 0) {
+				WPRINTF(("ioctl viona rx intr clear failed"
+				    " %d\n", error));
+			}
+		}
+
+		if (pollset.revents & POLLOUT) {
+			pci_generate_msix(sc->vsc_pi,
+			    sc->vsc_msix_table_idx[VIONA_TXQ]);
+			error = ioctl(sc->vsc_vnafd, VNA_IOC_TX_INTR_CLR);
+			if (error != 0) {
+				WPRINTF(("ioctl viona tx intr clear failed"
+				    " %d\n", error));
+			}
+		}
+	}
+
+	pthread_exit(NULL);
+}
+
+static void
+pci_viona_ping_rxq(struct pci_viona_softc *sc)
+{
+	int error;
+
+	error = ioctl(sc->vsc_vnafd, VNA_IOC_RX_RING_KICK);
+	if (error != 0) {
+		WPRINTF(("ioctl viona rx ring kick failed %d\n", error));
+	}
+}
+
+static void *
+pci_viona_tx_thread(void *param)
+{
+	struct pci_viona_softc *sc = (struct pci_viona_softc *)param;
+	int error;
+
+	pthread_mutex_lock(&sc->tx_mtx);
+	for (;;) {
+		error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+		assert(error == 0);
+		sc->vsc_tx_kick_lock_held = B_TRUE;
+		error = ioctl(sc->vsc_vnafd, VNA_IOC_TX_RING_KICK);
+		if (error != 0) {
+			WPRINTF(("ioctl viona tx ring kick failed %d\n",
+			    error));
+		}
+		sc->vsc_tx_kick_lock_held = B_FALSE;
+	}
+	pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+static void
+pci_viona_ping_txq(struct pci_viona_softc *sc)
+{
+	/* Signal the tx thread for processing */
+	if (sc->vsc_tx_kick_lock_held)
+		return;
+	pthread_mutex_lock(&sc->tx_mtx);
+	pthread_cond_signal(&sc->tx_cond);
+	pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+static void
+pci_viona_ping_ctlq(struct pci_viona_softc *sc)
+{
+	DPRINTF(("viona: control qnotify!\n\r"));
+}
+
+static void
+pci_viona_ring_init(struct pci_viona_softc *sc, uint64_t pfn)
+{
+	int			qnum = sc->vsc_curq;
+	vioc_ring_init_t	vna_ri;
+	int			error;
+
+	assert(qnum < VIONA_MAXQ);
+
+	sc->vsc_pfn[qnum] = (pfn << VRING_PFN);
+
+	vna_ri.ri_qsize = pci_viona_qsize(qnum);
+	vna_ri.ri_qaddr = (pfn << VRING_PFN);
+
+	switch (qnum) {
+	case VIONA_RXQ:
+		error = ioctl(sc->vsc_vnafd, VNA_IOC_RX_RING_INIT, &vna_ri);
+		if (error != 0) {
+			WPRINTF(("ioctl viona rx ring init failed %d\n",
+			    error));
+		}
+		break;
+	case VIONA_TXQ:
+		error = ioctl(sc->vsc_vnafd, VNA_IOC_TX_RING_INIT, &vna_ri);
+		if (error != 0) {
+			WPRINTF(("ioctl viona tx ring init failed %d\n",
+			    error));
+		}
+		break;
+	case VIONA_CTLQ:
+	default:
+		break;
+	}
+}
+
+static int
+pci_viona_viona_init(struct vmctx *ctx, struct pci_viona_softc *sc)
+{
+	vioc_create_t		vna_create;
+	char			devname[MAXNAMELEN];
+	int			ctlfd;
+	int			error;
+
+	sc->vsc_vnafd = open("/devices/pseudo/viona@0:ctl", O_RDWR | O_EXCL);
+	if (sc->vsc_vnafd == -1) {
+		WPRINTF(("open viona ctl failed\n"));
+		return (-1);
+	}
+
+	vna_create.c_linkid = sc->vsc_linkid;
+	strlcpy(vna_create.c_vmname, vmname,
+	    sizeof (vna_create.c_vmname));
+	vm_get_memory_seg(ctx, 1 * (1024 * 1024UL), &vna_create.c_lomem_size,
+	    NULL);
+	vm_get_memory_seg(ctx, 4 * (1024 * 1024 * 1024UL),
+	    &vna_create.c_himem_size, NULL);
+	error = ioctl(sc->vsc_vnafd, VNA_IOC_CREATE, &vna_create);
+	if (error != 0) {
+		WPRINTF(("ioctl viona create failed %d\n", error));
+		return (-1);
+	}
+
+	return (0);
+}
+
+static int
+pci_viona_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	dladm_handle_t		handle;
+	dladm_status_t		status;
+	dladm_vnic_attr_t	attr;
+	char			errmsg[DLADM_STRSIZE];
+	int error;
+	struct pci_viona_softc *sc;
+	int i;
+
+	if (opts == NULL) {
+		printf("virtio-viona: vnic required\n");
+		return (1);
+	}
+
+	sc = malloc(sizeof (struct pci_viona_softc));
+	memset(sc, 0, sizeof (struct pci_viona_softc));
+
+	pi->pi_arg = sc;
+	sc->vsc_pi = pi;
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	strlcpy(sc->vsc_linkname, opts, MAXLINKNAMELEN);
+
+	if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) {
+		WPRINTF(("could not open /dev/dld"));
+		free(sc);
+		return (1);
+	}
+
+	if (dladm_name2info(handle, sc->vsc_linkname, &sc->vsc_linkid,
+	    NULL, NULL, NULL) != DLADM_STATUS_OK) {
+		WPRINTF(("dladm_name2info() for %s failed: %s\n", opts,
+		    dladm_status2str(status, errmsg)));
+		dladm_close(handle);
+		free(sc);
+		return (1);
+	}
+
+	if (dladm_vnic_info(handle, sc->vsc_linkid, &attr,
+	    DLADM_OPT_ACTIVE) != DLADM_STATUS_OK) {
+		WPRINTF(("dladm_vnic_info() for %s failed: %s\n", opts,
+		    dladm_status2str(status, errmsg)));
+		dladm_close(handle);
+		free(sc);
+		return (1);
+	}
+
+	sc->vsc_tx_kick_lock_held = B_FALSE;
+	memcpy(sc->vsc_macaddr, attr.va_mac_addr, ETHERADDRL);
+
+	dladm_close(handle);
+
+	error = pci_viona_viona_init(ctx, sc);
+	if (error != 0) {
+		free(sc);
+		return (1);
+	}
+
+	error = pthread_create(NULL, NULL, pci_viona_poll_thread, sc);
+	assert(error == 0);
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
+
+	/* MSI-X support */
+	for (i = 0; i < VIONA_MAXQ; i++)
+		sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR;
+
+	/*
+	 * BAR 1 used to map MSI-X table and PBA
+	 */
+	if (pci_emul_add_msixcap(pi, VIONA_MAXQ, 1)) {
+		free(sc);
+		return (1);
+	}
+
+	pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VIONA_REGSZ);
+
+	/*
+	 * Initialize tx semaphore & spawn TX processing thread
+	 * As of now, only one thread for TX desc processing is
+	 * spawned.
+	 */
+	pthread_mutex_init(&sc->tx_mtx, NULL);
+	pthread_cond_init(&sc->tx_cond, NULL);
+	pthread_create(&sc->tx_tid, NULL, pci_viona_tx_thread, (void *)sc);
+
+	return (0);
+}
+
+/*
+ * Function pointer array to handle queue notifications
+ */
+static void (*pci_viona_qnotify[VIONA_MAXQ])(struct pci_viona_softc *) = {
+	pci_viona_ping_rxq,
+	pci_viona_ping_txq,
+	pci_viona_ping_ctlq
+};
+
+static uint64_t
+viona_adjust_offset(struct pci_devinst *pi, uint64_t offset)
+{
+	/*
+	 * Device specific offsets used by guest would change based on
+	 * whether MSI-X capability is enabled or not
+	 */
+	if (!pci_msix_enabled(pi)) {
+		if (offset >= VTCFG_R_MSIX)
+			return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX));
+	}
+
+	return (offset);
+}
+
+static void
+pci_viona_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		int baridx, uint64_t offset, int size, uint64_t value)
+{
+	struct pci_viona_softc *sc = pi->pi_arg;
+	void *ptr;
+	int err = 0;
+
+	if (baridx == pci_msix_table_bar(pi) ||
+	    baridx == pci_msix_pba_bar(pi)) {
+		pci_emul_msix_twrite(pi, offset, size, value);
+		return;
+	}
+
+	assert(baridx == 0);
+
+	if (offset + size > pci_viona_iosize(pi)) {
+		DPRINTF(("viona_write: 2big, offset %ld size %d\n",
+		    offset, size));
+		return;
+	}
+
+	pthread_mutex_lock(&sc->vsc_mtx);
+
+	offset = viona_adjust_offset(pi, offset);
+
+	switch (offset) {
+	case VTCFG_R_GUESTCAP:
+		assert(size == 4);
+		err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_FEATURES, &value);
+		if (err != 0)
+			WPRINTF(("ioctl feature negotiation returned"
+			    " err = %d\n", err));
+		break;
+	case VTCFG_R_PFN:
+		assert(size == 4);
+		pci_viona_ring_init(sc, value);
+		break;
+	case VTCFG_R_QSEL:
+		assert(size == 2);
+		assert(value < VIONA_MAXQ);
+		sc->vsc_curq = value;
+		break;
+	case VTCFG_R_QNOTIFY:
+		assert(size == 2);
+		assert(value < VIONA_MAXQ);
+		(*pci_viona_qnotify[value])(sc);
+		break;
+	case VTCFG_R_STATUS:
+		assert(size == 1);
+		pci_viona_update_status(sc, value);
+		break;
+	case VTCFG_R_CFGVEC:
+		assert(size == 2);
+		sc->vsc_msix_table_idx[VIONA_CTLQ] = value;
+		break;
+	case VTCFG_R_QVEC:
+		assert(size == 2);
+		assert(sc->vsc_curq != VIONA_CTLQ);
+		sc->vsc_msix_table_idx[sc->vsc_curq] = value;
+		break;
+	case VIONA_R_CFG0:
+	case VIONA_R_CFG1:
+	case VIONA_R_CFG2:
+	case VIONA_R_CFG3:
+	case VIONA_R_CFG4:
+	case VIONA_R_CFG5:
+		assert((size + offset) <= (VIONA_R_CFG5 + 1));
+		ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
+		/*
+		 * The driver is allowed to change the MAC address
+		 */
+		sc->vsc_macaddr[offset - VIONA_R_CFG0] = value;
+		if (size == 1) {
+			*(uint8_t *)ptr = value;
+		} else if (size == 2) {
+			*(uint16_t *)ptr = value;
+		} else {
+			*(uint32_t *)ptr = value;
+		}
+		break;
+	case VTCFG_R_HOSTCAP:
+	case VTCFG_R_QNUM:
+	case VTCFG_R_ISR:
+	case VIONA_R_CFG6:
+	case VIONA_R_CFG7:
+		DPRINTF(("viona: write to readonly reg %ld\n\r", offset));
+		break;
+	default:
+		DPRINTF(("viona: unknown i/o write offset %ld\n\r", offset));
+		value = 0;
+		break;
+	}
+
+	pthread_mutex_unlock(&sc->vsc_mtx);
+}
+
+uint64_t
+pci_viona_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+    int baridx, uint64_t offset, int size)
+{
+	struct pci_viona_softc *sc = pi->pi_arg;
+	void *ptr;
+	uint64_t value;
+	int err = 0;
+
+	if (baridx == pci_msix_table_bar(pi) ||
+	    baridx == pci_msix_pba_bar(pi)) {
+		return (pci_emul_msix_tread(pi, offset, size));
+	}
+
+	assert(baridx == 0);
+
+	if (offset + size > pci_viona_iosize(pi)) {
+		DPRINTF(("viona_read: 2big, offset %ld size %d\n",
+		    offset, size));
+		return (0);
+	}
+
+	pthread_mutex_lock(&sc->vsc_mtx);
+
+	offset = viona_adjust_offset(pi, offset);
+
+	switch (offset) {
+	case VTCFG_R_HOSTCAP:
+		assert(size == 4);
+		err = ioctl(sc->vsc_vnafd, VNA_IOC_GET_FEATURES, &value);
+		if (err != 0)
+			WPRINTF(("ioctl get host features returned"
+			    " err = %d\n", err));
+		break;
+	case VTCFG_R_GUESTCAP:
+		assert(size == 4);
+		value = sc->vsc_features; /* XXX never read ? */
+		break;
+	case VTCFG_R_PFN:
+		assert(size == 4);
+		value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN;
+		break;
+	case VTCFG_R_QNUM:
+		assert(size == 2);
+		value = pci_viona_qsize(sc->vsc_curq);
+		break;
+	case VTCFG_R_QSEL:
+		assert(size == 2);
+		value = sc->vsc_curq;  /* XXX never read ? */
+		break;
+	case VTCFG_R_QNOTIFY:
+		assert(size == 2);
+		value = sc->vsc_curq;  /* XXX never read ? */
+		break;
+	case VTCFG_R_STATUS:
+		assert(size == 1);
+		value = sc->vsc_status;
+		break;
+	case VTCFG_R_ISR:
+		assert(size == 1);
+		value = sc->vsc_isr;
+		sc->vsc_isr = 0;	/* a read clears this flag */
+		break;
+	case VTCFG_R_CFGVEC:
+		assert(size == 2);
+		value = sc->vsc_msix_table_idx[VIONA_CTLQ];
+		break;
+	case VTCFG_R_QVEC:
+		assert(size == 2);
+		assert(sc->vsc_curq != VIONA_CTLQ);
+		value = sc->vsc_msix_table_idx[sc->vsc_curq];
+		break;
+	case VIONA_R_CFG0:
+	case VIONA_R_CFG1:
+	case VIONA_R_CFG2:
+	case VIONA_R_CFG3:
+	case VIONA_R_CFG4:
+	case VIONA_R_CFG5:
+		assert((size + offset) <= (VIONA_R_CFG5 + 1));
+		ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
+		if (size == 1) {
+			value = *(uint8_t *)ptr;
+		} else if (size == 2) {
+			value = *(uint16_t *)ptr;
+		} else {
+			value = *(uint32_t *)ptr;
+		}
+		break;
+	case VIONA_R_CFG6:
+		assert(size != 4);
+		value = 0x01;	/* XXX link always up */
+		break;
+	case VIONA_R_CFG7:
+		assert(size == 1);
+		value = 0;	/* XXX link status in LSB */
+		break;
+	default:
+		DPRINTF(("viona: unknown i/o read offset %ld\n\r", offset));
+		value = 0;
+		break;
+	}
+
+	pthread_mutex_unlock(&sc->vsc_mtx);
+
+	return (value);
+}
+
+struct pci_devemu pci_de_viona = {
+	.pe_emu = 	"virtio-net-viona",
+	.pe_init =	pci_viona_init,
+	.pe_barwrite =	pci_viona_write,
+	.pe_barread =	pci_viona_read
+};
+PCI_EMUL_SET(pci_de_viona);
diff --git a/usr/src/cmd/bhyve/pm.c b/usr/src/cmd/bhyve/pm.c
new file mode 100644
index 0000000000..70c4f1fae8
--- /dev/null
+++ b/usr/src/cmd/bhyve/pm.c
@@ -0,0 +1,333 @@
+/*-
+ * Copyright (c) 2013 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pm.c 266125 2014-05-15 14:16:55Z jhb $");
+
+#include <sys/types.h>
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <pthread.h>
+#ifndef	__FreeBSD__
+#include <stdlib.h>
+#endif
+#include <signal.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "inout.h"
+#ifdef	__FreeBSD__
+#include "mevent.h"
+#endif
+#include "pci_irq.h"
+#include "pci_lpc.h"
+
+static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER;
+#ifdef	__FreeBSD__
+static struct mevent *power_button;
+static sig_t old_power_handler;
+#endif
+
+/*
+ * Reset Control register at I/O port 0xcf9.  Bit 2 forces a system
+ * reset when it transitions from 0 to 1.  Bit 1 selects the type of
+ * reset to attempt: 0 selects a "soft" reset, and 1 selects a "hard"
+ * reset.
+ */
+static int
+reset_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+	static uint8_t reset_control;
+
+	if (bytes != 1)
+		return (-1);
+	if (in)
+		*eax = reset_control;
+	else {
+		reset_control = *eax;
+
+		/* Treat hard and soft resets the same. */
+		if (reset_control & 0x4) {
+#ifdef	__FreeBSD__
+			error = vm_suspend(ctx, VM_SUSPEND_RESET);
+			assert(error == 0 || errno == EALREADY);
+#else
+			exit(0);
+#endif
+		}
+	}
+	return (0);
+}
+INOUT_PORT(reset_reg, 0xCF9, IOPORT_F_INOUT, reset_handler);
+
+/*
+ * ACPI's SCI is a level-triggered interrupt.
+ */
+static int sci_active;
+
+static void
+sci_assert(struct vmctx *ctx)
+{
+
+	if (sci_active)
+		return;
+	vm_isa_assert_irq(ctx, SCI_INT, SCI_INT);
+	sci_active = 1;
+}
+
+static void
+sci_deassert(struct vmctx *ctx)
+{
+
+	if (!sci_active)
+		return;
+	vm_isa_deassert_irq(ctx, SCI_INT, SCI_INT);
+	sci_active = 0;
+}
+
+/*
+ * Power Management 1 Event Registers
+ *
+ * The only power management event supported is a power button upon
+ * receiving SIGTERM.
+ */
+static uint16_t pm1_enable, pm1_status;
+
+#define	PM1_TMR_STS		0x0001
+#define	PM1_BM_STS		0x0010
+#define	PM1_GBL_STS		0x0020
+#define	PM1_PWRBTN_STS		0x0100
+#define	PM1_SLPBTN_STS		0x0200
+#define	PM1_RTC_STS		0x0400
+#define	PM1_WAK_STS		0x8000
+
+#define	PM1_TMR_EN		0x0001
+#define	PM1_GBL_EN		0x0020
+#define	PM1_PWRBTN_EN		0x0100
+#define	PM1_SLPBTN_EN		0x0200
+#define	PM1_RTC_EN		0x0400
+
+static void
+sci_update(struct vmctx *ctx)
+{
+	int need_sci;
+
+	/* See if the SCI should be active or not. */
+	need_sci = 0;
+	if ((pm1_enable & PM1_TMR_EN) && (pm1_status & PM1_TMR_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_GBL_EN) && (pm1_status & PM1_GBL_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_PWRBTN_EN) && (pm1_status & PM1_PWRBTN_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_SLPBTN_EN) && (pm1_status & PM1_SLPBTN_STS))
+		need_sci = 1;
+	if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS))
+		need_sci = 1;
+	if (need_sci)
+		sci_assert(ctx);
+	else
+		sci_deassert(ctx);
+}
+
+static int
+pm1_status_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	if (bytes != 2)
+		return (-1);
+
+	pthread_mutex_lock(&pm_lock);
+	if (in)
+		*eax = pm1_status;
+	else {
+		/*
+		 * Writes are only permitted to clear certain bits by
+		 * writing 1 to those flags.
+		 */
+		pm1_status &= ~(*eax & (PM1_WAK_STS | PM1_RTC_STS |
+		    PM1_SLPBTN_STS | PM1_PWRBTN_STS | PM1_BM_STS));
+		sci_update(ctx);
+	}
+	pthread_mutex_unlock(&pm_lock);
+	return (0);
+}
+
+static int
+pm1_enable_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	if (bytes != 2)
+		return (-1);
+
+	pthread_mutex_lock(&pm_lock);
+	if (in)
+		*eax = pm1_enable;
+	else {
+		/*
+		 * Only permit certain bits to be set.  We never use
+		 * the global lock, but ACPI-CA whines profusely if it
+		 * can't set GBL_EN.
+		 */
+		pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN);
+		sci_update(ctx);
+	}
+	pthread_mutex_unlock(&pm_lock);
+	return (0);
+}
+INOUT_PORT(pm1_status, PM1A_EVT_ADDR, IOPORT_F_INOUT, pm1_status_handler);
+INOUT_PORT(pm1_enable, PM1A_EVT_ADDR + 2, IOPORT_F_INOUT, pm1_enable_handler);
+
+#ifdef	__FreeBSD__
+static void
+power_button_handler(int signal, enum ev_type type, void *arg)
+{
+	struct vmctx *ctx;
+
+	ctx = arg;
+	pthread_mutex_lock(&pm_lock);
+	if (!(pm1_status & PM1_PWRBTN_STS)) {
+		pm1_status |= PM1_PWRBTN_STS;
+		sci_update(ctx);
+	}
+	pthread_mutex_unlock(&pm_lock);
+}
+#endif
+
+/*
+ * Power Management 1 Control Register
+ *
+ * This is mostly unimplemented except that we wish to handle writes that
+ * set SPL_EN to handle S5 (soft power off).
+ */
+static uint16_t pm1_control;
+
+#define	PM1_SCI_EN	0x0001
+#define	PM1_SLP_TYP	0x1c00
+#define	PM1_SLP_EN	0x2000
+#define	PM1_ALWAYS_ZERO	0xc003
+
+static int
+pm1_control_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	if (bytes != 2)
+		return (-1);
+	if (in)
+		*eax = pm1_control;
+	else {
+		/*
+		 * Various bits are write-only or reserved, so force them
+		 * to zero in pm1_control.  Always preserve SCI_EN as OSPM
+		 * can never change it.
+		 */
+		pm1_control = (pm1_control & PM1_SCI_EN) |
+		    (*eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO));
+
+		/*
+		 * If SLP_EN is set, check for S5.  Bhyve's _S5_ method
+		 * says that '5' should be stored in SLP_TYP for S5.
+		 */
+		if (*eax & PM1_SLP_EN) {
+			if ((pm1_control & PM1_SLP_TYP) >> 10 == 5) {
+#ifdef	__FreeBSD__
+				error = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
+				assert(error == 0 || errno == EALREADY);
+#else
+				exit(0);
+#endif
+			}
+		}
+	}
+	return (0);
+}
+INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler);
+#ifdef	__FreeBSD__
+SYSRES_IO(PM1A_EVT_ADDR, 8);
+#endif
+
+/*
+ * ACPI SMI Command Register
+ *
+ * This write-only register is used to enable and disable ACPI.
+ */
+static int
+smi_cmd_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+    uint32_t *eax, void *arg)
+{
+
+	assert(!in);
+	if (bytes != 1)
+		return (-1);
+
+	pthread_mutex_lock(&pm_lock);
+	switch (*eax) {
+	case BHYVE_ACPI_ENABLE:
+		pm1_control |= PM1_SCI_EN;
+#ifdef	__FreeBSD__
+		if (power_button == NULL) {
+			power_button = mevent_add(SIGTERM, EVF_SIGNAL,
+			    power_button_handler, ctx);
+			old_power_handler = signal(SIGTERM, SIG_IGN);
+		}
+#endif
+		break;
+	case BHYVE_ACPI_DISABLE:
+		pm1_control &= ~PM1_SCI_EN;
+#ifdef	__FreeBSD__
+		if (power_button != NULL) {
+			mevent_delete(power_button);
+			power_button = NULL;
+			signal(SIGTERM, old_power_handler);
+		}
+#endif
+		break;
+	}
+	pthread_mutex_unlock(&pm_lock);
+	return (0);
+}
+INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler);
+#ifdef	__FreeBSD__
+SYSRES_IO(SMI_CMD, 1);
+#endif
+
+void
+sci_init(struct vmctx *ctx)
+{
+
+	/*
+	 * Mark ACPI's SCI as level trigger and bump its use count
+	 * in the PIRQ router.
+	 */
+	pci_irq_use(SCI_INT);
+	vm_isa_set_irq_trigger(ctx, SCI_INT, LEVEL_TRIGGER);
+}
diff --git a/usr/src/cmd/bhyve/pmtmr.c b/usr/src/cmd/bhyve/pmtmr.c
new file mode 100644
index 0000000000..92ab24be57
--- /dev/null
+++ b/usr/src/cmd/bhyve/pmtmr.c
@@ -0,0 +1,212 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/pmtmr.c 259998 2013-12-28 04:01:05Z jhb $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pmtmr.c 259998 2013-12-28 04:01:05Z jhb $");
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+#include <machine/cpufunc.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include <pthread.h>
+#ifndef __FreeBSD__
+#include <kstat.h>
+#endif
+
+#include "acpi.h"
+#include "inout.h"
+
+/*
+ * The ACPI Power Management timer is a free-running 24- or 32-bit
+ * timer with a frequency of 3.579545MHz
+ *
+ * This implementation will be 32-bits
+ */
+
+#define PMTMR_FREQ	3579545  /* 3.579545MHz */
+
+static pthread_mutex_t pmtmr_mtx;
+static pthread_once_t pmtmr_once = PTHREAD_ONCE_INIT;
+
+static uint64_t	pmtmr_old;
+
+static uint64_t	pmtmr_tscf;
+static uint64_t	pmtmr_tsc_old;
+
+#ifdef	__FreeBSD__
+static clockid_t clockid = CLOCK_UPTIME_FAST;
+static struct timespec pmtmr_uptime_old;
+
+#define	timespecsub(vvp, uvp)						\
+	do {								\
+		(vvp)->tv_sec -= (uvp)->tv_sec;				\
+		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
+		if ((vvp)->tv_nsec < 0) {				\
+			(vvp)->tv_sec--;				\
+			(vvp)->tv_nsec += 1000000000;			\
+		}							\
+	} while (0)
+
+static uint64_t
+timespec_to_pmtmr(const struct timespec *tsnew, const struct timespec *tsold)
+{
+	struct timespec tsdiff;
+	int64_t nsecs;
+
+	tsdiff = *tsnew;
+	timespecsub(&tsdiff, tsold);
+	nsecs = tsdiff.tv_sec * 1000000000 + tsdiff.tv_nsec;
+	assert(nsecs >= 0);
+
+	return (nsecs * PMTMR_FREQ / 1000000000 + pmtmr_old);
+}
+#endif
+
+static uint64_t
+tsc_to_pmtmr(uint64_t tsc_new, uint64_t tsc_old)
+{
+
+	return ((tsc_new - tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old);
+}
+
+static void
+pmtmr_init(void)
+{
+#ifdef	__FreeBSD__
+	size_t len;
+	int smp_tsc, err;
+	struct timespec tsnew, tsold = { 0 };
+
+	len = sizeof(smp_tsc);
+	err = sysctlbyname("kern.timecounter.smp_tsc", &smp_tsc, &len, NULL, 0);
+	assert(err == 0);
+
+	if (smp_tsc) {
+		len = sizeof(pmtmr_tscf);
+		err = sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len,
+				   NULL, 0);
+		assert(err == 0);
+
+		pmtmr_tsc_old = rdtsc();
+		pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0);
+	} else {
+		if (getenv("BHYVE_PMTMR_PRECISE") != NULL)
+			clockid = CLOCK_UPTIME;
+
+		err = clock_gettime(clockid, &tsnew);
+		assert(err == 0);
+
+		pmtmr_uptime_old = tsnew;
+		pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold);
+	}
+#else
+	kstat_ctl_t *kstat_ctl;
+	kstat_t *kstat;
+	kstat_named_t *kstat_cpu_freq;
+
+	kstat_ctl = kstat_open();
+	kstat = kstat_lookup(kstat_ctl, "cpu_info", 0, NULL);
+	kstat_read(kstat_ctl, kstat, NULL);
+	kstat_cpu_freq = kstat_data_lookup(kstat, "current_clock_Hz");
+	pmtmr_tscf = kstat_cpu_freq->value.ul;
+	kstat_close(kstat_ctl);
+
+	pmtmr_tsc_old = rdtsc();
+	pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0);
+#endif
+	pthread_mutex_init(&pmtmr_mtx, NULL);
+}
+
+static uint32_t
+pmtmr_val(void)
+{
+	struct timespec	tsnew;
+	uint64_t	pmtmr_tsc_new;
+	uint64_t	pmtmr_new;
+	int		error;
+
+	pthread_once(&pmtmr_once, pmtmr_init);
+
+	pthread_mutex_lock(&pmtmr_mtx);
+
+#ifdef	__FreeBSD__
+	if (pmtmr_tscf) {
+		pmtmr_tsc_new = rdtsc();
+		pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old);
+		pmtmr_tsc_old = pmtmr_tsc_new;
+	} else {
+		error = clock_gettime(clockid, &tsnew);
+		assert(error == 0);
+
+		pmtmr_new = timespec_to_pmtmr(&tsnew, &pmtmr_uptime_old);
+		pmtmr_uptime_old = tsnew;
+	}
+#else
+	pmtmr_tsc_new = rdtsc();
+	pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old);
+	pmtmr_tsc_old = pmtmr_tsc_new;
+#endif
+	pmtmr_old = pmtmr_new;
+
+	pthread_mutex_unlock(&pmtmr_mtx);
+
+	return (pmtmr_new); 
+}
+
+static int
+pmtmr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+	          uint32_t *eax, void *arg)
+{
+	assert(in == 1);
+
+	if (bytes != 4)
+		return (-1);
+
+	*eax = pmtmr_val();
+
+	return (0);
+}
+
+INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler);
diff --git a/usr/src/cmd/bhyve/post.c b/usr/src/cmd/bhyve/post.c
new file mode 100644
index 0000000000..dcb481aac4
--- /dev/null
+++ b/usr/src/cmd/bhyve/post.c
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/post.c 260206 2014-01-02 21:26:59Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/post.c 260206 2014-01-02 21:26:59Z jhb $");
+
+#include <sys/types.h>
+
+#include <assert.h>
+
+#include "inout.h"
+#include "pci_lpc.h"
+
+static int
+post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		  uint32_t *eax, void *arg)
+{
+	assert(in == 1);
+
+	if (bytes != 1)
+		return (-1);
+
+	*eax = 0xff;		/* return some garbage */
+	return (0);
+}
+
+INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler);
+SYSRES_IO(0x84, 1);
diff --git a/usr/src/cmd/bhyve/ps2kbd.c b/usr/src/cmd/bhyve/ps2kbd.c
new file mode 100644
index 0000000000..22e566ac21
--- /dev/null
+++ b/usr/src/cmd/bhyve/ps2kbd.c
@@ -0,0 +1,418 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2015 Nahanni Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "atkbdc.h"
+#include "console.h"
+
+/* keyboard device commands */
+#define	PS2KC_RESET_DEV		0xff
+#define	PS2KC_DISABLE		0xf5
+#define	PS2KC_ENABLE		0xf4
+#define	PS2KC_SET_TYPEMATIC	0xf3
+#define	PS2KC_SEND_DEV_ID	0xf2
+#define	PS2KC_SET_SCANCODE_SET	0xf0
+#define	PS2KC_ECHO		0xee
+#define	PS2KC_SET_LEDS		0xed
+
+#define	PS2KC_BAT_SUCCESS	0xaa
+#define	PS2KC_ACK		0xfa
+
+#define	PS2KBD_FIFOSZ		16
+
+struct fifo {
+	uint8_t	buf[PS2KBD_FIFOSZ];
+	int	rindex;		/* index to read from */
+	int	windex;		/* index to write to */
+	int	num;		/* number of bytes in the fifo */
+	int	size;		/* size of the fifo */
+};
+
+struct ps2kbd_softc {
+	struct atkbdc_softc	*atkbdc_sc;
+	pthread_mutex_t		mtx;
+
+	bool			enabled;
+	struct fifo		fifo;
+
+	uint8_t			curcmd;	/* current command for next byte */
+};
+
+static void
+fifo_init(struct ps2kbd_softc *sc)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	fifo->size = sizeof(((struct fifo *)0)->buf);
+}
+
+static void
+fifo_reset(struct ps2kbd_softc *sc)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	bzero(fifo, sizeof(struct fifo));
+	fifo->size = sizeof(((struct fifo *)0)->buf);
+}
+
+static int
+fifo_available(struct ps2kbd_softc *sc)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	return (fifo->num < fifo->size);
+}
+
+static void
+fifo_put(struct ps2kbd_softc *sc, uint8_t val)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	if (fifo->num < fifo->size) {
+		fifo->buf[fifo->windex] = val;
+		fifo->windex = (fifo->windex + 1) % fifo->size;
+		fifo->num++;
+	}
+}
+
+static int
+fifo_get(struct ps2kbd_softc *sc, uint8_t *val)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	if (fifo->num > 0) {
+		*val = fifo->buf[fifo->rindex];
+		fifo->rindex = (fifo->rindex + 1) % fifo->size;
+		fifo->num--;
+		return (0);
+	}
+
+	return (-1);
+}
+
+int
+ps2kbd_read(struct ps2kbd_softc *sc, uint8_t *val)
+{
+	int retval;
+
+	pthread_mutex_lock(&sc->mtx);
+	retval = fifo_get(sc, val);
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (retval);
+}
+
+void
+ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val)
+{
+	pthread_mutex_lock(&sc->mtx);
+	if (sc->curcmd) {
+		switch (sc->curcmd) {
+		case PS2KC_SET_TYPEMATIC:
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		case PS2KC_SET_SCANCODE_SET:
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		case PS2KC_SET_LEDS:
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		default:
+			fprintf(stderr, "Unhandled ps2 keyboard current "
+			    "command byte 0x%02x\n", val);
+			break;
+		}
+		sc->curcmd = 0;
+	} else {
+		switch (val) {
+		case PS2KC_RESET_DEV:
+			fifo_reset(sc);
+			fifo_put(sc, PS2KC_ACK);
+			fifo_put(sc, PS2KC_BAT_SUCCESS);
+			break;
+		case PS2KC_DISABLE:
+			sc->enabled = false;
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		case PS2KC_ENABLE:
+			sc->enabled = true;
+			fifo_reset(sc);
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		case PS2KC_SET_TYPEMATIC:
+			sc->curcmd = val;
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		case PS2KC_SEND_DEV_ID:
+			fifo_put(sc, PS2KC_ACK);
+			fifo_put(sc, 0xab);
+			fifo_put(sc, 0x83);
+			break;
+		case PS2KC_SET_SCANCODE_SET:
+			sc->curcmd = val;
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		case PS2KC_ECHO:
+			fifo_put(sc, PS2KC_ECHO);
+			break;
+		case PS2KC_SET_LEDS:
+			sc->curcmd = val;
+			fifo_put(sc, PS2KC_ACK);
+			break;
+		default:
+			fprintf(stderr, "Unhandled ps2 keyboard command "
+			    "0x%02x\n", val);
+			break;
+		}
+	}
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+/*
+ * Translate keysym to type 2 scancode and insert into keyboard buffer.
+ */
+static void
+ps2kbd_keysym_queue(struct ps2kbd_softc *sc,
+    int down, uint32_t keysym)
+{
+	/* ASCII to type 2 scancode lookup table */
+	const uint8_t translation[128] = {
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x29, 0x16, 0x52, 0x26, 0x25, 0x2e, 0x3d, 0x52,
+		0x46, 0x45, 0x3e, 0x55, 0x41, 0x4e, 0x49, 0x4a,
+		0x45, 0x16, 0x1e, 0x26, 0x25, 0x2e, 0x36, 0x3d,
+		0x3e, 0x46, 0x4c, 0x4c, 0x41, 0x55, 0x49, 0x4a,
+		0x1e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34,
+		0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44,
+		0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d,
+		0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x36, 0x4e,
+		0x0e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34,
+		0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44,
+		0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d,
+		0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x0e, 0x00,
+	};
+
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	switch (keysym) {
+	case 0x0 ... 0x7f:
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, translation[keysym]);
+		break;
+	case 0xff08:	/* Back space */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x66);
+		break;
+	case 0xff09:	/* Tab */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x0d);
+		break;
+	case 0xff0d:	/* Return  */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x5a);
+		break;
+	case 0xff1b:	/* Escape */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x76);
+		break;
+	case 0xff51:	/* Left arrow */
+		fifo_put(sc, 0xe0);
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x6b);
+		break;
+	case 0xff52:	/* Up arrow */
+		fifo_put(sc, 0xe0);
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x75);
+		break;
+	case 0xff53:	/* Right arrow */
+		fifo_put(sc, 0xe0);
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x74);
+		break;
+	case 0xff54:	/* Down arrow */
+		fifo_put(sc, 0xe0);
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x72);
+		break;
+	case 0xffbe:	/* F1 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x05);
+		break;
+	case 0xffbf:	/* F2 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x06);
+		break;
+	case 0xffc0:	/* F3 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x04);
+		break;
+	case 0xffc1:	/* F4 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x0c);
+		break;
+	case 0xffc2:	/* F5 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x03);
+		break;
+	case 0xffc3:	/* F6 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x0b);
+		break;
+	case 0xffc4:	/* F7 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x83);
+		break;
+	case 0xffc5:	/* F8 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x0a);
+		break;
+	case 0xffc6:	/* F9 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x01);
+		break;
+	case 0xffc7:	/* F10 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x09);
+		break;
+	case 0xffc8:	/* F11 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x78);
+		break;
+	case 0xffc9:	/* F12 */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x07);
+		break;
+	case 0xffe1:	/* Left shift */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x12);
+		break;
+	case 0xffe2:	/* Right shift */
+		/* XXX */
+		break;
+	case 0xffe3:	/* Left control */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x14);
+		break;
+	case 0xffe4:	/* Right control */
+		/* XXX */
+		break;
+	case 0xffe7:	/* Left meta */
+		/* XXX */
+		break;
+	case 0xffe8:	/* Right meta */
+		/* XXX */
+		break;
+	case 0xffe9:	/* Left alt */
+		if (!down)
+			fifo_put(sc, 0xf0);
+		fifo_put(sc, 0x11);
+		break;
+	case 0xffea:	/* Right alt */
+		/* XXX */
+		break;
+	default:
+		fprintf(stderr, "Unhandled ps2 keyboard keysym 0x%x\n",
+		     keysym);
+		break;
+	}
+}
+
+static void
+ps2kbd_event(int down, uint32_t keysym, void *arg)
+{
+	struct ps2kbd_softc *sc = arg;
+
+	pthread_mutex_lock(&sc->mtx);
+	if (!sc->enabled) {
+		pthread_mutex_unlock(&sc->mtx);
+		return;
+	}
+
+	ps2kbd_keysym_queue(sc, down, keysym);
+	pthread_mutex_unlock(&sc->mtx);
+
+	atkbdc_event(sc->atkbdc_sc);
+}
+
+struct ps2kbd_softc *
+ps2kbd_init(struct atkbdc_softc *atkbdc_sc)
+{
+	struct ps2kbd_softc *sc;
+
+	sc = calloc(1, sizeof (struct ps2kbd_softc));
+	pthread_mutex_init(&sc->mtx, NULL);
+	fifo_init(sc);
+	sc->atkbdc_sc = atkbdc_sc;
+
+	console_kbd_register(ps2kbd_event, sc);
+
+	return (sc);
+}
diff --git a/usr/src/cmd/bhyve/ps2kbd.h b/usr/src/cmd/bhyve/ps2kbd.h
new file mode 100644
index 0000000000..34c31b1ea8
--- /dev/null
+++ b/usr/src/cmd/bhyve/ps2kbd.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PS2KBD_H_
+#define	_PS2KBD_H_
+
+struct atkbdc_softc;
+
+struct ps2kbd_softc *ps2kbd_init(struct atkbdc_softc *sc);
+
+int ps2kbd_read(struct ps2kbd_softc *sc, uint8_t *val);
+void ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val);
+
+#endif /* _PS2KBD_H_ */
diff --git a/usr/src/cmd/bhyve/ps2mouse.c b/usr/src/cmd/bhyve/ps2mouse.c
new file mode 100644
index 0000000000..e96fbbf411
--- /dev/null
+++ b/usr/src/cmd/bhyve/ps2mouse.c
@@ -0,0 +1,371 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2015 Nahanni Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "atkbdc.h"
+#include "console.h"
+
+/* mouse device commands */
+#define	PS2MC_RESET_DEV		0xff
+#define	PS2MC_SET_DEFAULTS	0xf6
+#define	PS2MC_DISABLE		0xf5
+#define	PS2MC_ENABLE		0xf4
+#define	PS2MC_SET_SAMPLING_RATE	0xf3
+#define	PS2MC_SEND_DEV_ID	0xf2
+#define	PS2MC_SET_REMOTE_MODE	0xf0
+#define	PS2MC_SEND_DEV_DATA	0xeb
+#define	PS2MC_SET_STREAM_MODE	0xea
+#define	PS2MC_SEND_DEV_STATUS	0xe9
+#define	PS2MC_SET_RESOLUTION	0xe8
+#define	PS2MC_SET_SCALING1	0xe7
+#define	PS2MC_SET_SCALING2	0xe6
+
+#define	PS2MC_BAT_SUCCESS	0xaa
+#define	PS2MC_ACK		0xfa
+
+/* mouse device id */
+#define	PS2MOUSE_DEV_ID		0x0
+
+/* mouse status bits */
+#define	PS2M_STS_REMOTE_MODE	0x40
+#define	PS2M_STS_ENABLE_DEV	0x20
+#define	PS2M_STS_SCALING_21	0x10
+#define	PS2M_STS_MID_BUTTON	0x04
+#define	PS2M_STS_RIGHT_BUTTON	0x02
+#define	PS2M_STS_LEFT_BUTTON	0x01
+
+#define	PS2MOUSE_FIFOSZ		16
+
+struct fifo {
+	uint8_t	buf[PS2MOUSE_FIFOSZ];
+	int	rindex;		/* index to read from */
+	int	windex;		/* index to write to */
+	int	num;		/* number of bytes in the fifo */
+	int	size;		/* size of the fifo */
+};
+
+struct ps2mouse_softc {
+	struct atkbdc_softc	*atkbdc_sc;
+	pthread_mutex_t		mtx;
+
+	uint8_t		status;
+	uint8_t		resolution;
+	uint8_t		sampling_rate;
+	struct fifo	fifo;
+
+	uint8_t		curcmd;	/* current command for next byte */
+
+	int		cur_x, cur_y;
+	int		delta_x, delta_y;
+};
+
+static void
+fifo_init(struct ps2mouse_softc *sc)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	fifo->size = sizeof(((struct fifo *)0)->buf);
+}
+
+static void
+fifo_reset(struct ps2mouse_softc *sc)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	bzero(fifo, sizeof(struct fifo));
+	fifo->size = sizeof(((struct fifo *)0)->buf);
+}
+
+static void
+fifo_put(struct ps2mouse_softc *sc, uint8_t val)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	if (fifo->num < fifo->size) {
+		fifo->buf[fifo->windex] = val;
+		fifo->windex = (fifo->windex + 1) % fifo->size;
+		fifo->num++;
+	}
+}
+
+static int
+fifo_get(struct ps2mouse_softc *sc, uint8_t *val)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->fifo;
+	if (fifo->num > 0) {
+		*val = fifo->buf[fifo->rindex];
+		fifo->rindex = (fifo->rindex + 1) % fifo->size;
+		fifo->num--;
+		return (0);
+	}
+
+	return (-1);
+}
+
+static void
+movement_reset(struct ps2mouse_softc *sc)
+{
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	sc->delta_x = 0;
+	sc->delta_y = 0;
+}
+
+static void
+movement_update(struct ps2mouse_softc *sc, int x, int y)
+{
+	sc->delta_x += x - sc->cur_x;
+	sc->delta_y += sc->cur_y - y;
+	sc->cur_x = x;
+	sc->cur_y = y;
+}
+
+static void
+movement_get(struct ps2mouse_softc *sc)
+{
+	uint8_t val0, val1, val2;
+
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+
+	val0 = 	sc->status & (PS2M_STS_LEFT_BUTTON |
+	    PS2M_STS_RIGHT_BUTTON | PS2M_STS_MID_BUTTON);
+
+	if (sc->delta_x >= 0) {
+		if (sc->delta_x > 255) {
+			val0 |= (1 << 6);
+			val1 = 255;
+		} else
+			val1 = sc->delta_x;
+	} else {
+		val0 |= (1 << 4);
+		if (sc->delta_x < -255) {
+			val0 |= (1 << 6);
+			val1 = 255;
+		} else
+			val1 = sc->delta_x;
+	}
+	sc->delta_x = 0;
+
+	if (sc->delta_y >= 0) {
+		if (sc->delta_y > 255) {
+			val0 |= (1 << 7);
+			val2 = 255;
+		} else
+			val2 = sc->delta_y;
+	} else {
+		val0 |= (1 << 5);
+		if (sc->delta_y < -255) {
+			val0 |= (1 << 7);
+			val2 = 255;
+		} else
+			val2 = sc->delta_y;
+	}
+	sc->delta_y = 0;
+
+	fifo_put(sc, val0);
+	fifo_put(sc, val1);
+	fifo_put(sc, val2);
+}
+
+static void
+ps2mouse_reset(struct ps2mouse_softc *sc)
+{
+	assert(pthread_mutex_isowned_np(&sc->mtx));
+	fifo_reset(sc);
+	movement_reset(sc);
+	sc->status = 0x8;
+	sc->resolution = 4;
+	sc->sampling_rate = 100;
+
+	sc->cur_x = 0;
+	sc->cur_y = 0;
+	sc->delta_x = 0;
+	sc->delta_y = 0;
+}
+
+int
+ps2mouse_read(struct ps2mouse_softc *sc, uint8_t *val)
+{
+	int retval;
+
+	pthread_mutex_lock(&sc->mtx);
+	retval = fifo_get(sc, val);
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (retval);
+}
+
+void
+ps2mouse_write(struct ps2mouse_softc *sc, uint8_t val)
+{
+	pthread_mutex_lock(&sc->mtx);
+	if (sc->curcmd) {
+		switch (sc->curcmd) {
+		case PS2MC_SET_SAMPLING_RATE:
+			sc->sampling_rate = val;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_SET_RESOLUTION:
+			sc->resolution = val;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		default:
+			fprintf(stderr, "Unhandled ps2 mouse current "
+			    "command byte 0x%02x\n", val);
+			break;
+		}
+		sc->curcmd = 0;
+	} else {
+		switch (val) {
+		case PS2MC_RESET_DEV:
+			ps2mouse_reset(sc);
+			fifo_put(sc, PS2MC_ACK);
+			fifo_put(sc, PS2MC_BAT_SUCCESS);
+			fifo_put(sc, PS2MOUSE_DEV_ID);
+			break;
+		case PS2MC_SET_DEFAULTS:
+			ps2mouse_reset(sc);
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_DISABLE:
+			fifo_reset(sc);
+			sc->status &= ~PS2M_STS_ENABLE_DEV;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_ENABLE:
+			fifo_reset(sc);
+			sc->status |= PS2M_STS_ENABLE_DEV;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_SET_SAMPLING_RATE:
+			sc->curcmd = val;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_SEND_DEV_ID:
+			fifo_put(sc, PS2MC_ACK);
+			fifo_put(sc, PS2MOUSE_DEV_ID);
+			break;
+		case PS2MC_SET_REMOTE_MODE:
+			sc->status |= PS2M_STS_REMOTE_MODE;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_SEND_DEV_DATA:
+			fifo_put(sc, PS2MC_ACK);
+			movement_get(sc);
+			break;
+		case PS2MC_SET_STREAM_MODE:
+			sc->status &= ~PS2M_STS_REMOTE_MODE;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_SEND_DEV_STATUS:
+			fifo_put(sc, PS2MC_ACK);
+			fifo_put(sc, sc->status);
+			fifo_put(sc, sc->resolution);
+			fifo_put(sc, sc->sampling_rate);
+			break;
+		case PS2MC_SET_RESOLUTION:
+			sc->curcmd = val;
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		case PS2MC_SET_SCALING1:
+		case PS2MC_SET_SCALING2:
+			fifo_put(sc, PS2MC_ACK);
+			break;
+		default:
+			fprintf(stderr, "Unhandled ps2 mouse command "
+			    "0x%02x\n", val);
+			break;
+		}
+	}
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+static void
+ps2mouse_event(uint8_t button, int x, int y, void *arg)
+{
+	struct ps2mouse_softc *sc = arg;
+
+	pthread_mutex_lock(&sc->mtx);
+	movement_update(sc, x, y);
+
+	sc->status &= ~(PS2M_STS_LEFT_BUTTON |
+	    PS2M_STS_RIGHT_BUTTON | PS2M_STS_MID_BUTTON);
+	if (button & (1 << 0))
+		sc->status |= PS2M_STS_LEFT_BUTTON;
+	if (button & (1 << 1))
+		sc->status |= PS2M_STS_MID_BUTTON;
+	if (button & (1 << 2))
+		sc->status |= PS2M_STS_RIGHT_BUTTON;
+
+	if ((sc->status & PS2M_STS_ENABLE_DEV) == 0) {
+		/* no data reporting */
+		pthread_mutex_unlock(&sc->mtx);
+		return;
+	}
+
+	movement_get(sc);
+	pthread_mutex_unlock(&sc->mtx);
+
+	atkbdc_event(sc->atkbdc_sc);
+}
+
+struct ps2mouse_softc *
+ps2mouse_init(struct atkbdc_softc *atkbdc_sc)
+{
+	struct ps2mouse_softc *sc;
+
+	sc = calloc(1, sizeof (struct ps2mouse_softc));
+	pthread_mutex_init(&sc->mtx, NULL);
+	fifo_init(sc);
+	sc->atkbdc_sc = atkbdc_sc;
+
+	pthread_mutex_lock(&sc->mtx);
+	ps2mouse_reset(sc);
+	pthread_mutex_unlock(&sc->mtx);
+
+	console_ptr_register(ps2mouse_event, sc);
+
+	return (sc);
+}
+
diff --git a/usr/src/cmd/bhyve/ps2mouse.h b/usr/src/cmd/bhyve/ps2mouse.h
new file mode 100644
index 0000000000..1a78934b98
--- /dev/null
+++ b/usr/src/cmd/bhyve/ps2mouse.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PS2MOUSE_H_
+#define	_PS2MOUSE_H_
+
+struct atkbdc_softc;
+
+struct ps2mouse_softc *ps2mouse_init(struct atkbdc_softc *sc);
+
+int ps2mouse_read(struct ps2mouse_softc *sc, uint8_t *val);
+void ps2mouse_write(struct ps2mouse_softc *sc, uint8_t val);
+
+#endif /* _PS2MOUSE_H_ */
diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c
new file mode 100644
index 0000000000..0846316378
--- /dev/null
+++ b/usr/src/cmd/bhyve/rfb.c
@@ -0,0 +1,420 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2015 Nahanni Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "bhyvegc.h"
+#include "console.h"
+#include "rfb.h"
+
+struct rfb_softc {
+	int		sfd;
+	pthread_t	tid;
+
+	int		width, height;
+
+	bool		enc_raw_ok;
+	bool		enc_resize_ok;
+};
+
+struct rfb_pixfmt {
+	uint8_t		bpp;
+	uint8_t		depth;
+	uint8_t		bigendian;
+	uint8_t		truecolor;
+	uint16_t	red_max;
+	uint16_t	green_max;
+	uint16_t	blue_max;
+	uint8_t		red_shift;
+	uint8_t		green_shift;
+	uint8_t		blue_shift;
+	uint8_t		pad[3];
+};
+
+struct rfb_srvr_info {
+	uint16_t		width;
+	uint16_t		height;
+	struct rfb_pixfmt	pixfmt;
+	uint32_t		namelen;
+};
+
+struct rfb_pixfmt_msg {
+	uint8_t			type;
+	uint8_t			pad[3];
+	struct rfb_pixfmt	pixfmt;
+};
+
+#define	RFB_ENCODING_RAW		0
+#define	RFB_ENCODING_RESIZE		-223
+
+struct rfb_enc_msg {
+	uint8_t		type;
+	uint8_t		pad;
+	uint16_t	numencs;
+};
+
+struct rfb_updt_msg {
+	uint8_t		type;
+	uint8_t		incremental;
+	uint16_t	x;
+	uint16_t	y;
+	uint16_t	width;
+	uint16_t	height;
+};
+
+struct rfb_key_msg {
+	uint8_t		type;
+	uint8_t		down;
+	uint16_t	pad;
+	uint32_t	code;
+};
+
+struct rfb_ptr_msg {
+	uint8_t		type;
+	uint8_t		button;
+	uint16_t	x;
+	uint16_t	y;
+};
+
+struct rfb_srvr_updt_msg {
+	uint8_t		type;
+	uint8_t		pad;
+	uint16_t	numrects;
+};
+
+struct rfb_srvr_rect_hdr {
+	uint16_t	x;
+	uint16_t	y;
+	uint16_t	width;
+	uint16_t	height;
+	uint32_t	encoding;
+};
+
+static void
+rfb_send_server_init_msg(int cfd)
+{
+	struct bhyvegc_image *gc_image;
+	struct rfb_srvr_info sinfo;
+	int len;
+
+	gc_image = console_get_image();
+
+	sinfo.width = ntohs(gc_image->width);
+	sinfo.height = ntohs(gc_image->height);
+	sinfo.pixfmt.bpp = 32;
+	sinfo.pixfmt.depth = 32;
+	sinfo.pixfmt.bigendian = 0;
+	sinfo.pixfmt.truecolor = 1;
+	sinfo.pixfmt.red_max = ntohs(255);
+	sinfo.pixfmt.green_max = ntohs(255);
+	sinfo.pixfmt.blue_max = ntohs(255);
+	sinfo.pixfmt.red_shift = 16;
+	sinfo.pixfmt.green_shift = 8;
+	sinfo.pixfmt.blue_shift = 0;
+	sinfo.namelen = ntohl(strlen("bhyve"));
+	len = write(cfd, &sinfo, sizeof(sinfo));
+	len = write(cfd, "bhyve", strlen("bhyve"));
+}
+
+static void
+rfb_send_resize_update_msg(struct rfb_softc *rc, int cfd)
+{
+	struct rfb_srvr_updt_msg supdt_msg;
+        struct rfb_srvr_rect_hdr srect_hdr;
+
+	/* Number of rectangles: 1 */
+	supdt_msg.type = 0;
+	supdt_msg.pad = 0;
+	supdt_msg.numrects = ntohs(1);
+	write(cfd, &supdt_msg, sizeof(struct rfb_srvr_updt_msg));
+
+	/* Rectangle header */
+	srect_hdr.x = ntohs(0);
+	srect_hdr.y = ntohs(0);
+	srect_hdr.width = ntohs(rc->width);
+	srect_hdr.height = ntohs(rc->height);
+	srect_hdr.encoding = ntohl(RFB_ENCODING_RESIZE);
+	write(cfd, &srect_hdr, sizeof(struct rfb_srvr_rect_hdr));
+}
+
+static void
+rfb_recv_set_pixfmt_msg(struct rfb_softc *rc, int cfd)
+{
+	struct rfb_pixfmt_msg pixfmt_msg;
+	int len;
+
+	len = read(cfd, ((void *)&pixfmt_msg) + 1, sizeof(pixfmt_msg) - 1);
+}
+
+
+static void
+rfb_recv_set_encodings_msg(struct rfb_softc *rc, int cfd)
+{
+	struct rfb_enc_msg enc_msg;
+	int len, i;
+	uint32_t encoding;
+
+	assert((sizeof(enc_msg) - 1) == 3);
+	len = read(cfd, ((void *)&enc_msg) + 1, sizeof(enc_msg) - 1);
+
+	for (i = 0; i < ntohs(enc_msg.numencs); i++) {
+		len = read(cfd, &encoding, sizeof(encoding));
+		switch (ntohl(encoding)) {
+		case RFB_ENCODING_RAW:
+			rc->enc_raw_ok = true;
+			break;
+		case RFB_ENCODING_RESIZE:
+			rc->enc_resize_ok = true;
+			break;
+		}
+	}
+}
+
+static void
+rfb_resize_update(struct rfb_softc *rc, int fd)
+{
+	struct rfb_srvr_updt_msg supdt_msg;
+        struct rfb_srvr_rect_hdr srect_hdr;
+
+	/* Number of rectangles: 1 */
+	supdt_msg.type = 0;
+	supdt_msg.pad = 0;
+	supdt_msg.numrects = ntohs(1);
+	write(fd, &supdt_msg, sizeof (struct rfb_srvr_updt_msg));
+
+	/* Rectangle header */
+	srect_hdr.x = ntohs(0);
+	srect_hdr.y = ntohs(0);
+	srect_hdr.width = ntohs(rc->width);
+	srect_hdr.height = ntohs(rc->height);
+	srect_hdr.encoding = ntohl(RFB_ENCODING_RESIZE);
+	write(fd, &srect_hdr, sizeof (struct rfb_srvr_rect_hdr));
+}
+
+static void
+rfb_recv_update_msg(struct rfb_softc *rc, int cfd)
+{
+	struct rfb_updt_msg updt_msg;
+	struct rfb_srvr_updt_msg supdt_msg;
+        struct rfb_srvr_rect_hdr srect_hdr;
+	struct bhyvegc_image *gc_image;
+	int len;
+
+	len = read(cfd, ((void *)&updt_msg) + 1 , sizeof(updt_msg) - 1);
+
+	console_refresh();
+	gc_image = console_get_image();
+
+	if (rc->width != gc_image->width || rc->height != gc_image->height) {
+		rc->width = gc_image->width;
+		rc->height = gc_image->height;
+		rfb_send_resize_update_msg(rc, cfd);
+	}
+
+	/*
+	 * Send the whole thing
+	 */
+	/* Number of rectangles: 1 */
+	supdt_msg.type = 0;
+	supdt_msg.pad = 0;
+	supdt_msg.numrects = ntohs(1);
+	write(cfd, &supdt_msg, sizeof(struct rfb_srvr_updt_msg));
+
+	/* Rectangle header */
+	srect_hdr.x = ntohs(0);
+	srect_hdr.y = ntohs(0);
+	srect_hdr.width = ntohs(gc_image->width);
+	srect_hdr.height = ntohs(gc_image->height);
+	srect_hdr.encoding = ntohl(0);	/* raw */
+	write(cfd, &srect_hdr, sizeof(struct rfb_srvr_rect_hdr));
+
+	write(cfd, gc_image->data, gc_image->width * gc_image->height *
+	    sizeof(uint32_t));
+}
+
+static void
+rfb_recv_key_msg(struct rfb_softc *rc, int cfd)
+{
+	struct rfb_key_msg key_msg;
+	int len;
+
+	len = read(cfd, ((void *)&key_msg) + 1, sizeof(key_msg) - 1);
+
+	console_key_event(key_msg.down, ntohl(key_msg.code));
+}
+
+static void
+rfb_recv_ptr_msg(struct rfb_softc *rc, int cfd)
+{
+	struct rfb_ptr_msg ptr_msg;
+	int len;
+
+	len = read(cfd, ((void *)&ptr_msg) + 1, sizeof(ptr_msg) - 1);
+
+	console_ptr_event(ptr_msg.button, ntohs(ptr_msg.x), ntohs(ptr_msg.y));
+}
+
+void
+rfb_handle(struct rfb_softc *rc, int cfd)
+{
+	const char *vbuf = "RFB 003.008\n";
+	unsigned char buf[80];
+	int len;
+        uint32_t sres;
+
+	/* 1a. Send server version */
+	printf("server vers write: (%s), %d bytes\n", vbuf, (int) strlen(vbuf));
+	write(cfd, vbuf, strlen(vbuf));
+
+	/* 1b. Read client version */
+	len = read(cfd, buf, sizeof(buf));
+
+	/* 2a. Send security type 'none' */
+	buf[0] = 1;
+	buf[1] = 1; /* none */
+	write(cfd, buf, 2);
+
+	/* 2b. Read agreed security type */
+	len = read(cfd, buf, 1);
+
+	/* 2c. Write back a status of 0 */
+	sres = 0;
+	write(cfd, &sres, 4);
+
+	/* 3a. Read client shared-flag byte */
+	len = read(cfd, buf, 1);
+
+	/* 4a. Write server-init info */
+	rfb_send_server_init_msg(cfd);
+
+        /* Now read in client requests. 1st byte identifies type */
+	for (;;) {
+		len = read(cfd, buf, 1);
+		if (len <= 0) {
+			printf("exiting\n");
+			break;
+		}
+
+		switch (buf[0]) {
+		case 0:
+			rfb_recv_set_pixfmt_msg(rc, cfd);
+			break;
+		case 2:
+			rfb_recv_set_encodings_msg(rc, cfd);
+			break;
+		case 3:
+			rfb_recv_update_msg(rc, cfd);
+			break;
+		case 4:
+			rfb_recv_key_msg(rc, cfd);
+			break;
+		case 5:
+			rfb_recv_ptr_msg(rc, cfd);
+			break;
+		default:
+			printf("unknown client code!\n");
+			exit(1);
+		}
+	}
+}
+
+static void *
+rfb_thr(void *arg)
+{
+	struct rfb_softc *rc;
+	sigset_t set;
+
+	int cfd;
+
+	rc = arg;
+
+	sigemptyset(&set);
+	sigaddset(&set, SIGPIPE);
+	if (pthread_sigmask(SIG_BLOCK, &set, NULL) != 0) {
+		perror("pthread_sigmask");
+		return (NULL);
+	}
+
+	for (;;) {
+		cfd = accept(rc->sfd, NULL, NULL);
+		rfb_handle(rc, cfd);
+	}
+
+	/* NOTREACHED */
+	return (NULL);
+}
+
+int
+rfb_init(int port)
+{
+	struct rfb_softc *rc;
+	struct sockaddr_in sin;
+	int on = 1;
+
+	rc = calloc(1, sizeof(struct rfb_softc));
+
+	rc->sfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (rc->sfd < 0) {
+		perror("socket");
+		return (-1);
+	}
+
+	setsockopt(rc->sfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+#ifdef	__FreeBSD__
+	sin.sin_len = sizeof(sin);
+#endif
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = htonl(INADDR_ANY);
+	sin.sin_port = htons(port);
+	if (bind(rc->sfd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
+		perror("bind");
+		return (-1);
+	}
+
+	if (listen(rc->sfd, 1) < 0) {
+		perror("listen");
+		return (-1);
+	}
+
+	pthread_create(&rc->tid, NULL, rfb_thr, rc);
+
+	return (0);
+}
diff --git a/usr/src/cmd/bhyve/rfb.h b/usr/src/cmd/bhyve/rfb.h
new file mode 100644
index 0000000000..5504c333ab
--- /dev/null
+++ b/usr/src/cmd/bhyve/rfb.h
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _RFB_H_
+#define	_RFB_H_
+
+#define	RFB_PORT	5900
+
+int	rfb_init(int port);
+
+#endif /* _RFB_H_ */
diff --git a/usr/src/cmd/bhyve/rtc.c b/usr/src/cmd/bhyve/rtc.c
new file mode 100644
index 0000000000..5ab78e060f
--- /dev/null
+++ b/usr/src/cmd/bhyve/rtc.c
@@ -0,0 +1,380 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/rtc.c 260206 2014-01-02 21:26:59Z jhb $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/rtc.c 260206 2014-01-02 21:26:59Z jhb $");
+
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "acpi.h"
+#include "inout.h"
+#include "pci_lpc.h"
+#include "rtc.h"
+
+#define	IO_RTC	0x70
+
+#define RTC_SEC		0x00	/* seconds */
+#define	RTC_SEC_ALARM	0x01
+#define	RTC_MIN		0x02
+#define	RTC_MIN_ALARM	0x03
+#define	RTC_HRS		0x04
+#define	RTC_HRS_ALARM	0x05
+#define	RTC_WDAY	0x06
+#define	RTC_DAY		0x07
+#define	RTC_MONTH	0x08
+#define	RTC_YEAR	0x09
+#define	RTC_CENTURY	0x32	/* current century */
+
+#define RTC_STATUSA	0xA
+#define  RTCSA_TUP	 0x80	/* time update, don't look now */
+
+#define	RTC_STATUSB	0xB
+#define	 RTCSB_DST	 0x01
+#define	 RTCSB_24HR	 0x02
+#define	 RTCSB_BIN	 0x04	/* 0 = BCD, 1 = Binary */
+#define	 RTCSB_PINTR	 0x40	/* 1 = enable periodic clock interrupt */
+#define	 RTCSB_HALT      0x80	/* stop clock updates */
+
+#define RTC_INTR	0x0c	/* status register C (R) interrupt source */
+
+#define RTC_STATUSD	0x0d	/* status register D (R) Lost Power */
+#define  RTCSD_PWR	 0x80	/* clock power OK */
+
+#define	RTC_NVRAM_START	0x0e
+#define	RTC_NVRAM_END	0x7f
+#define RTC_NVRAM_SZ	(128 - RTC_NVRAM_START)
+#define	nvoff(x)	((x) - RTC_NVRAM_START)
+
+#define	RTC_DIAG	0x0e
+#define RTC_RSTCODE	0x0f
+#define	RTC_EQUIPMENT	0x14
+#define	RTC_LMEM_LSB	0x34
+#define	RTC_LMEM_MSB	0x35
+#define	RTC_HMEM_LSB	0x5b
+#define	RTC_HMEM_SB	0x5c
+#define	RTC_HMEM_MSB	0x5d
+
+#define m_64KB		(64*1024)
+#define	m_16MB		(16*1024*1024)
+#define	m_4GB		(4ULL*1024*1024*1024)
+
+static int addr;
+
+static uint8_t rtc_nvram[RTC_NVRAM_SZ];
+
+/* XXX initialize these to default values as they would be from BIOS */
+static uint8_t status_a, status_b;
+
+static struct {
+	uint8_t  hours;
+	uint8_t  mins;
+	uint8_t  secs;
+} rtc_alarm;
+
+static u_char const bin2bcd_data[] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
+};
+#define	bin2bcd(bin)	(bin2bcd_data[bin])
+
+#define	rtcout(val)	((status_b & RTCSB_BIN) ? (val) : bin2bcd((val)))
+
+static void
+timevalfix(struct timeval *t1)
+{
+
+	if (t1->tv_usec < 0) {
+		t1->tv_sec--;
+		t1->tv_usec += 1000000;
+	}
+	if (t1->tv_usec >= 1000000) {
+		t1->tv_sec++;
+		t1->tv_usec -= 1000000;
+	}
+}
+
+static void
+timevalsub(struct timeval *t1, const struct timeval *t2)
+{
+
+	t1->tv_sec -= t2->tv_sec;
+	t1->tv_usec -= t2->tv_usec;
+	timevalfix(t1);
+}
+
+static int
+rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		 uint32_t *eax, void *arg)
+{
+	if (bytes != 1)
+		return (-1);
+
+	if (in) {
+		/* straight read of this register will return 0xFF */
+		*eax = 0xff;
+		return (0);
+	}
+
+	switch (*eax & 0x7f) {
+	case RTC_SEC:
+	case RTC_SEC_ALARM:
+	case RTC_MIN:
+	case RTC_MIN_ALARM:
+	case RTC_HRS:
+	case RTC_HRS_ALARM:
+	case RTC_WDAY:
+	case RTC_DAY:
+	case RTC_MONTH:
+	case RTC_YEAR:
+	case RTC_STATUSA:
+	case RTC_STATUSB:
+	case RTC_INTR:
+	case RTC_STATUSD:
+	case RTC_NVRAM_START ... RTC_NVRAM_END:
+		break;
+	default:
+		return (-1);
+	}
+
+	addr = *eax & 0x7f;
+	return (0);
+}
+
+static int
+rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		 uint32_t *eax, void *arg)
+{
+	int hour;
+	time_t t;
+	struct timeval cur, delta;
+
+	static struct timeval last;
+	static struct tm tm;
+
+	if (bytes != 1)
+		return (-1);
+
+	gettimeofday(&cur, NULL);
+
+	/*
+	 * Increment the cached time only once per second so we can guarantee
+	 * that the guest has at least one second to read the hour:min:sec
+	 * separately and still get a coherent view of the time.
+	 */
+	delta = cur;
+	timevalsub(&delta, &last);
+	if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) {
+		t = cur.tv_sec;
+		localtime_r(&t, &tm);
+		last = cur;
+	}
+
+	if (in) {
+		switch (addr) {
+		case RTC_SEC_ALARM:
+			*eax = rtc_alarm.secs;
+			break;
+		case RTC_MIN_ALARM:
+			*eax = rtc_alarm.mins;
+			break;
+		case RTC_HRS_ALARM:
+			*eax = rtc_alarm.hours;
+			break;
+		case RTC_SEC:
+			*eax = rtcout(tm.tm_sec);
+			return (0);
+		case RTC_MIN:
+			*eax = rtcout(tm.tm_min);
+			return (0);
+		case RTC_HRS:
+			if (status_b & RTCSB_24HR)
+				hour = tm.tm_hour;
+			else
+				hour = (tm.tm_hour % 12) + 1;
+			
+			*eax = rtcout(hour);
+
+			/*
+			 * If we are representing time in the 12-hour format
+			 * then set the MSB to indicate PM.
+			 */
+			if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12)
+				*eax |= 0x80;
+
+			return (0);
+		case RTC_WDAY:
+			*eax = rtcout(tm.tm_wday + 1);
+			return (0);
+		case RTC_DAY:
+			*eax = rtcout(tm.tm_mday);
+			return (0);
+		case RTC_MONTH:
+			*eax = rtcout(tm.tm_mon + 1);
+			return (0);
+		case RTC_YEAR:
+			*eax = rtcout(tm.tm_year % 100);
+			return (0);
+		case RTC_STATUSA:
+			*eax = status_a;
+			return (0);
+		case RTC_STATUSB:
+			*eax = status_b;
+			return (0);
+		case RTC_INTR:
+			*eax = 0;
+			return (0);
+		case RTC_STATUSD:
+			*eax = RTCSD_PWR;
+			return (0);
+		case RTC_NVRAM_START ... RTC_NVRAM_END:
+			*eax = rtc_nvram[addr - RTC_NVRAM_START];
+			return (0);
+		default:
+			return (-1);
+		}
+	}
+
+	switch (addr) {
+	case RTC_STATUSA:
+		status_a = *eax & ~RTCSA_TUP;
+		break;
+	case RTC_STATUSB:
+		/* XXX not implemented yet XXX */
+		if (*eax & RTCSB_PINTR)
+			return (-1);
+		status_b = *eax;
+		break;
+	case RTC_STATUSD:
+		/* ignore write */
+		break;
+	case RTC_SEC_ALARM:
+		rtc_alarm.secs = *eax;
+		break;
+	case RTC_MIN_ALARM:
+		rtc_alarm.mins = *eax;
+		break;
+	case RTC_HRS_ALARM:
+		rtc_alarm.hours = *eax;
+		break;
+	case RTC_SEC:
+	case RTC_MIN:
+	case RTC_HRS:
+	case RTC_WDAY:
+	case RTC_DAY:
+	case RTC_MONTH:
+	case RTC_YEAR:
+		/*
+		 * Ignore writes to the time of day registers
+		 */
+		break;
+	case RTC_NVRAM_START ... RTC_NVRAM_END:
+		rtc_nvram[addr - RTC_NVRAM_START] = *eax;
+		break;
+	default:
+		return (-1);
+	}
+	return (0);
+}
+
+void
+rtc_init(struct vmctx *ctx)
+{	
+	struct timeval cur;
+	struct tm tm;
+	size_t himem;
+	size_t lomem;
+	int err;
+
+	err = gettimeofday(&cur, NULL);
+	assert(err == 0);
+	(void) localtime_r(&cur.tv_sec, &tm);
+
+	memset(rtc_nvram, 0, sizeof(rtc_nvram));
+
+	rtc_nvram[nvoff(RTC_CENTURY)] = bin2bcd((tm.tm_year + 1900) / 100);
+
+	/* XXX init diag/reset code/equipment/checksum ? */
+
+	/*
+	 * Report guest memory size in nvram cells as required by UEFI.
+	 * Little-endian encoding.
+	 * 0x34/0x35 - 64KB chunks above 16MB, below 4GB
+	 * 0x5b/0x5c/0x5d - 64KB chunks above 4GB
+	 */
+	lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB;
+	rtc_nvram[nvoff(RTC_LMEM_LSB)] = lomem;
+	rtc_nvram[nvoff(RTC_LMEM_MSB)] = lomem >> 8;
+
+	himem = vm_get_highmem_size(ctx) / m_64KB;
+	rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem;
+	rtc_nvram[nvoff(RTC_HMEM_SB)]  = himem >> 8;
+	rtc_nvram[nvoff(RTC_HMEM_MSB)] = himem >> 16;
+}
+
+INOUT_PORT(rtc, IO_RTC, IOPORT_F_INOUT, rtc_addr_handler);
+INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler);
+
+#ifdef	__FreeBSD__
+static void
+rtc_dsdt(void)
+{
+
+	dsdt_line("");
+	dsdt_line("Device (RTC)");
+	dsdt_line("{");
+	dsdt_line("  Name (_HID, EisaId (\"PNP0B00\"))");
+	dsdt_line("  Name (_CRS, ResourceTemplate ()");
+	dsdt_line("  {");
+	dsdt_indent(2);
+	dsdt_fixed_ioport(IO_RTC, 2);
+	dsdt_fixed_irq(8);
+	dsdt_unindent(2);
+	dsdt_line("  })");
+	dsdt_line("}");
+}
+LPC_DSDT(rtc_dsdt);
+#endif
+
+SYSRES_IO(0x72, 6);
diff --git a/usr/src/cmd/bhyve/rtc.h b/usr/src/cmd/bhyve/rtc.h
new file mode 100644
index 0000000000..6406d24c37
--- /dev/null
+++ b/usr/src/cmd/bhyve/rtc.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/rtc.h 253181 2013-07-11 03:54:35Z grehan $
+ */
+
+#ifndef _RTC_H_
+#define _RTC_H_
+
+void	rtc_init(struct vmctx *ctx);
+
+#endif /* _RTC_H_ */
diff --git a/usr/src/cmd/bhyve/smbiostbl.c b/usr/src/cmd/bhyve/smbiostbl.c
new file mode 100644
index 0000000000..7ba0f0dfa0
--- /dev/null
+++ b/usr/src/cmd/bhyve/smbiostbl.c
@@ -0,0 +1,827 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/smbiostbl.c 272007 2014-09-23 01:17:22Z grehan $");
+
+#include <sys/param.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <md5.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <uuid.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "smbiostbl.h"
+
+#define	MB			(1024*1024)
+#define	GB			(1024ULL*1024*1024)
+
+#define SMBIOS_BASE		0xF1000
+
+/* BHYVE_ACPI_BASE - SMBIOS_BASE) */
+#define	SMBIOS_MAX_LENGTH	(0xF2400 - 0xF1000)
+
+#define	SMBIOS_TYPE_BIOS	0
+#define	SMBIOS_TYPE_SYSTEM	1
+#define	SMBIOS_TYPE_CHASSIS	3
+#define	SMBIOS_TYPE_PROCESSOR	4
+#define	SMBIOS_TYPE_MEMARRAY	16
+#define	SMBIOS_TYPE_MEMDEVICE	17
+#define	SMBIOS_TYPE_MEMARRAYMAP	19
+#define	SMBIOS_TYPE_BOOT	32
+#define	SMBIOS_TYPE_EOT		127
+
+struct smbios_structure {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	handle;
+} __packed;
+
+typedef int (*initializer_func_t)(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_template_entry {
+	struct smbios_structure	*entry;
+	const char		**strings;
+	initializer_func_t	initializer;
+};
+
+/*
+ * SMBIOS Structure Table Entry Point
+ */
+#define	SMBIOS_ENTRY_EANCHOR	"_SM_"
+#define	SMBIOS_ENTRY_EANCHORLEN	4
+#define	SMBIOS_ENTRY_IANCHOR	"_DMI_"
+#define	SMBIOS_ENTRY_IANCHORLEN	5
+
+struct smbios_entry_point {
+	char		eanchor[4];	/* anchor tag */
+	uint8_t		echecksum;	/* checksum of entry point structure */
+	uint8_t		eplen;		/* length in bytes of entry point */
+	uint8_t		major;		/* major version of the SMBIOS spec */
+	uint8_t		minor;		/* minor version of the SMBIOS spec */
+	uint16_t	maxssize;	/* maximum size in bytes of a struct */
+	uint8_t		revision;	/* entry point structure revision */
+	uint8_t		format[5];	/* entry point rev-specific data */
+	char		ianchor[5];	/* intermediate anchor tag */
+	uint8_t		ichecksum;	/* intermediate checksum */
+	uint16_t	stlen;		/* len in bytes of structure table */
+	uint32_t	staddr;		/* physical addr of structure table */
+	uint16_t	stnum;		/* number of structure table entries */
+	uint8_t		bcdrev;		/* BCD value representing DMI ver */
+} __packed;
+
+/*
+ * BIOS Information
+ */
+#define	SMBIOS_FL_ISA		0x00000010	/* ISA is supported */
+#define	SMBIOS_FL_PCI		0x00000080	/* PCI is supported */
+#define	SMBIOS_FL_SHADOW	0x00001000	/* BIOS shadowing is allowed */
+#define	SMBIOS_FL_CDBOOT	0x00008000	/* Boot from CD is supported */
+#define	SMBIOS_FL_SELBOOT	0x00010000	/* Selectable Boot supported */
+#define	SMBIOS_FL_EDD		0x00080000	/* EDD Spec is supported */
+
+#define	SMBIOS_XB1_FL_ACPI	0x00000001	/* ACPI is supported */
+
+#define	SMBIOS_XB2_FL_BBS	0x00000001	/* BIOS Boot Specification */
+#define	SMBIOS_XB2_FL_VM	0x00000010	/* Virtual Machine */
+
+struct smbios_table_type0 {
+	struct smbios_structure	header;
+	uint8_t			vendor;		/* vendor string */
+	uint8_t			version;	/* version string */
+	uint16_t		segment;	/* address segment location */
+	uint8_t			rel_date;	/* release date */
+	uint8_t			size;		/* rom size */
+	uint64_t		cflags;		/* characteristics */
+	uint8_t			xc_bytes[2];	/* characteristics ext bytes */
+	uint8_t			sb_major_rel;	/* system bios version */
+	uint8_t			sb_minor_rele;
+	uint8_t			ecfw_major_rel;	/* embedded ctrl fw version */
+	uint8_t			ecfw_minor_rel;
+} __packed;
+
+/*
+ * System Information
+ */
+#define	SMBIOS_WAKEUP_SWITCH	0x06	/* power switch */
+
+struct smbios_table_type1 {
+	struct smbios_structure	header;
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint8_t			product;	/* product name string */
+	uint8_t			version;	/* version string */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			uuid[16];	/* uuid byte array */
+	uint8_t			wakeup;		/* wake-up event */
+	uint8_t			sku;		/* sku number string */
+	uint8_t			family;		/* family name string */
+} __packed;
+
+/*
+ * System Enclosure or Chassis
+ */
+#define	SMBIOS_CHT_UNKNOWN	0x02	/* unknown */
+
+#define	SMBIOS_CHST_SAFE	0x03	/* safe */
+
+#define	SMBIOS_CHSC_NONE	0x03	/* none */
+
+struct smbios_table_type3 {
+	struct smbios_structure	header;
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint8_t			type;		/* type */
+	uint8_t			version;	/* version string */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			asset;		/* asset tag string */
+	uint8_t			bustate;	/* boot-up state */
+	uint8_t			psstate;	/* power supply state */
+	uint8_t			tstate;		/* thermal state */
+	uint8_t			security;	/* security status */
+	uint8_t			uheight;	/* height in 'u's */
+	uint8_t			cords;		/* number of power cords */
+	uint8_t			elems;		/* number of element records */
+	uint8_t			elemlen;	/* length of records */
+	uint8_t			sku;		/* sku number string */
+} __packed;
+
+/*
+ * Processor Information
+ */
+#define	SMBIOS_PRT_CENTRAL	0x03	/* central processor */
+
+#define	SMBIOS_PRF_OTHER	0x01	/* other */
+
+#define	SMBIOS_PRS_PRESENT	0x40	/* socket is populated */
+#define	SMBIOS_PRS_ENABLED	0x1	/* enabled */
+
+#define	SMBIOS_PRU_NONE		0x06	/* none */
+
+#define	SMBIOS_PFL_64B	0x04	/* 64-bit capable */
+
+struct smbios_table_type4 {
+	struct smbios_structure	header;
+	uint8_t			socket;		/* socket designation string */
+	uint8_t			type;		/* processor type */
+	uint8_t			family;		/* processor family */
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint64_t		cpuid;		/* processor cpuid */
+	uint8_t			version;	/* version string */
+	uint8_t			voltage;	/* voltage */
+	uint16_t		clkspeed;	/* ext clock speed in mhz */
+	uint16_t		maxspeed;	/* maximum speed in mhz */
+	uint16_t		curspeed;	/* current speed in mhz */
+	uint8_t			status;		/* status */
+	uint8_t			upgrade;	/* upgrade */
+	uint16_t		l1handle;	/* l1 cache handle */
+	uint16_t		l2handle;	/* l2 cache handle */
+	uint16_t		l3handle;	/* l3 cache handle */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			asset;		/* asset tag string */
+	uint8_t			part;		/* part number string */
+	uint8_t			cores;		/* cores per socket */
+	uint8_t			ecores;		/* enabled cores */
+	uint8_t			threads;	/* threads per socket */
+	uint16_t		cflags;		/* processor characteristics */
+	uint16_t		family2;	/* processor family 2 */
+} __packed;
+
+/*
+ * Physical Memory Array
+ */
+#define	SMBIOS_MAL_SYSMB	0x03	/* system board or motherboard */
+
+#define	SMBIOS_MAU_SYSTEM	0x03	/* system memory */
+
+#define	SMBIOS_MAE_NONE		0x03	/* none */
+
+struct smbios_table_type16 {
+	struct smbios_structure	header;
+	uint8_t			location;	/* physical device location */
+	uint8_t			use;		/* device functional purpose */
+	uint8_t			ecc;		/* err detect/correct method */
+	uint32_t		size;		/* max mem capacity in kb */
+	uint16_t		errhand;	/* handle of error (if any) */
+	uint16_t		ndevs;		/* num of slots or sockets */
+	uint64_t		xsize;		/* max mem capacity in bytes */
+} __packed;
+
+/*
+ * Memory Device
+ */
+#define	SMBIOS_MDFF_UNKNOWN	0x02	/* unknown */
+
+#define	SMBIOS_MDT_UNKNOWN	0x02	/* unknown */
+
+#define	SMBIOS_MDF_UNKNOWN	0x0004	/* unknown */
+
+struct smbios_table_type17 {
+	struct smbios_structure	header;
+	uint16_t		arrayhand;	/* handle of physl mem array */
+	uint16_t		errhand;	/* handle of mem error data */
+	uint16_t		twidth;		/* total width in bits */
+	uint16_t		dwidth;		/* data width in bits */
+	uint16_t		size;		/* size in bytes */
+	uint8_t			form;		/* form factor */
+	uint8_t			set;		/* set */
+	uint8_t			dloc;		/* device locator string */
+	uint8_t			bloc;		/* phys bank locator string */
+	uint8_t			type;		/* memory type */
+	uint16_t		flags;		/* memory characteristics */
+	uint16_t		maxspeed;	/* maximum speed in mhz */
+	uint8_t			manufacturer;	/* manufacturer string */
+	uint8_t			serial;		/* serial number string */
+	uint8_t			asset;		/* asset tag string */
+	uint8_t			part;		/* part number string */
+	uint8_t			attributes;	/* attributes */
+	uint32_t		xsize;		/* extended size in mbs */
+	uint16_t		curspeed;	/* current speed in mhz */
+	uint16_t		minvoltage;	/* minimum voltage */
+	uint16_t		maxvoltage;	/* maximum voltage */
+	uint16_t		curvoltage;	/* configured voltage */
+} __packed;
+
+/*
+ * Memory Array Mapped Address
+ */
+struct smbios_table_type19 {
+	struct smbios_structure	header;
+	uint32_t		saddr;		/* start phys addr in kb */
+	uint32_t		eaddr;		/* end phys addr in kb */
+	uint16_t		arrayhand;	/* physical mem array handle */
+	uint8_t			width;		/* num of dev in row */
+	uint64_t		xsaddr;		/* start phys addr in bytes */
+	uint64_t		xeaddr;		/* end phys addr in bytes */
+} __packed;
+
+/*
+ * System Boot Information
+ */
+#define	SMBIOS_BOOT_NORMAL	0	/* no errors detected */
+
+struct smbios_table_type32 {
+	struct smbios_structure	header;
+	uint8_t			reserved[6];
+	uint8_t			status;		/* boot status */
+} __packed;
+
+/*
+ * End-of-Table
+ */
+struct smbios_table_type127 {
+	struct smbios_structure	header;
+} __packed;
+
+struct smbios_table_type0 smbios_type0_template = {
+	{ SMBIOS_TYPE_BIOS, sizeof (struct smbios_table_type0), 0 },
+	1,	/* bios vendor string */
+	2,	/* bios version string */
+	0xF000,	/* bios address segment location */
+	3,	/* bios release date */
+	0x0,	/* bios size (64k * (n + 1) is the size in bytes) */
+	SMBIOS_FL_ISA | SMBIOS_FL_PCI | SMBIOS_FL_SHADOW |
+	    SMBIOS_FL_CDBOOT | SMBIOS_FL_EDD,
+	{ SMBIOS_XB1_FL_ACPI, SMBIOS_XB2_FL_BBS | SMBIOS_XB2_FL_VM },
+	0x0,	/* bios major release */
+	0x0,	/* bios minor release */
+	0xff,	/* embedded controller firmware major release */
+	0xff	/* embedded controller firmware minor release */
+};
+
+const char *smbios_type0_strings[] = {
+	"BHYVE",	/* vendor string */
+	__TIME__,	/* bios version string */
+	__DATE__,	/* bios release date string */
+	NULL
+};
+
+struct smbios_table_type1 smbios_type1_template = {
+	{ SMBIOS_TYPE_SYSTEM, sizeof (struct smbios_table_type1), 0 },
+	1,		/* manufacturer string */
+	2,		/* product string */
+	3,		/* version string */
+	4,		/* serial number string */
+	{ 0 },
+	SMBIOS_WAKEUP_SWITCH,
+	5,		/* sku string */
+	6		/* family string */
+};
+
+static int smbios_type1_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+const char *smbios_type1_strings[] = {
+	" ",		/* manufacturer string */
+	"BHYVE",	/* product name string */
+	"1.0",		/* version string */
+	"None",		/* serial number string */
+	"None",		/* sku string */
+	" ",		/* family name string */
+	NULL
+};
+
+struct smbios_table_type3 smbios_type3_template = {
+	{ SMBIOS_TYPE_CHASSIS, sizeof (struct smbios_table_type3), 0 },
+	1,		/* manufacturer string */
+	SMBIOS_CHT_UNKNOWN,
+	2,		/* version string */
+	3,		/* serial number string */
+	4,		/* asset tag string */
+	SMBIOS_CHST_SAFE,
+	SMBIOS_CHST_SAFE,
+	SMBIOS_CHST_SAFE,
+	SMBIOS_CHSC_NONE,
+	0,		/* height in 'u's (0=enclosure height unspecified) */
+	0,		/* number of power cords (0=number unspecified) */
+	0,		/* number of contained element records */
+	0,		/* length of records */
+	5		/* sku number string */
+};
+
+const char *smbios_type3_strings[] = {
+	" ",		/* manufacturer string */
+	"1.0",		/* version string */
+	"None",		/* serial number string */
+	"None",		/* asset tag string */
+	"None",		/* sku number string */
+	NULL
+};
+
+struct smbios_table_type4 smbios_type4_template = {
+	{ SMBIOS_TYPE_PROCESSOR, sizeof (struct smbios_table_type4), 0 },
+	1,		/* socket designation string */
+	SMBIOS_PRT_CENTRAL,
+	SMBIOS_PRF_OTHER,
+	2,		/* manufacturer string */
+	0,		/* cpuid */
+	3,		/* version string */
+	0,		/* voltage */
+	0,		/* external clock frequency in mhz (0=unknown) */
+	0,		/* maximum frequency in mhz (0=unknown) */
+	0,		/* current frequency in mhz (0=unknown) */
+	SMBIOS_PRS_PRESENT | SMBIOS_PRS_ENABLED,
+	SMBIOS_PRU_NONE,
+	-1,		/* l1 cache handle */
+	-1,		/* l2 cache handle */
+	-1,		/* l3 cache handle */
+	4,		/* serial number string */
+	5,		/* asset tag string */
+	6,		/* part number string */
+	0,		/* cores per socket (0=unknown) */
+	0,		/* enabled cores per socket (0=unknown) */
+	0,		/* threads per socket (0=unknown) */
+	SMBIOS_PFL_64B,
+	SMBIOS_PRF_OTHER
+};
+
+const char *smbios_type4_strings[] = {
+	" ",		/* socket designation string */
+	" ",		/* manufacturer string */
+	" ",		/* version string */
+	"None",		/* serial number string */
+	"None",		/* asset tag string */
+	"None",		/* part number string */
+	NULL
+};
+
+static int smbios_type4_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type16 smbios_type16_template = {
+	{ SMBIOS_TYPE_MEMARRAY, sizeof (struct smbios_table_type16),  0 },
+	SMBIOS_MAL_SYSMB,
+	SMBIOS_MAU_SYSTEM,
+	SMBIOS_MAE_NONE,
+	0x80000000,	/* max mem capacity in kb (0x80000000=use extended) */
+	-1,		/* handle of error (if any) */
+	0,		/* number of slots or sockets (TBD) */
+	0		/* extended maximum memory capacity in bytes (TBD) */
+};
+
+static int smbios_type16_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type17 smbios_type17_template = {
+	{ SMBIOS_TYPE_MEMDEVICE, sizeof (struct smbios_table_type17),  0 },
+	-1,		/* handle of physical memory array */
+	-1,		/* handle of memory error data */
+	64,		/* total width in bits including ecc */
+	64,		/* data width in bits */
+	0x7fff,		/* size in bytes (0x7fff=use extended)*/
+	SMBIOS_MDFF_UNKNOWN,
+	0,		/* set (0x00=none, 0xff=unknown) */
+	1,		/* device locator string */
+	2,		/* physical bank locator string */
+	SMBIOS_MDT_UNKNOWN,
+	SMBIOS_MDF_UNKNOWN,
+	0,		/* maximum memory speed in mhz (0=unknown) */
+	3,		/* manufacturer string */
+	4,		/* serial number string */
+	5,		/* asset tag string */
+	6,		/* part number string */
+	0,		/* attributes (0=unknown rank information) */
+	0,		/* extended size in mb (TBD) */
+	0,		/* current speed in mhz (0=unknown) */
+	0,		/* minimum voltage in mv (0=unknown) */
+	0,		/* maximum voltage in mv (0=unknown) */
+	0		/* configured voltage in mv (0=unknown) */
+};
+
+const char *smbios_type17_strings[] = {
+	" ",		/* device locator string */
+	" ",		/* physical bank locator string */
+	" ",		/* manufacturer string */
+	"None",		/* serial number string */
+	"None",		/* asset tag string */
+	"None",		/* part number string */
+	NULL
+};
+
+static int smbios_type17_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type19 smbios_type19_template = {
+	{ SMBIOS_TYPE_MEMARRAYMAP, sizeof (struct smbios_table_type19),  0 },
+	0xffffffff,	/* starting phys addr in kb (0xffffffff=use ext) */
+	0xffffffff,	/* ending phys addr in kb (0xffffffff=use ext) */
+	-1,		/* physical memory array handle */
+	1,		/* number of devices that form a row */
+	0,		/* extended starting phys addr in bytes (TDB) */
+	0		/* extended ending phys addr in bytes (TDB) */
+};
+
+static int smbios_type19_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+struct smbios_table_type32 smbios_type32_template = {
+	{ SMBIOS_TYPE_BOOT, sizeof (struct smbios_table_type32),  0 },
+	{ 0, 0, 0, 0, 0, 0 },
+	SMBIOS_BOOT_NORMAL
+};
+
+struct smbios_table_type127 smbios_type127_template = {
+	{ SMBIOS_TYPE_EOT, sizeof (struct smbios_table_type127),  0 }
+};
+
+static int smbios_generic_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size);
+
+static struct smbios_template_entry smbios_template[] = {
+	{ (struct smbios_structure *)&smbios_type0_template,
+	  smbios_type0_strings,
+	  smbios_generic_initializer },
+	{ (struct smbios_structure *)&smbios_type1_template,
+	  smbios_type1_strings,
+	  smbios_type1_initializer },
+	{ (struct smbios_structure *)&smbios_type3_template,
+	  smbios_type3_strings,
+	  smbios_generic_initializer },
+	{ (struct smbios_structure *)&smbios_type4_template,
+	  smbios_type4_strings,
+	  smbios_type4_initializer },
+	{ (struct smbios_structure *)&smbios_type16_template,
+	  NULL,
+	  smbios_type16_initializer },
+	{ (struct smbios_structure *)&smbios_type17_template,
+	  smbios_type17_strings,
+	  smbios_type17_initializer },
+	{ (struct smbios_structure *)&smbios_type19_template,
+	  NULL,
+	  smbios_type19_initializer },
+	{ (struct smbios_structure *)&smbios_type32_template,
+	  NULL,
+	  smbios_generic_initializer },
+	{ (struct smbios_structure *)&smbios_type127_template,
+	  NULL,
+	  smbios_generic_initializer },
+	{ NULL,NULL, NULL }
+};
+
+static uint64_t guest_lomem, guest_himem;
+static uint16_t type16_handle;
+
+static int
+smbios_generic_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_structure *entry;
+
+	memcpy(curaddr, template_entry, template_entry->length);
+	entry = (struct smbios_structure *)curaddr;
+	entry->handle = *n + 1;
+	curaddr += entry->length;
+	if (template_strings != NULL) {
+		int	i;
+
+		for (i = 0; template_strings[i] != NULL; i++) {
+			const char *string;
+			int len;
+
+			string = template_strings[i];
+			len = strlen(string) + 1;
+			memcpy(curaddr, string, len);
+			curaddr += len;
+		}
+		*curaddr = '\0';
+		curaddr++;
+	} else {
+		/* Minimum string section is double nul */
+		*curaddr = '\0';
+		curaddr++;
+		*curaddr = '\0';
+		curaddr++;
+	}
+	(*n)++;
+	*endaddr = curaddr;
+
+	return (0);
+}
+
+static int
+smbios_type1_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type1 *type1;
+
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type1 = (struct smbios_table_type1 *)curaddr;
+
+	if (guest_uuid_str != NULL) {
+		uuid_t		uuid;
+		uint32_t	status;
+
+		uuid_from_string(guest_uuid_str, &uuid, &status);
+		if (status != uuid_s_ok)
+			return (-1);
+
+		uuid_enc_le(&type1->uuid, &uuid);
+	} else {
+		MD5_CTX		mdctx;
+		u_char		digest[16];
+		char		hostname[MAXHOSTNAMELEN];
+
+		/*
+		 * Universally unique and yet reproducible are an
+		 * oxymoron, however reproducible is desirable in
+		 * this case.
+		 */
+		if (gethostname(hostname, sizeof(hostname)))
+			return (-1);
+
+		MD5Init(&mdctx);
+		MD5Update(&mdctx, vmname, strlen(vmname));
+		MD5Update(&mdctx, hostname, sizeof(hostname));
+		MD5Final(digest, &mdctx);
+
+		/*
+		 * Set the variant and version number.
+		 */
+		digest[6] &= 0x0F;
+		digest[6] |= 0x30;	/* version 3 */
+		digest[8] &= 0x3F;
+		digest[8] |= 0x80;
+
+		memcpy(&type1->uuid, digest, sizeof (digest));
+	}
+
+	return (0);
+}
+
+static int
+smbios_type4_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	int i;
+
+	for (i = 0; i < guest_ncpus; i++) {
+		struct smbios_table_type4 *type4;
+		char *p;
+		int nstrings, len;
+
+		smbios_generic_initializer(template_entry, template_strings,
+		    curaddr, endaddr, n, size);
+		type4 = (struct smbios_table_type4 *)curaddr;
+		p = curaddr + sizeof (struct smbios_table_type4);
+		nstrings = 0;
+		while (p < *endaddr - 1) {
+			if (*p++ == '\0')
+				nstrings++;
+		}
+		len = sprintf(*endaddr - 1, "CPU #%d", i) + 1;
+		*endaddr += len - 1;
+		*(*endaddr) = '\0';
+		(*endaddr)++;
+		type4->socket = nstrings + 1;
+		curaddr = *endaddr;
+	}
+
+	return (0);
+}
+
+static int
+smbios_type16_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type16 *type16;
+
+	type16_handle = *n;
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type16 = (struct smbios_table_type16 *)curaddr;
+	type16->xsize = guest_lomem + guest_himem;
+	type16->ndevs = guest_himem > 0 ? 2 : 1;
+
+	return (0);
+}
+
+static int
+smbios_type17_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type17 *type17;
+
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type17 = (struct smbios_table_type17 *)curaddr;
+	type17->arrayhand = type16_handle;
+	type17->xsize = guest_lomem;
+
+	if (guest_himem > 0) {
+		curaddr = *endaddr;
+		smbios_generic_initializer(template_entry, template_strings,
+		    curaddr, endaddr, n, size);
+		type17 = (struct smbios_table_type17 *)curaddr;
+		type17->arrayhand = type16_handle;
+		type17->xsize = guest_himem;
+	}
+
+	return (0);
+}
+
+static int
+smbios_type19_initializer(struct smbios_structure *template_entry,
+    const char **template_strings, char *curaddr, char **endaddr,
+    uint16_t *n, uint16_t *size)
+{
+	struct smbios_table_type19 *type19;
+
+	smbios_generic_initializer(template_entry, template_strings,
+	    curaddr, endaddr, n, size);
+	type19 = (struct smbios_table_type19 *)curaddr;
+	type19->arrayhand = type16_handle;
+	type19->xsaddr = 0;
+	type19->xeaddr = guest_lomem;
+
+	if (guest_himem > 0) {
+		curaddr = *endaddr;
+		smbios_generic_initializer(template_entry, template_strings,
+		    curaddr, endaddr, n, size);
+		type19 = (struct smbios_table_type19 *)curaddr;
+		type19->arrayhand = type16_handle;
+		type19->xsaddr = 4*GB;
+		type19->xeaddr = guest_himem;
+	}
+
+	return (0);
+}
+
+static void
+smbios_ep_initializer(struct smbios_entry_point *smbios_ep, uint32_t staddr)
+{
+	memset(smbios_ep, 0, sizeof(*smbios_ep));
+	memcpy(smbios_ep->eanchor, SMBIOS_ENTRY_EANCHOR,
+	    SMBIOS_ENTRY_EANCHORLEN);
+	smbios_ep->eplen = 0x1F;
+	assert(sizeof (struct smbios_entry_point) == smbios_ep->eplen);
+	smbios_ep->major = 2;
+	smbios_ep->minor = 6;
+	smbios_ep->revision = 0;
+	memcpy(smbios_ep->ianchor, SMBIOS_ENTRY_IANCHOR,
+	    SMBIOS_ENTRY_IANCHORLEN);
+	smbios_ep->staddr = staddr;
+	smbios_ep->bcdrev = 0x24;
+}
+
+static void
+smbios_ep_finalizer(struct smbios_entry_point *smbios_ep, uint16_t len,
+    uint16_t num, uint16_t maxssize)
+{
+	uint8_t	checksum;
+	int	i;
+
+	smbios_ep->maxssize = maxssize;
+	smbios_ep->stlen = len;
+	smbios_ep->stnum = num;
+
+	checksum = 0;
+	for (i = 0x10; i < 0x1f; i++) {
+		checksum -= ((uint8_t *)smbios_ep)[i];
+	}
+	smbios_ep->ichecksum = checksum;
+
+	checksum = 0;
+	for (i = 0; i < 0x1f; i++) {
+		checksum -= ((uint8_t *)smbios_ep)[i];
+	}
+	smbios_ep->echecksum = checksum;
+}
+
+int
+smbios_build(struct vmctx *ctx)
+{
+	struct smbios_entry_point	*smbios_ep;
+	uint16_t			n;
+	uint16_t			maxssize;
+	char				*curaddr, *startaddr, *ststartaddr;
+	int				i;
+	int				err;
+
+	guest_lomem = vm_get_lowmem_size(ctx);
+	guest_himem = vm_get_highmem_size(ctx);
+
+	startaddr = paddr_guest2host(ctx, SMBIOS_BASE, SMBIOS_MAX_LENGTH);
+	if (startaddr == NULL) {
+		fprintf(stderr, "smbios table requires mapped mem\n");
+		return (ENOMEM);
+	}
+
+	curaddr = startaddr;
+
+	smbios_ep = (struct smbios_entry_point *)curaddr;
+	smbios_ep_initializer(smbios_ep, SMBIOS_BASE +
+	    sizeof(struct smbios_entry_point));
+	curaddr += sizeof(struct smbios_entry_point);
+	ststartaddr = curaddr;
+
+	n = 0;
+	maxssize = 0;
+	for (i = 0; smbios_template[i].entry != NULL; i++) {
+		struct smbios_structure	*entry;
+		const char		**strings;
+		initializer_func_t      initializer;
+		char			*endaddr;
+		uint16_t		size;
+
+		entry = smbios_template[i].entry;
+		strings = smbios_template[i].strings;
+		initializer = smbios_template[i].initializer;
+
+		err = (*initializer)(entry, strings, curaddr, &endaddr,
+		    &n, &size);
+		if (err != 0)
+			return (err);
+
+		if (size > maxssize)
+			maxssize = size;
+
+		curaddr = endaddr;
+	}
+
+	assert(curaddr - startaddr < SMBIOS_MAX_LENGTH);
+	smbios_ep_finalizer(smbios_ep, curaddr - ststartaddr, n, maxssize);
+
+	return (0);
+}
diff --git a/usr/src/cmd/bhyve/smbiostbl.h b/usr/src/cmd/bhyve/smbiostbl.h
new file mode 100644
index 0000000000..fd7f86be80
--- /dev/null
+++ b/usr/src/cmd/bhyve/smbiostbl.h
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/smbiostbl.h 262744 2014-03-04 17:12:06Z tychon $
+ */
+
+#ifndef _SMBIOSTBL_H_
+#define _SMBIOSTBL_H_
+
+struct vmctx;
+
+int	smbios_build(struct vmctx *ctx);
+
+#endif /* _SMBIOSTBL_H_ */
diff --git a/usr/src/cmd/bhyve/spinup_ap.c b/usr/src/cmd/bhyve/spinup_ap.c
new file mode 100644
index 0000000000..e1dd562d3f
--- /dev/null
+++ b/usr/src/cmd/bhyve/spinup_ap.c
@@ -0,0 +1,104 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/spinup_ap.c 263432 2014-03-20 18:15:37Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/spinup_ap.c 263432 2014-03-20 18:15:37Z neel $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "bhyverun.h"
+#include "spinup_ap.h"
+
+static void
+spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip)
+{
+	int vector, error;
+	uint16_t cs;
+	uint64_t desc_base;
+	uint32_t desc_limit, desc_access;
+
+	vector = *rip >> PAGE_SHIFT;
+	*rip = 0;
+
+	/*
+	 * Update the %cs and %rip of the guest so that it starts
+	 * executing real mode code at at 'vector << 12'.
+	 */
+	error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip);
+	assert(error == 0);
+
+	error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base,
+			    &desc_limit, &desc_access);
+	assert(error == 0);
+
+	desc_base = vector << PAGE_SHIFT;
+	error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS,
+			    desc_base, desc_limit, desc_access);
+	assert(error == 0);
+
+	cs = (vector << PAGE_SHIFT) >> 4;
+	error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs);
+	assert(error == 0);
+}
+
+int
+spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip)
+{
+	int error;
+
+	assert(newcpu != 0);
+	assert(newcpu < guest_ncpus);
+
+	error = vcpu_reset(ctx, newcpu);
+	assert(error == 0);
+
+	fbsdrun_set_capabilities(ctx, newcpu);
+
+	/*
+	 * Enable the 'unrestricted guest' mode for 'newcpu'.
+	 *
+	 * Set up the processor state in power-on 16-bit mode, with the CS:IP
+	 * init'd to the specified low-mem 4K page.
+	 */
+	error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
+	assert(error == 0);
+
+	spinup_ap_realmode(ctx, newcpu, &rip);
+
+	fbsdrun_addcpu(ctx, vcpu, newcpu, rip);
+
+	return (newcpu);
+}
diff --git a/usr/src/cmd/bhyve/spinup_ap.h b/usr/src/cmd/bhyve/spinup_ap.h
new file mode 100644
index 0000000000..090de091ba
--- /dev/null
+++ b/usr/src/cmd/bhyve/spinup_ap.h
@@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/spinup_ap.h 240912 2012-09-25 02:33:25Z neel $
+ */
+
+#ifndef	_SPINUP_AP_H_
+#define	_SPINUP_AP_H_
+
+int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip);
+
+#endif
diff --git a/usr/src/cmd/bhyve/uart_emul.c b/usr/src/cmd/bhyve/uart_emul.c
new file mode 100644
index 0000000000..a8b5d40356
--- /dev/null
+++ b/usr/src/cmd/bhyve/uart_emul.c
@@ -0,0 +1,1042 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/uart_emul.c 257293 2013-10-29 00:18:11Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/uart_emul.c 257293 2013-10-29 00:18:11Z neel $");
+
+#include <sys/types.h>
+#include <dev/ic/ns16550.h>
+
+#ifndef	__FreeBSD__
+#include <sys/socket.h>
+#include <sys/stat.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <termios.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <pthread.h>
+#ifndef	__FreeBSD__
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#endif
+
+#ifndef	__FreeBSD__
+#include <bhyve.h>
+
+#include "bhyverun.h"
+#endif
+#ifdef	__FreeBSD__
+#include "mevent.h"
+#endif
+#include "uart_emul.h"
+
+#define	COM1_BASE	0x3F8
+#define	COM1_IRQ	4
+#define	COM2_BASE      	0x2F8
+#define COM2_IRQ	3
+
+#define	DEFAULT_RCLK	1843200
+#define	DEFAULT_BAUD	9600
+
+#define	FCR_RX_MASK	0xC0
+
+#define	MCR_OUT1	0x04
+#define	MCR_OUT2	0x08
+
+#define	MSR_DELTA_MASK	0x0f
+
+#ifndef REG_SCR
+#define REG_SCR		com_scr
+#endif
+
+#define	FIFOSZ	16
+
+static bool uart_stdio;		/* stdio in use for i/o */
+#ifndef	__FreeBSD__
+static bool uart_bcons;		/* bhyveconsole in use for i/o */
+#endif
+
+static struct {
+	int	baseaddr;
+	int	irq;
+	bool	inuse;
+} uart_lres[] = {
+	{ COM1_BASE, COM1_IRQ, false},
+	{ COM2_BASE, COM2_IRQ, false},
+};
+
+#define	UART_NLDEVS	(sizeof(uart_lres) / sizeof(uart_lres[0]))
+
+struct fifo {
+	uint8_t	buf[FIFOSZ];
+	int	rindex;		/* index to read from */
+	int	windex;		/* index to write to */
+	int	num;		/* number of characters in the fifo */
+	int	size;		/* size of the fifo */
+};
+
+struct uart_softc {
+	pthread_mutex_t mtx;	/* protects all softc elements */
+	uint8_t data;		/* Data register (R/W) */
+	uint8_t ier;		/* Interrupt enable register (R/W) */
+	uint8_t lcr;		/* Line control register (R/W) */
+	uint8_t mcr;		/* Modem control register (R/W) */
+	uint8_t lsr;		/* Line status register (R/W) */
+	uint8_t msr;		/* Modem status register (R/W) */
+	uint8_t fcr;		/* FIFO control register (W) */
+	uint8_t scr;		/* Scratch register (R/W) */
+
+	uint8_t dll;		/* Baudrate divisor latch LSB */
+	uint8_t dlh;		/* Baudrate divisor latch MSB */
+
+	struct fifo rxfifo;
+
+	bool	opened;
+	bool	stdio;
+#ifndef	__FreeBSD__
+	bool	bcons;
+	struct {
+		pid_t	clipid;
+		int	clifd;		/* console client unix domain socket */
+		int	servfd;		/* console server unix domain socket */
+	} usc_bcons;
+#endif
+
+	bool	thre_int_pending;	/* THRE interrupt pending */
+
+	void	*arg;
+	uart_intr_func_t intr_assert;
+	uart_intr_func_t intr_deassert;
+};
+
+#ifdef	__FreeBSD__
+static void uart_drain(int fd, enum ev_type ev, void *arg);
+#else
+static void uart_tty_drain(struct uart_softc *sc);
+static int uart_bcons_drain(struct uart_softc *sc);
+#endif
+
+static struct termios tio_orig, tio_new;	/* I/O Terminals */
+
+static void
+ttyclose(void)
+{
+
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
+}
+
+static void
+ttyopen(void)
+{
+
+	tcgetattr(STDIN_FILENO, &tio_orig);
+
+	tio_new = tio_orig;
+	cfmakeraw(&tio_new);
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
+
+	atexit(ttyclose);
+}
+
+static bool
+tty_char_available(void)
+{
+	fd_set rfds;
+	struct timeval tv;
+
+	FD_ZERO(&rfds);
+	FD_SET(STDIN_FILENO, &rfds);
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0 ) {
+		return (true);
+	} else {
+		return (false);
+	}
+}
+
+static int
+ttyread(void)
+{
+	char rb;
+
+	if (tty_char_available()) {
+		read(STDIN_FILENO, &rb, 1);
+		return (rb & 0xff);
+	} else {
+		return (-1);
+	}
+}
+
+static void
+ttywrite(unsigned char wb)
+{
+
+	(void)write(STDIN_FILENO, &wb, 1);
+}
+
+#ifndef	__FreeBSD__
+static void
+bconswrite(struct uart_softc *sc, unsigned char wb)
+{
+	(void) write(sc->usc_bcons.clifd, &wb, 1);
+}
+#endif
+
+static void
+fifo_reset(struct fifo *fifo, int size)
+{
+	bzero(fifo, sizeof(struct fifo));
+	fifo->size = size;
+}
+
+static int
+fifo_putchar(struct fifo *fifo, uint8_t ch)
+{
+
+	if (fifo->num < fifo->size) {
+		fifo->buf[fifo->windex] = ch;
+		fifo->windex = (fifo->windex + 1) % fifo->size;
+		fifo->num++;
+		return (0);
+	} else
+		return (-1);
+}
+
+static int
+fifo_getchar(struct fifo *fifo)
+{
+	int c;
+
+	if (fifo->num > 0) {
+		c = fifo->buf[fifo->rindex];
+		fifo->rindex = (fifo->rindex + 1) % fifo->size;
+		fifo->num--;
+		return (c);
+	} else
+		return (-1);
+}
+
+static int
+fifo_numchars(struct fifo *fifo)
+{
+
+	return (fifo->num);
+}
+
+static int
+fifo_available(struct fifo *fifo)
+{
+
+	return (fifo->num < fifo->size);
+}
+
+static void
+uart_opentty(struct uart_softc *sc)
+{
+	struct mevent *mev;
+
+	assert(!sc->opened && sc->stdio);
+
+	ttyopen();
+#ifdef	__FreeBSD__
+	mev = mevent_add(STDIN_FILENO, EVF_READ, uart_drain, sc);
+#endif
+	assert(mev);
+}
+
+/*
+ * The IIR returns a prioritized interrupt reason:
+ * - receive data available
+ * - transmit holding register empty
+ * - modem status change
+ *
+ * Return an interrupt reason if one is available.
+ */
+static int
+uart_intr_reason(struct uart_softc *sc)
+{
+
+	if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0)
+		return (IIR_RLS);
+	else if (fifo_numchars(&sc->rxfifo) > 0 && (sc->ier & IER_ERXRDY) != 0)
+		return (IIR_RXTOUT);
+	else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0)
+		return (IIR_TXRDY);
+	else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0)
+		return (IIR_MLSC);
+	else
+		return (IIR_NOPEND);
+}
+
+static void
+uart_reset(struct uart_softc *sc)
+{
+	uint16_t divisor;
+
+	divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16;
+	sc->dll = divisor;
+	sc->dlh = divisor >> 16;
+
+	fifo_reset(&sc->rxfifo, 1);	/* no fifo until enabled by software */
+}
+
+/*
+ * Toggle the COM port's intr pin depending on whether or not we have an
+ * interrupt condition to report to the processor.
+ */
+static void
+uart_toggle_intr(struct uart_softc *sc)
+{
+	uint8_t intr_reason;
+
+	intr_reason = uart_intr_reason(sc);
+
+	if (intr_reason == IIR_NOPEND)
+		(*sc->intr_deassert)(sc->arg);
+	else
+		(*sc->intr_assert)(sc->arg);
+}
+
+#ifdef	__FreeBSD__
+static void
+uart_drain(int fd, enum ev_type ev, void *arg)
+{
+	struct uart_softc *sc;
+	int ch;
+
+	sc = arg;
+
+	assert(fd == STDIN_FILENO);
+	assert(ev == EVF_READ);
+
+	/*
+	 * This routine is called in the context of the mevent thread
+	 * to take out the softc lock to protect against concurrent
+	 * access from a vCPU i/o exit
+	 */
+	pthread_mutex_lock(&sc->mtx);
+
+	if ((sc->mcr & MCR_LOOPBACK) != 0) {
+		(void) ttyread();
+	} else {
+		while (fifo_available(&sc->rxfifo) &&
+		       ((ch = ttyread()) != -1)) {
+			fifo_putchar(&sc->rxfifo, ch);
+		}
+		uart_toggle_intr(sc);
+	}
+
+	pthread_mutex_unlock(&sc->mtx);
+}
+#else
+static void
+uart_tty_drain(struct uart_softc *sc)
+{
+	int ch;
+
+	/*
+	 * Take the softc lock to protect against concurrent
+	 * access from a vCPU i/o exit
+	 */
+	pthread_mutex_lock(&sc->mtx);
+
+	if ((sc->mcr & MCR_LOOPBACK) != 0) {
+		(void) ttyread();
+	} else {
+		while (fifo_available(&sc->rxfifo) &&
+		       ((ch = ttyread()) != -1)) {
+			fifo_putchar(&sc->rxfifo, ch);
+		}
+		uart_toggle_intr(sc);
+	}
+
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+static int
+uart_bcons_drain(struct uart_softc *sc)
+{
+	char ch;
+	int nbytes;
+	int ret = 0;
+
+	/*
+	 * Take the softc lock to protect against concurrent
+	 * access from a vCPU i/o exit
+	 */
+	pthread_mutex_lock(&sc->mtx);
+
+	if ((sc->mcr & MCR_LOOPBACK) != 0) {
+		(void) read(sc->usc_bcons.clifd, &ch, 1);
+	} else {
+		for (;;) {
+			nbytes = read(sc->usc_bcons.clifd, &ch, 1);
+			if (nbytes == 0) {
+				ret = 1;
+				break;
+			}
+			if (nbytes == -1 &&
+			    errno != EINTR && errno != EAGAIN) {
+				ret = -1;
+				break;
+			}
+			if (nbytes == -1) {
+				break;
+			}
+
+			if (fifo_available(&sc->rxfifo)) {
+				fifo_putchar(&sc->rxfifo, ch);
+			}
+		}
+		uart_toggle_intr(sc);
+	}
+
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (ret);
+}
+#endif
+
+void
+uart_write(struct uart_softc *sc, int offset, uint8_t value)
+{
+	int fifosz;
+	uint8_t msr;
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/* Open terminal */
+	if (!sc->opened && sc->stdio) {
+		uart_opentty(sc);
+		sc->opened = true;
+	}
+
+	/*
+	 * Take care of the special case DLAB accesses first
+	 */
+	if ((sc->lcr & LCR_DLAB) != 0) {
+		if (offset == REG_DLL) {
+			sc->dll = value;
+			goto done;
+		}
+
+		if (offset == REG_DLH) {
+			sc->dlh = value;
+			goto done;
+		}
+	}
+
+        switch (offset) {
+	case REG_DATA:
+		if (sc->mcr & MCR_LOOPBACK) {
+			if (fifo_putchar(&sc->rxfifo, value) != 0)
+				sc->lsr |= LSR_OE;
+		} else if (sc->stdio) {
+			ttywrite(value);
+#ifndef	__FreeBSD__
+		} else if (sc->bcons) {
+				bconswrite(sc, value);
+#endif
+		} /* else drop on floor */
+		sc->thre_int_pending = true;
+		break;
+	case REG_IER:
+		/*
+		 * Apply mask so that bits 4-7 are 0
+		 * Also enables bits 0-3 only if they're 1
+		 */
+		sc->ier = value & 0x0F;
+		break;
+		case REG_FCR:
+			/*
+			 * When moving from FIFO and 16450 mode and vice versa,
+			 * the FIFO contents are reset.
+			 */
+			if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) {
+				fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1;
+				fifo_reset(&sc->rxfifo, fifosz);
+			}
+
+			/*
+			 * The FCR_ENABLE bit must be '1' for the programming
+			 * of other FCR bits to be effective.
+			 */
+			if ((value & FCR_ENABLE) == 0) {
+				sc->fcr = 0;
+			} else {
+				if ((value & FCR_RCV_RST) != 0)
+					fifo_reset(&sc->rxfifo, FIFOSZ);
+
+				sc->fcr = value &
+					 (FCR_ENABLE | FCR_DMA | FCR_RX_MASK);
+			}
+			break;
+		case REG_LCR:
+			sc->lcr = value;
+			break;
+		case REG_MCR:
+			/* Apply mask so that bits 5-7 are 0 */
+			sc->mcr = value & 0x1F;
+
+			msr = 0;
+			if (sc->mcr & MCR_LOOPBACK) {
+				/*
+				 * In the loopback mode certain bits from the
+				 * MCR are reflected back into MSR
+				 */
+				if (sc->mcr & MCR_RTS)
+					msr |= MSR_CTS;
+				if (sc->mcr & MCR_DTR)
+					msr |= MSR_DSR;
+				if (sc->mcr & MCR_OUT1)
+					msr |= MSR_RI;
+				if (sc->mcr & MCR_OUT2)
+					msr |= MSR_DCD;
+			}
+
+			/*
+			 * Detect if there has been any change between the
+			 * previous and the new value of MSR. If there is
+			 * then assert the appropriate MSR delta bit.
+			 */
+			if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS))
+				sc->msr |= MSR_DCTS;
+			if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR))
+				sc->msr |= MSR_DDSR;
+			if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD))
+				sc->msr |= MSR_DDCD;
+			if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0)
+				sc->msr |= MSR_TERI;
+
+			/*
+			 * Update the value of MSR while retaining the delta
+			 * bits.
+			 */
+			sc->msr &= MSR_DELTA_MASK;
+			sc->msr |= msr;
+			break;
+		case REG_LSR:
+			/*
+			 * Line status register is not meant to be written to
+			 * during normal operation.
+			 */
+			break;
+		case REG_MSR:
+			/*
+			 * As far as I can tell MSR is a read-only register.
+			 */
+			break;
+		case REG_SCR:
+			sc->scr = value;
+			break;
+		default:
+			break;
+	}
+
+done:
+	uart_toggle_intr(sc);
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+uint8_t
+uart_read(struct uart_softc *sc, int offset)
+{
+	uint8_t iir, intr_reason, reg;
+
+	pthread_mutex_lock(&sc->mtx);
+
+	/* Open terminal */
+	if (!sc->opened && sc->stdio) {
+		uart_opentty(sc);
+		sc->opened = true;
+	}
+
+	/*
+	 * Take care of the special case DLAB accesses first
+	 */
+	if ((sc->lcr & LCR_DLAB) != 0) {
+		if (offset == REG_DLL) {
+			reg = sc->dll;
+			goto done;
+		}
+
+		if (offset == REG_DLH) {
+			reg = sc->dlh;
+			goto done;
+		}
+	}
+
+	switch (offset) {
+	case REG_DATA:
+		reg = fifo_getchar(&sc->rxfifo);
+		break;
+	case REG_IER:
+		reg = sc->ier;
+		break;
+	case REG_IIR:
+		iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0;
+
+		intr_reason = uart_intr_reason(sc);
+
+		/*
+		 * Deal with side effects of reading the IIR register
+		 */
+		if (intr_reason == IIR_TXRDY)
+			sc->thre_int_pending = false;
+
+		iir |= intr_reason;
+
+		reg = iir;
+		break;
+	case REG_LCR:
+		reg = sc->lcr;
+		break;
+	case REG_MCR:
+		reg = sc->mcr;
+		break;
+	case REG_LSR:
+		/* Transmitter is always ready for more data */
+		sc->lsr |= LSR_TEMT | LSR_THRE;
+
+		/* Check for new receive data */
+		if (fifo_numchars(&sc->rxfifo) > 0)
+			sc->lsr |= LSR_RXRDY;
+		else
+			sc->lsr &= ~LSR_RXRDY;
+
+		reg = sc->lsr;
+
+		/* The LSR_OE bit is cleared on LSR read */
+		sc->lsr &= ~LSR_OE;
+		break;
+	case REG_MSR:
+		/*
+		 * MSR delta bits are cleared on read
+		 */
+		reg = sc->msr;
+		sc->msr &= ~MSR_DELTA_MASK;
+		break;
+	case REG_SCR:
+		reg = sc->scr;
+		break;
+	default:
+		reg = 0xFF;
+		break;
+	}
+
+done:
+	uart_toggle_intr(sc);
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (reg);
+}
+
+#ifndef	__FreeBSD__
+static void *
+uart_tty_thread(void *param)
+{
+	struct uart_softc *sc = param;
+	pollfd_t pollset;
+
+	pollset.fd = STDIN_FILENO;
+	pollset.events = POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND;
+
+	for (;;) {
+		if (poll(&pollset, 1, -1) < 0) {
+			if (errno != EINTR) {
+				perror("poll failed");
+				break;
+			}
+			continue;
+		}
+		uart_tty_drain(sc);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Read the "ident" string from the client's descriptor; this routine also
+ * tolerates being called with pid=NULL, for times when you want to "eat"
+ * the ident string from a client without saving it.
+ */
+static int
+get_client_ident(int clifd, pid_t *pid)
+{
+	char buf[BUFSIZ], *bufp;
+	size_t buflen = sizeof (buf);
+	char c = '\0';
+	int i = 0, r;
+
+	/* "eat up the ident string" case, for simplicity */
+	if (pid == NULL) {
+		while (read(clifd, &c, 1) == 1) {
+			if (c == '\n')
+				return (0);
+		}
+	}
+
+	bzero(buf, sizeof (buf));
+	while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) {
+		buflen--;
+		if (c == '\n')
+			break;
+
+		buf[i] = c;
+		i++;
+	}
+	if (r == -1)
+		return (-1);
+
+	/*
+	 * We've filled the buffer, but still haven't seen \n.  Keep eating
+	 * until we find it; we don't expect this to happen, but this is
+	 * defensive.
+	 */
+	if (c != '\n') {
+		while ((r = read(clifd, &c, sizeof (c))) > 0)
+			if (c == '\n')
+				break;
+	}
+
+	/*
+	 * Parse buffer for message of the form: IDENT <pid>
+	 */
+	bufp = buf;
+	if (strncmp(bufp, "IDENT ", 6) != 0)
+		return (-1);
+	bufp += 6;
+	errno = 0;
+	*pid = strtoll(bufp, &bufp, 10);
+	if (errno != 0)
+		return (-1);
+
+	return (0);
+}
+
+static int
+uart_bcons_accept_client(struct uart_softc *sc)
+{
+	int connfd;
+	struct sockaddr_un cliaddr;
+	socklen_t clilen;
+	pid_t pid;
+
+	clilen = sizeof (cliaddr);
+	connfd = accept(sc->usc_bcons.servfd,
+			(struct sockaddr *)&cliaddr, &clilen);
+	if (connfd == -1)
+		return (-1);
+	if (get_client_ident(connfd, &pid) == -1) {
+		(void) shutdown(connfd, SHUT_RDWR);
+		(void) close(connfd);
+		return (-1);
+	}
+
+	if (fcntl(connfd, F_SETFL, O_NONBLOCK) < 0) {
+		(void) shutdown(connfd, SHUT_RDWR);
+		(void) close(connfd);
+		return (-1);
+	}
+	(void) write(connfd, "OK\n", 3);
+
+	sc->usc_bcons.clipid = pid;
+	sc->usc_bcons.clifd = connfd;
+
+	printf("Connection from process ID %lu.\n", pid);
+
+	return (0);
+}
+
+static void
+uart_bcons_reject_client(struct uart_softc *sc)
+{
+	int connfd;
+	struct sockaddr_un cliaddr;
+	socklen_t clilen;
+	char nak[MAXPATHLEN];
+
+	clilen = sizeof (cliaddr);
+	connfd = accept(sc->usc_bcons.servfd,
+			(struct sockaddr *)&cliaddr, &clilen);
+
+	/*
+	 * After hear its ident string, tell client to get lost.
+	 */
+	if (get_client_ident(connfd, NULL) == 0) {
+		(void) snprintf(nak, sizeof (nak), "%lu\n",
+		    sc->usc_bcons.clipid);
+		(void) write(connfd, nak, strlen(nak));
+	}
+	(void) shutdown(connfd, SHUT_RDWR);
+	(void) close(connfd);
+}
+
+static int
+uart_bcons_client_event(struct uart_softc *sc)
+{
+	int res;
+
+	res = uart_bcons_drain(sc);
+	if (res < 0)
+		return (-1);
+
+	if (res > 0) {
+		fprintf(stderr, "Closing connection with bhyve console\n");
+		(void) shutdown(sc->usc_bcons.clifd, SHUT_RDWR);
+		(void) close(sc->usc_bcons.clifd);
+		sc->usc_bcons.clifd = -1;
+	}
+
+	return (0);
+}
+
+static void
+uart_bcons_server_event(struct uart_softc *sc)
+{
+	int clifd;
+
+	if (sc->usc_bcons.clifd != -1) {
+		/* we're already handling a client */
+		uart_bcons_reject_client(sc);
+		return;
+	}
+
+	if (uart_bcons_accept_client(sc) == 0) {
+		pthread_mutex_lock(&bcons_wait_lock);
+		bcons_connected = B_TRUE;
+		pthread_cond_signal(&bcons_wait_done);
+		pthread_mutex_unlock(&bcons_wait_lock);
+	}
+}
+
+static void *
+uart_bcons_thread(void *param)
+{
+	struct uart_softc *sc = param;
+	struct pollfd pollfds[2];
+	int res;
+
+	/* read from client and write to vm */
+	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND |
+	    POLLPRI | POLLERR | POLLHUP;
+
+	/* the server socket; watch for events (new connections) */
+	pollfds[1].events = pollfds[0].events;
+
+	for (;;) {
+		pollfds[0].fd = sc->usc_bcons.clifd;
+		pollfds[1].fd = sc->usc_bcons.servfd;
+		pollfds[0].revents = pollfds[1].revents = 0;
+
+		res = poll(pollfds,
+		    sizeof (pollfds) / sizeof (struct pollfd), -1);
+
+		if (res == -1 && errno != EINTR) {
+			perror("poll failed");
+			/* we are hosed, close connection */
+			break;
+		}
+
+		/* event from client side */
+		if (pollfds[0].revents) {
+			if (pollfds[0].revents &
+			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+				if (uart_bcons_client_event(sc) < 0)
+					break;
+			} else {
+				break;
+			}
+		}
+
+		/* event from server socket */
+		if (pollfds[1].revents) {
+			if (pollfds[1].revents & (POLLIN | POLLRDNORM)) {
+				uart_bcons_server_event(sc);
+			} else {
+				break;
+			}
+		}
+	}
+
+	if (sc->usc_bcons.clifd != -1) {
+		fprintf(stderr, "Closing connection with bhyve console\n");
+		(void) shutdown(sc->usc_bcons.clifd, SHUT_RDWR);
+		(void) close(sc->usc_bcons.clifd);
+		sc->usc_bcons.clifd = -1;
+	}
+
+	return (NULL);
+}
+
+static int
+init_bcons_sock(void)
+{
+	int servfd;
+	struct sockaddr_un servaddr;
+
+	if (mkdir(BHYVE_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
+		fprintf(stderr, "bhyve console setup: "
+		    "could not mkdir %s", BHYVE_TMPDIR, strerror(errno));
+		return (-1);
+	}
+
+	bzero(&servaddr, sizeof (servaddr));
+	servaddr.sun_family = AF_UNIX;
+	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
+	    BHYVE_CONS_SOCKPATH, vmname);
+
+	if ((servfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+		fprintf(stderr, "bhyve console setup: "
+		    "could not create socket\n");
+		return (-1);
+	}
+	(void) unlink(servaddr.sun_path);
+
+	if (bind(servfd, (struct sockaddr *)&servaddr,
+	    sizeof (servaddr)) == -1) {
+		fprintf(stderr, "bhyve console setup: "
+		    "could not bind to socket\n");
+		goto out;
+        }
+
+        if (listen(servfd, 4) == -1) {
+		fprintf(stderr, "bhyve console setup: "
+		    "could not listen on socket");
+		goto out;
+        }
+        return (servfd);
+
+out:
+	(void) unlink(servaddr.sun_path);
+        (void) close(servfd);
+        return (-1);
+}
+#endif
+
+int
+uart_legacy_alloc(int which, int *baseaddr, int *irq)
+{
+
+	if (which < 0 || which >= UART_NLDEVS || uart_lres[which].inuse)
+		return (-1);
+
+	uart_lres[which].inuse = true;
+	*baseaddr = uart_lres[which].baseaddr;
+	*irq = uart_lres[which].irq;
+
+	return (0);
+}
+
+struct uart_softc *
+uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert,
+    void *arg)
+{
+	struct uart_softc *sc;
+
+	sc = malloc(sizeof(struct uart_softc));
+	bzero(sc, sizeof(struct uart_softc));
+
+	sc->arg = arg;
+	sc->intr_assert = intr_assert;
+	sc->intr_deassert = intr_deassert;
+
+	pthread_mutex_init(&sc->mtx, NULL);
+
+	uart_reset(sc);
+
+	return (sc);
+}
+
+int
+uart_set_backend(struct uart_softc *sc, const char *opts)
+{
+#ifndef	__FreeBSD__
+	int error;
+#endif
+	/*
+	 * XXX one stdio backend supported at this time.
+	 */
+	if (opts == NULL)
+		return (0);
+
+#ifdef	__FreeBSD__
+	if (strcmp("stdio", opts) == 0 && !uart_stdio) {
+		sc->stdio = true;
+		uart_stdio = true;
+		return (0);
+#else
+	if (strcmp("stdio", opts) == 0 && !uart_stdio && !uart_bcons) {
+		sc->stdio = true;
+		uart_stdio = true;
+
+		error = pthread_create(NULL, NULL, uart_tty_thread, sc);
+		assert(error == 0);
+
+		return (0);
+	} else if (strstr(opts, "bcons") != 0 && !uart_stdio && !uart_bcons) {
+		sc->bcons = true;
+		uart_bcons= true;
+
+		if (strstr(opts, "bcons,wait") != 0) {
+			bcons_wait = true;
+		}
+
+		sc->usc_bcons.clifd = -1;
+		if ((sc->usc_bcons.servfd = init_bcons_sock()) == -1) {
+			fprintf(stderr, "bhyve console setup: "
+			    "socket initialization failed\n");
+			return (-1);
+		}
+		error = pthread_create(NULL, NULL, uart_bcons_thread, sc);
+		assert(error == 0);
+
+		return (0);
+#endif
+	} else
+		return (-1);
+}
diff --git a/usr/src/cmd/bhyve/uart_emul.h b/usr/src/cmd/bhyve/uart_emul.h
new file mode 100644
index 0000000000..ecff957991
--- /dev/null
+++ b/usr/src/cmd/bhyve/uart_emul.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/uart_emul.h 257293 2013-10-29 00:18:11Z neel $
+ */
+
+#ifndef _UART_EMUL_H_
+#define	_UART_EMUL_H_
+
+
+#define	UART_IO_BAR_SIZE	8
+
+struct uart_softc;
+
+typedef void (*uart_intr_func_t)(void *arg);
+struct uart_softc *uart_init(uart_intr_func_t intr_assert,
+		uart_intr_func_t intr_deassert, void *arg);
+
+int	uart_legacy_alloc(int unit, int *ioaddr, int *irq);
+uint8_t	uart_read(struct uart_softc *sc, int offset);
+void	uart_write(struct uart_softc *sc, int offset, uint8_t value);
+int	uart_set_backend(struct uart_softc *sc, const char *opt);
+#endif
diff --git a/usr/src/cmd/bhyve/vga.c b/usr/src/cmd/bhyve/vga.c
new file mode 100644
index 0000000000..4330741042
--- /dev/null
+++ b/usr/src/cmd/bhyve/vga.c
@@ -0,0 +1,1289 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <machine/vmm.h>
+
+#include "bhyvegc.h"
+#include "console.h"
+#include "inout.h"
+#include "mem.h"
+#include "vga.h"
+
+#define	KB	(1024UL)
+#define	MB	(1024 * 1024UL)
+
+struct vga_softc {
+	struct mem_range	mr;
+
+	struct bhyvegc		*gc;
+	int			gc_width;
+	int			gc_height;
+	struct bhyvegc_image	*gc_image;
+
+	uint8_t			*vga_ram;
+
+	/*
+	 * General registers
+	 */
+	uint8_t			vga_misc;
+	uint8_t			vga_sts1;
+
+	/*
+	 * Sequencer
+	 */
+	struct {
+		int		seq_index;
+		uint8_t		seq_reset;
+		uint8_t		seq_clock_mode;
+		int		seq_cm_dots;
+		uint8_t		seq_map_mask;
+		uint8_t		seq_cmap_sel;
+		int		seq_cmap_pri_off;
+		int		seq_cmap_sec_off;
+		uint8_t		seq_mm;
+	} vga_seq;
+
+	/*
+	 * CRT Controller
+	 */
+	struct {
+		int		crtc_index;
+		uint8_t		crtc_mode_ctrl;
+		uint8_t		crtc_horiz_total;
+		uint8_t		crtc_horiz_disp_end;
+		uint8_t		crtc_start_horiz_blank;
+		uint8_t		crtc_end_horiz_blank;
+		uint8_t		crtc_start_horiz_retrace;
+		uint8_t		crtc_end_horiz_retrace;
+		uint8_t		crtc_vert_total;
+		uint8_t		crtc_overflow;
+		uint8_t		crtc_present_row_scan;
+		uint8_t		crtc_max_scan_line;
+		uint8_t		crtc_cursor_start;
+		uint8_t		crtc_cursor_on;
+		uint8_t		crtc_cursor_end;
+		uint8_t		crtc_start_addr_high;
+		uint8_t		crtc_start_addr_low;
+		uint16_t	crtc_start_addr;
+		uint8_t		crtc_cursor_loc_low;
+		uint8_t		crtc_cursor_loc_high;
+		uint16_t	crtc_cursor_loc;
+		uint8_t		crtc_vert_retrace_start;
+		uint8_t		crtc_vert_retrace_end;
+		uint8_t		crtc_vert_disp_end;
+		uint8_t		crtc_offset;
+		uint8_t		crtc_underline_loc;
+		uint8_t		crtc_start_vert_blank;
+		uint8_t		crtc_end_vert_blank;
+		uint8_t		crtc_line_compare;
+	} vga_crtc;
+
+	/*
+	 * Graphics Controller
+	 */
+	struct {
+		int		gc_index;
+		uint8_t		gc_set_reset;
+		uint8_t		gc_enb_set_reset;
+		uint8_t		gc_color_compare;
+		uint8_t		gc_rotate;
+		uint8_t		gc_op;
+		uint8_t		gc_read_map_sel;
+		uint8_t		gc_mode;
+		bool		gc_mode_c4;		/* chain 4 */
+		bool		gc_mode_oe;		/* odd/even */
+		uint8_t		gc_mode_rm;		/* read mode */
+		uint8_t		gc_mode_wm;		/* write mode */
+		uint8_t		gc_misc;
+		uint8_t		gc_misc_gm;		/* graphics mode */
+		uint8_t		gc_misc_mm;		/* memory map */
+		uint8_t		gc_color_dont_care;
+		uint8_t		gc_bit_mask;
+		uint8_t		gc_latch0;
+		uint8_t		gc_latch1;
+		uint8_t		gc_latch2;
+		uint8_t		gc_latch3;
+	} vga_gc;
+
+	/*
+	 * Attribute Controller
+	 */
+	struct {
+		int		atc_flipflop;
+		int		atc_index;
+		uint8_t		atc_palette[16];
+		uint8_t		atc_mode;
+		uint8_t		atc_overscan_color;
+		uint8_t		atc_color_plane_enb;
+		uint8_t		atc_horiz_pixel_panning;
+		uint8_t		atc_color_select;
+		uint8_t		atc_color_select_45;
+		uint8_t		atc_color_select_67;
+	} vga_atc;
+
+	/*
+	 * DAC
+	 */
+	struct {
+		uint8_t		dac_state;
+		int		dac_rd_index;
+		int		dac_rd_subindex;
+		int		dac_wr_index;
+		int		dac_wr_subindex;
+		uint8_t		dac_palette[3 * 256];
+		uint32_t	dac_palette_rgb[256];
+	} vga_dac;
+};
+
+static bool
+vga_in_reset(struct vga_softc *sc)
+{
+	return (((sc->vga_seq.seq_clock_mode & SEQ_CM_SO) != 0) ||
+	    ((sc->vga_seq.seq_reset & SEQ_RESET_ASYNC) == 0) ||
+	    ((sc->vga_seq.seq_reset & SEQ_RESET_SYNC) == 0) ||
+	    ((sc->vga_crtc.crtc_mode_ctrl & CRTC_MC_TE) == 0));
+}
+
+static void
+vga_check_size(struct bhyvegc *gc, struct vga_softc *sc)
+{
+	int old_width, old_height;
+
+	if (vga_in_reset(sc))
+		return;
+
+	old_width = sc->gc_width;
+	old_height = sc->gc_height;
+
+	/*
+	 * Horizontal Display End: For text modes this is the number
+	 * of characters.  For graphics modes this is the number of
+	 * pixels per scanlines divided by the number of pixels per
+	 * character clock.
+	 */
+	sc->gc_width = (sc->vga_crtc.crtc_horiz_disp_end + 1) *
+	    sc->vga_seq.seq_cm_dots;
+
+	sc->gc_height = (sc->vga_crtc.crtc_vert_disp_end |
+	    (((sc->vga_crtc.crtc_overflow & CRTC_OF_VDE8) >> CRTC_OF_VDE8_SHIFT) << 8) |
+	    (((sc->vga_crtc.crtc_overflow & CRTC_OF_VDE9) >> CRTC_OF_VDE9_SHIFT) << 9)) + 1;
+
+	if (old_width != sc->gc_width || old_height != sc->gc_height)
+		bhyvegc_resize(gc, sc->gc_width, sc->gc_height);
+}
+
+static uint32_t
+vga_get_pixel(struct vga_softc *sc, int x, int y)
+{
+	int offset;
+	int bit;
+	uint8_t data;
+	uint8_t idx;
+
+	offset = (y * sc->gc_width / 8) + (x / 8);
+	bit = 7 - (x % 8);
+
+	data = (((sc->vga_ram[offset + 0 * 64*KB] >> bit) & 0x1) << 0) |
+		(((sc->vga_ram[offset + 1 * 64*KB] >> bit) & 0x1) << 1) |
+		(((sc->vga_ram[offset + 2 * 64*KB] >> bit) & 0x1) << 2) |
+		(((sc->vga_ram[offset + 3 * 64*KB] >> bit) & 0x1) << 3);
+
+	data &= sc->vga_atc.atc_color_plane_enb;
+
+	if (sc->vga_atc.atc_mode & ATC_MC_IPS) {
+		idx = sc->vga_atc.atc_palette[data] & 0x0f;
+		idx |= sc->vga_atc.atc_color_select_45;
+	} else {
+		idx = sc->vga_atc.atc_palette[data];
+	}
+	idx |= sc->vga_atc.atc_color_select_67;
+
+	return (sc->vga_dac.dac_palette_rgb[idx]);
+}
+
+static void
+vga_render_graphics(struct vga_softc *sc)
+{
+	int x, y;
+
+	for (y = 0; y < sc->gc_height; y++) {
+		for (x = 0; x < sc->gc_width; x++) {
+			int offset;
+
+			offset = y * sc->gc_width + x;
+			sc->gc_image->data[offset] = vga_get_pixel(sc, x, y);
+		}
+	}
+}
+
+static uint32_t
+vga_get_text_pixel(struct vga_softc *sc, int x, int y)
+{
+	int dots, offset, bit, font_offset;
+	uint8_t ch, attr, font;
+	uint8_t idx;
+
+	dots = sc->vga_seq.seq_cm_dots;
+
+	offset = 2 * sc->vga_crtc.crtc_start_addr;
+	offset += (y / 16 * sc->gc_width / dots) * 2 + (x / dots) * 2;
+
+	bit = 7 - (x % dots);
+
+	ch = sc->vga_ram[offset + 0 * 64*KB];
+	attr = sc->vga_ram[offset + 1 * 64*KB];
+
+	if (sc->vga_crtc.crtc_cursor_on &&
+	    (offset == (sc->vga_crtc.crtc_cursor_loc * 2)) &&
+	    ((y % 16) >= (sc->vga_crtc.crtc_cursor_start & CRTC_CS_CS)) &&
+	    ((y % 16) <= (sc->vga_crtc.crtc_cursor_end & CRTC_CE_CE))) {
+		idx = sc->vga_atc.atc_palette[attr & 0xf];
+		return (sc->vga_dac.dac_palette_rgb[idx]);
+	}
+
+	if ((sc->vga_seq.seq_mm & SEQ_MM_EM) &&
+	    sc->vga_seq.seq_cmap_pri_off != sc->vga_seq.seq_cmap_sec_off) {
+		if (attr & 0x8)
+			font_offset = sc->vga_seq.seq_cmap_pri_off +
+				(ch << 5) + y % 16;
+		else
+			font_offset = sc->vga_seq.seq_cmap_sec_off +
+				(ch << 5) + y % 16;
+		attr &= ~0x8;
+	} else {
+		font_offset = (ch << 5) + y % 16;
+	}
+
+	font = sc->vga_ram[font_offset + 2 * 64*KB];
+
+	if ((bit > 0) && (font & (1 << bit)))
+		idx = sc->vga_atc.atc_palette[attr & 0xf];
+	else
+		idx = sc->vga_atc.atc_palette[attr >> 4];
+
+	return (sc->vga_dac.dac_palette_rgb[idx]);
+}
+
+static void
+vga_render_text(struct vga_softc *sc)
+{
+	int x, y;
+
+	for (y = 0; y < sc->gc_height; y++) {
+		for (x = 0; x < sc->gc_width; x++) {
+			int offset;
+
+			offset = y * sc->gc_width + x;
+			sc->gc_image->data[offset] = vga_get_text_pixel(sc, x, y);
+		}
+	}
+}
+
+static void
+vga_render(struct bhyvegc *gc, void *arg)
+{
+	struct vga_softc *sc = arg;
+
+	vga_check_size(gc, sc);
+
+	if (vga_in_reset(sc)) {
+		memset(sc->gc_image->data, 0,
+		    sc->gc_image->width * sc->gc_image->height *
+		     sizeof (uint32_t));
+		return;
+	}
+
+	if (sc->vga_gc.gc_misc_gm && (sc->vga_atc.atc_mode & ATC_MC_GA))
+		vga_render_graphics(sc);
+	else
+		vga_render_text(sc);
+}
+
+static uint64_t
+vga_mem_rd_handler(struct vmctx *ctx, uint64_t addr, void *arg1)
+{
+	struct vga_softc *sc = arg1;
+	uint8_t map_sel;
+	int offset;
+
+	offset = addr;
+	switch (sc->vga_gc.gc_misc_mm) {
+	case 0x0:
+		/*
+		 * extended mode: base 0xa0000 size 128k
+		 */
+		offset -=0xa0000;
+		offset &= (128 * KB - 1);
+		break;
+	case 0x1:
+		/*
+		 * EGA/VGA mode: base 0xa0000 size 64k
+		 */
+		offset -=0xa0000;
+		offset &= (64 * KB - 1);
+		break;
+	case 0x2:
+		/*
+		 * monochrome text mode: base 0xb0000 size 32kb
+		 */
+		assert(0);
+	case 0x3:
+		/*
+		 * color text mode and CGA: base 0xb8000 size 32kb
+		 */
+		offset -=0xb8000;
+		offset &= (32 * KB - 1);
+		break;
+	}
+
+	/* Fill latches. */
+	sc->vga_gc.gc_latch0 = sc->vga_ram[offset + 0*64*KB];
+	sc->vga_gc.gc_latch1 = sc->vga_ram[offset + 1*64*KB];
+	sc->vga_gc.gc_latch2 = sc->vga_ram[offset + 2*64*KB];
+	sc->vga_gc.gc_latch3 = sc->vga_ram[offset + 3*64*KB];
+
+	if (sc->vga_gc.gc_mode_rm) {
+		/* read mode 1 */
+		assert(0);
+	}
+
+	map_sel = sc->vga_gc.gc_read_map_sel;
+	if (sc->vga_gc.gc_mode_oe) {
+		map_sel |= (offset & 1);
+		offset &= ~1;
+	}
+
+	/* read mode 0: return the byte from the selected plane. */
+	offset += map_sel * 64*KB;
+
+	return (sc->vga_ram[offset]);
+}
+
+static void
+vga_mem_wr_handler(struct vmctx *ctx, uint64_t addr, uint8_t val, void *arg1)
+{
+	struct vga_softc *sc = arg1;
+	uint8_t c0, c1, c2, c3;
+	uint8_t m0, m1, m2, m3;
+	uint8_t set_reset;
+	uint8_t enb_set_reset;
+	uint8_t	mask;
+	int offset;
+
+	offset = addr;
+	switch (sc->vga_gc.gc_misc_mm) {
+	case 0x0:
+		/*
+		 * extended mode: base 0xa0000 size 128kb
+		 */
+		offset -=0xa0000;
+		offset &= (128 * KB - 1);
+		break;
+	case 0x1:
+		/*
+		 * EGA/VGA mode: base 0xa0000 size 64kb
+		 */
+		offset -=0xa0000;
+		offset &= (64 * KB - 1);
+		break;
+	case 0x2:
+		/*
+		 * monochrome text mode: base 0xb0000 size 32kb
+		 */
+		assert(0);
+	case 0x3:
+		/*
+		 * color text mode and CGA: base 0xb8000 size 32kb
+		 */
+		offset -=0xb8000;
+		offset &= (32 * KB - 1);
+		break;
+	}
+
+	set_reset = sc->vga_gc.gc_set_reset;
+	enb_set_reset = sc->vga_gc.gc_enb_set_reset;
+
+	c0 = sc->vga_gc.gc_latch0;
+	c1 = sc->vga_gc.gc_latch1;
+	c2 = sc->vga_gc.gc_latch2;
+	c3 = sc->vga_gc.gc_latch3;
+
+	switch (sc->vga_gc.gc_mode_wm) {
+	case 0:
+		/* write mode 0 */
+		mask = sc->vga_gc.gc_bit_mask;
+
+		val = (val >> sc->vga_gc.gc_rotate) |
+		    (val << (8 - sc->vga_gc.gc_rotate));
+
+		switch (sc->vga_gc.gc_op) {
+		case 0x00:		/* replace */
+			m0 = (set_reset & 1) ? mask : 0x00;
+			m1 = (set_reset & 2) ? mask : 0x00;
+			m2 = (set_reset & 4) ? mask : 0x00;
+			m3 = (set_reset & 8) ? mask : 0x00;
+
+			c0 = (enb_set_reset & 1) ? (c0 & ~mask) : (val & mask);
+			c1 = (enb_set_reset & 2) ? (c1 & ~mask) : (val & mask);
+			c2 = (enb_set_reset & 4) ? (c2 & ~mask) : (val & mask);
+			c3 = (enb_set_reset & 8) ? (c3 & ~mask) : (val & mask);
+
+			c0 |= m0;
+			c1 |= m1;
+			c2 |= m2;
+			c3 |= m3;
+			break;
+		case 0x08:		/* AND */
+			m0 = set_reset & 1 ? 0xff : ~mask;
+			m1 = set_reset & 2 ? 0xff : ~mask;
+			m2 = set_reset & 4 ? 0xff : ~mask;
+			m3 = set_reset & 8 ? 0xff : ~mask;
+
+			c0 = enb_set_reset & 1 ? c0 & m0 : val & m0;
+			c1 = enb_set_reset & 2 ? c1 & m1 : val & m1;
+			c2 = enb_set_reset & 4 ? c2 & m2 : val & m2;
+			c3 = enb_set_reset & 8 ? c3 & m3 : val & m3;
+			break;
+		case 0x10:		/* OR */
+			m0 = set_reset & 1 ? mask : 0x00;
+			m1 = set_reset & 2 ? mask : 0x00;
+			m2 = set_reset & 4 ? mask : 0x00;
+			m3 = set_reset & 8 ? mask : 0x00;
+
+			c0 = enb_set_reset & 1 ? c0 | m0 : val | m0;
+			c1 = enb_set_reset & 2 ? c1 | m1 : val | m1;
+			c2 = enb_set_reset & 4 ? c2 | m2 : val | m2;
+			c3 = enb_set_reset & 8 ? c3 | m3 : val | m3;
+			break;
+		case 0x18:		/* XOR */
+			m0 = set_reset & 1 ? mask : 0x00;
+			m1 = set_reset & 2 ? mask : 0x00;
+			m2 = set_reset & 4 ? mask : 0x00;
+			m3 = set_reset & 8 ? mask : 0x00;
+
+			c0 = enb_set_reset & 1 ? c0 ^ m0 : val ^ m0;
+			c1 = enb_set_reset & 2 ? c1 ^ m1 : val ^ m1;
+			c2 = enb_set_reset & 4 ? c2 ^ m2 : val ^ m2;
+			c3 = enb_set_reset & 8 ? c3 ^ m3 : val ^ m3;
+			break;
+		}
+		break;
+	case 1:
+		/* write mode 1 */
+		break;
+	case 2:
+		/* write mode 2 */
+		mask = sc->vga_gc.gc_bit_mask;
+
+		switch (sc->vga_gc.gc_op) {
+		case 0x00:		/* replace */
+			m0 = (val & 1 ? 0xff : 0x00) & mask;
+			m1 = (val & 2 ? 0xff : 0x00) & mask;
+			m2 = (val & 4 ? 0xff : 0x00) & mask;
+			m3 = (val & 8 ? 0xff : 0x00) & mask;
+
+			c0 &= ~mask;
+			c1 &= ~mask;
+			c2 &= ~mask;
+			c3 &= ~mask;
+
+			c0 |= m0;
+			c1 |= m1;
+			c2 |= m2;
+			c3 |= m3;
+			break;
+		case 0x08:		/* AND */
+			m0 = (val & 1 ? 0xff : 0x00) | ~mask;
+			m1 = (val & 2 ? 0xff : 0x00) | ~mask;
+			m2 = (val & 4 ? 0xff : 0x00) | ~mask;
+			m3 = (val & 8 ? 0xff : 0x00) | ~mask;
+
+			c0 &= m0;
+			c1 &= m1;
+			c2 &= m2;
+			c3 &= m3;
+			break;
+		case 0x10:		/* OR */
+			m0 = (val & 1 ? 0xff : 0x00) & mask;
+			m1 = (val & 2 ? 0xff : 0x00) & mask;
+			m2 = (val & 4 ? 0xff : 0x00) & mask;
+			m3 = (val & 8 ? 0xff : 0x00) & mask;
+
+			c0 |= m0;
+			c1 |= m1;
+			c2 |= m2;
+			c3 |= m3;
+			break;
+		case 0x18:		/* XOR */
+			m0 = (val & 1 ? 0xff : 0x00) & mask;
+			m1 = (val & 2 ? 0xff : 0x00) & mask;
+			m2 = (val & 4 ? 0xff : 0x00) & mask;
+			m3 = (val & 8 ? 0xff : 0x00) & mask;
+
+			c0 ^= m0;
+			c1 ^= m1;
+			c2 ^= m2;
+			c3 ^= m3;
+			break;
+		}
+		break;
+	case 3:
+		/* write mode 3 */
+		mask = sc->vga_gc.gc_bit_mask & val;
+
+		val = (val >> sc->vga_gc.gc_rotate) |
+		    (val << (8 - sc->vga_gc.gc_rotate));
+
+		switch (sc->vga_gc.gc_op) {
+		case 0x00:		/* replace */
+			m0 = (set_reset & 1 ? 0xff : 0x00) & mask;
+			m1 = (set_reset & 2 ? 0xff : 0x00) & mask;
+			m2 = (set_reset & 4 ? 0xff : 0x00) & mask;
+			m3 = (set_reset & 8 ? 0xff : 0x00) & mask;
+
+			c0 &= ~mask;
+			c1 &= ~mask;
+			c2 &= ~mask;
+			c3 &= ~mask;
+
+			c0 |= m0;
+			c1 |= m1;
+			c2 |= m2;
+			c3 |= m3;
+			break;
+		case 0x08:		/* AND */
+			m0 = (set_reset & 1 ? 0xff : 0x00) | ~mask;
+			m1 = (set_reset & 2 ? 0xff : 0x00) | ~mask;
+			m2 = (set_reset & 4 ? 0xff : 0x00) | ~mask;
+			m3 = (set_reset & 8 ? 0xff : 0x00) | ~mask;
+
+			c0 &= m0;
+			c1 &= m1;
+			c2 &= m2;
+			c3 &= m3;
+			break;
+		case 0x10:		/* OR */
+			m0 = (set_reset & 1 ? 0xff : 0x00) & mask;
+			m1 = (set_reset & 2 ? 0xff : 0x00) & mask;
+			m2 = (set_reset & 4 ? 0xff : 0x00) & mask;
+			m3 = (set_reset & 8 ? 0xff : 0x00) & mask;
+
+			c0 |= m0;
+			c1 |= m1;
+			c2 |= m2;
+			c3 |= m3;
+			break;
+		case 0x18:		/* XOR */
+			m0 = (set_reset & 1 ? 0xff : 0x00) & mask;
+			m1 = (set_reset & 2 ? 0xff : 0x00) & mask;
+			m2 = (set_reset & 4 ? 0xff : 0x00) & mask;
+			m3 = (set_reset & 8 ? 0xff : 0x00) & mask;
+
+			c0 ^= m0;
+			c1 ^= m1;
+			c2 ^= m2;
+			c3 ^= m3;
+			break;
+		}
+		break;
+	}
+
+	if (sc->vga_gc.gc_mode_oe) {
+		if (offset & 1) {
+			offset &= ~1;
+			if (sc->vga_seq.seq_map_mask & 2)
+				sc->vga_ram[offset + 1*64*KB] = c1;
+			if (sc->vga_seq.seq_map_mask & 8)
+				sc->vga_ram[offset + 3*64*KB] = c3;
+		} else {
+			if (sc->vga_seq.seq_map_mask & 1)
+				sc->vga_ram[offset + 0*64*KB] = c0;
+			if (sc->vga_seq.seq_map_mask & 4)
+				sc->vga_ram[offset + 2*64*KB] = c2;
+		}
+	} else {
+		if (sc->vga_seq.seq_map_mask & 1)
+			sc->vga_ram[offset + 0*64*KB] = c0;
+		if (sc->vga_seq.seq_map_mask & 2)
+			sc->vga_ram[offset + 1*64*KB] = c1;
+		if (sc->vga_seq.seq_map_mask & 4)
+			sc->vga_ram[offset + 2*64*KB] = c2;
+		if (sc->vga_seq.seq_map_mask & 8)
+			sc->vga_ram[offset + 3*64*KB] = c3;
+	}
+}
+
+static int
+vga_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+		int size, uint64_t *val, void *arg1, long arg2)
+{
+	if (dir == MEM_F_WRITE) {
+		switch (size) {
+		case 1:
+			vga_mem_wr_handler(ctx, addr, *val, arg1);
+			break;
+		case 2:
+			vga_mem_wr_handler(ctx, addr, *val, arg1);
+			vga_mem_wr_handler(ctx, addr + 1, *val >> 8, arg1);
+			break;
+		case 4:
+			vga_mem_wr_handler(ctx, addr, *val, arg1);
+			vga_mem_wr_handler(ctx, addr + 1, *val >> 8, arg1);
+			vga_mem_wr_handler(ctx, addr + 2, *val >> 16, arg1);
+			vga_mem_wr_handler(ctx, addr + 3, *val >> 24, arg1);
+			break;
+		case 8:
+			vga_mem_wr_handler(ctx, addr, *val, arg1);
+			vga_mem_wr_handler(ctx, addr + 1, *val >> 8, arg1);
+			vga_mem_wr_handler(ctx, addr + 2, *val >> 16, arg1);
+			vga_mem_wr_handler(ctx, addr + 3, *val >> 24, arg1);
+			vga_mem_wr_handler(ctx, addr + 4, *val >> 32, arg1);
+			vga_mem_wr_handler(ctx, addr + 5, *val >> 40, arg1);
+			vga_mem_wr_handler(ctx, addr + 6, *val >> 48, arg1);
+			vga_mem_wr_handler(ctx, addr + 7, *val >> 56, arg1);
+			break;
+		}
+	} else {
+		switch (size) {
+		case 1:
+			*val = vga_mem_rd_handler(ctx, addr, arg1);
+			break;
+		case 2:
+			*val = vga_mem_rd_handler(ctx, addr, arg1);
+			*val |= vga_mem_rd_handler(ctx, addr + 1, arg1) << 8;
+			break;
+		case 4:
+			*val = vga_mem_rd_handler(ctx, addr, arg1);
+			*val |= vga_mem_rd_handler(ctx, addr + 1, arg1) << 8;
+			*val |= vga_mem_rd_handler(ctx, addr + 2, arg1) << 16;
+			*val |= vga_mem_rd_handler(ctx, addr + 3, arg1) << 24;
+			break;
+		case 8:
+			*val = vga_mem_rd_handler(ctx, addr, arg1);
+			*val |= vga_mem_rd_handler(ctx, addr + 1, arg1) << 8;
+			*val |= vga_mem_rd_handler(ctx, addr + 2, arg1) << 16;
+			*val |= vga_mem_rd_handler(ctx, addr + 3, arg1) << 24;
+			*val |= vga_mem_rd_handler(ctx, addr + 4, arg1) << 32;
+			*val |= vga_mem_rd_handler(ctx, addr + 5, arg1) << 40;
+			*val |= vga_mem_rd_handler(ctx, addr + 6, arg1) << 48;
+			*val |= vga_mem_rd_handler(ctx, addr + 7, arg1) << 56;
+			break;
+		}
+	}
+
+	return (0);
+}
+
+static int
+vga_port_in_handler(struct vmctx *ctx, int in, int port, int bytes,
+		    uint8_t *val, void *arg)
+{
+	struct vga_softc *sc = arg;
+
+	switch (port) {
+	case CRTC_IDX_MONO_PORT:
+	case CRTC_IDX_COLOR_PORT:
+		*val = sc->vga_crtc.crtc_index;
+		break;
+	case CRTC_DATA_MONO_PORT:
+	case CRTC_DATA_COLOR_PORT:
+		switch (sc->vga_crtc.crtc_index) {
+		case CRTC_HORIZ_TOTAL:
+			*val = sc->vga_crtc.crtc_horiz_total;
+			break;
+		case CRTC_HORIZ_DISP_END:
+			*val = sc->vga_crtc.crtc_horiz_disp_end;
+			break;
+		case CRTC_START_HORIZ_BLANK:
+			*val = sc->vga_crtc.crtc_start_horiz_blank;
+			break;
+		case CRTC_END_HORIZ_BLANK:
+			*val = sc->vga_crtc.crtc_end_horiz_blank;
+			break;
+		case CRTC_START_HORIZ_RETRACE:
+			*val = sc->vga_crtc.crtc_start_horiz_retrace;
+			break;
+		case CRTC_END_HORIZ_RETRACE:
+			*val = sc->vga_crtc.crtc_end_horiz_retrace;
+			break;
+		case CRTC_VERT_TOTAL:
+			*val = sc->vga_crtc.crtc_vert_total;
+			break;
+		case CRTC_OVERFLOW:
+			*val = sc->vga_crtc.crtc_overflow;
+			break;
+		case CRTC_PRESET_ROW_SCAN:
+			*val = sc->vga_crtc.crtc_present_row_scan;
+			break;
+		case CRTC_MAX_SCAN_LINE:
+			*val = sc->vga_crtc.crtc_max_scan_line;
+			break;
+		case CRTC_CURSOR_START:
+			*val = sc->vga_crtc.crtc_cursor_start;
+			break;
+		case CRTC_CURSOR_END:
+			*val = sc->vga_crtc.crtc_cursor_end;
+			break;
+		case CRTC_START_ADDR_HIGH:
+			*val = sc->vga_crtc.crtc_start_addr_high;
+			break;
+		case CRTC_START_ADDR_LOW:
+			*val = sc->vga_crtc.crtc_start_addr_low;
+			break;
+		case CRTC_CURSOR_LOC_HIGH:
+			*val = sc->vga_crtc.crtc_cursor_loc_high;
+			break;
+		case CRTC_CURSOR_LOC_LOW:
+			*val = sc->vga_crtc.crtc_cursor_loc_low;
+			break;
+		case CRTC_VERT_RETRACE_START:
+			*val = sc->vga_crtc.crtc_vert_retrace_start;
+			break;
+		case CRTC_VERT_RETRACE_END:
+			*val = sc->vga_crtc.crtc_vert_retrace_end;
+			break;
+		case CRTC_VERT_DISP_END:
+			*val = sc->vga_crtc.crtc_vert_disp_end;
+			break;
+		case CRTC_OFFSET:
+			*val = sc->vga_crtc.crtc_offset;
+			break;
+		case CRTC_UNDERLINE_LOC:
+			*val = sc->vga_crtc.crtc_underline_loc;
+			break;
+		case CRTC_START_VERT_BLANK:
+			*val = sc->vga_crtc.crtc_start_vert_blank;
+			break;
+		case CRTC_END_VERT_BLANK:
+			*val = sc->vga_crtc.crtc_end_vert_blank;
+			break;
+		case CRTC_MODE_CONTROL:
+			*val = sc->vga_crtc.crtc_mode_ctrl;
+			break;
+		case CRTC_LINE_COMPARE:
+			*val = sc->vga_crtc.crtc_line_compare;
+			break;
+		default:
+			//printf("XXX VGA CRTC: inb 0x%04x at index %d\n", port, sc->vga_crtc.crtc_index);
+			assert(0);
+			break;
+		}
+		break;
+	case ATC_IDX_PORT:
+		*val = sc->vga_atc.atc_index;
+		break;
+	case ATC_DATA_PORT:
+		switch (sc->vga_atc.atc_index) {
+		case ATC_PALETTE0 ... ATC_PALETTE15:
+			*val = sc->vga_atc.atc_palette[sc->vga_atc.atc_index];
+			break;
+		case ATC_MODE_CONTROL:
+			*val = sc->vga_atc.atc_mode;
+			break;
+		case ATC_OVERSCAN_COLOR:
+			*val = sc->vga_atc.atc_overscan_color;
+			break;
+		case ATC_COLOR_PLANE_ENABLE:
+			*val = sc->vga_atc.atc_color_plane_enb;
+			break;
+		case ATC_HORIZ_PIXEL_PANNING:
+			*val = sc->vga_atc.atc_horiz_pixel_panning;
+			break;
+		case ATC_COLOR_SELECT:
+			*val = sc->vga_atc.atc_color_select;
+			break;
+		default:
+			//printf("XXX VGA ATC inb 0x%04x at index %d\n", port , sc->vga_atc.atc_index);
+			assert(0);
+			break;
+		}
+		break;
+	case SEQ_IDX_PORT:
+		*val = sc->vga_seq.seq_index;
+		break;
+	case SEQ_DATA_PORT:
+		switch (sc->vga_seq.seq_index) {
+		case SEQ_RESET:
+			*val = sc->vga_seq.seq_reset;
+			break;
+		case SEQ_CLOCKING_MODE:
+			*val = sc->vga_seq.seq_clock_mode;
+			break;
+		case SEQ_MAP_MASK:
+			*val = sc->vga_seq.seq_map_mask;
+			break;
+		case SEQ_CHAR_MAP_SELECT:
+			*val = sc->vga_seq.seq_cmap_sel;
+			break;
+		case SEQ_MEMORY_MODE:
+			*val = sc->vga_seq.seq_mm;
+			break;
+		default:
+			//printf("XXX VGA SEQ: inb 0x%04x at index %d\n", port, sc->vga_seq.seq_index);
+			assert(0);
+			break;
+		}
+	case DAC_DATA_PORT:
+		*val = sc->vga_dac.dac_palette[3 * sc->vga_dac.dac_rd_index +
+					       sc->vga_dac.dac_rd_subindex];
+		sc->vga_dac.dac_rd_subindex++;
+		if (sc->vga_dac.dac_rd_subindex == 3) {
+			sc->vga_dac.dac_rd_index++;
+			sc->vga_dac.dac_rd_subindex = 0;
+		}
+		break;
+	case GC_IDX_PORT:
+		*val = sc->vga_gc.gc_index;
+		break;
+	case GC_DATA_PORT:
+		switch (sc->vga_gc.gc_index) {
+		case GC_SET_RESET:
+			*val = sc->vga_gc.gc_set_reset;
+			break;
+		case GC_ENABLE_SET_RESET:
+			*val = sc->vga_gc.gc_enb_set_reset;
+			break;
+		case GC_COLOR_COMPARE:
+			*val = sc->vga_gc.gc_color_compare;
+			break;
+		case GC_DATA_ROTATE:
+			*val = sc->vga_gc.gc_rotate;
+			break;
+		case GC_READ_MAP_SELECT:
+			*val = sc->vga_gc.gc_read_map_sel;
+			break;
+		case GC_MODE:
+			*val = sc->vga_gc.gc_mode;
+			break;
+		case GC_MISCELLANEOUS:
+			*val = sc->vga_gc.gc_misc;
+			break;
+		case GC_COLOR_DONT_CARE:
+			*val = sc->vga_gc.gc_color_dont_care;
+			break;
+		case GC_BIT_MASK:
+			*val = sc->vga_gc.gc_bit_mask;
+			break;
+		default:
+			//printf("XXX VGA GC: inb 0x%04x at index %d\n", port, sc->vga_crtc.crtc_index);
+			assert(0);
+			break;
+		}
+		break;
+	case GEN_MISC_OUTPUT_PORT:
+		*val = sc->vga_misc;
+		break;
+	case GEN_INPUT_STS0_PORT:
+		assert(0);
+		break;
+	case GEN_INPUT_STS1_MONO_PORT:
+	case GEN_INPUT_STS1_COLOR_PORT:
+		sc->vga_atc.atc_flipflop = 0;
+		sc->vga_sts1 ^= (GEN_IS1_VR | GEN_IS1_DE);
+		*val = sc->vga_sts1;
+		break;
+	case GEN_FEATURE_CTRL_PORT:
+		assert(0);
+		break;
+	default:
+		printf("XXX vga_port_in_handler() unhandled port 0x%x\n", port);
+		assert(0);
+		return (-1);
+	}
+
+	return (0);
+}
+
+static int
+vga_port_out_handler(struct vmctx *ctx, int in, int port, int bytes,
+		     uint8_t val, void *arg)
+{
+	struct vga_softc *sc = arg;
+
+	switch (port) {
+	case CRTC_IDX_MONO_PORT:
+	case CRTC_IDX_COLOR_PORT:
+		sc->vga_crtc.crtc_index = val;
+		break;
+	case CRTC_DATA_MONO_PORT:
+	case CRTC_DATA_COLOR_PORT:
+		switch (sc->vga_crtc.crtc_index) {
+		case CRTC_HORIZ_TOTAL:
+			sc->vga_crtc.crtc_horiz_total = val;
+			break;
+		case CRTC_HORIZ_DISP_END:
+			sc->vga_crtc.crtc_horiz_disp_end = val;
+			break;
+		case CRTC_START_HORIZ_BLANK:
+			sc->vga_crtc.crtc_start_horiz_blank = val;
+			break;
+		case CRTC_END_HORIZ_BLANK:
+			sc->vga_crtc.crtc_end_horiz_blank = val;
+			break;
+		case CRTC_START_HORIZ_RETRACE:
+			sc->vga_crtc.crtc_start_horiz_retrace = val;
+			break;
+		case CRTC_END_HORIZ_RETRACE:
+			sc->vga_crtc.crtc_end_horiz_retrace = val;
+			break;
+		case CRTC_VERT_TOTAL:
+			sc->vga_crtc.crtc_vert_total = val;
+			break;
+		case CRTC_OVERFLOW:
+			sc->vga_crtc.crtc_overflow = val;
+			break;
+		case CRTC_PRESET_ROW_SCAN:
+			sc->vga_crtc.crtc_present_row_scan = val;
+			break;
+		case CRTC_MAX_SCAN_LINE:
+			sc->vga_crtc.crtc_max_scan_line = val;
+			break;
+		case CRTC_CURSOR_START:
+			sc->vga_crtc.crtc_cursor_start = val;
+			sc->vga_crtc.crtc_cursor_on = (val & CRTC_CS_CO) == 0;
+			break;
+		case CRTC_CURSOR_END:
+			sc->vga_crtc.crtc_cursor_end = val;
+			break;
+		case CRTC_START_ADDR_HIGH:
+			sc->vga_crtc.crtc_start_addr_high = val;
+			sc->vga_crtc.crtc_start_addr &= 0x00ff;
+			sc->vga_crtc.crtc_start_addr |= (val << 8);
+			break;
+		case CRTC_START_ADDR_LOW:
+			sc->vga_crtc.crtc_start_addr_low = val;
+			sc->vga_crtc.crtc_start_addr &= 0xff00;
+			sc->vga_crtc.crtc_start_addr |= (val & 0xff);
+			break;
+		case CRTC_CURSOR_LOC_HIGH:
+			sc->vga_crtc.crtc_cursor_loc_high = val;
+			sc->vga_crtc.crtc_cursor_loc &= 0x00ff;
+			sc->vga_crtc.crtc_cursor_loc |= (val << 8);
+			break;
+		case CRTC_CURSOR_LOC_LOW:
+			sc->vga_crtc.crtc_cursor_loc_low = val;
+			sc->vga_crtc.crtc_cursor_loc &= 0xff00;
+			sc->vga_crtc.crtc_cursor_loc |= (val & 0xff);
+			break;
+		case CRTC_VERT_RETRACE_START:
+			sc->vga_crtc.crtc_vert_retrace_start = val;
+			break;
+		case CRTC_VERT_RETRACE_END:
+			sc->vga_crtc.crtc_vert_retrace_end = val;
+			break;
+		case CRTC_VERT_DISP_END:
+			sc->vga_crtc.crtc_vert_disp_end = val;
+			break;
+		case CRTC_OFFSET:
+			sc->vga_crtc.crtc_offset = val;
+			break;
+		case CRTC_UNDERLINE_LOC:
+			sc->vga_crtc.crtc_underline_loc = val;
+			break;
+		case CRTC_START_VERT_BLANK:
+			sc->vga_crtc.crtc_start_vert_blank = val;
+			break;
+		case CRTC_END_VERT_BLANK:
+			sc->vga_crtc.crtc_end_vert_blank = val;
+			break;
+		case CRTC_MODE_CONTROL:
+			sc->vga_crtc.crtc_mode_ctrl = val;
+			break;
+		case CRTC_LINE_COMPARE:
+			sc->vga_crtc.crtc_line_compare = val;
+			break;
+		default:
+			//printf("XXX VGA CRTC: outb 0x%04x, 0x%02x at index %d\n", port, val, sc->vga_crtc.crtc_index);
+			assert(0);
+			break;
+		}
+		break;
+	case ATC_IDX_PORT:
+		if (sc->vga_atc.atc_flipflop == 0) {
+			if (sc->vga_atc.atc_index & 0x20)
+				assert(0);
+			sc->vga_atc.atc_index = val & ATC_IDX_MASK;
+		} else {
+			switch (sc->vga_atc.atc_index) {
+			case ATC_PALETTE0 ... ATC_PALETTE15:
+				sc->vga_atc.atc_palette[sc->vga_atc.atc_index] = val & 0x3f;
+				break;
+			case ATC_MODE_CONTROL:
+				sc->vga_atc.atc_mode = val;
+				break;
+			case ATC_OVERSCAN_COLOR:
+				sc->vga_atc.atc_overscan_color = val;
+				break;
+			case ATC_COLOR_PLANE_ENABLE:
+				sc->vga_atc.atc_color_plane_enb = val;
+				break;
+			case ATC_HORIZ_PIXEL_PANNING:
+				sc->vga_atc.atc_horiz_pixel_panning = val;
+				break;
+			case ATC_COLOR_SELECT:
+				sc->vga_atc.atc_color_select = val;
+				sc->vga_atc.atc_color_select_45 =
+					(val & ATC_CS_C45) << 4;
+				sc->vga_atc.atc_color_select_67 =
+					(val & ATC_CS_C67) << 6;
+				break;
+			default:
+				//printf("XXX VGA ATC: outb 0x%04x, 0x%02x at index %d\n", port, val, sc->vga_atc.atc_index);
+				assert(0);
+				break;
+			}
+		}
+		sc->vga_atc.atc_flipflop ^= 1;
+		break;
+	case ATC_DATA_PORT:
+		break;
+	case SEQ_IDX_PORT:
+		sc->vga_seq.seq_index = val & 0x1f;
+		break;
+	case SEQ_DATA_PORT:
+		switch (sc->vga_seq.seq_index) {
+		case SEQ_RESET:
+			sc->vga_seq.seq_reset = val;
+			break;
+		case SEQ_CLOCKING_MODE:
+			sc->vga_seq.seq_clock_mode = val;
+			sc->vga_seq.seq_cm_dots = (val & SEQ_CM_89) ? 8 : 9;
+			break;
+		case SEQ_MAP_MASK:
+			sc->vga_seq.seq_map_mask = val;
+			break;
+		case SEQ_CHAR_MAP_SELECT:
+			sc->vga_seq.seq_cmap_sel = val;
+
+			sc->vga_seq.seq_cmap_pri_off = ((((val & SEQ_CMS_SA) >> SEQ_CMS_SA_SHIFT) * 2) + ((val & SEQ_CMS_SAH) >> SEQ_CMS_SAH_SHIFT)) * 8 * KB;
+			sc->vga_seq.seq_cmap_sec_off = ((((val & SEQ_CMS_SB) >> SEQ_CMS_SB_SHIFT) * 2) + ((val & SEQ_CMS_SBH) >> SEQ_CMS_SBH_SHIFT)) * 8 * KB;
+			break;
+		case SEQ_MEMORY_MODE:
+			sc->vga_seq.seq_mm = val;
+			assert((sc->vga_seq.seq_mm & SEQ_MM_C4) == 0);
+			break;
+		default:
+			//printf("XXX VGA SEQ: outb 0x%04x, 0x%02x at index %d\n", port, val, sc->vga_seq.seq_index);
+			assert(0);
+			break;
+		}
+		break;
+	case DAC_MASK:
+		break;
+	case DAC_IDX_RD_PORT:
+		sc->vga_dac.dac_rd_index = val;
+		sc->vga_dac.dac_rd_subindex = 0;
+		break;
+	case DAC_IDX_WR_PORT:
+		sc->vga_dac.dac_wr_index = val;
+		sc->vga_dac.dac_wr_subindex = 0;
+		break;
+	case DAC_DATA_PORT:
+		sc->vga_dac.dac_palette[3 * sc->vga_dac.dac_wr_index +
+					sc->vga_dac.dac_wr_subindex] = val;
+		sc->vga_dac.dac_wr_subindex++;
+		if (sc->vga_dac.dac_wr_subindex == 3) {
+			sc->vga_dac.dac_palette_rgb[sc->vga_dac.dac_wr_index] =
+				((((sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 0] << 2) |
+				   ((sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 0] & 0x1) << 1) |
+				   (sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 0] & 0x1)) << 16) |
+				 (((sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 1] << 2) |
+				   ((sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 1] & 0x1) << 1) |
+				   (sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 1] & 0x1)) << 8) |
+				 (((sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 2] << 2) |
+				   ((sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 2] & 0x1) << 1) |
+				   (sc->vga_dac.dac_palette[3*sc->vga_dac.dac_wr_index + 2] & 0x1)) << 0));
+
+			sc->vga_dac.dac_wr_index++;
+			sc->vga_dac.dac_wr_subindex = 0;
+		}
+		break;
+	case GC_IDX_PORT:
+		sc->vga_gc.gc_index = val;
+		break;
+	case GC_DATA_PORT:
+		switch (sc->vga_gc.gc_index) {
+		case GC_SET_RESET:
+			sc->vga_gc.gc_set_reset = val;
+			break;
+		case GC_ENABLE_SET_RESET:
+			sc->vga_gc.gc_enb_set_reset = val;
+			break;
+		case GC_COLOR_COMPARE:
+			sc->vga_gc.gc_color_compare = val;
+			break;
+		case GC_DATA_ROTATE:
+			sc->vga_gc.gc_rotate = val;
+			sc->vga_gc.gc_op = (val >> 3) & 0x3;
+			break;
+		case GC_READ_MAP_SELECT:
+			sc->vga_gc.gc_read_map_sel = val;
+			break;
+		case GC_MODE:
+			sc->vga_gc.gc_mode = val;
+			sc->vga_gc.gc_mode_c4 = (val & GC_MODE_C4) != 0;
+			assert(!sc->vga_gc.gc_mode_c4);
+			sc->vga_gc.gc_mode_oe = (val & GC_MODE_OE) != 0;
+			sc->vga_gc.gc_mode_rm = (val >> 3) & 0x1;
+			sc->vga_gc.gc_mode_wm = val & 0x3;
+			break;
+		case GC_MISCELLANEOUS:
+			sc->vga_gc.gc_misc = val;
+			sc->vga_gc.gc_misc_gm = val & GC_MISC_GM;
+			sc->vga_gc.gc_misc_mm = (val & GC_MISC_MM) >>
+			    GC_MISC_MM_SHIFT;
+			break;
+		case GC_COLOR_DONT_CARE:
+			sc->vga_gc.gc_color_dont_care = val;
+			break;
+		case GC_BIT_MASK:
+			sc->vga_gc.gc_bit_mask = val;
+			break;
+		default:
+			//printf("XXX VGA GC: outb 0x%04x, 0x%02x at index %d\n", port, val, sc->vga_gc.gc_index);
+			assert(0);
+			break;
+		}
+		break;
+	case GEN_INPUT_STS0_PORT:
+		/* write to Miscellaneous Output Register */
+		sc->vga_misc = val;
+		break;
+	case GEN_INPUT_STS1_MONO_PORT:
+	case GEN_INPUT_STS1_COLOR_PORT:
+		/* write to Feature Control Register */
+		break;
+	default:
+		printf("XXX vga_port_out_handler() unhandled port 0x%x\n", port);
+		//assert(0);
+		return (-1);
+	}
+	return (0);
+}
+
+static int
+vga_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+		 uint32_t *eax, void *arg)
+{
+	uint8_t val;
+	int error;
+
+	switch (bytes) {
+	case 1:
+		if (in) {
+			*eax &= ~0xff;
+			error = vga_port_in_handler(ctx, in, port, 1,
+						    &val, arg);
+			if (!error) {
+				*eax |= val & 0xff;
+			}
+		} else {
+			val = *eax & 0xff;
+			error = vga_port_out_handler(ctx, in, port, 1,
+						     val, arg);
+		}
+		break;
+	case 2:
+		if (in) {
+			*eax &= ~0xffff;
+			error = vga_port_in_handler(ctx, in, port, 1,
+						    &val, arg);
+			if (!error) {
+				*eax |= val & 0xff;
+			}
+			error = vga_port_in_handler(ctx, in, port + 1, 1,
+						    &val, arg);
+			if (!error) {
+				*eax |= (val & 0xff) << 8;
+			}
+		} else {
+			val = *eax & 0xff;
+			error = vga_port_out_handler(ctx, in, port, 1,
+						     val, arg);
+			val = (*eax >> 8) & 0xff;
+			error =vga_port_out_handler(ctx, in, port + 1, 1,
+						    val, arg);
+		}
+		break;
+	default:
+		assert(0);
+		return (-1);
+	}
+
+	return (error);
+}
+
+int
+vga_init(void)
+{
+	struct inout_port iop;
+	struct vga_softc *sc;
+	int port, error;
+
+	sc = calloc(1, sizeof(struct vga_softc));
+
+	bzero(&iop, sizeof(struct inout_port));
+	iop.name = "VGA";
+	for (port = VGA_IOPORT_START; port <= VGA_IOPORT_END; port++) {
+		iop.port = port;
+		iop.size = 1;
+		iop.flags = IOPORT_F_INOUT;
+		iop.handler = vga_port_handler;
+		iop.arg = sc;
+
+		error = register_inout(&iop);
+		assert(error == 0);
+	}
+
+	sc->mr.name = "VGA memory";
+	sc->mr.flags = MEM_F_RW;
+	sc->mr.base = 640 * KB;
+	sc->mr.size = 128 * KB;
+	sc->mr.handler = vga_mem_handler;
+	sc->mr.arg1 = sc;
+	error = register_mem_fallback(&sc->mr);
+	assert(error == 0);
+
+	sc->vga_ram = malloc(256 * KB);
+	memset(sc->vga_ram, 0, 256 * KB);
+
+	sc->gc_image = console_get_image();
+	console_fb_register(vga_render, sc);
+
+	return (0);
+}
diff --git a/usr/src/cmd/bhyve/vga.h b/usr/src/cmd/bhyve/vga.h
new file mode 100644
index 0000000000..14637b12b3
--- /dev/null
+++ b/usr/src/cmd/bhyve/vga.h
@@ -0,0 +1,160 @@
+/*-
+ * Copyright (c) 2015 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VGA_H_
+#define	_VGA_H_
+
+#define	VGA_IOPORT_START		0x3c0
+#define	VGA_IOPORT_END			0x3df
+
+/* General registers */
+#define	GEN_INPUT_STS0_PORT		0x3c2
+#define	GEN_FEATURE_CTRL_PORT		0x3ca
+#define	GEN_MISC_OUTPUT_PORT		0x3cc
+#define	GEN_INPUT_STS1_MONO_PORT	0x3ba
+#define	GEN_INPUT_STS1_COLOR_PORT	0x3da
+#define	 GEN_IS1_VR			0x08	/* Vertical retrace */
+#define	 GEN_IS1_DE			0x01	/* Display enable not */
+
+/* Attribute controller registers. */
+#define	ATC_IDX_PORT			0x3c0
+#define	ATC_DATA_PORT			0x3c1
+
+#define	ATC_IDX_MASK			0x1f
+#define	ATC_PALETTE0			0
+#define	ATC_PALETTE15			15
+#define	ATC_MODE_CONTROL		16
+#define	 ATC_MC_IPS			0x80	/* Internal palette size */
+#define	 ATC_MC_GA			0x01	/* Graphics/alphanumeric */
+#define	ATC_OVERSCAN_COLOR		17
+#define	ATC_COLOR_PLANE_ENABLE		18
+#define	ATC_HORIZ_PIXEL_PANNING		19
+#define	ATC_COLOR_SELECT		20
+#define	 ATC_CS_C67			0x0c	/* Color select bits 6+7 */
+#define	 ATC_CS_C45			0x03	/* Color select bits 4+5 */
+
+/* Sequencer registers. */
+#define	SEQ_IDX_PORT			0x3c4
+#define	SEQ_DATA_PORT			0x3c5
+
+#define	SEQ_RESET			0
+#define	SEQ_RESET_ASYNC			0x1
+#define	SEQ_RESET_SYNC			0x2
+#define	SEQ_CLOCKING_MODE		1
+#define	 SEQ_CM_SO			0x20	/* Screen off */
+#define	 SEQ_CM_89			0x01	/* 8/9 dot clock */
+#define	SEQ_MAP_MASK			2
+#define	SEQ_CHAR_MAP_SELECT		3
+#define	 SEQ_CMS_SAH			0x20	/* Char map A bit 2 */
+#define	 SEQ_CMS_SAH_SHIFT		5
+#define	 SEQ_CMS_SA			0x0c	/* Char map A bits 0+1 */
+#define	 SEQ_CMS_SA_SHIFT		2
+#define	 SEQ_CMS_SBH			0x10	/* Char map B bit 2 */
+#define	 SEQ_CMS_SBH_SHIFT		4
+#define	 SEQ_CMS_SB			0x03	/* Char map B bits 0+1 */
+#define	 SEQ_CMS_SB_SHIFT		0
+#define	SEQ_MEMORY_MODE			4
+#define	 SEQ_MM_C4			0x08	/* Chain 4 */
+#define	 SEQ_MM_OE			0x04	/* Odd/even */
+#define	 SEQ_MM_EM			0x02	/* Extended memory */
+
+/* Graphics controller registers. */
+#define	GC_IDX_PORT			0x3ce
+#define	GC_DATA_PORT			0x3cf
+
+#define	GC_SET_RESET			0
+#define	GC_ENABLE_SET_RESET		1
+#define	GC_COLOR_COMPARE		2
+#define	GC_DATA_ROTATE			3
+#define	GC_READ_MAP_SELECT		4
+#define	GC_MODE				5
+#define	 GC_MODE_OE			0x10	/* Odd/even */
+#define	 GC_MODE_C4			0x04	/* Chain 4 */
+
+#define	GC_MISCELLANEOUS		6
+#define	 GC_MISC_GM			0x01	/* Graphics/alphanumeric */
+#define	 GC_MISC_MM			0x0c	/* memory map */
+#define	 GC_MISC_MM_SHIFT	2
+#define	GC_COLOR_DONT_CARE		7
+#define	GC_BIT_MASK			8
+
+/* CRT controller registers. */
+#define	CRTC_IDX_MONO_PORT		0x3b4
+#define	CRTC_DATA_MONO_PORT		0x3b5
+#define	CRTC_IDX_COLOR_PORT		0x3d4
+#define	CRTC_DATA_COLOR_PORT		0x3d5
+
+#define	CRTC_HORIZ_TOTAL		0
+#define	CRTC_HORIZ_DISP_END		1
+#define	CRTC_START_HORIZ_BLANK		2
+#define	CRTC_END_HORIZ_BLANK		3
+#define	CRTC_START_HORIZ_RETRACE	4
+#define	CRTC_END_HORIZ_RETRACE		5
+#define	CRTC_VERT_TOTAL			6
+#define	CRTC_OVERFLOW			7
+#define	 CRTC_OF_VRS9			0x80	/* VRS bit 9 */
+#define	 CRTC_OF_VRS9_SHIFT		7
+#define	 CRTC_OF_VDE9			0x40	/* VDE bit 9 */
+#define	 CRTC_OF_VDE9_SHIFT		6
+#define	 CRTC_OF_VRS8			0x04	/* VRS bit 8 */
+#define	 CRTC_OF_VRS8_SHIFT		2
+#define	 CRTC_OF_VDE8			0x02	/* VDE bit 8 */
+#define	 CRTC_OF_VDE8_SHIFT		1
+#define	CRTC_PRESET_ROW_SCAN		8
+#define	CRTC_MAX_SCAN_LINE		9
+#define	 CRTC_MSL_MSL			0x1f
+#define	CRTC_CURSOR_START		10
+#define	 CRTC_CS_CO			0x20	/* Cursor off */
+#define	 CRTC_CS_CS			0x1f	/* Cursor start */
+#define	CRTC_CURSOR_END			11
+#define	 CRTC_CE_CE			0x1f	/* Cursor end */
+#define	CRTC_START_ADDR_HIGH		12
+#define	CRTC_START_ADDR_LOW		13
+#define	CRTC_CURSOR_LOC_HIGH		14
+#define	CRTC_CURSOR_LOC_LOW		15
+#define	CRTC_VERT_RETRACE_START		16
+#define	CRTC_VERT_RETRACE_END		17
+#define	 CRTC_VRE_MASK			0xf
+#define	CRTC_VERT_DISP_END		18
+#define	CRTC_OFFSET			19
+#define	CRTC_UNDERLINE_LOC		20
+#define	CRTC_START_VERT_BLANK		21
+#define	CRTC_END_VERT_BLANK		22
+#define	CRTC_MODE_CONTROL		23
+#define	 CRTC_MC_TE			0x80	/* Timing enable */
+#define	CRTC_LINE_COMPARE		24
+
+/* DAC registers */
+#define	DAC_MASK			0x3c6
+#define	DAC_IDX_RD_PORT			0x3c7
+#define	DAC_IDX_WR_PORT			0x3c8
+#define	DAC_DATA_PORT			0x3c9
+
+int	vga_init(void);
+
+#endif /* _VGA_H_ */
diff --git a/usr/src/cmd/bhyve/virtio.c b/usr/src/cmd/bhyve/virtio.c
new file mode 100644
index 0000000000..c3b11dc439
--- /dev/null
+++ b/usr/src/cmd/bhyve/virtio.c
@@ -0,0 +1,755 @@
+/*-
+ * Copyright (c) 2013  Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/virtio.c 270326 2014-08-22 13:01:22Z tychon $");
+
+#include <sys/param.h>
+#include <sys/uio.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+
+/*
+ * Functions for dealing with generalized "virtual devices" as
+ * defined by <https://www.google.com/#output=search&q=virtio+spec>
+ */
+
+/*
+ * In case we decide to relax the "virtio softc comes at the
+ * front of virtio-based device softc" constraint, let's use
+ * this to convert.
+ */
+#define DEV_SOFTC(vs) ((void *)(vs))
+
+/*
+ * Link a virtio_softc to its constants, the device softc, and
+ * the PCI emulation.
+ */
+void
+vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+		void *dev_softc, struct pci_devinst *pi,
+		struct vqueue_info *queues)
+{
+	int i;
+
+	/* vs and dev_softc addresses must match */
+	assert((void *)vs == dev_softc);
+	vs->vs_vc = vc;
+	vs->vs_pi = pi;
+	pi->pi_arg = vs;
+
+	vs->vs_queues = queues;
+	for (i = 0; i < vc->vc_nvq; i++) {
+		queues[i].vq_vs = vs;
+		queues[i].vq_num = i;
+	}
+}
+
+/*
+ * Reset device (device-wide).  This erases all queues, i.e.,
+ * all the queues become invalid (though we don't wipe out the
+ * internal pointers, we just clear the VQ_ALLOC flag).
+ *
+ * It resets negotiated features to "none".
+ *
+ * If MSI-X is enabled, this also resets all the vectors to NO_VECTOR.
+ */
+void
+vi_reset_dev(struct virtio_softc *vs)
+{
+	struct vqueue_info *vq;
+	int i, nvq;
+
+	if (vs->vs_mtx)
+		assert(pthread_mutex_isowned_np(vs->vs_mtx));
+
+	nvq = vs->vs_vc->vc_nvq;
+	for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
+		vq->vq_flags = 0;
+		vq->vq_last_avail = 0;
+		vq->vq_pfn = 0;
+		vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
+	}
+	vs->vs_negotiated_caps = 0;
+	vs->vs_curq = 0;
+	/* vs->vs_status = 0; -- redundant */
+	if (vs->vs_isr)
+		pci_lintr_deassert(vs->vs_pi);
+	vs->vs_isr = 0;
+	vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
+}
+
+/*
+ * Set I/O BAR (usually 0) to map PCI config registers.
+ */
+void
+vi_set_io_bar(struct virtio_softc *vs, int barnum)
+{
+	size_t size;
+
+	/*
+	 * ??? should we use CFG0 if MSI-X is disabled?
+	 * Existing code did not...
+	 */
+	size = VTCFG_R_CFG1 + vs->vs_vc->vc_cfgsize;
+	pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size);
+}
+
+/*
+ * Initialize MSI-X vector capabilities if we're to use MSI-X,
+ * or MSI capabilities if not.
+ *
+ * We assume we want one MSI-X vector per queue, here, plus one
+ * for the config vec.
+ */
+int
+vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
+{
+	int nvec;
+
+	if (use_msix) {
+		vs->vs_flags |= VIRTIO_USE_MSIX;
+		VS_LOCK(vs);
+		vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
+		VS_UNLOCK(vs);
+		nvec = vs->vs_vc->vc_nvq + 1;
+		if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum))
+			return (1);
+	} else
+		vs->vs_flags &= ~VIRTIO_USE_MSIX;
+	/* Only 1 MSI vector for bhyve */
+	pci_emul_add_msicap(vs->vs_pi, 1);
+	return (0);
+}
+
+/*
+ * Initialize the currently-selected virtio queue (vs->vs_curq).
+ * The guest just gave us a page frame number, from which we can
+ * calculate the addresses of the queue.
+ */
+void
+vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
+{
+	struct vqueue_info *vq;
+	uint64_t phys;
+	size_t size;
+	char *base;
+
+	vq = &vs->vs_queues[vs->vs_curq];
+	vq->vq_pfn = pfn;
+	phys = (uint64_t)pfn << VRING_PFN;
+	size = vring_size(vq->vq_qsize);
+	base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
+
+	/* First page(s) are descriptors... */
+	vq->vq_desc = (struct virtio_desc *)base;
+	base += vq->vq_qsize * sizeof(struct virtio_desc);
+
+	/* ... immediately followed by "avail" ring (entirely uint16_t's) */
+	vq->vq_avail = (struct vring_avail *)base;
+	base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
+
+	/* Then it's rounded up to the next page... */
+	base = (char *)roundup2((uintptr_t)base, VRING_ALIGN);
+
+	/* ... and the last page(s) are the used ring. */
+	vq->vq_used = (struct vring_used *)base;
+
+	/* Mark queue as allocated, and start at 0 when we use it. */
+	vq->vq_flags = VQ_ALLOC;
+	vq->vq_last_avail = 0;
+}
+
+/*
+ * Helper inline for vq_getchain(): record the i'th "real"
+ * descriptor.
+ */
+static inline void
+_vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx,
+	   struct iovec *iov, int n_iov, uint16_t *flags) {
+
+	if (i >= n_iov)
+		return;
+	iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len);
+	iov[i].iov_len = vd->vd_len;
+	if (flags != NULL)
+		flags[i] = vd->vd_flags;
+}
+#define	VQ_MAX_DESCRIPTORS	512	/* see below */
+
+/*
+ * Examine the chain of descriptors starting at the "next one" to
+ * make sure that they describe a sensible request.  If so, return
+ * the number of "real" descriptors that would be needed/used in
+ * acting on this request.  This may be smaller than the number of
+ * available descriptors, e.g., if there are two available but
+ * they are two separate requests, this just returns 1.  Or, it
+ * may be larger: if there are indirect descriptors involved,
+ * there may only be one descriptor available but it may be an
+ * indirect pointing to eight more.  We return 8 in this case,
+ * i.e., we do not count the indirect descriptors, only the "real"
+ * ones.
+ *
+ * Basically, this vets the vd_flags and vd_next field of each
+ * descriptor and tells you how many are involved.  Since some may
+ * be indirect, this also needs the vmctx (in the pci_devinst
+ * at vs->vs_pi) so that it can find indirect descriptors.
+ *
+ * As we process each descriptor, we copy and adjust it (guest to
+ * host address wise, also using the vmtctx) into the given iov[]
+ * array (of the given size).  If the array overflows, we stop
+ * placing values into the array but keep processing descriptors,
+ * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
+ * So you, the caller, must not assume that iov[] is as big as the
+ * return value (you can process the same thing twice to allocate
+ * a larger iov array if needed, or supply a zero length to find
+ * out how much space is needed).
+ *
+ * If you want to verify the WRITE flag on each descriptor, pass a
+ * non-NULL "flags" pointer to an array of "uint16_t" of the same size
+ * as n_iov and we'll copy each vd_flags field after unwinding any
+ * indirects.
+ *
+ * If some descriptor(s) are invalid, this prints a diagnostic message
+ * and returns -1.  If no descriptors are ready now it simply returns 0.
+ *
+ * You are assumed to have done a vq_ring_ready() if needed (note
+ * that vq_has_descs() does one).
+ */
+int
+vq_getchain(struct vqueue_info *vq,
+	    struct iovec *iov, int n_iov, uint16_t *flags)
+{
+	int i;
+	u_int ndesc, n_indir;
+	u_int idx, head, next;
+	volatile struct virtio_desc *vdir, *vindir, *vp;
+	struct vmctx *ctx;
+	struct virtio_softc *vs;
+	const char *name;
+
+	vs = vq->vq_vs;
+	name = vs->vs_vc->vc_name;
+
+	/*
+	 * Note: it's the responsibility of the guest not to
+	 * update vq->vq_avail->va_idx until all of the descriptors
+         * the guest has written are valid (including all their
+         * vd_next fields and vd_flags).
+	 *
+	 * Compute (last_avail - va_idx) in integers mod 2**16.  This is
+	 * the number of descriptors the device has made available
+	 * since the last time we updated vq->vq_last_avail.
+	 *
+	 * We just need to do the subtraction as an unsigned int,
+	 * then trim off excess bits.
+	 */
+	idx = vq->vq_last_avail;
+	ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx);
+	if (ndesc == 0)
+		return (0);
+	if (ndesc > vq->vq_qsize) {
+		/* XXX need better way to diagnose issues */
+		fprintf(stderr,
+		    "%s: ndesc (%u) out of range, driver confused?\r\n",
+		    name, (u_int)ndesc);
+		return (-1);
+	}
+
+	/*
+	 * Now count/parse "involved" descriptors starting from
+	 * the head of the chain.
+	 *
+	 * To prevent loops, we could be more complicated and
+	 * check whether we're re-visiting a previously visited
+	 * index, but we just abort if the count gets excessive.
+	 */
+	ctx = vs->vs_pi->pi_vmctx;
+	head = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)];
+	next = head;
+	for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
+		if (next >= vq->vq_qsize) {
+			fprintf(stderr,
+			    "%s: descriptor index %u out of range, "
+			    "driver confused?\r\n",
+			    name, next);
+			return (-1);
+		}
+		vdir = &vq->vq_desc[next];
+		if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
+			_vq_record(i, vdir, ctx, iov, n_iov, flags);
+			i++;
+		} else if ((vs->vs_negotiated_caps &
+		    VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+			fprintf(stderr,
+			    "%s: descriptor has forbidden INDIRECT flag, "
+			    "driver confused?\r\n",
+			    name);
+			return (-1);
+		} else {
+			n_indir = vdir->vd_len / 16;
+			if ((vdir->vd_len & 0xf) || n_indir == 0) {
+				fprintf(stderr,
+				    "%s: invalid indir len 0x%x, "
+				    "driver confused?\r\n",
+				    name, (u_int)vdir->vd_len);
+				return (-1);
+			}
+			vindir = paddr_guest2host(ctx,
+			    vdir->vd_addr, vdir->vd_len);
+			/*
+			 * Indirects start at the 0th, then follow
+			 * their own embedded "next"s until those run
+			 * out.  Each one's indirect flag must be off
+			 * (we don't really have to check, could just
+			 * ignore errors...).
+			 */
+			next = 0;
+			for (;;) {
+				vp = &vindir[next];
+				if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
+					fprintf(stderr,
+					    "%s: indirect desc has INDIR flag,"
+					    " driver confused?\r\n",
+					    name);
+					return (-1);
+				}
+				_vq_record(i, vp, ctx, iov, n_iov, flags);
+				if (++i > VQ_MAX_DESCRIPTORS)
+					goto loopy;
+				if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
+					break;
+				next = vp->vd_next;
+				if (next >= n_indir) {
+					fprintf(stderr,
+					    "%s: invalid next %u > %u, "
+					    "driver confused?\r\n",
+					    name, (u_int)next, n_indir);
+					return (-1);
+				}
+			}
+		}
+		if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0)
+			return (i);
+	}
+loopy:
+	fprintf(stderr,
+	    "%s: descriptor loop? count > %d - driver confused?\r\n",
+	    name, i);
+	return (-1);
+}
+
+/*
+ * Return the currently-first request chain to the guest, setting
+ * its I/O length to the provided value.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_relchain(struct vqueue_info *vq, uint32_t iolen)
+{
+	uint16_t head, uidx, mask;
+	volatile struct vring_used *vuh;
+	volatile struct virtio_used *vue;
+
+	/*
+	 * Notes:
+	 *  - mask is N-1 where N is a power of 2 so computes x % N
+	 *  - vuh points to the "used" data shared with guest
+	 *  - vue points to the "used" ring entry we want to update
+	 *  - head is the same value we compute in vq_iovecs().
+	 *
+	 * (I apologize for the two fields named vu_idx; the
+	 * virtio spec calls the one that vue points to, "id"...)
+	 */
+	mask = vq->vq_qsize - 1;
+	vuh = vq->vq_used;
+	head = vq->vq_avail->va_ring[vq->vq_last_avail++ & mask];
+
+	uidx = vuh->vu_idx;
+	vue = &vuh->vu_ring[uidx++ & mask];
+	vue->vu_idx = head; /* ie, vue->id = head */
+	vue->vu_tlen = iolen;
+	vuh->vu_idx = uidx;
+}
+
+/*
+ * Driver has finished processing "available" chains and calling
+ * vq_relchain on each one.  If driver used all the available
+ * chains, used_all should be set.
+ *
+ * If the "used" index moved we may need to inform the guest, i.e.,
+ * deliver an interrupt.  Even if the used index did NOT move we
+ * may need to deliver an interrupt, if the avail ring is empty and
+ * we are supposed to interrupt on empty.
+ *
+ * Note that used_all_avail is provided by the caller because it's
+ * a snapshot of the ring state when he decided to finish interrupt
+ * processing -- it's possible that descriptors became available after
+ * that point.  (It's also typically a constant 1/True as well.)
+ */
+void
+vq_endchains(struct vqueue_info *vq, int used_all_avail)
+{
+	struct virtio_softc *vs;
+	uint16_t event_idx, new_idx, old_idx;
+	int intr;
+
+	/*
+	 * Interrupt generation: if we're using EVENT_IDX,
+	 * interrupt if we've crossed the event threshold.
+	 * Otherwise interrupt is generated if we added "used" entries,
+	 * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
+	 *
+	 * In any case, though, if NOTIFY_ON_EMPTY is set and the
+	 * entire avail was processed, we need to interrupt always.
+	 */
+	vs = vq->vq_vs;
+	new_idx = vq->vq_used->vu_idx;
+	old_idx = vq->vq_save_used;
+	if (used_all_avail &&
+	    (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
+		intr = 1;
+	else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
+		event_idx = VQ_USED_EVENT_IDX(vq);
+		/*
+		 * This calculation is per docs and the kernel
+		 * (see src/sys/dev/virtio/virtio_ring.h).
+		 */
+		intr = (uint16_t)(new_idx - event_idx - 1) <
+			(uint16_t)(new_idx - old_idx);
+	} else {
+		intr = new_idx != old_idx &&
+		    !(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT);
+	}
+	if (intr)
+		vq_interrupt(vs, vq);
+}
+
+/* Note: these are in sorted order to make for a fast search */
+static struct config_reg {
+	uint16_t	cr_offset;	/* register offset */
+	uint8_t		cr_size;	/* size (bytes) */
+	uint8_t		cr_ro;		/* true => reg is read only */
+	const char	*cr_name;	/* name of reg */
+} config_regs[] = {
+	{ VTCFG_R_HOSTCAP,	4, 1, "HOSTCAP" },
+	{ VTCFG_R_GUESTCAP,	4, 0, "GUESTCAP" },
+	{ VTCFG_R_PFN,		4, 0, "PFN" },
+	{ VTCFG_R_QNUM,		2, 1, "QNUM" },
+	{ VTCFG_R_QSEL,		2, 0, "QSEL" },
+	{ VTCFG_R_QNOTIFY,	2, 0, "QNOTIFY" },
+	{ VTCFG_R_STATUS,	1, 0, "STATUS" },
+	{ VTCFG_R_ISR,		1, 0, "ISR" },
+	{ VTCFG_R_CFGVEC,	2, 0, "CFGVEC" },
+	{ VTCFG_R_QVEC,		2, 0, "QVEC" },
+};
+
+static inline struct config_reg *
+vi_find_cr(int offset) {
+	u_int hi, lo, mid;
+	struct config_reg *cr;
+
+	lo = 0;
+	hi = sizeof(config_regs) / sizeof(*config_regs) - 1;
+	while (hi >= lo) {
+		mid = (hi + lo) >> 1;
+		cr = &config_regs[mid];
+		if (cr->cr_offset == offset)
+			return (cr);
+		if (cr->cr_offset < offset)
+			lo = mid + 1;
+		else
+			hi = mid - 1;
+	}
+	return (NULL);
+}
+
+/*
+ * Handle pci config space reads.
+ * If it's to the MSI-X info, do that.
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+uint64_t
+vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	    int baridx, uint64_t offset, int size)
+{
+	struct virtio_softc *vs = pi->pi_arg;
+	struct virtio_consts *vc;
+	struct config_reg *cr;
+	uint64_t virtio_config_size, max;
+	const char *name;
+	uint32_t newoff;
+	uint32_t value;
+	int error;
+
+	if (vs->vs_flags & VIRTIO_USE_MSIX) {
+		if (baridx == pci_msix_table_bar(pi) ||
+		    baridx == pci_msix_pba_bar(pi)) {
+			return (pci_emul_msix_tread(pi, offset, size));
+		}
+	}
+
+	/* XXX probably should do something better than just assert() */
+	assert(baridx == 0);
+
+	if (vs->vs_mtx)
+		pthread_mutex_lock(vs->vs_mtx);
+
+	vc = vs->vs_vc;
+	name = vc->vc_name;
+	value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff;
+
+	if (size != 1 && size != 2 && size != 4)
+		goto bad;
+
+	if (pci_msix_enabled(pi))
+		virtio_config_size = VTCFG_R_CFG1;
+	else
+		virtio_config_size = VTCFG_R_CFG0;
+
+	if (offset >= virtio_config_size) {
+		/*
+		 * Subtract off the standard size (including MSI-X
+		 * registers if enabled) and dispatch to underlying driver.
+		 * If that fails, fall into general code.
+		 */
+		newoff = offset - virtio_config_size;
+		max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
+		if (newoff + size > max)
+			goto bad;
+		error = (*vc->vc_cfgread)(DEV_SOFTC(vs), newoff, size, &value);
+		if (!error)
+			goto done;
+	}
+
+bad:
+	cr = vi_find_cr(offset);
+	if (cr == NULL || cr->cr_size != size) {
+		if (cr != NULL) {
+			/* offset must be OK, so size must be bad */
+			fprintf(stderr,
+			    "%s: read from %s: bad size %d\r\n",
+			    name, cr->cr_name, size);
+		} else {
+			fprintf(stderr,
+			    "%s: read from bad offset/size %jd/%d\r\n",
+			    name, (uintmax_t)offset, size);
+		}
+		goto done;
+	}
+
+	switch (offset) {
+	case VTCFG_R_HOSTCAP:
+		value = vc->vc_hv_caps;
+		break;
+	case VTCFG_R_GUESTCAP:
+		value = vs->vs_negotiated_caps;
+		break;
+	case VTCFG_R_PFN:
+		if (vs->vs_curq < vc->vc_nvq)
+			value = vs->vs_queues[vs->vs_curq].vq_pfn;
+		break;
+	case VTCFG_R_QNUM:
+		value = vs->vs_curq < vc->vc_nvq ?
+		    vs->vs_queues[vs->vs_curq].vq_qsize : 0;
+		break;
+	case VTCFG_R_QSEL:
+		value = vs->vs_curq;
+		break;
+	case VTCFG_R_QNOTIFY:
+		value = 0;	/* XXX */
+		break;
+	case VTCFG_R_STATUS:
+		value = vs->vs_status;
+		break;
+	case VTCFG_R_ISR:
+		value = vs->vs_isr;
+		vs->vs_isr = 0;		/* a read clears this flag */
+		if (value)
+			pci_lintr_deassert(pi);
+		break;
+	case VTCFG_R_CFGVEC:
+		value = vs->vs_msix_cfg_idx;
+		break;
+	case VTCFG_R_QVEC:
+		value = vs->vs_curq < vc->vc_nvq ?
+		    vs->vs_queues[vs->vs_curq].vq_msix_idx :
+		    VIRTIO_MSI_NO_VECTOR;
+		break;
+	}
+done:
+	if (vs->vs_mtx)
+		pthread_mutex_unlock(vs->vs_mtx);
+	return (value);
+}
+
+/*
+ * Handle pci config space writes.
+ * If it's to the MSI-X info, do that.
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+void
+vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+	     int baridx, uint64_t offset, int size, uint64_t value)
+{
+	struct virtio_softc *vs = pi->pi_arg;
+	struct vqueue_info *vq;
+	struct virtio_consts *vc;
+	struct config_reg *cr;
+	uint64_t virtio_config_size, max;
+	const char *name;
+	uint32_t newoff;
+	int error;
+
+	if (vs->vs_flags & VIRTIO_USE_MSIX) {
+		if (baridx == pci_msix_table_bar(pi) ||
+		    baridx == pci_msix_pba_bar(pi)) {
+			pci_emul_msix_twrite(pi, offset, size, value);
+			return;
+		}
+	}
+
+	/* XXX probably should do something better than just assert() */
+	assert(baridx == 0);
+
+	if (vs->vs_mtx)
+		pthread_mutex_lock(vs->vs_mtx);
+
+	vc = vs->vs_vc;
+	name = vc->vc_name;
+
+	if (size != 1 && size != 2 && size != 4)
+		goto bad;
+
+	if (pci_msix_enabled(pi))
+		virtio_config_size = VTCFG_R_CFG1;
+	else
+		virtio_config_size = VTCFG_R_CFG0;
+
+	if (offset >= virtio_config_size) {
+		/*
+		 * Subtract off the standard size (including MSI-X
+		 * registers if enabled) and dispatch to underlying driver.
+		 */
+		newoff = offset - virtio_config_size;
+		max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
+		if (newoff + size > max)
+			goto bad;
+		error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs), newoff, size, value);
+		if (!error)
+			goto done;
+	}
+
+bad:
+	cr = vi_find_cr(offset);
+	if (cr == NULL || cr->cr_size != size || cr->cr_ro) {
+		if (cr != NULL) {
+			/* offset must be OK, wrong size and/or reg is R/O */
+			if (cr->cr_size != size)
+				fprintf(stderr,
+				    "%s: write to %s: bad size %d\r\n",
+				    name, cr->cr_name, size);
+			if (cr->cr_ro)
+				fprintf(stderr,
+				    "%s: write to read-only reg %s\r\n",
+				    name, cr->cr_name);
+		} else {
+			fprintf(stderr,
+			    "%s: write to bad offset/size %jd/%d\r\n",
+			    name, (uintmax_t)offset, size);
+		}
+		goto done;
+	}
+
+	switch (offset) {
+	case VTCFG_R_GUESTCAP:
+		vs->vs_negotiated_caps = value & vc->vc_hv_caps;
+		break;
+	case VTCFG_R_PFN:
+		if (vs->vs_curq >= vc->vc_nvq)
+			goto bad_qindex;
+		vi_vq_init(vs, value);
+		break;
+	case VTCFG_R_QSEL:
+		/*
+		 * Note that the guest is allowed to select an
+		 * invalid queue; we just need to return a QNUM
+		 * of 0 while the bad queue is selected.
+		 */
+		vs->vs_curq = value;
+		break;
+	case VTCFG_R_QNOTIFY:
+		if (value >= vc->vc_nvq) {
+			fprintf(stderr, "%s: queue %d notify out of range\r\n",
+				name, (int)value);
+			goto done;
+		}
+		vq = &vs->vs_queues[value];
+		if (vq->vq_notify)
+			(*vq->vq_notify)(DEV_SOFTC(vs), vq);
+		else if (vc->vc_qnotify)
+			(*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
+		else
+			fprintf(stderr,
+			    "%s: qnotify queue %d: missing vq/vc notify\r\n",
+				name, (int)value);
+		break;
+	case VTCFG_R_STATUS:
+		vs->vs_status = value;
+		if (value == 0)
+			(*vc->vc_reset)(DEV_SOFTC(vs));
+		break;
+	case VTCFG_R_CFGVEC:
+		vs->vs_msix_cfg_idx = value;
+		break;
+	case VTCFG_R_QVEC:
+		if (vs->vs_curq >= vc->vc_nvq)
+			goto bad_qindex;
+		vq = &vs->vs_queues[vs->vs_curq];
+		vq->vq_msix_idx = value;
+		break;
+	}
+	goto done;
+
+bad_qindex:
+	fprintf(stderr,
+	    "%s: write config reg %s: curq %d >= max %d\r\n",
+	    name, cr->cr_name, vs->vs_curq, vc->vc_nvq);
+done:
+	if (vs->vs_mtx)
+		pthread_mutex_unlock(vs->vs_mtx);
+}
diff --git a/usr/src/cmd/bhyve/virtio.h b/usr/src/cmd/bhyve/virtio.h
new file mode 100644
index 0000000000..1a2ebe8118
--- /dev/null
+++ b/usr/src/cmd/bhyve/virtio.h
@@ -0,0 +1,475 @@
+/*-
+ * Copyright (c) 2013  Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/virtio.h 268276 2014-07-05 02:38:53Z grehan $
+ */
+
+#ifndef	_VIRTIO_H_
+#define	_VIRTIO_H_
+
+/*
+ * These are derived from several virtio specifications.
+ *
+ * Some useful links:
+ *    https://github.com/rustyrussell/virtio-spec
+ *    http://people.redhat.com/pbonzini/virtio-spec.pdf
+ */
+
+/*
+ * A virtual device has zero or more "virtual queues" (virtqueue).
+ * Each virtqueue uses at least two 4096-byte pages, laid out thus:
+ *
+ *      +-----------------------------------------------+
+ *      |    "desc":  <N> descriptors, 16 bytes each    |
+ *      |   -----------------------------------------   |
+ *      |   "avail":   2 uint16; <N> uint16; 1 uint16   |
+ *      |   -----------------------------------------   |
+ *      |              pad to 4k boundary               |
+ *      +-----------------------------------------------+
+ *      |   "used": 2 x uint16; <N> elems; 1 uint16     |
+ *      |   -----------------------------------------   |
+ *      |              pad to 4k boundary               |
+ *      +-----------------------------------------------+
+ *
+ * The number <N> that appears here is always a power of two and is
+ * limited to no more than 32768 (as it must fit in a 16-bit field).
+ * If <N> is sufficiently large, the above will occupy more than
+ * two pages.  In any case, all pages must be physically contiguous
+ * within the guest's physical address space.
+ *
+ * The <N> 16-byte "desc" descriptors consist of a 64-bit guest
+ * physical address <addr>, a 32-bit length <len>, a 16-bit
+ * <flags>, and a 16-bit <next> field (all in guest byte order).
+ *
+ * There are three flags that may be set :
+ *	NEXT    descriptor is chained, so use its "next" field
+ *	WRITE   descriptor is for host to write into guest RAM
+ *		(else host is to read from guest RAM)
+ *	INDIRECT   descriptor address field is (guest physical)
+ *		address of a linear array of descriptors
+ *
+ * Unless INDIRECT is set, <len> is the number of bytes that may
+ * be read/written from guest physical address <addr>.  If
+ * INDIRECT is set, WRITE is ignored and <len> provides the length
+ * of the indirect descriptors (and <len> must be a multiple of
+ * 16).  Note that NEXT may still be set in the main descriptor
+ * pointing to the indirect, and should be set in each indirect
+ * descriptor that uses the next descriptor (these should generally
+ * be numbered sequentially).  However, INDIRECT must not be set
+ * in the indirect descriptors.  Upon reaching an indirect descriptor
+ * without a NEXT bit, control returns to the direct descriptors.
+ *
+ * Except inside an indirect, each <next> value must be in the
+ * range [0 .. N) (i.e., the half-open interval).  (Inside an
+ * indirect, each <next> must be in the range [0 .. <len>/16).)
+ *
+ * The "avail" data structures reside in the same pages as the
+ * "desc" structures since both together are used by the device to
+ * pass information to the hypervisor's virtual driver.  These
+ * begin with a 16-bit <flags> field and 16-bit index <idx>, then
+ * have <N> 16-bit <ring> values, followed by one final 16-bit
+ * field <used_event>.  The <N> <ring> entries are simply indices
+ * indices into the descriptor ring (and thus must meet the same
+ * constraints as each <next> value).  However, <idx> is counted
+ * up from 0 (initially) and simply wraps around after 65535; it
+ * is taken mod <N> to find the next available entry.
+ *
+ * The "used" ring occupies a separate page or pages, and contains
+ * values written from the virtual driver back to the guest OS.
+ * This begins with a 16-bit <flags> and 16-bit <idx>, then there
+ * are <N> "vring_used" elements, followed by a 16-bit <avail_event>.
+ * The <N> "vring_used" elements consist of a 32-bit <id> and a
+ * 32-bit <len> (vu_tlen below).  The <id> is simply the index of
+ * the head of a descriptor chain the guest made available
+ * earlier, and the <len> is the number of bytes actually written,
+ * e.g., in the case of a network driver that provided a large
+ * receive buffer but received only a small amount of data.
+ *
+ * The two event fields, <used_event> and <avail_event>, in the
+ * avail and used rings (respectively -- note the reversal!), are
+ * always provided, but are used only if the virtual device
+ * negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature
+ * negotiation.  Similarly, both rings provide a flag --
+ * VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in
+ * their <flags> field, indicating that the guest does not need an
+ * interrupt, or that the hypervisor driver does not need a
+ * notify, when descriptors are added to the corresponding ring.
+ * (These are provided only for interrupt optimization and need
+ * not be implemented.)
+ */
+#define VRING_ALIGN	4096
+
+#define VRING_DESC_F_NEXT	(1 << 0)
+#define VRING_DESC_F_WRITE	(1 << 1)
+#define VRING_DESC_F_INDIRECT	(1 << 2)
+
+struct virtio_desc {			/* AKA vring_desc */
+	uint64_t	vd_addr;	/* guest physical address */
+	uint32_t	vd_len;		/* length of scatter/gather seg */
+	uint16_t	vd_flags;	/* VRING_F_DESC_* */
+	uint16_t	vd_next;	/* next desc if F_NEXT */
+} __packed;
+
+struct virtio_used {			/* AKA vring_used_elem */
+	uint32_t	vu_idx;		/* head of used descriptor chain */
+	uint32_t	vu_tlen;	/* length written-to */
+} __packed;
+
+#define VRING_AVAIL_F_NO_INTERRUPT   1
+
+struct vring_avail {
+	uint16_t	va_flags;	/* VRING_AVAIL_F_* */
+	uint16_t	va_idx;		/* counts to 65535, then cycles */
+	uint16_t	va_ring[];	/* size N, reported in QNUM value */
+/*	uint16_t	va_used_event;	-- after N ring entries */
+} __packed;
+
+#define	VRING_USED_F_NO_NOTIFY		1
+struct vring_used {
+	uint16_t	vu_flags;	/* VRING_USED_F_* */
+	uint16_t	vu_idx;		/* counts to 65535, then cycles */
+	struct virtio_used vu_ring[];	/* size N */
+/*	uint16_t	vu_avail_event;	-- after N ring entries */
+} __packed;
+
+/*
+ * The address of any given virtual queue is determined by a single
+ * Page Frame Number register.  The guest writes the PFN into the
+ * PCI config space.  However, a device that has two or more
+ * virtqueues can have a different PFN, and size, for each queue.
+ * The number of queues is determinable via the PCI config space
+ * VTCFG_R_QSEL register.  Writes to QSEL select the queue: 0 means
+ * queue #0, 1 means queue#1, etc.  Once a queue is selected, the
+ * remaining PFN and QNUM registers refer to that queue.
+ *
+ * QNUM is a read-only register containing a nonzero power of two
+ * that indicates the (hypervisor's) queue size.  Or, if reading it
+ * produces zero, the hypervisor does not have a corresponding
+ * queue.  (The number of possible queues depends on the virtual
+ * device.  The block device has just one; the network device
+ * provides either two -- 0 = receive, 1 = transmit -- or three,
+ * with 2 = control.)
+ *
+ * PFN is a read/write register giving the physical page address of
+ * the virtqueue in guest memory (the guest must allocate enough space
+ * based on the hypervisor's provided QNUM).
+ *
+ * QNOTIFY is effectively write-only: when the guest writes a queue
+ * number to the register, the hypervisor should scan the specified
+ * virtqueue. (Reading QNOTIFY currently always gets 0).
+ */
+
+/*
+ * PFN register shift amount
+ */
+#define VRING_PFN               12
+
+/*
+ * Virtio device types
+ *
+ * XXX Should really be merged with <dev/virtio/virtio.h> defines
+ */
+#define	VIRTIO_TYPE_NET		1
+#define	VIRTIO_TYPE_BLOCK	2
+#define	VIRTIO_TYPE_CONSOLE	3
+#define	VIRTIO_TYPE_ENTROPY	4
+#define	VIRTIO_TYPE_BALLOON	5
+#define	VIRTIO_TYPE_IOMEMORY	6
+#define	VIRTIO_TYPE_RPMSG	7
+#define	VIRTIO_TYPE_SCSI	8
+#define	VIRTIO_TYPE_9P		9
+
+/* experimental IDs start at 65535 and work down */
+
+/*
+ * PCI vendor/device IDs
+ */
+#define	VIRTIO_VENDOR		0x1AF4
+#define	VIRTIO_DEV_NET		0x1000
+#define	VIRTIO_DEV_BLOCK	0x1001
+#define	VIRTIO_DEV_RANDOM	0x1002
+
+/*
+ * PCI config space constants.
+ *
+ * If MSI-X is enabled, the ISR register is generally not used,
+ * and the configuration vector and queue vector appear at offsets
+ * 20 and 22 with the remaining configuration registers at 24.
+ * If MSI-X is not enabled, those two registers disappear and
+ * the remaining configuration registers start at offset 20.
+ */
+#define VTCFG_R_HOSTCAP		0
+#define VTCFG_R_GUESTCAP	4
+#define VTCFG_R_PFN		8
+#define VTCFG_R_QNUM		12
+#define VTCFG_R_QSEL		14
+#define VTCFG_R_QNOTIFY		16
+#define VTCFG_R_STATUS		18
+#define VTCFG_R_ISR		19
+#define VTCFG_R_CFGVEC		20
+#define VTCFG_R_QVEC		22
+#define VTCFG_R_CFG0		20	/* No MSI-X */
+#define VTCFG_R_CFG1		24	/* With MSI-X */
+#define VTCFG_R_MSIX		20
+
+/*
+ * Bits in VTCFG_R_STATUS.  Guests need not actually set any of these,
+ * but a guest writing 0 to this register means "please reset".
+ */
+#define	VTCFG_STATUS_ACK	0x01	/* guest OS has acknowledged dev */
+#define	VTCFG_STATUS_DRIVER	0x02	/* guest OS driver is loaded */
+#define	VTCFG_STATUS_DRIVER_OK	0x04	/* guest OS driver ready */
+#define	VTCFG_STATUS_FAILED	0x80	/* guest has given up on this dev */
+
+/*
+ * Bits in VTCFG_R_ISR.  These apply only if not using MSI-X.
+ *
+ * (We don't [yet?] ever use CONF_CHANGED.)
+ */
+#define	VTCFG_ISR_QUEUES	0x01	/* re-scan queues */
+#define	VTCFG_ISR_CONF_CHANGED	0x80	/* configuration changed */
+
+#define VIRTIO_MSI_NO_VECTOR	0xFFFF
+
+/*
+ * Feature flags.
+ * Note: bits 0 through 23 are reserved to each device type.
+ */
+#define	VIRTIO_F_NOTIFY_ON_EMPTY	(1 << 24)
+#define	VIRTIO_RING_F_INDIRECT_DESC	(1 << 28)
+#define	VIRTIO_RING_F_EVENT_IDX		(1 << 29)
+
+/* From section 2.3, "Virtqueue Configuration", of the virtio specification */
+static inline size_t
+vring_size(u_int qsz)
+{
+	size_t size;
+
+	/* constant 3 below = va_flags, va_idx, va_used_event */
+	size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz);
+	size = roundup2(size, VRING_ALIGN);
+
+	/* constant 3 below = vu_flags, vu_idx, vu_avail_event */
+	size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz;
+	size = roundup2(size, VRING_ALIGN);
+
+	return (size);
+}
+
+struct vmctx;
+struct pci_devinst;
+struct vqueue_info;
+
+/*
+ * A virtual device, with some number (possibly 0) of virtual
+ * queues and some size (possibly 0) of configuration-space
+ * registers private to the device.  The virtio_softc should come
+ * at the front of each "derived class", so that a pointer to the
+ * virtio_softc is also a pointer to the more specific, derived-
+ * from-virtio driver's softc.
+ *
+ * Note: inside each hypervisor virtio driver, changes to these
+ * data structures must be locked against other threads, if any.
+ * Except for PCI config space register read/write, we assume each
+ * driver does the required locking, but we need a pointer to the
+ * lock (if there is one) for PCI config space read/write ops.
+ *
+ * When the guest reads or writes the device's config space, the
+ * generic layer checks for operations on the special registers
+ * described above.  If the offset of the register(s) being read
+ * or written is past the CFG area (CFG0 or CFG1), the request is
+ * passed on to the virtual device, after subtracting off the
+ * generic-layer size.  (So, drivers can just use the offset as
+ * an offset into "struct config", for instance.)
+ *
+ * (The virtio layer also makes sure that the read or write is to/
+ * from a "good" config offset, hence vc_cfgsize, and on BAR #0.
+ * However, the driver must verify the read or write size and offset
+ * and that no one is writing a readonly register.)
+ *
+ * The BROKED flag ("this thing done gone and broked") is for future
+ * use.
+ */
+#define	VIRTIO_USE_MSIX		0x01
+#define	VIRTIO_EVENT_IDX	0x02	/* use the event-index values */
+#define	VIRTIO_BROKED		0x08	/* ??? */
+
+struct virtio_softc {
+	struct virtio_consts *vs_vc;	/* constants (see below) */
+	int	vs_flags;		/* VIRTIO_* flags from above */
+	pthread_mutex_t *vs_mtx;	/* POSIX mutex, if any */
+	struct pci_devinst *vs_pi;	/* PCI device instance */
+	uint32_t vs_negotiated_caps;	/* negotiated capabilities */
+	struct vqueue_info *vs_queues;	/* one per vc_nvq */
+	int	vs_curq;		/* current queue */
+	uint8_t	vs_status;		/* value from last status write */
+	uint8_t	vs_isr;			/* ISR flags, if not MSI-X */
+	uint16_t vs_msix_cfg_idx;	/* MSI-X vector for config event */
+};
+
+#define	VS_LOCK(vs)							\
+do {									\
+	if (vs->vs_mtx)							\
+		pthread_mutex_lock(vs->vs_mtx);				\
+} while (0)
+
+#define	VS_UNLOCK(vs)							\
+do {									\
+	if (vs->vs_mtx)							\
+		pthread_mutex_unlock(vs->vs_mtx);			\
+} while (0)
+
+struct virtio_consts {
+	const char *vc_name;		/* name of driver (for diagnostics) */
+	int	vc_nvq;			/* number of virtual queues */
+	size_t	vc_cfgsize;		/* size of dev-specific config regs */
+	void	(*vc_reset)(void *);	/* called on virtual device reset */
+	void	(*vc_qnotify)(void *, struct vqueue_info *);
+					/* called on QNOTIFY if no VQ notify */
+	int	(*vc_cfgread)(void *, int, int, uint32_t *);
+					/* called to read config regs */
+	int	(*vc_cfgwrite)(void *, int, int, uint32_t);
+					/* called to write config regs */
+	uint64_t vc_hv_caps;		/* hypervisor-provided capabilities */
+};
+
+/*
+ * Data structure allocated (statically) per virtual queue.
+ *
+ * Drivers may change vq_qsize after a reset.  When the guest OS
+ * requests a device reset, the hypervisor first calls
+ * vs->vs_vc->vc_reset(); then the data structure below is
+ * reinitialized (for each virtqueue: vs->vs_vc->vc_nvq).
+ *
+ * The remaining fields should only be fussed-with by the generic
+ * code.
+ *
+ * Note: the addresses of vq_desc, vq_avail, and vq_used are all
+ * computable from each other, but it's a lot simpler if we just
+ * keep a pointer to each one.  The event indices are similarly
+ * (but more easily) computable, and this time we'll compute them:
+ * they're just XX_ring[N].
+ */
+#define	VQ_ALLOC	0x01	/* set once we have a pfn */
+#define	VQ_BROKED	0x02	/* ??? */
+struct vqueue_info {
+	uint16_t vq_qsize;	/* size of this queue (a power of 2) */
+	void	(*vq_notify)(void *, struct vqueue_info *);
+				/* called instead of vc_notify, if not NULL */
+
+	struct virtio_softc *vq_vs;	/* backpointer to softc */
+	uint16_t vq_num;	/* we're the num'th queue in the softc */
+
+	uint16_t vq_flags;	/* flags (see above) */
+	uint16_t vq_last_avail;	/* a recent value of vq_avail->va_idx */
+	uint16_t vq_save_used;	/* saved vq_used->vu_idx; see vq_endchains */
+	uint16_t vq_msix_idx;	/* MSI-X index, or VIRTIO_MSI_NO_VECTOR */
+
+	uint32_t vq_pfn;	/* PFN of virt queue (not shifted!) */
+
+	volatile struct virtio_desc *vq_desc;	/* descriptor array */
+	volatile struct vring_avail *vq_avail;	/* the "avail" ring */
+	volatile struct vring_used *vq_used;	/* the "used" ring */
+
+};
+/* as noted above, these are sort of backwards, name-wise */
+#define VQ_AVAIL_EVENT_IDX(vq) \
+	(*(volatile uint16_t *)&(vq)->vq_used->vu_ring[(vq)->vq_qsize])
+#define VQ_USED_EVENT_IDX(vq) \
+	((vq)->vq_avail->va_ring[(vq)->vq_qsize])
+
+/*
+ * Is this ring ready for I/O?
+ */
+static inline int
+vq_ring_ready(struct vqueue_info *vq)
+{
+
+	return (vq->vq_flags & VQ_ALLOC);
+}
+
+/*
+ * Are there "available" descriptors?  (This does not count
+ * how many, just returns True if there are some.)
+ */
+static inline int
+vq_has_descs(struct vqueue_info *vq)
+{
+
+	return (vq_ring_ready(vq) && vq->vq_last_avail !=
+	    vq->vq_avail->va_idx);
+}
+
+/*
+ * Called by virtio driver as it starts processing chains.  Each
+ * completed chain (obtained from vq_getchain()) is released by
+ * calling vq_relchain(), then when all are done, vq_endchains()
+ * can tell if / how-many chains were processed and know whether
+ * and how to generate an interrupt.
+ */
+static inline void
+vq_startchains(struct vqueue_info *vq)
+{
+
+	vq->vq_save_used = vq->vq_used->vu_idx;
+}
+
+/*
+ * Deliver an interrupt to guest on the given virtual queue
+ * (if possible, or a generic MSI interrupt if not using MSI-X).
+ */
+static inline void
+vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
+{
+
+	if (pci_msix_enabled(vs->vs_pi))
+		pci_generate_msix(vs->vs_pi, vq->vq_msix_idx);
+	else {
+		VS_LOCK(vs);
+		vs->vs_isr |= VTCFG_ISR_QUEUES;
+		pci_generate_msi(vs->vs_pi, 0);
+		pci_lintr_assert(vs->vs_pi);
+		VS_UNLOCK(vs);
+	}
+}
+
+struct iovec;
+void	vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+			void *dev_softc, struct pci_devinst *pi,
+			struct vqueue_info *queues);
+int	vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix);
+void	vi_reset_dev(struct virtio_softc *);
+void	vi_set_io_bar(struct virtio_softc *, int);
+
+int	vq_getchain(struct vqueue_info *vq,
+		    struct iovec *iov, int n_iov, uint16_t *flags);
+void	vq_relchain(struct vqueue_info *vq, uint32_t iolen);
+void	vq_endchains(struct vqueue_info *vq, int used_all_avail);
+
+uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		     int baridx, uint64_t offset, int size);
+void	vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
+		     int baridx, uint64_t offset, int size, uint64_t value);
+#endif	/* _VIRTIO_H_ */
diff --git a/usr/src/cmd/bhyve/xmsr.c b/usr/src/cmd/bhyve/xmsr.c
new file mode 100644
index 0000000000..0c097251e0
--- /dev/null
+++ b/usr/src/cmd/bhyve/xmsr.c
@@ -0,0 +1,237 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/xmsr.c 279227 2015-02-24 05:15:40Z neel $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyve/xmsr.c 279227 2015-02-24 05:15:40Z neel $");
+
+#include <sys/types.h>
+
+#include <machine/cpufunc.h>
+#include <machine/vmm.h>
+#include <machine/specialreg.h>
+
+#include <vmmapi.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "xmsr.h"
+
+static int cpu_vendor_intel, cpu_vendor_amd;
+
+int
+emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val)
+{
+
+	if (cpu_vendor_intel) {
+		switch (num) {
+#ifndef	__FreeBSD__
+		case MSR_PERFCTR0:
+		case MSR_PERFCTR1:
+		case MSR_EVNTSEL0:
+		case MSR_EVNTSEL1:
+			return (0);
+#endif
+		case 0xd04:		/* Sandy Bridge uncore PMCs */
+		case 0xc24:
+			return (0);
+		case MSR_BIOS_UPDT_TRIG:
+			return (0);
+		case MSR_BIOS_SIGN:
+			return (0);
+		default:
+			break;
+		}
+	} else if (cpu_vendor_amd) {
+		switch (num) {
+		case MSR_HWCR:
+			/*
+			 * Ignore writes to hardware configuration MSR.
+			 */
+			return (0);
+
+		case MSR_NB_CFG1:
+		case MSR_IC_CFG:
+			return (0);	/* Ignore writes */
+
+		case MSR_PERFEVSEL0:
+		case MSR_PERFEVSEL1:
+		case MSR_PERFEVSEL2:
+		case MSR_PERFEVSEL3:
+			/* Ignore writes to the PerfEvtSel MSRs */
+			return (0);
+
+		case MSR_K7_PERFCTR0:
+		case MSR_K7_PERFCTR1:
+		case MSR_K7_PERFCTR2:
+		case MSR_K7_PERFCTR3:
+			/* Ignore writes to the PerfCtr MSRs */
+			return (0);
+
+		case MSR_P_STATE_CONTROL:
+			/* Ignore write to change the P-state */
+			return (0);
+
+		default:
+			break;
+		}
+	}
+	return (-1);
+}
+
+int
+emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val)
+{
+	int error = 0;
+
+	if (cpu_vendor_intel) {
+		switch (num) {
+		case MSR_BIOS_SIGN:
+		case MSR_IA32_PLATFORM_ID:
+		case MSR_PKG_ENERGY_STATUS:
+		case MSR_PP0_ENERGY_STATUS:
+		case MSR_PP1_ENERGY_STATUS:
+		case MSR_DRAM_ENERGY_STATUS:
+			*val = 0;
+			break;
+		case MSR_RAPL_POWER_UNIT:
+			/*
+			 * Use the default value documented in section
+			 * "RAPL Interfaces" in Intel SDM vol3.
+			 */
+			*val = 0x000a1003;
+			break;
+		default:
+			error = -1;
+			break;
+		}
+	} else if (cpu_vendor_amd) {
+		switch (num) {
+		case MSR_BIOS_SIGN:
+			*val = 0;
+			break;
+		case MSR_HWCR:
+			/*
+			 * Bios and Kernel Developer's Guides for AMD Families
+			 * 12H, 14H, 15H and 16H.
+			 */
+			*val = 0x01000010;	/* Reset value */
+			*val |= 1 << 9;		/* MONITOR/MWAIT disable */
+			break;
+
+		case MSR_NB_CFG1:
+		case MSR_IC_CFG:
+			/*
+			 * The reset value is processor family dependent so
+			 * just return 0.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_PERFEVSEL0:
+		case MSR_PERFEVSEL1:
+		case MSR_PERFEVSEL2:
+		case MSR_PERFEVSEL3:
+			/*
+			 * PerfEvtSel MSRs are not properly virtualized so just
+			 * return zero.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_K7_PERFCTR0:
+		case MSR_K7_PERFCTR1:
+		case MSR_K7_PERFCTR2:
+		case MSR_K7_PERFCTR3:
+			/*
+			 * PerfCtr MSRs are not properly virtualized so just
+			 * return zero.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_SMM_ADDR:
+		case MSR_SMM_MASK:
+			/*
+			 * Return the reset value defined in the AMD Bios and
+			 * Kernel Developer's Guide.
+			 */
+			*val = 0;
+			break;
+
+		case MSR_P_STATE_LIMIT:
+		case MSR_P_STATE_CONTROL:
+		case MSR_P_STATE_STATUS:
+		case MSR_P_STATE_CONFIG(0):	/* P0 configuration */
+			*val = 0;
+			break;
+
+		/*
+		 * OpenBSD guests test bit 0 of this MSR to detect if the
+		 * workaround for erratum 721 is already applied.
+		 * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf
+		 */
+		case 0xC0011029:
+			*val = 1;
+			break;
+
+		default:
+			error = -1;
+			break;
+		}
+	} else {
+		error = -1;
+	}
+	return (error);
+}
+
+int
+init_msr(void)
+{
+	int error;
+	u_int regs[4];
+	char cpu_vendor[13];
+
+	do_cpuid(0, regs);
+	((u_int *)&cpu_vendor)[0] = regs[1];
+	((u_int *)&cpu_vendor)[1] = regs[3];
+	((u_int *)&cpu_vendor)[2] = regs[2];
+	cpu_vendor[12] = '\0';
+
+	error = 0;
+	if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+		cpu_vendor_amd = 1;
+	} else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+		cpu_vendor_intel = 1;
+	} else {
+		fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor);
+		error = -1;
+	}
+	return (error);
+}
diff --git a/usr/src/cmd/bhyve/xmsr.h b/usr/src/cmd/bhyve/xmsr.h
new file mode 100644
index 0000000000..ac3c147442
--- /dev/null
+++ b/usr/src/cmd/bhyve/xmsr.h
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyve/xmsr.h 271888 2014-09-20 02:35:21Z neel $
+ */
+
+#ifndef	_XMSR_H_
+#define	_XMSR_H_
+
+int init_msr(void);
+int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
+int emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val);
+
+#endif
diff --git a/usr/src/cmd/bhyveconsole/Makefile b/usr/src/cmd/bhyveconsole/Makefile
new file mode 100644
index 0000000000..11d34e6599
--- /dev/null
+++ b/usr/src/cmd/bhyveconsole/Makefile
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+include ../Makefile.cmd
+
+SUBDIRS= $(MACH)
+
+all	:=	TARGET = all
+install	:=	TARGET = install
+clean	:=	TARGET = clean
+clobber	:=	TARGET = clobber
+lint	:=	TARGET = lint
+
+.KEEP_STATE:
+
+all:	$(SUBDIRS)
+
+clean clobber lint:	$(SUBDIRS)
+
+install:	$(SUBDIRS)
+	-$(RM) $(ROOTUSRSBINPROG)
+	-$(LN) $(ISAEXEC) $(ROOTUSRSBINPROG)
+
+$(SUBDIRS):	FRC
+	@cd $@; pwd; $(MAKE) CW_NO_SHADOW=true __GNUC= $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/bhyveconsole/bhyveconsole.c b/usr/src/cmd/bhyveconsole/bhyveconsole.c
new file mode 100644
index 0000000000..7f237a72f6
--- /dev/null
+++ b/usr/src/cmd/bhyveconsole/bhyveconsole.c
@@ -0,0 +1,360 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/param.h>
+#include <sys/signal.h>
+#include <sys/socket.h>
+#include <sys/termios.h>
+#include <assert.h>
+#include <errno.h>
+#include <libgen.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <bhyve.h>
+
+static int masterfd;
+static struct termios save_termios;
+static int save_fd;
+
+static int nocmdchar = 0;
+static char cmdchar = '~';
+
+static const char *pname;
+
+#define	BCONS_BUFSIZ		8192
+
+static void
+usage(void)
+{
+	(void) fprintf(stderr, "usage: %s vmname\n", pname);
+	exit(2);
+}
+
+static void
+bcons_error(const char *fmt, ...)
+{
+	va_list alist;
+
+	(void) fprintf(stderr, "%s: ", pname);
+	va_start(alist, fmt);
+	(void) vfprintf(stderr, fmt, alist);
+	va_end(alist);
+	(void) fprintf(stderr, "\n");
+}
+
+static void
+bcons_perror(const char *str)
+{
+	const char *estr;
+
+	if ((estr = strerror(errno)) != NULL)
+		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
+	else
+		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
+}
+
+/*
+ * Create the unix domain socket and call bhyve; handshake
+ * with it to determine whether it will allow us to connect.
+ */
+static int
+get_console(const char *vmname)
+{
+	int sockfd = -1;
+	struct sockaddr_un servaddr;
+	char clientid[MAXPATHLEN];
+	char handshake[MAXPATHLEN], c;
+	int msglen;
+	int i = 0, err = 0;
+
+	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+		bcons_perror("could not create socket");
+		return (-1);
+	}
+
+	bzero(&servaddr, sizeof (servaddr));
+	servaddr.sun_family = AF_UNIX;
+	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
+	    BHYVE_CONS_SOCKPATH, vmname);
+
+	if (connect(sockfd, (struct sockaddr *)&servaddr,
+	    sizeof (servaddr)) == -1) {
+		bcons_perror("Could not connect to console server");
+		goto bad;
+	}
+	masterfd = sockfd;
+
+	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu\n",
+	    getpid());
+	assert(msglen > 0 && msglen < sizeof (clientid));
+
+	if (write(masterfd, clientid, msglen) != msglen) {
+		bcons_error("protocol error");
+		goto bad;
+	}
+
+	/*
+	 * Take care not to accumulate more than our fill, and leave room for
+	 * the NUL at the end.
+	 */
+	while ((err = read(masterfd, &c, 1)) == 1) {
+		if (i >= (sizeof (handshake) - 1))
+			break;
+		if (c == '\n')
+			break;
+		handshake[i] = c;
+		i++;
+	}
+	handshake[i] = '\0';
+
+	/*
+	 * If something went wrong during the handshake we bail; perhaps
+	 * the server died off.
+	 */
+	if (err == -1) {
+		bcons_perror("Could not connect to console server");
+		goto bad;
+	}
+
+	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
+		return (0);
+
+	bcons_error("Console is already in use by process ID %s.",
+	    handshake);
+bad:
+	(void) close(sockfd);
+	masterfd = -1;
+	return (-1);
+}
+
+/*
+ * Place terminal into raw mode.
+ */
+static int
+set_tty_rawmode(int fd)
+{
+	struct termios term;
+	if (tcgetattr(fd, &term) < 0) {
+		bcons_perror("failed to get user terminal settings");
+		return (-1);
+	}
+
+	/* Stash for later, so we can revert back to previous mode */
+	save_termios = term;
+	save_fd = fd;
+
+	/* disable 8->7 bit strip, start/stop, enable any char to restart */
+	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
+	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
+	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
+	/* disable output post-processing */
+	term.c_oflag &= ~OPOST;
+	/* disable canonical mode, signal chars, echo & extended functions */
+	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
+
+	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
+	term.c_cc[VTIME] = 0;
+
+	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
+		bcons_perror("failed to set user terminal to raw mode");
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * reset terminal settings for global environment
+ */
+static void
+reset_tty(void)
+{
+	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
+}
+
+/*
+ * process_user_input watches the input stream for the escape sequence for
+ * 'quit' (by default, tilde-period).  Because we might be fed just one
+ * keystroke at a time, state associated with the user input (are we at the
+ * beginning of the line?  are we locally echoing the next character?) is
+ * maintained by beginning_of_line and local_echo across calls to the routine.
+ *
+ * This routine returns -1 when the 'quit' escape sequence has been issued,
+ * or an error is encountered and 0 otherwise.
+ */
+static int
+process_user_input(int out_fd, int in_fd)
+{
+	static boolean_t beginning_of_line = B_TRUE;
+	static boolean_t local_echo = B_FALSE;
+	char ibuf[BCONS_BUFSIZ];
+	int nbytes;
+	char *buf = ibuf;
+	char c;
+
+	nbytes = read(in_fd, ibuf, sizeof (ibuf));
+	if (nbytes == -1 && errno != EINTR)
+		return (-1);
+
+	if (nbytes == -1)	/* The read was interrupted. */
+		return (0);
+
+	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
+		buf++;
+		if (beginning_of_line && !nocmdchar) {
+			beginning_of_line = B_FALSE;
+			if (c == cmdchar) {
+				local_echo = B_TRUE;
+				continue;
+			}
+		} else if (local_echo) {
+			local_echo = B_FALSE;
+			if (c == '.') {
+				(void) write(STDOUT_FILENO, &cmdchar, 1);
+				(void) write(STDOUT_FILENO, &c, 1);
+				return (-1);
+			}
+		}
+
+		(void) write(out_fd, &c, 1);
+
+		beginning_of_line = (c == '\r' || c == '\n');
+	}
+
+	return (0);
+}
+
+static int
+process_output(int in_fd, int out_fd)
+{
+	int wrote = 0;
+	int cc;
+	char ibuf[BCONS_BUFSIZ];
+
+	cc = read(in_fd, ibuf, sizeof (ibuf));
+	if (cc == -1 && errno != EINTR)
+		return (-1);
+	if (cc == 0)	/* EOF */
+		return (-1);
+	if (cc == -1)	/* The read was interrupted. */
+		return (0);
+
+	do {
+		int len;
+
+		len = write(out_fd, ibuf + wrote, cc - wrote);
+		if (len == -1 && errno != EINTR)
+			return (-1);
+		if (len != -1)
+			wrote += len;
+	} while (wrote < cc);
+
+	return (0);
+}
+
+/*
+ * This is the main I/O loop.
+ */
+static void
+doio(void)
+{
+	struct pollfd pollfds[2];
+	int res;
+
+	/* read from vm and write to stdout */
+	pollfds[0].fd = masterfd;
+	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
+
+	/* read from stdin and write to vm */
+	pollfds[1].fd = STDIN_FILENO;
+	pollfds[1].events = pollfds[0].events;
+
+	for (;;) {
+		pollfds[0].revents = pollfds[1].revents = 0;
+
+		res = poll(pollfds,
+		    sizeof (pollfds) / sizeof (struct pollfd), -1);
+
+		if (res == -1 && errno != EINTR) {
+			bcons_perror("poll failed");
+			/* we are hosed, close connection */
+			break;
+		}
+
+		/* event from master side stdout */
+		if (pollfds[0].revents) {
+			if (pollfds[0].revents &
+			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+				if (process_output(masterfd, STDOUT_FILENO)
+				    != 0)
+					break;
+			} else {
+				break;
+			}
+		}
+
+		/* event from user stdin side */
+		if (pollfds[1].revents) {
+			if (pollfds[1].revents &
+			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+			  if (process_user_input(masterfd, STDIN_FILENO)
+				    != 0)
+					break;
+			} else {
+				break;
+			}
+		}
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	char *vmname;
+
+	pname = basename(argv[0]);
+
+	if (argc == 2) {
+		vmname = argv[1];
+	} else {
+		usage();
+	}
+
+	/*
+	 * Make contact with bhyve
+	 */
+	if (get_console(vmname) == -1)
+		return (1);
+
+	(void) printf("[Connected to vm '%s' console]\n", vmname);
+
+	if (set_tty_rawmode(STDIN_FILENO) == -1) {
+		reset_tty();
+		bcons_perror("failed to set stdin pty to raw mode");
+		return (1);
+	}
+
+	/*
+	 * Run the I/O loop until we get disconnected.
+	 */
+	doio();
+	reset_tty();
+	(void) printf("\n[Connection to vm '%s' console closed]\n", vmname);
+
+	return (0);
+}
diff --git a/usr/src/cmd/bhyveconsole/i386/Makefile b/usr/src/cmd/bhyveconsole/i386/Makefile
new file mode 100644
index 0000000000..c4f317a9fa
--- /dev/null
+++ b/usr/src/cmd/bhyveconsole/i386/Makefile
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+PROG=	bhyveconsole
+
+OBJS=	bhyveconsole.o
+
+SRCS=	$(OBJS:%.o=../%.c)
+
+include ../../Makefile.cmd
+
+CFLAGS	+= $(CCVERBOSE)
+LDLIBS += -lsocket
+
+.KEEP_STATE:
+
+%.o:	../%.c
+	$(COMPILE.c) $<
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+	$(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+	$(POST_PROCESS)
+
+install: all $(ROOTUSRSBINPROG32)
+
+clean:
+	$(RM) $(OBJS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/bhyvectl/Makefile b/usr/src/cmd/bhyvectl/Makefile
new file mode 100644
index 0000000000..fe98204056
--- /dev/null
+++ b/usr/src/cmd/bhyvectl/Makefile
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+PROG =		bhyvectl
+
+include ../Makefile.cmd
+
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all	:=	TARGET = all
+install	:=	TARGET = install
+clean	:=	TARGET = clean
+clobber	:=	TARGET = clobber
+lint	:=	TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint:	$(SUBDIRS)
+
+install: $(SUBDIRS)
+	-$(RM) $(ROOTUSRSBINPROG)
+	-$(LN) $(ISAEXEC) $(ROOTUSRSBINPROG)
+
+$(SUBDIRS):	FRC
+	@cd $@; pwd; $(MAKE) CW_NO_SHADOW=true __GNUC= $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/bhyvectl/Makefile.com b/usr/src/cmd/bhyvectl/Makefile.com
new file mode 100644
index 0000000000..03ca34792c
--- /dev/null
+++ b/usr/src/cmd/bhyvectl/Makefile.com
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+PROG= bhyvectl
+
+SRCS = bhyvectl.c
+OBJS = $(SRCS:.c=.o)
+
+include ../../Makefile.cmd
+
+.KEEP_STATE:
+
+CFLAGS +=	$(CCVERBOSE)
+CPPFLAGS =	-I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd $(CPPFLAGS.master) \
+	-I$(ROOT)/usr/platform/i86pc/include \
+	-I$(SRC)/uts/i86pc/io/vmm
+LDLIBS +=	-lvmmapi
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+	$(LINK.c) -o $@ $(OBJS) $(LDFLAGS) $(LDLIBS)
+	$(POST_PROCESS)
+
+install: all $(ROOTUSRSBINPROG)
+
+clean:
+	$(RM) $(OBJS)
+
+lint:	lint_SRCS
+
+include ../../Makefile.targ
+
+%.o: ../%.c
+	$(COMPILE.c) -I$(SRC)/common $<
+	$(POST_PROCESS_O)
diff --git a/usr/src/cmd/bhyvectl/amd64/Makefile b/usr/src/cmd/bhyvectl/amd64/Makefile
new file mode 100644
index 0000000000..b602c50d05
--- /dev/null
+++ b/usr/src/cmd/bhyvectl/amd64/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+CPPFLAGS += -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64
+
+install: all $(ROOTUSRSBINPROG64)
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
new file mode 100644
index 0000000000..07d0a83df5
--- /dev/null
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -0,0 +1,1523 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/bhyvectl/bhyvectl.c 273375 2014-10-21 07:10:43Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/bhyvectl/bhyvectl.c 273375 2014-10-21 07:10:43Z neel $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/errno.h>
+#include <sys/mman.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <libutil.h>
+#include <fcntl.h>
+#include <string.h>
+#include <getopt.h>
+#include <assert.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "intel/vmcs.h"
+
+#define	MB	(1UL << 20)
+#define	GB	(1UL << 30)
+
+#define	REQ_ARG		required_argument
+#define	NO_ARG		no_argument
+#define	OPT_ARG		optional_argument
+
+static const char *progname;
+
+static void
+usage(void)
+{
+
+	(void)fprintf(stderr,
+	"Usage: %s --vm=<vmname>\n"
+	"       [--cpu=<vcpu_number>]\n"
+	"       [--create]\n"
+	"       [--destroy]\n"
+	"       [--get-all]\n"
+	"       [--get-stats]\n"
+	"       [--set-desc-ds]\n"
+	"       [--get-desc-ds]\n"
+	"       [--set-desc-es]\n"
+	"       [--get-desc-es]\n"
+	"       [--set-desc-gs]\n"
+	"       [--get-desc-gs]\n"
+	"       [--set-desc-fs]\n"
+	"       [--get-desc-fs]\n"
+	"       [--set-desc-cs]\n"
+	"       [--get-desc-cs]\n"
+	"       [--set-desc-ss]\n"
+	"       [--get-desc-ss]\n"
+	"       [--set-desc-tr]\n"
+	"       [--get-desc-tr]\n"
+	"       [--set-desc-ldtr]\n"
+	"       [--get-desc-ldtr]\n"
+	"       [--set-desc-gdtr]\n"
+	"       [--get-desc-gdtr]\n"
+	"       [--set-desc-idtr]\n"
+	"       [--get-desc-idtr]\n"
+	"       [--run]\n"
+	"       [--capname=<capname>]\n"
+	"       [--getcap]\n"
+	"       [--setcap=<0|1>]\n"
+	"       [--desc-base=<BASE>]\n"
+	"       [--desc-limit=<LIMIT>]\n"
+	"       [--desc-access=<ACCESS>]\n"
+	"       [--set-cr0=<CR0>]\n"
+	"       [--get-cr0]\n"
+	"       [--set-cr3=<CR3>]\n"
+	"       [--get-cr3]\n"
+	"       [--set-cr4=<CR4>]\n"
+	"       [--get-cr4]\n"
+	"       [--set-dr7=<DR7>]\n"
+	"       [--get-dr7]\n"
+	"       [--set-rsp=<RSP>]\n"
+	"       [--get-rsp]\n"
+	"       [--set-rip=<RIP>]\n"
+	"       [--get-rip]\n"
+	"       [--get-rax]\n"
+	"       [--set-rax=<RAX>]\n"
+	"       [--get-rbx]\n"
+	"       [--get-rcx]\n"
+	"       [--get-rdx]\n"
+	"       [--get-rsi]\n"
+	"       [--get-rdi]\n"
+	"       [--get-rbp]\n"
+	"       [--get-r8]\n"
+	"       [--get-r9]\n"
+	"       [--get-r10]\n"
+	"       [--get-r11]\n"
+	"       [--get-r12]\n"
+	"       [--get-r13]\n"
+	"       [--get-r14]\n"
+	"       [--get-r15]\n"
+	"       [--set-rflags=<RFLAGS>]\n"
+	"       [--get-rflags]\n"
+	"       [--set-cs]\n"
+	"       [--get-cs]\n"
+	"       [--set-ds]\n"
+	"       [--get-ds]\n"
+	"       [--set-es]\n"
+	"       [--get-es]\n"
+	"       [--set-fs]\n"
+	"       [--get-fs]\n"
+	"       [--set-gs]\n"
+	"       [--get-gs]\n"
+	"       [--set-ss]\n"
+	"       [--get-ss]\n"
+	"       [--get-tr]\n"
+	"       [--get-ldtr]\n"
+	"       [--get-vmcs-pinbased-ctls]\n"
+	"       [--get-vmcs-procbased-ctls]\n"
+	"       [--get-vmcs-procbased-ctls2]\n"
+	"       [--get-vmcs-entry-interruption-info]\n"
+	"       [--set-vmcs-entry-interruption-info=<info>]\n"
+	"       [--get-vmcs-eptp]\n"
+	"       [--get-vmcs-guest-physical-address\n"
+	"       [--get-vmcs-guest-linear-address\n"
+	"       [--set-vmcs-exception-bitmap]\n"
+	"       [--get-vmcs-exception-bitmap]\n"
+	"       [--get-vmcs-io-bitmap-address]\n"
+	"       [--get-vmcs-tsc-offset]\n"
+	"       [--get-vmcs-guest-pat]\n"
+	"       [--get-vmcs-host-pat]\n"
+	"       [--get-vmcs-host-cr0]\n"
+	"       [--get-vmcs-host-cr3]\n"
+	"       [--get-vmcs-host-cr4]\n"
+	"       [--get-vmcs-host-rip]\n"
+	"       [--get-vmcs-host-rsp]\n"
+	"       [--get-vmcs-cr0-mask]\n"
+	"       [--get-vmcs-cr0-shadow]\n"
+	"       [--get-vmcs-cr4-mask]\n"
+	"       [--get-vmcs-cr4-shadow]\n"
+	"       [--get-vmcs-cr3-targets]\n"
+	"       [--get-vmcs-apic-access-address]\n"
+	"       [--get-vmcs-virtual-apic-address]\n"
+	"       [--get-vmcs-tpr-threshold]\n"
+	"       [--get-vmcs-msr-bitmap]\n"
+	"       [--get-vmcs-msr-bitmap-address]\n"
+	"       [--get-vmcs-vpid]\n"
+	"       [--get-vmcs-ple-gap]\n"
+	"       [--get-vmcs-ple-window]\n"
+	"       [--get-vmcs-instruction-error]\n"
+	"       [--get-vmcs-exit-ctls]\n"
+	"       [--get-vmcs-entry-ctls]\n"
+	"       [--get-vmcs-guest-sysenter]\n"
+	"       [--get-vmcs-link]\n"
+	"       [--get-vmcs-exit-reason]\n"
+	"       [--get-vmcs-exit-qualification]\n"
+	"       [--get-vmcs-exit-interruption-info]\n"
+	"       [--get-vmcs-exit-interruption-error]\n"
+	"       [--get-vmcs-interruptibility]\n"
+	"       [--set-x2apic-state=<state>]\n"
+	"       [--get-x2apic-state]\n"
+	"       [--unassign-pptdev=<bus/slot/func>]\n"
+	"       [--set-mem=<memory in units of MB>]\n"
+	"       [--get-lowmem]\n"
+	"       [--get-highmem]\n",
+	progname);
+	exit(1);
+}
+
+static int get_stats, getcap, setcap, capval;
+static const char *capname;
+static int create, destroy, get_lowmem, get_highmem;
+static uint64_t memsize;
+static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4;
+static int set_efer, get_efer;
+static int set_dr7, get_dr7;
+static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags;
+static int set_rax, get_rax;
+static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp;
+static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15;
+static int set_desc_ds, get_desc_ds;
+static int set_desc_es, get_desc_es;
+static int set_desc_fs, get_desc_fs;
+static int set_desc_gs, get_desc_gs;
+static int set_desc_cs, get_desc_cs;
+static int set_desc_ss, get_desc_ss;
+static int set_desc_gdtr, get_desc_gdtr;
+static int set_desc_idtr, get_desc_idtr;
+static int set_desc_tr, get_desc_tr;
+static int set_desc_ldtr, get_desc_ldtr;
+static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
+static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
+static int set_x2apic_state, get_x2apic_state;
+enum x2apic_state x2apic_state;
+static int unassign_pptdev, bus, slot, func;
+static int run;
+
+/*
+ * VMCS-specific fields
+ */
+static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2;
+static int get_eptp, get_io_bitmap, get_tsc_offset;
+static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info;
+static int get_vmcs_interruptibility;
+uint32_t vmcs_entry_interruption_info;
+static int get_vmcs_gpa, get_vmcs_gla;
+static int get_exception_bitmap, set_exception_bitmap, exception_bitmap;
+static int get_cr0_mask, get_cr0_shadow;
+static int get_cr4_mask, get_cr4_shadow;
+static int get_cr3_targets;
+static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold;
+static int get_msr_bitmap, get_msr_bitmap_address;
+static int get_vpid, get_ple_gap, get_ple_window;
+static int get_inst_err, get_exit_ctls, get_entry_ctls;
+static int get_host_cr0, get_host_cr3, get_host_cr4;
+static int get_host_rip, get_host_rsp;
+static int get_guest_pat, get_host_pat;
+static int get_guest_sysenter, get_vmcs_link;
+static int get_vmcs_exit_reason, get_vmcs_exit_qualification;
+static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error;
+
+static uint64_t desc_base;
+static uint32_t desc_limit, desc_access;
+
+static int get_all;
+
+static void
+dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu)
+{
+	printf("vm exit[%d]\n", vcpu);
+	printf("\trip\t\t0x%016lx\n", vmexit->rip);
+	printf("\tinst_length\t%d\n", vmexit->inst_length);
+	switch (vmexit->exitcode) {
+	case VM_EXITCODE_INOUT:
+		printf("\treason\t\tINOUT\n");
+		printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT");
+		printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes);
+		printf("\tflags\t\t%s%s\n",
+			vmexit->u.inout.string ? "STRING " : "",
+			vmexit->u.inout.rep ? "REP " : "");
+		printf("\tport\t\t0x%04x\n", vmexit->u.inout.port);
+		printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax);
+		break;
+	case VM_EXITCODE_VMX:
+		printf("\treason\t\tVMX\n");
+		printf("\tstatus\t\t%d\n", vmexit->u.vmx.status);
+		printf("\texit_reason\t0x%08x (%u)\n",
+		    vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason);
+		printf("\tqualification\t0x%016lx\n",
+			vmexit->u.vmx.exit_qualification);
+		printf("\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
+		printf("\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
+		break;
+	default:
+		printf("*** unknown vm run exitcode %d\n", vmexit->exitcode);
+		break;
+	}
+}
+
+static int
+dump_vmcs_msr_bitmap(int vcpu, u_long addr)
+{
+	int error, fd, byte, bit, readable, writeable;
+	u_int msr;
+	const char *bitmap;
+
+	error = -1;
+	bitmap = MAP_FAILED;
+
+	fd = open("/dev/mem", O_RDONLY, 0);
+	if (fd < 0)
+		goto done;
+
+	bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, 0, fd, addr);
+	if (bitmap == MAP_FAILED)
+		goto done;
+
+	for (msr = 0; msr < 0x2000; msr++) {
+		byte = msr / 8;
+		bit = msr & 0x7;
+
+		/* Look at MSRs in the range 0x00000000 to 0x00001FFF */
+		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
+		writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1;
+		if (readable || writeable) {
+			printf("msr 0x%08x[%d]\t\t%c%c\n", msr, vcpu,
+				readable ? 'R' : '-',
+				writeable ? 'W' : '-');
+		}
+
+		/* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */
+		byte += 1024;
+		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
+		writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1;
+		if (readable || writeable) {
+			printf("msr 0x%08x[%d]\t\t%c%c\n",
+				0xc0000000 + msr, vcpu,
+				readable ? 'R' : '-',
+				writeable ? 'W' : '-');
+		}
+	}
+
+	error = 0;
+done:
+	if (bitmap != MAP_FAILED)
+		munmap((void *)bitmap, PAGE_SIZE);
+	if (fd >= 0)
+		close(fd);
+	return (error);
+}
+
+static int
+vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val)
+{
+
+	return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val));
+}
+
+static int
+vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
+{
+
+	return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val));
+}
+
+enum {
+	VMNAME = 1000,	/* avoid collision with return values from getopt */
+	VCPU,
+	SET_MEM,
+	SET_EFER,
+	SET_CR0,
+	SET_CR3,
+	SET_CR4,
+	SET_DR7,
+	SET_RSP,
+	SET_RIP,
+	SET_RAX,
+	SET_RFLAGS,
+	DESC_BASE,
+	DESC_LIMIT,
+	DESC_ACCESS,
+	SET_CS,
+	SET_DS,
+	SET_ES,
+	SET_FS,
+	SET_GS,
+	SET_SS,
+	SET_TR,
+	SET_LDTR,
+	SET_X2APIC_STATE,
+	SET_VMCS_EXCEPTION_BITMAP,
+	SET_VMCS_ENTRY_INTERRUPTION_INFO,
+	SET_CAP,
+	CAPNAME,
+	UNASSIGN_PPTDEV,
+};
+
+int
+main(int argc, char *argv[])
+{
+	char *vmname;
+	int error, ch, vcpu;
+	vm_paddr_t gpa;
+	size_t len;
+	struct vm_exit vmexit;
+	uint64_t ctl, eptp, bm, addr, u64;
+	struct vmctx *ctx;
+	int wired;
+
+	uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat;
+	uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp;
+	uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
+	uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
+
+	struct option opts[] = {
+		{ "vm",		REQ_ARG,	0,	VMNAME },
+		{ "cpu",	REQ_ARG,	0,	VCPU },
+		{ "set-mem",	REQ_ARG,	0,	SET_MEM },
+		{ "set-efer",	REQ_ARG,	0,	SET_EFER },
+		{ "set-cr0",	REQ_ARG,	0,	SET_CR0 },
+		{ "set-cr3",	REQ_ARG,	0,	SET_CR3 },
+		{ "set-cr4",	REQ_ARG,	0,	SET_CR4 },
+		{ "set-dr7",	REQ_ARG,	0,	SET_DR7 },
+		{ "set-rsp",	REQ_ARG,	0,	SET_RSP },
+		{ "set-rip",	REQ_ARG,	0,	SET_RIP },
+		{ "set-rax",	REQ_ARG,	0,	SET_RAX },
+		{ "set-rflags",	REQ_ARG,	0,	SET_RFLAGS },
+		{ "desc-base",	REQ_ARG,	0,	DESC_BASE },
+		{ "desc-limit",	REQ_ARG,	0,	DESC_LIMIT },
+		{ "desc-access",REQ_ARG,	0,	DESC_ACCESS },
+		{ "set-cs",	REQ_ARG,	0,	SET_CS },
+		{ "set-ds",	REQ_ARG,	0,	SET_DS },
+		{ "set-es",	REQ_ARG,	0,	SET_ES },
+		{ "set-fs",	REQ_ARG,	0,	SET_FS },
+		{ "set-gs",	REQ_ARG,	0,	SET_GS },
+		{ "set-ss",	REQ_ARG,	0,	SET_SS },
+		{ "set-tr",	REQ_ARG,	0,	SET_TR },
+		{ "set-ldtr",	REQ_ARG,	0,	SET_LDTR },
+		{ "set-x2apic-state",REQ_ARG,	0,	SET_X2APIC_STATE },
+		{ "set-vmcs-exception-bitmap",
+				REQ_ARG,	0, SET_VMCS_EXCEPTION_BITMAP },
+		{ "set-vmcs-entry-interruption-info",
+				REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO },
+		{ "capname",	REQ_ARG,	0,	CAPNAME },
+		{ "unassign-pptdev", REQ_ARG,	0,	UNASSIGN_PPTDEV },
+		{ "setcap",	REQ_ARG,	0,	SET_CAP },
+		{ "getcap",	NO_ARG,		&getcap,	1 },
+		{ "get-stats",	NO_ARG,		&get_stats,	1 },
+		{ "get-desc-ds",NO_ARG,		&get_desc_ds,	1 },
+		{ "set-desc-ds",NO_ARG,		&set_desc_ds,	1 },
+		{ "get-desc-es",NO_ARG,		&get_desc_es,	1 },
+		{ "set-desc-es",NO_ARG,		&set_desc_es,	1 },
+		{ "get-desc-ss",NO_ARG,		&get_desc_ss,	1 },
+		{ "set-desc-ss",NO_ARG,		&set_desc_ss,	1 },
+		{ "get-desc-cs",NO_ARG,		&get_desc_cs,	1 },
+		{ "set-desc-cs",NO_ARG,		&set_desc_cs,	1 },
+		{ "get-desc-fs",NO_ARG,		&get_desc_fs,	1 },
+		{ "set-desc-fs",NO_ARG,		&set_desc_fs,	1 },
+		{ "get-desc-gs",NO_ARG,		&get_desc_gs,	1 },
+		{ "set-desc-gs",NO_ARG,		&set_desc_gs,	1 },
+		{ "get-desc-tr",NO_ARG,		&get_desc_tr,	1 },
+		{ "set-desc-tr",NO_ARG,		&set_desc_tr,	1 },
+		{ "set-desc-ldtr", NO_ARG,	&set_desc_ldtr,	1 },
+		{ "get-desc-ldtr", NO_ARG,	&get_desc_ldtr,	1 },
+		{ "set-desc-gdtr", NO_ARG,	&set_desc_gdtr, 1 },
+		{ "get-desc-gdtr", NO_ARG,	&get_desc_gdtr, 1 },
+		{ "set-desc-idtr", NO_ARG,	&set_desc_idtr, 1 },
+		{ "get-desc-idtr", NO_ARG,	&get_desc_idtr, 1 },
+		{ "get-lowmem", NO_ARG,		&get_lowmem,	1 },
+		{ "get-highmem",NO_ARG,		&get_highmem,	1 },
+		{ "get-efer",	NO_ARG,		&get_efer,	1 },
+		{ "get-cr0",	NO_ARG,		&get_cr0,	1 },
+		{ "get-cr3",	NO_ARG,		&get_cr3,	1 },
+		{ "get-cr4",	NO_ARG,		&get_cr4,	1 },
+		{ "get-dr7",	NO_ARG,		&get_dr7,	1 },
+		{ "get-rsp",	NO_ARG,		&get_rsp,	1 },
+		{ "get-rip",	NO_ARG,		&get_rip,	1 },
+		{ "get-rax",	NO_ARG,		&get_rax,	1 },
+		{ "get-rbx",	NO_ARG,		&get_rbx,	1 },
+		{ "get-rcx",	NO_ARG,		&get_rcx,	1 },
+		{ "get-rdx",	NO_ARG,		&get_rdx,	1 },
+		{ "get-rsi",	NO_ARG,		&get_rsi,	1 },
+		{ "get-rdi",	NO_ARG,		&get_rdi,	1 },
+		{ "get-rbp",	NO_ARG,		&get_rbp,	1 },
+		{ "get-r8",	NO_ARG,		&get_r8,	1 },
+		{ "get-r9",	NO_ARG,		&get_r9,	1 },
+		{ "get-r10",	NO_ARG,		&get_r10,	1 },
+		{ "get-r11",	NO_ARG,		&get_r11,	1 },
+		{ "get-r12",	NO_ARG,		&get_r12,	1 },
+		{ "get-r13",	NO_ARG,		&get_r13,	1 },
+		{ "get-r14",	NO_ARG,		&get_r14,	1 },
+		{ "get-r15",	NO_ARG,		&get_r15,	1 },
+		{ "get-rflags",	NO_ARG,		&get_rflags,	1 },
+		{ "get-cs",	NO_ARG,		&get_cs,	1 },
+		{ "get-ds",	NO_ARG,		&get_ds,	1 },
+		{ "get-es",	NO_ARG,		&get_es,	1 },
+		{ "get-fs",	NO_ARG,		&get_fs,	1 },
+		{ "get-gs",	NO_ARG,		&get_gs,	1 },
+		{ "get-ss",	NO_ARG,		&get_ss,	1 },
+		{ "get-tr",	NO_ARG,		&get_tr,	1 },
+		{ "get-ldtr",	NO_ARG,		&get_ldtr,	1 },
+		{ "get-vmcs-pinbased-ctls",
+				NO_ARG,		&get_pinbased_ctls, 1 },
+		{ "get-vmcs-procbased-ctls",
+				NO_ARG,		&get_procbased_ctls, 1 },
+		{ "get-vmcs-procbased-ctls2",
+				NO_ARG,		&get_procbased_ctls2, 1 },
+		{ "get-vmcs-guest-linear-address",
+				NO_ARG,		&get_vmcs_gla,	1 },
+		{ "get-vmcs-guest-physical-address",
+				NO_ARG,		&get_vmcs_gpa,	1 },
+		{ "get-vmcs-entry-interruption-info",
+				NO_ARG, &get_vmcs_entry_interruption_info, 1},
+		{ "get-vmcs-eptp", NO_ARG,	&get_eptp,	1 },
+		{ "get-vmcs-exception-bitmap",
+				NO_ARG,		&get_exception_bitmap, 1 },
+		{ "get-vmcs-io-bitmap-address",
+				NO_ARG,		&get_io_bitmap,	1 },
+		{ "get-vmcs-tsc-offset", NO_ARG,&get_tsc_offset, 1 },
+		{ "get-vmcs-cr0-mask", NO_ARG,	&get_cr0_mask,	1 },
+		{ "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 },
+		{ "get-vmcs-cr4-mask", NO_ARG,	&get_cr4_mask,	1 },
+		{ "get-vmcs-cr4-shadow", NO_ARG,&get_cr4_shadow, 1 },
+		{ "get-vmcs-cr3-targets", NO_ARG, &get_cr3_targets, 1},
+		{ "get-vmcs-apic-access-address",
+				NO_ARG,		&get_apic_access_addr, 1},
+		{ "get-vmcs-virtual-apic-address",
+				NO_ARG,		&get_virtual_apic_addr, 1},
+		{ "get-vmcs-tpr-threshold",
+				NO_ARG,		&get_tpr_threshold, 1 },
+		{ "get-vmcs-msr-bitmap",
+				NO_ARG,		&get_msr_bitmap, 1 },
+		{ "get-vmcs-msr-bitmap-address",
+				NO_ARG,		&get_msr_bitmap_address, 1 },
+		{ "get-vmcs-vpid", NO_ARG,	&get_vpid,	1 },
+		{ "get-vmcs-ple-gap", NO_ARG,	&get_ple_gap,	1 },
+		{ "get-vmcs-ple-window", NO_ARG,&get_ple_window,1 },
+		{ "get-vmcs-instruction-error",
+				NO_ARG,		&get_inst_err,	1 },
+		{ "get-vmcs-exit-ctls", NO_ARG,	&get_exit_ctls,	1 },
+		{ "get-vmcs-entry-ctls",
+					NO_ARG,	&get_entry_ctls, 1 },
+		{ "get-vmcs-guest-pat",	NO_ARG,	&get_guest_pat,	1 },
+		{ "get-vmcs-host-pat",	NO_ARG,	&get_host_pat,	1 },
+		{ "get-vmcs-host-cr0",
+				NO_ARG,		&get_host_cr0,	1 },
+		{ "get-vmcs-host-cr3",
+				NO_ARG,		&get_host_cr3,	1 },
+		{ "get-vmcs-host-cr4",
+				NO_ARG,		&get_host_cr4,	1 },
+		{ "get-vmcs-host-rip",
+				NO_ARG,		&get_host_rip,	1 },
+		{ "get-vmcs-host-rsp",
+				NO_ARG,		&get_host_rsp,	1 },
+		{ "get-vmcs-guest-sysenter",
+				NO_ARG,		&get_guest_sysenter, 1 },
+		{ "get-vmcs-link", NO_ARG,	&get_vmcs_link, 1 },
+		{ "get-vmcs-exit-reason",
+				NO_ARG,		&get_vmcs_exit_reason, 1 },
+		{ "get-vmcs-exit-qualification",
+			NO_ARG,		&get_vmcs_exit_qualification, 1 },
+		{ "get-vmcs-exit-interruption-info",
+				NO_ARG,	&get_vmcs_exit_interruption_info, 1},
+		{ "get-vmcs-exit-interruption-error",
+				NO_ARG,	&get_vmcs_exit_interruption_error, 1},
+		{ "get-vmcs-interruptibility",
+				NO_ARG, &get_vmcs_interruptibility, 1 },
+		{ "get-x2apic-state",NO_ARG,	&get_x2apic_state, 1 },
+		{ "get-all",	NO_ARG,		&get_all,	1 },
+		{ "run",	NO_ARG,		&run,		1 },
+		{ "create",	NO_ARG,		&create,	1 },
+		{ "destroy",	NO_ARG,		&destroy,	1 },
+		{ NULL,		0,		NULL,		0 }
+	};
+
+	vcpu = 0;
+	progname = basename(argv[0]);
+
+	while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
+		switch (ch) {
+		case 0:
+			break;
+		case VMNAME:
+			vmname = optarg;
+			break;
+		case VCPU:
+			vcpu = atoi(optarg);
+			break;
+		case SET_MEM:
+			memsize = atoi(optarg) * MB;
+			memsize = roundup(memsize, 2 * MB);
+			break;
+		case SET_EFER:
+			efer = strtoul(optarg, NULL, 0);
+			set_efer = 1;
+			break;
+		case SET_CR0:
+			cr0 = strtoul(optarg, NULL, 0);
+			set_cr0 = 1;
+			break;
+		case SET_CR3:
+			cr3 = strtoul(optarg, NULL, 0);
+			set_cr3 = 1;
+			break;
+		case SET_CR4:
+			cr4 = strtoul(optarg, NULL, 0);
+			set_cr4 = 1;
+			break;
+		case SET_DR7:
+			dr7 = strtoul(optarg, NULL, 0);
+			set_dr7 = 1;
+			break;
+		case SET_RSP:
+			rsp = strtoul(optarg, NULL, 0);
+			set_rsp = 1;
+			break;
+		case SET_RIP:
+			rip = strtoul(optarg, NULL, 0);
+			set_rip = 1;
+			break;
+		case SET_RAX:
+			rax = strtoul(optarg, NULL, 0);
+			set_rax = 1;
+			break;
+		case SET_RFLAGS:
+			rflags = strtoul(optarg, NULL, 0);
+			set_rflags = 1;
+			break;
+		case DESC_BASE:
+			desc_base = strtoul(optarg, NULL, 0);
+			break;
+		case DESC_LIMIT:
+			desc_limit = strtoul(optarg, NULL, 0);
+			break;
+		case DESC_ACCESS:
+			desc_access = strtoul(optarg, NULL, 0);
+			break;
+		case SET_CS:
+			cs = strtoul(optarg, NULL, 0);
+			set_cs = 1;
+			break;
+		case SET_DS:
+			ds = strtoul(optarg, NULL, 0);
+			set_ds = 1;
+			break;
+		case SET_ES:
+			es = strtoul(optarg, NULL, 0);
+			set_es = 1;
+			break;
+		case SET_FS:
+			fs = strtoul(optarg, NULL, 0);
+			set_fs = 1;
+			break;
+		case SET_GS:
+			gs = strtoul(optarg, NULL, 0);
+			set_gs = 1;
+			break;
+		case SET_SS:
+			ss = strtoul(optarg, NULL, 0);
+			set_ss = 1;
+			break;
+		case SET_TR:
+			tr = strtoul(optarg, NULL, 0);
+			set_tr = 1;
+			break;
+		case SET_LDTR:
+			ldtr = strtoul(optarg, NULL, 0);
+			set_ldtr = 1;
+			break;
+		case SET_X2APIC_STATE:
+			x2apic_state = strtol(optarg, NULL, 0);
+			set_x2apic_state = 1;
+			break;
+		case SET_VMCS_EXCEPTION_BITMAP:
+			exception_bitmap = strtoul(optarg, NULL, 0);
+			set_exception_bitmap = 1;
+			break;
+		case SET_VMCS_ENTRY_INTERRUPTION_INFO:
+			vmcs_entry_interruption_info = strtoul(optarg, NULL, 0);
+			set_vmcs_entry_interruption_info = 1;
+			break;
+		case SET_CAP:
+			capval = strtoul(optarg, NULL, 0);
+			setcap = 1;
+			break;
+		case CAPNAME:
+			capname = optarg;
+			break;
+		case UNASSIGN_PPTDEV:
+			unassign_pptdev = 1;
+			if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3)
+				usage();
+			break;
+		default:
+			usage();
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (vmname == NULL)
+		usage();
+
+	error = 0;
+
+	if (!error && create)
+		error = vm_create(vmname);
+
+	if (!error) {
+		ctx = vm_open(vmname);
+		if (ctx == NULL)
+			error = -1;
+	}
+
+	if (!error && memsize)
+		error = vm_setup_memory(ctx, memsize, VM_MMAP_NONE);
+
+	if (!error && set_efer)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer);
+
+	if (!error && set_cr0)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0);
+
+	if (!error && set_cr3)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3);
+
+	if (!error && set_cr4)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4);
+
+	if (!error && set_dr7)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7);
+
+	if (!error && set_rsp)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp);
+
+	if (!error && set_rip)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip);
+
+	if (!error && set_rax)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax);
+
+	if (!error && set_rflags) {
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS,
+					rflags);
+	}
+
+	if (!error && set_desc_ds) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_es) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_ss) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_cs) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_fs) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_gs) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_tr) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_ldtr) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR,
+				    desc_base, desc_limit, desc_access);
+	}
+
+	if (!error && set_desc_gdtr) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR,
+				    desc_base, desc_limit, 0);
+	}
+
+	if (!error && set_desc_idtr) {
+		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR,
+				    desc_base, desc_limit, 0);
+	}
+
+	if (!error && set_cs)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs);
+
+	if (!error && set_ds)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds);
+
+	if (!error && set_es)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es);
+
+	if (!error && set_fs)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs);
+
+	if (!error && set_gs)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs);
+
+	if (!error && set_ss)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss);
+
+	if (!error && set_tr)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr);
+
+	if (!error && set_ldtr)
+		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr);
+
+	if (!error && set_x2apic_state)
+		error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
+
+#ifdef	__FreeBSD__
+	if (!error && unassign_pptdev)
+		error = vm_unassign_pptdev(ctx, bus, slot, func);
+#endif
+
+	if (!error && set_exception_bitmap) {
+		error = vm_set_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP,
+					  exception_bitmap);
+	}
+
+	if (!error && set_vmcs_entry_interruption_info) {
+		error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,
+					  vmcs_entry_interruption_info);
+	}
+
+	if (!error && (get_lowmem || get_all)) {
+		gpa = 0;
+		error = vm_get_memory_seg(ctx, gpa, &len, &wired);
+		if (error == 0)
+			printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len,
+			    wired ? " wired" : "");
+	}
+
+	if (!error && (get_highmem || get_all)) {
+		gpa = 4 * GB;
+		error = vm_get_memory_seg(ctx, gpa, &len, &wired);
+		if (error == 0)
+			printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len,
+			    wired ? " wired" : "");
+	}
+
+	if (!error && (get_efer || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer);
+		if (error == 0)
+			printf("efer[%d]\t\t0x%016lx\n", vcpu, efer);
+	}
+
+	if (!error && (get_cr0 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0);
+		if (error == 0)
+			printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0);
+	}
+
+	if (!error && (get_cr3 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3);
+		if (error == 0)
+			printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3);
+	}
+
+	if (!error && (get_cr4 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4);
+		if (error == 0)
+			printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4);
+	}
+
+	if (!error && (get_dr7 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7);
+		if (error == 0)
+			printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7);
+	}
+
+	if (!error && (get_rsp || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp);
+		if (error == 0)
+			printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp);
+	}
+
+	if (!error && (get_rip || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
+		if (error == 0)
+			printf("rip[%d]\t\t0x%016lx\n", vcpu, rip);
+	}
+
+	if (!error && (get_rax || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax);
+		if (error == 0)
+			printf("rax[%d]\t\t0x%016lx\n", vcpu, rax);
+	}
+
+	if (!error && (get_rbx || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx);
+		if (error == 0)
+			printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx);
+	}
+
+	if (!error && (get_rcx || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx);
+		if (error == 0)
+			printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx);
+	}
+
+	if (!error && (get_rdx || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx);
+		if (error == 0)
+			printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx);
+	}
+
+	if (!error && (get_rsi || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi);
+		if (error == 0)
+			printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi);
+	}
+
+	if (!error && (get_rdi || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi);
+		if (error == 0)
+			printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi);
+	}
+
+	if (!error && (get_rbp || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp);
+		if (error == 0)
+			printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp);
+	}
+
+	if (!error && (get_r8 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8);
+		if (error == 0)
+			printf("r8[%d]\t\t0x%016lx\n", vcpu, r8);
+	}
+
+	if (!error && (get_r9 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9);
+		if (error == 0)
+			printf("r9[%d]\t\t0x%016lx\n", vcpu, r9);
+	}
+
+	if (!error && (get_r10 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10);
+		if (error == 0)
+			printf("r10[%d]\t\t0x%016lx\n", vcpu, r10);
+	}
+
+	if (!error && (get_r11 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11);
+		if (error == 0)
+			printf("r11[%d]\t\t0x%016lx\n", vcpu, r11);
+	}
+
+	if (!error && (get_r12 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12);
+		if (error == 0)
+			printf("r12[%d]\t\t0x%016lx\n", vcpu, r12);
+	}
+
+	if (!error && (get_r13 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13);
+		if (error == 0)
+			printf("r13[%d]\t\t0x%016lx\n", vcpu, r13);
+	}
+
+	if (!error && (get_r14 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14);
+		if (error == 0)
+			printf("r14[%d]\t\t0x%016lx\n", vcpu, r14);
+	}
+
+	if (!error && (get_r15 || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15);
+		if (error == 0)
+			printf("r15[%d]\t\t0x%016lx\n", vcpu, r15);
+	}
+
+	if (!error && (get_rflags || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS,
+					&rflags);
+		if (error == 0)
+			printf("rflags[%d]\t0x%016lx\n", vcpu, rflags);
+	}
+
+#ifdef	__FreeBSD__
+	if (!error && (get_stats || get_all)) {
+		int i, num_stats;
+		uint64_t *stats;
+		struct timeval tv;
+		const char *desc;
+
+		stats = vm_get_stats(ctx, vcpu, &tv, &num_stats);
+		if (stats != NULL) {
+			printf("vcpu%d\n", vcpu);
+			for (i = 0; i < num_stats; i++) {
+				desc = vm_get_stat_desc(ctx, i);
+				printf("%-40s\t%ld\n", desc, stats[i]);
+			}
+		}
+	}
+#endif
+
+	if (!error && (get_desc_ds || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_es || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_fs || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_gs || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_ss || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_cs || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_tr || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_ldtr || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
+			       vcpu, desc_base, desc_limit, desc_access);	
+		}
+	}
+
+	if (!error && (get_desc_gdtr || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("gdtr[%d]\t\t0x%016lx/0x%08x\n",
+			       vcpu, desc_base, desc_limit);	
+		}
+	}
+
+	if (!error && (get_desc_idtr || get_all)) {
+		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR,
+				    &desc_base, &desc_limit, &desc_access);
+		if (error == 0) {
+			printf("idtr[%d]\t\t0x%016lx/0x%08x\n",
+			       vcpu, desc_base, desc_limit);	
+		}
+	}
+
+	if (!error && (get_cs || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs);
+		if (error == 0)
+			printf("cs[%d]\t\t0x%04lx\n", vcpu, cs);
+	}
+
+	if (!error && (get_ds || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds);
+		if (error == 0)
+			printf("ds[%d]\t\t0x%04lx\n", vcpu, ds);
+	}
+
+	if (!error && (get_es || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es);
+		if (error == 0)
+			printf("es[%d]\t\t0x%04lx\n", vcpu, es);
+	}
+
+	if (!error && (get_fs || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs);
+		if (error == 0)
+			printf("fs[%d]\t\t0x%04lx\n", vcpu, fs);
+	}
+
+	if (!error && (get_gs || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs);
+		if (error == 0)
+			printf("gs[%d]\t\t0x%04lx\n", vcpu, gs);
+	}
+
+	if (!error && (get_ss || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss);
+		if (error == 0)
+			printf("ss[%d]\t\t0x%04lx\n", vcpu, ss);
+	}
+
+	if (!error && (get_tr || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr);
+		if (error == 0)
+			printf("tr[%d]\t\t0x%04lx\n", vcpu, tr);
+	}
+
+	if (!error && (get_ldtr || get_all)) {
+		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr);
+		if (error == 0)
+			printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr);
+	}
+
+	if (!error && (get_x2apic_state || get_all)) {
+		error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state);
+		if (error == 0)
+			printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state);
+	}
+
+	if (!error && (get_pinbased_ctls || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl);
+		if (error == 0)
+			printf("pinbased_ctls[%d]\t0x%08lx\n", vcpu, ctl);
+	}
+
+	if (!error && (get_procbased_ctls || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_PRI_PROC_BASED_CTLS, &ctl);
+		if (error == 0)
+			printf("procbased_ctls[%d]\t0x%08lx\n", vcpu, ctl);
+	}
+
+	if (!error && (get_procbased_ctls2 || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_SEC_PROC_BASED_CTLS, &ctl);
+		if (error == 0)
+			printf("procbased_ctls2[%d]\t0x%08lx\n", vcpu, ctl);
+	}
+
+	if (!error && (get_vmcs_gla || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_GUEST_LINEAR_ADDRESS, &u64);
+		if (error == 0)
+			printf("gla[%d]\t\t0x%016lx\n", vcpu, u64);
+	}
+
+	if (!error && (get_vmcs_gpa || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_GUEST_PHYSICAL_ADDRESS, &u64);
+		if (error == 0)
+			printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64);
+	}
+
+	if (!error && (get_vmcs_entry_interruption_info || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64);
+		if (error == 0) {
+			printf("entry_interruption_info[%d]\t0x%08lx\n",
+				vcpu, u64);
+		}
+	}
+
+	if (!error && (get_eptp || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp);
+		if (error == 0)
+			printf("eptp[%d]\t\t0x%016lx\n", vcpu, eptp);
+	}
+
+	if (!error && (get_exception_bitmap || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP,
+					  &bm);
+		if (error == 0)
+			printf("exception_bitmap[%d]\t0x%08lx\n", vcpu, bm);
+	}
+
+	if (!error && (get_io_bitmap || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm);
+		if (error == 0)
+			printf("io_bitmap_a[%d]\t0x%08lx\n", vcpu, bm);
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B, &bm);
+		if (error == 0)
+			printf("io_bitmap_b[%d]\t0x%08lx\n", vcpu, bm);
+	}
+
+	if (!error && (get_tsc_offset || get_all)) {
+		uint64_t tscoff;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff);
+		if (error == 0)
+			printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff);
+	}
+
+	if (!error && (get_cr0_mask || get_all)) {
+		uint64_t cr0mask;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask);
+		if (error == 0)
+			printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask);
+	}
+
+	if (!error && (get_cr0_shadow || get_all)) {
+		uint64_t cr0shadow;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW,
+					  &cr0shadow);
+		if (error == 0)
+			printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow);
+	}
+
+	if (!error && (get_cr4_mask || get_all)) {
+		uint64_t cr4mask;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask);
+		if (error == 0)
+			printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask);
+	}
+
+	if (!error && (get_cr4_shadow || get_all)) {
+		uint64_t cr4shadow;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW,
+					  &cr4shadow);
+		if (error == 0)
+			printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow);
+	}
+	
+	if (!error && (get_cr3_targets || get_all)) {
+		uint64_t target_count, target_addr;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT,
+					  &target_count);
+		if (error == 0) {
+			printf("cr3_target_count[%d]\t0x%08lx\n",
+				vcpu, target_count);
+		}
+
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0,
+					  &target_addr);
+		if (error == 0) {
+			printf("cr3_target0[%d]\t\t0x%016lx\n",
+				vcpu, target_addr);
+		}
+
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1,
+					  &target_addr);
+		if (error == 0) {
+			printf("cr3_target1[%d]\t\t0x%016lx\n",
+				vcpu, target_addr);
+		}
+
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2,
+					  &target_addr);
+		if (error == 0) {
+			printf("cr3_target2[%d]\t\t0x%016lx\n",
+				vcpu, target_addr);
+		}
+
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3,
+					  &target_addr);
+		if (error == 0) {
+			printf("cr3_target3[%d]\t\t0x%016lx\n",
+				vcpu, target_addr);
+		}
+	}
+
+	if (!error && (get_apic_access_addr || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_APIC_ACCESS, &addr);
+		if (error == 0)
+			printf("apic_access_addr[%d]\t0x%016lx\n", vcpu, addr);
+	}
+
+	if (!error && (get_virtual_apic_addr || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_VIRTUAL_APIC, &addr);
+		if (error == 0)
+			printf("virtual_apic_addr[%d]\t0x%016lx\n", vcpu, addr);
+	}
+
+	if (!error && (get_tpr_threshold || get_all)) {
+		uint64_t threshold;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD,
+					  &threshold);
+		if (error == 0)
+			printf("tpr_threshold[%d]\t0x%08lx\n", vcpu, threshold);
+	}
+
+	if (!error && (get_msr_bitmap_address || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr);
+		if (error == 0)
+			printf("msr_bitmap[%d]\t\t0x%016lx\n", vcpu, addr);
+	}
+
+	if (!error && (get_msr_bitmap || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr);
+		if (error == 0)
+			error = dump_vmcs_msr_bitmap(vcpu, addr);
+	}
+
+	if (!error && (get_vpid || get_all)) {
+		uint64_t vpid;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid);
+		if (error == 0)
+			printf("vpid[%d]\t\t0x%04lx\n", vcpu, vpid);
+	}
+	
+	if (!error && (get_ple_window || get_all)) {
+		uint64_t window;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_WINDOW, &window);
+		if (error == 0)
+			printf("ple_window[%d]\t\t0x%08lx\n", vcpu, window);
+	}
+
+	if (!error && (get_ple_gap || get_all)) {
+		uint64_t gap;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_GAP, &gap);
+		if (error == 0)
+			printf("ple_gap[%d]\t\t0x%08lx\n", vcpu, gap);
+	}
+
+	if (!error && (get_inst_err || get_all)) {
+		uint64_t insterr;
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR,
+					  &insterr);
+		if (error == 0) {
+			printf("instruction_error[%d]\t0x%08lx\n",
+				vcpu, insterr);
+		}
+	}
+
+	if (!error && (get_exit_ctls || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl);
+		if (error == 0)
+			printf("exit_ctls[%d]\t\t0x%08lx\n", vcpu, ctl);
+	}
+
+	if (!error && (get_entry_ctls || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl);
+		if (error == 0)
+			printf("entry_ctls[%d]\t\t0x%08lx\n", vcpu, ctl);
+	}
+
+	if (!error && (get_host_pat || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat);
+		if (error == 0)
+			printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat);
+	}
+
+	if (!error && (get_guest_pat || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat);
+		if (error == 0)
+			printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat);
+	}
+
+	if (!error && (get_host_cr0 || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0);
+		if (error == 0)
+			printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0);
+	}
+
+	if (!error && (get_host_cr3 || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3);
+		if (error == 0)
+			printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3);
+	}
+
+	if (!error && (get_host_cr4 || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4);
+		if (error == 0)
+			printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4);
+	}
+
+	if (!error && (get_host_rip || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip);
+		if (error == 0)
+			printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip);
+	}
+
+	if (!error && (get_host_rsp || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp);
+		if (error == 0)
+			printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp);
+	}
+
+	if (!error && (get_guest_sysenter || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_GUEST_IA32_SYSENTER_CS, &cs);
+		if (error == 0)
+			printf("guest_sysenter_cs[%d]\t0x%08lx\n", vcpu, cs);
+
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_GUEST_IA32_SYSENTER_ESP, &rsp);
+		if (error == 0)
+			printf("guest_sysenter_sp[%d]\t0x%016lx\n", vcpu, rsp);
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_GUEST_IA32_SYSENTER_EIP, &rip);
+		if (error == 0)
+			printf("guest_sysenter_ip[%d]\t0x%016lx\n", vcpu, rip);
+	}
+
+	if (!error && (get_vmcs_link || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr);
+		if (error == 0)
+			printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr);
+	}
+
+	if (!error && (get_vmcs_exit_reason || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64);
+		if (error == 0)
+			printf("vmcs_exit_reason[%d]\t0x%016lx\n", vcpu, u64);
+	}
+
+	if (!error && (get_vmcs_exit_qualification || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION,
+					  &u64);
+		if (error == 0)
+			printf("vmcs_exit_qualification[%d]\t0x%016lx\n",
+				vcpu, u64);
+	}
+
+	if (!error && (get_vmcs_exit_interruption_info || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_INFO, &u64);
+		if (error == 0) {
+			printf("vmcs_exit_interruption_info[%d]\t0x%08lx\n",
+				vcpu, u64);
+		}
+	}
+
+	if (!error && (get_vmcs_exit_interruption_error || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_ERRCODE,
+		    &u64);
+		if (error == 0) {
+			printf("vmcs_exit_interruption_error[%d]\t0x%08lx\n",
+				vcpu, u64);
+		}
+	}
+
+	if (!error && (get_vmcs_interruptibility || get_all)) {
+		error = vm_get_vmcs_field(ctx, vcpu,
+					  VMCS_GUEST_INTERRUPTIBILITY, &u64);
+		if (error == 0) {
+			printf("vmcs_guest_interruptibility[%d]\t0x%08lx\n",
+				vcpu, u64);
+		}
+	}
+
+	if (!error && setcap) {
+		int captype;
+		captype = vm_capability_name2type(capname);
+		error = vm_set_capability(ctx, vcpu, captype, capval);
+		if (error != 0 && errno == ENOENT)
+			printf("Capability \"%s\" is not available\n", capname);
+	}
+
+	if (!error && (getcap || get_all)) {
+		int captype, val, getcaptype;
+
+		if (getcap && capname)
+			getcaptype = vm_capability_name2type(capname);
+		else
+			getcaptype = -1;
+
+		for (captype = 0; captype < VM_CAP_MAX; captype++) {
+			if (getcaptype >= 0 && captype != getcaptype)
+				continue;
+			error = vm_get_capability(ctx, vcpu, captype, &val);
+			if (error == 0) {
+				printf("Capability \"%s\" is %s on vcpu %d\n",
+					vm_capability_type2name(captype),
+					val ? "set" : "not set", vcpu);
+			} else if (errno == ENOENT) {
+				error = 0;
+				printf("Capability \"%s\" is not available\n",
+					vm_capability_type2name(captype));
+			} else {
+				break;
+			}
+		}
+	}
+
+	if (!error && run) {
+		error = vm_run(ctx, vcpu, &vmexit);
+		if (error == 0)
+			dump_vm_run_exitcode(&vmexit, vcpu);
+		else
+			printf("vm_run error %d\n", error);
+	}
+
+	if (error)
+		printf("errno = %d\n", errno);
+
+	if (!error && destroy)
+		error = vm_destroy(ctx);
+
+	exit(error);
+}
diff --git a/usr/src/cmd/bhyveload-uefi/Makefile b/usr/src/cmd/bhyveload-uefi/Makefile
new file mode 100644
index 0000000000..bbcbacf32f
--- /dev/null
+++ b/usr/src/cmd/bhyveload-uefi/Makefile
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+PROG =		bhyveload-uefi
+
+include ../Makefile.cmd
+
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all	:=	TARGET = all
+install	:=	TARGET = install
+clean	:=	TARGET = clean
+clobber	:=	TARGET = clobber
+lint	:=	TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint:	$(SUBDIRS)
+
+install: $(SUBDIRS)
+	-$(RM) $(ROOTUSRSBINPROG)
+	-$(LN) $(ISAEXEC) $(ROOTUSRSBINPROG)
+
+$(SUBDIRS):	FRC
+	@cd $@; pwd; $(MAKE) CW_NO_SHADOW=true __GNUC= $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/bhyveload-uefi/Makefile.com b/usr/src/cmd/bhyveload-uefi/Makefile.com
new file mode 100644
index 0000000000..7865cca8d8
--- /dev/null
+++ b/usr/src/cmd/bhyveload-uefi/Makefile.com
@@ -0,0 +1,52 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+PROG= bhyveload-uefi
+
+SRCS = ../bhyveload-uefi.c expand_number.c
+OBJS = bhyveload-uefi.o expand_number.o
+
+include ../../Makefile.cmd
+
+.KEEP_STATE:
+
+CFLAGS +=	$(CCVERBOSE)
+CPPFLAGS =	-I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd $(CPPFLAGS.master) \
+	        -I$(ROOT)/usr/platform/i86pc/include
+LDLIBS +=	-lvmmapi
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+	$(LINK.c) -o $@ $(OBJS) $(LDFLAGS) $(LDLIBS)
+	$(POST_PROCESS)
+
+install: all $(ROOTUSRSBINPROG)
+
+clean:
+	$(RM) $(OBJS)
+
+lint:	lint_SRCS
+
+include ../../Makefile.targ
+
+%.o: ../%.c
+	$(COMPILE.c) $<
+	$(POST_PROCESS_O)
+
+%.o: $(CONTRIB)/freebsd/lib/libutil/%.c
+	$(COMPILE.c) $<
+	$(POST_PROCESS_O)
+
diff --git a/usr/src/cmd/bhyveload-uefi/amd64/Makefile b/usr/src/cmd/bhyveload-uefi/amd64/Makefile
new file mode 100644
index 0000000000..b602c50d05
--- /dev/null
+++ b/usr/src/cmd/bhyveload-uefi/amd64/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+CPPFLAGS += -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64
+
+install: all $(ROOTUSRSBINPROG64)
diff --git a/usr/src/cmd/bhyveload-uefi/bhyveload-uefi.c b/usr/src/cmd/bhyveload-uefi/bhyveload-uefi.c
new file mode 100644
index 0000000000..62a7ca5d0f
--- /dev/null
+++ b/usr/src/cmd/bhyveload-uefi/bhyveload-uefi.c
@@ -0,0 +1,190 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/types.h>
+
+#include <machine/vmm.h>
+
+#include <errno.h>
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include <vmmapi.h>
+
+#define	KB	(1024UL)
+#define	MB	(1024 * 1024UL)
+#define	GB	(1024 * 1024 * 1024UL)
+
+#define	UEFI_ROM_ADDR	0xFFE00000
+#define	UEFI_ROM_SIZE	(2 * MB)
+/*
+ * N.B. the UEFI code zeros the first page in memory so use the second.
+ */
+#define	BHYVE_HOB_ADDR		0x00002000
+#define	BHYVE_BO_HOB_ADDR	0x00002080
+
+#define	UEFI_ROM_PATH	"/usr/share/bhyve/uefi-rom.bin"
+
+struct platform_info {
+	uint32_t	ncpus;
+};
+
+/*
+ * Boot order code:
+ * 0 - EFI_CD_HD
+ * 1 - EFI_CD
+ * 2 - EFI_HD_CD
+ * 3 - EFI_HD
+ * 4 - EFI_NET
+ * 5 - EFI_NET_CD_HD
+ * 6 - EFI_HD_HD_CD
+ * 7 - LEGACY_CD_HD
+ * 8 - LEGACY_CD
+ * 9 - LEGACY_HD_CD
+ * 10 - LEGACY_HD
+ * 11 - EFI_SHELL
+ */
+
+struct bootorder_info {
+	uint32_t	guestbootorder;
+};
+
+static char *vmname, *progname;
+static struct vmctx *ctx;
+
+static void
+usage(void)
+{
+	printf("usage: %s "
+	       "[-c vcpus] [-m mem-size] [-b bootorder]"
+	       "<vmname>\n", progname);
+	exit(1);
+}
+
+int
+main(int argc, char** argv)
+{
+	int opt, error, fd;
+	int guest_ncpus;
+	int guest_bootorder = 0;
+	uint64_t mem_size;
+	char *membase, *rombase;
+	struct platform_info *pi;
+	struct bootorder_info *bi;
+
+	progname = argv[0];
+
+	guest_ncpus = 1;
+	mem_size = 256 * MB;
+
+	while ((opt = getopt(argc, argv, "c:m:b:")) != -1) {
+		switch (opt) {
+		case 'c':
+			guest_ncpus = atoi(optarg);
+			break;
+		case 'm':
+			error = vm_parse_memsize(optarg, &mem_size);
+			if (error != 0 || mem_size == 0)
+				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
+			break;
+		case 'b':
+			guest_bootorder = atoi(optarg);
+			if (guest_bootorder < 0 || guest_bootorder > 11) {
+				errx(EX_USAGE, "Invalid bootoption: %d\n"
+		 		    "\tBoot order code:\n"
+ 				    "\t0 - EFI_CD_HD\n"
+ 				    "\t1 - EFI_CD\n"
+ 				    "\t2 - EFI_HD_CD\n"
+				    "\t3 - EFI_HD\n"
+				    "\t4 - EFI_NET\n"
+				    "\t5 - EFI_NET_CD_HD\n"
+				    "\t6 - EFI_HD_HD_CD\n"
+				    "\t7 - LEGACY_CD_HD\n"
+				    "\t8 - LEGACY_CD\n"
+				    "\t9 - LEGACY_HD_CD\n"
+				    "\t10 - LEGACY_HD\n"
+ 				    "\t11 - EFI_SHELL\n", guest_bootorder);
+				exit(1);
+			}
+			break;
+		case '?':
+			usage();
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 1)
+		usage();
+
+	vmname = argv[0];
+	error = vm_create(vmname);
+	if (error != 0 && errno != EEXIST) {
+		perror("vm_create");
+		exit(1);
+
+	}
+
+	ctx = vm_open(vmname);
+	if (ctx == NULL) {
+		perror("vm_open");
+		exit(1);
+	}
+
+	error = vm_set_capability(ctx, 0, VM_CAP_UNRESTRICTED_GUEST, 1);
+	if (error) {
+		perror("vm_set_capability(VM_CAP_UNRESTRICTED_GUEST)");
+	}
+
+	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
+	if (error) {
+		perror("vm_setup_memory");
+		exit(1);
+	}
+	membase = vm_map_gpa(ctx, 0, 8 * KB);
+
+	error = vm_setup_rom(ctx, UEFI_ROM_ADDR, UEFI_ROM_SIZE);
+	if (error) {
+		perror("vm_setup_rom");
+		exit(1);
+	}
+	rombase = vm_map_gpa(ctx, UEFI_ROM_ADDR, UEFI_ROM_SIZE);
+
+	fd = open(UEFI_ROM_PATH, O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
+	read(fd, rombase, UEFI_ROM_SIZE);
+	close(fd);
+
+	pi = (struct platform_info *)(membase + BHYVE_HOB_ADDR);
+	pi->ncpus = guest_ncpus;
+	bi = (struct bootorder_info *)(membase + BHYVE_BO_HOB_ADDR);
+	bi->guestbootorder = guest_bootorder;
+
+	error = vcpu_reset(ctx, 0);
+	if (error) {
+		perror("vcpu_reset");
+		exit(1);
+	}
+
+	return (0);
+}
diff --git a/usr/src/cmd/bhyveload-uefi/i386/Makefile b/usr/src/cmd/bhyveload-uefi/i386/Makefile
new file mode 100644
index 0000000000..f5b7bb6915
--- /dev/null
+++ b/usr/src/cmd/bhyveload-uefi/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTUSRSBINPROG32)
diff --git a/usr/src/cmd/mdb/intel/amd64/vmm/Makefile b/usr/src/cmd/mdb/intel/amd64/vmm/Makefile
new file mode 100644
index 0000000000..bf9219b435
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/vmm/Makefile
@@ -0,0 +1,20 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Pluribus Networks Inc.
+#
+
+MAKEVARS = CW_NO_SHADOW=true __GNUC=
+
+include $(SRC)/Makefile.master
+$(BUILD64)SUBDIRS += 	$(MACH64)
+include ../../../Makefile.subdirs
diff --git a/usr/src/cmd/mdb/intel/amd64/vmm/amd64/Makefile b/usr/src/cmd/mdb/intel/amd64/vmm/amd64/Makefile
new file mode 100644
index 0000000000..49ca0c5eb3
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/vmm/amd64/Makefile
@@ -0,0 +1,32 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+MODULE = vmm.so
+MDBTGT = kvm
+
+MODSRCS = vmm.c
+
+include ../../../../../Makefile.cmd
+include ../../../../../Makefile.cmd.64
+include ../../../Makefile.amd64
+include ../../../../Makefile.module
+
+CPPFLAGS = -D_KERNEL -D_MACHDEP
+CPPFLAGS += -I$(COMPAT)/freebsd -I$(COMPAT)/freebsd/amd64
+CPPFLAGS += -I$(CONTRIB)/freebsd -I$(CONTRIB)/freebsd/amd64
+CPPFLAGS += -I$(SRC)/uts/common -I$(SRC)/uts/i86pc
+CPPFLAGS += -I$(SRC)/cmd/mdb/common
+
+CPPFLAGS += -_cc=-xdryrun
diff --git a/usr/src/cmd/mdb/intel/amd64/vmm/vmm.c b/usr/src/cmd/mdb/intel/amd64/vmm/vmm.c
new file mode 100644
index 0000000000..9e29d8662a
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/vmm/vmm.c
@@ -0,0 +1,238 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/param.h>
+
+#include <mdb/mdb_modapi.h>
+#include <sys/cpuvar.h>
+#include <sys/varargs.h>
+#include <sys/vmm.h>
+#include <sys/vmm_impl.h>
+
+/*
+ * VMM trace debug walker/dcmd code
+ */
+
+/*
+ * Initialize the vmm_trace_dmsg_t walker by either using the given starting
+ * address, or reading the value of the kernel's vmm_debug_rbuf pointer.
+ * We also allocate a vmm_trace_dmsg_t for storage, and save this using the
+ * walk_data pointer.
+ */
+static int
+vmm_dmsg_walk_i(mdb_walk_state_t *wsp)
+{
+	uintptr_t rbuf_addr;
+	vmm_trace_rbuf_t rbuf;
+
+	if (wsp->walk_addr == NULL) {
+		if (mdb_readvar(&rbuf_addr, "vmm_debug_rbuf") == -1) {
+			mdb_warn("failed to read 'vmm_debug_rbuf'");
+			return (WALK_ERR);
+		}
+
+		if (mdb_vread(&rbuf, sizeof (vmm_trace_rbuf_t), rbuf_addr)
+		    == -1) {
+			mdb_warn("failed to read vmm_trace_rbuf_t at %p",
+			    rbuf_addr);
+			return (WALK_ERR);
+		}
+
+		wsp->walk_addr = (uintptr_t)(vmm_trace_dmsg_t *)rbuf.dmsgh;
+	}
+
+	/*
+	 * Save ptr to head of ring buffer to prevent looping.
+	 */
+	wsp->walk_arg = (void *)wsp->walk_addr;
+	wsp->walk_data = mdb_alloc(sizeof (vmm_trace_dmsg_t), UM_SLEEP);
+	return (WALK_NEXT);
+}
+
+/*
+ * At each step, read a vmm_trace_dmsg_t into our private storage, and then
+ * invoke the callback function.  We terminate when we reach a NULL next
+ * pointer.
+ */
+static int
+vmm_dmsg_walk_s(mdb_walk_state_t *wsp)
+{
+	int status;
+
+	if (wsp->walk_addr == NULL)
+		return (WALK_DONE);
+
+	if (mdb_vread(wsp->walk_data, sizeof (vmm_trace_dmsg_t),
+	    wsp->walk_addr) == -1) {
+		mdb_warn("failed to read vmm_trace_dmsg_t at %p",
+		    wsp->walk_addr);
+		return (WALK_ERR);
+	}
+
+	status = wsp->walk_callback(wsp->walk_addr, wsp->walk_data,
+	    wsp->walk_cbdata);
+
+	wsp->walk_addr =
+	    (uintptr_t)(((vmm_trace_dmsg_t *)wsp->walk_data)->next);
+
+	/*
+	 * If we've looped then we're done.
+	 */
+	if (wsp->walk_addr == (uintptr_t)wsp->walk_arg)
+		wsp->walk_addr = NULL;
+
+	return (status);
+}
+
+/*
+ * The walker's fini function is invoked at the end of each walk.  Since we
+ * dynamically allocated a vmm_trace_dmsg_t in vmm_dmsg_walk_i, we must
+ * free it now.
+ */
+static void
+vmm_dmsg_walk_f(mdb_walk_state_t *wsp)
+{
+	mdb_free(wsp->walk_data, sizeof (vmm_trace_dmsg_t));
+}
+
+/*
+ * This routine is used by the vmm_dmsg_dump dcmd to dump content of
+ * VMM trace ring buffer.
+ */
+int
+vmm_dmsg_dump(vmm_trace_dmsg_t *addr, int print_pathname, uint_t *printed)
+{
+	vmm_trace_dmsg_t	dmsg, *dmsgh = addr;
+	char			pathname[MAXPATHLEN];
+	char			merge[1024];
+
+	while (addr != NULL) {
+		if (mdb_vread(&dmsg, sizeof (dmsg), (uintptr_t)addr) !=
+		    sizeof (dmsg)) {
+			mdb_warn("failed to read message pointer in kernel");
+			return (DCMD_ERR);
+		}
+
+		(void) mdb_snprintf(merge, sizeof (merge),
+		    "[%Y:%03d:%03d:%03d] : %s",
+		    dmsg.timestamp.tv_sec,
+		    (int)dmsg.timestamp.tv_nsec/1000000,
+		    (int)(dmsg.timestamp.tv_nsec/1000)%1000,
+		    (int)dmsg.timestamp.tv_nsec%1000,
+		    dmsg.buf);
+
+		mdb_printf("%s", merge);
+
+		if (printed != NULL) {
+			(*printed)++;
+		}
+
+		if (((addr = dmsg.next) == NULL) || (dmsg.next == dmsgh)) {
+			break;
+		}
+	}
+
+	return (DCMD_OK);
+}
+
+/*
+ * 1. Process flag passed to vmm_dmsg_dump dcmd.
+ * 2. Obtain VMM trace ring buffer pointer.
+ * 3. Pass VMM trace ring buffer pointer to vmm_dmsg_dump()
+ *    to dump content of VMM trace ring buffer.
+ */
+int
+vmm_rbuf_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	vmm_trace_rbuf_t	rbuf;
+	uint_t		printed = 0; /* have we printed anything? */
+	int		print_pathname = FALSE;
+	int		rval = DCMD_OK;
+
+	if (argc > 1) {
+		return (DCMD_USAGE);
+	}
+
+	if (mdb_getopts(argc, argv,
+	    'a', MDB_OPT_SETBITS, TRUE, &print_pathname) != argc) {
+		return (DCMD_USAGE);
+	}
+
+	/*
+	 * If ring buffer address not provided try to obtain
+	 * it using vmm_debug_rbuf global.
+	 */
+	if ((addr == NULL) || !(flags & DCMD_ADDRSPEC)) {
+		if (mdb_readvar(&addr, "vmm_debug_rbuf") == -1) {
+			mdb_warn("Failed to read 'vmm_debug_rbuf'.");
+			return (DCMD_ERR);
+		}
+	}
+
+	if (mdb_vread(&rbuf, sizeof (rbuf), addr) != sizeof (rbuf)) {
+		mdb_warn("Failed to read ring buffer in kernel.");
+		return (DCMD_ERR);
+	}
+
+	if (rbuf.dmsgh == NULL) {
+		mdb_printf("The vmm trace ring buffer is empty.\n");
+		return (DCMD_OK);
+	}
+
+	rval = vmm_dmsg_dump((vmm_trace_dmsg_t *)rbuf.dmsgh,
+	    print_pathname, &printed);
+
+	if (rval != DCMD_OK) {
+		return (rval);
+	}
+
+	if (printed == 0) {
+		mdb_warn("Failed to read vmm trace ring buffer.");
+		return (DCMD_ERR);
+	}
+
+	return (rval);
+}
+
+/*
+ * MDB module linkage information:
+ *
+ * We declare a list of structures describing our dcmds, a list of structures
+ * describing our walkers, and a function named _mdb_init to return a pointer
+ * to our module information.
+ */
+
+static const mdb_dcmd_t dcmds[] = {
+	{ "vmm_dmsg_dump", "[-a]", "Dump vmm trace debug messages",
+	    vmm_rbuf_dump },
+	{ NULL }
+};
+
+static const mdb_walker_t walkers[] = {
+	{ "vmm_dmsg",
+	    "walk ring buffer containing vmm trace debug messages",
+	    vmm_dmsg_walk_i, vmm_dmsg_walk_s, vmm_dmsg_walk_f },
+	{ NULL }
+};
+
+static const mdb_modinfo_t modinfo = {
+	MDB_API_VERSION, dcmds, walkers
+};
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+	return (&modinfo);
+}
diff --git a/usr/src/compat/freebsd/amd64/machine/asmacros.h b/usr/src/compat/freebsd/amd64/machine/asmacros.h
new file mode 100644
index 0000000000..fcf35a7b78
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/asmacros.h
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_ASMACROS_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_ASMACROS_H_
+
+#define	ENTRY(x) \
+	.text; .p2align 4,0x90; \
+	.globl  x; \
+	.type   x, @function; \
+x:
+
+#define	END(x) \
+	.size x, [.-x]
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_ASMACROS_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/atomic.h b/usr/src/compat/freebsd/amd64/machine/atomic.h
new file mode 100644
index 0000000000..5b78143d21
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/atomic.h
@@ -0,0 +1,244 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_ATOMIC_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_ATOMIC_H_
+
+static __inline u_char
+atomic_load_acq_char(volatile u_char *p)
+{
+	u_char res;
+
+	__asm volatile("lock ; " "cmpxchgb %b0,%1"
+		       : "=a" (res), "=m" (*p)
+		       : "m" (*p) : "memory", "cc");
+	return (res);
+}
+
+static __inline u_short
+atomic_load_acq_short(volatile u_short *p)
+{
+	u_short res;
+
+	__asm volatile("lock ; " "cmpxchgw %w0,%1"
+		       : "=a" (res), "=m" (*p)
+		       : "m" (*p)
+		       : "memory", "cc");
+	return (res);
+}
+
+static __inline u_int
+atomic_load_acq_int(volatile u_int *p)
+{
+	u_int res;
+
+	__asm volatile("lock ; " "cmpxchgl %0,%1"
+		       : "=a" (res), "=m" (*p)
+		       : "m" (*p)
+		       : "memory", "cc");
+	return (res);
+}
+
+static __inline u_long
+atomic_load_acq_long(volatile u_long *p)
+{
+	u_long res;
+
+	__asm volatile("lock ; " "cmpxchgq %0,%1"
+		       : "=a" (res), "=m" (*p)
+		       : "m" (*p)
+		       : "memory", "cc");
+	return (res);
+}
+
+static __inline void
+atomic_store_rel_char(volatile u_char *p, u_char v)
+{
+	__asm volatile("" : : : "memory");
+	*p = v;
+}
+
+static __inline void
+atomic_store_rel_short(volatile u_short *p, u_short v)
+{
+	__asm volatile("" : : : "memory");
+	*p = v;
+}
+
+static __inline void
+atomic_store_rel_int(volatile u_int *p, u_int v)
+{
+	__asm volatile("" : : : "memory");
+	*p = v;
+}
+
+static __inline void
+atomic_store_rel_long(volatile u_long *p, u_long v)
+{
+	__asm volatile("" : : : "memory");
+	*p = v;
+}
+
+/*
+ * Atomic compare and set.
+ *
+ * if (*dst == expect) *dst = src (all 32 bit words)
+ *
+ * Returns 0 on failure, non-zero on success
+ */
+static __inline int
+atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
+{
+	u_char res;
+
+	__asm __volatile(
+	"	lock ;			"
+	"	cmpxchgl %3,%1 ;	"
+	"       sete	%0 ;		"
+	"# atomic_cmpset_int"
+	: "=q" (res),			/* 0 */
+	  "+m" (*dst),			/* 1 */
+	  "+a" (expect)			/* 2 */
+	: "r" (src)			/* 3 */
+	: "memory", "cc");
+	return (res);
+}
+
+static __inline int
+atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
+{
+	u_char res;
+
+	__asm __volatile(
+	"	lock ;			"
+	"	cmpxchgq %3,%1 ;	"
+	"       sete	%0 ;		"
+	"# atomic_cmpset_long"
+	: "=q" (res),			/* 0 */
+	  "+m" (*dst),			/* 1 */
+	  "+a" (expect)			/* 2 */
+	: "r" (src)			/* 3 */
+	: "memory", "cc");
+	return (res);
+}
+
+/*
+ * Atomically add the value of v to the integer pointed to by p and return
+ * the previous value of *p.
+ */
+static __inline u_int
+atomic_fetchadd_int(volatile u_int *p, u_int v)
+{
+
+	__asm __volatile(
+	"	lock ;			"
+	"	xaddl	%0, %1 ;	"
+	"# atomic_fetchadd_int"
+	: "+r" (v),			/* 0 (result) */
+	  "=m" (*p)			/* 1 */
+	: "m" (*p)			/* 2 */
+	: "cc");
+	return (v);
+}
+
+static __inline void
+atomic_set_int(volatile u_int *p, u_int v)
+{
+	__asm volatile(
+	"lock ; " "orl %1,%0"
+	: "=m" (*p)
+	: "ir" (v), "m" (*p)
+	: "cc");
+}
+
+static __inline void
+atomic_clear_int(volatile u_int *p, u_int v)
+{
+	__asm volatile(
+	"lock ; " "andl %1,%0"
+	: "=m" (*p)
+	: "ir" (~v), "m" (*p)
+	: "cc");
+}
+
+static __inline void
+atomic_subtract_int(volatile u_int *p, u_int v)
+{
+	__asm volatile(
+	"lock ; " "subl %1,%0"
+	: "=m" (*p)
+	: "ir" (v), "m" (*p)
+	: "cc");
+}
+
+static __inline void
+atomic_set_long(volatile u_long *p, u_long v)
+{
+	__asm volatile(
+	"lock ; " "orq %1,%0"
+	: "+m" (*p)
+	: "ir" (v)
+	: "cc");
+}
+
+static __inline void
+atomic_clear_long(volatile u_long *p, u_long v)
+{
+	__asm volatile("lock ; " "andq %1,%0"
+	: "+m" (*p)
+	: "ir" (~v)
+	: "cc");
+}
+
+static __inline u_int
+atomic_swap_int(volatile u_int *p, u_int v)
+{
+
+	__asm __volatile(
+	"	xchgl	%1,%0 ;		"
+	"# atomic_swap_int"
+	: "+r" (v),			/* 0 */
+	  "+m" (*p));			/* 1 */
+	return (v);
+}
+
+static __inline u_long
+atomic_swap_long(volatile u_long *p, u_long v)
+{
+
+	__asm __volatile(
+	"	xchgq	%1,%0 ;		"
+	"# atomic_swap_long"
+	: "+r" (v),			/* 0 */
+	  "+m" (*p));			/* 1 */
+	return (v);
+}
+
+#define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
+#define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
+
+/* Operations on 32-bit double words. */
+#define	atomic_load_acq_32	atomic_load_acq_int
+#define	atomic_store_rel_32	atomic_store_rel_int
+#define	atomic_cmpset_32	atomic_cmpset_int
+
+/* Operations on 64-bit quad words. */
+#define	atomic_cmpset_64	atomic_cmpset_long
+#define	atomic_readandclear_64	atomic_readandclear_long
+
+/* Operations on pointers. */
+#define	atomic_cmpset_ptr	atomic_cmpset_long
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_ATOMIC_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/clock.h b/usr/src/compat/freebsd/amd64/machine/clock.h
new file mode 100644
index 0000000000..f50b42a126
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/clock.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_CLOCK_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_CLOCK_H_
+
+extern uint64_t cpu_freq_hz;
+
+#define	tsc_freq	cpu_freq_hz
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_CLOCK_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/cpufunc.h b/usr/src/compat/freebsd/amd64/machine/cpufunc.h
new file mode 100644
index 0000000000..cf485e947c
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/cpufunc.h
@@ -0,0 +1,165 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_CPUFUNC_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_CPUFUNC_H_
+
+static __inline u_long
+bsfq(u_long mask)
+{
+	u_long	result;
+
+	__asm __volatile("bsfq %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline u_int
+bsrl(u_int mask)
+{
+	u_int	result;
+
+	__asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline u_long
+bsrq(u_long mask)
+{
+	u_long	result;
+
+	__asm __volatile("bsrq %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
+static __inline void
+clts(void)
+{
+	__asm __volatile("clts");
+}
+
+static __inline void
+do_cpuid(u_int ax, u_int *p)
+{
+	__asm __volatile("cpuid"
+			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+			 :  "0" (ax));
+}
+
+static __inline void
+cpuid_count(u_int ax, u_int cx, u_int *p)
+{
+	__asm __volatile("cpuid"
+			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+			 :  "0" (ax), "c" (cx));
+}
+
+static __inline void
+enable_intr(void)
+{
+	__asm __volatile("sti");
+}
+
+static __inline int
+ffsl(long mask)
+{
+	return (mask == 0 ? mask : (int)bsfq((u_long)mask) + 1);
+}
+
+static __inline int
+fls(int mask)
+{
+	return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
+}
+
+static __inline int
+flsl(long mask)
+{
+	return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1);
+}
+
+static __inline int
+flsll(long long mask)
+{
+	return (flsl((long)mask));
+}
+
+static __inline uint64_t
+rdmsr(u_int msr)
+{
+	uint32_t low, high;
+ 
+	__asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
+	return (low | ((uint64_t)high << 32));
+}
+
+static __inline uint64_t
+rdtsc(void)
+{
+	uint32_t low, high;
+ 
+	__asm __volatile("rdtsc" : "=a" (low), "=d" (high));
+	return (low | ((uint64_t)high << 32));
+}
+
+static __inline void
+wrmsr(u_int msr, uint64_t newval)
+{
+	uint32_t low, high;
+
+	low = newval;
+	high = newval >> 32;
+	__asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr));
+}
+
+static __inline void
+load_cr0(u_long data)
+{
+	__asm __volatile("movq %0,%%cr0" : : "r" (data));
+}
+
+static __inline u_long
+rcr0(void)
+{
+	u_long  data;
+ 
+	__asm __volatile("movq %%cr0,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline u_long
+rcr3(void)
+{
+	u_long  data;
+
+	__asm __volatile("movq %%cr3,%0" : "=r" (data));
+	return (data);
+}
+
+static __inline void
+load_cr4(u_long data)
+{
+	__asm __volatile("movq %0,%%cr4" : : "r" (data));
+}
+
+static __inline u_long
+rcr4(void)
+{
+	u_long  data;
+ 
+	__asm __volatile("movq %%cr4,%0" : "=r" (data));
+	return (data);
+}
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_CPUFUNC_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/fpu.h b/usr/src/compat/freebsd/amd64/machine/fpu.h
new file mode 100644
index 0000000000..48e686780c
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/fpu.h
@@ -0,0 +1,29 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_
+
+#define	XSAVE_AREA_ALIGN	64
+
+void	fpuexit(kthread_t *td);
+void	fpurestore(void *);
+void	fpusave(void *);
+
+struct savefpu	*fpu_save_area_alloc(void);
+void	fpu_save_area_free(struct savefpu *fsa);
+void	fpu_save_area_reset(struct savefpu *fsa);
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/md_var.h b/usr/src/compat/freebsd/amd64/machine/md_var.h
new file mode 100644
index 0000000000..60fdd566e5
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/md_var.h
@@ -0,0 +1,24 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_MD_VAR_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_MD_VAR_H_
+
+extern  u_int   cpu_high;		/* Highest arg to CPUID */
+extern	u_int	cpu_exthigh;		/* Highest arg to extended CPUID */
+extern	u_int	cpu_id;			/* Stepping ID */
+extern	char	cpu_vendor[];		/* CPU Origin code */
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_MD_VAR_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/param.h b/usr/src/compat/freebsd/amd64/machine/param.h
new file mode 100644
index 0000000000..eaca5ab8d7
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/param.h
@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_PARAM_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_PARAM_H_
+
+#ifdef	_KERNEL
+#define	MAXCPU		NCPU
+#endif	/* _KERNEL */
+
+#define	PAGE_SHIFT	12		/* LOG2(PAGE_SIZE) */
+#define	PAGE_SIZE	(1<<PAGE_SHIFT)	/* bytes/page */
+#define	PAGE_MASK	(PAGE_SIZE-1)
+
+/* Size of the level 1 page table units */
+#define	NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
+
+/* Size of the level 2 page directory units */
+#define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
+
+/* Size of the level 3 page directory pointer table units */
+#define	NPDPEPG		(PAGE_SIZE/(sizeof (pdp_entry_t)))
+
+/* Size of the level 4 page-map level-4 table units */
+#define	NPML4EPG	(PAGE_SIZE/(sizeof (pml4_entry_t)))
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_PARAM_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/pcb.h b/usr/src/compat/freebsd/amd64/machine/pcb.h
new file mode 100644
index 0000000000..75b5de640c
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/pcb.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_
+
+#include <machine/fpu.h>
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/pmap.h b/usr/src/compat/freebsd/amd64/machine/pmap.h
new file mode 100644
index 0000000000..d0303bdd56
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/pmap.h
@@ -0,0 +1,44 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_PMAP_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_PMAP_H_
+
+				/* ---- Intel Nomenclature ---- */
+#define	PG_V		0x001	/* P	Valid			*/
+#define	PG_RW		0x002	/* R/W	Read/Write		*/
+#define	PG_U		0x004	/* U/S	User/Supervisor 	*/
+#define	PG_A		0x020	/* A	Accessed		*/
+#define	PG_M		0x040	/* D	Dirty			*/
+#define	PG_PS		0x080	/* PS	Page size (0=4k,1=2M)	*/
+
+/*
+ * Page Protection Exception bits
+ */
+#define PGEX_P		0x01	/* Protection violation vs. not present */
+#define PGEX_W		0x02	/* during a Write cycle */
+#define PGEX_U		0x04	/* access from User mode (UPL) */
+#define PGEX_RSV	0x08	/* reserved PTE field is non-zero */
+#define PGEX_I		0x10	/* during an instruction fetch */
+
+typedef u_int64_t pd_entry_t;
+typedef u_int64_t pt_entry_t;
+typedef u_int64_t pdp_entry_t;
+typedef u_int64_t pml4_entry_t;
+
+#define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
+vm_paddr_t pmap_kextract(vm_offset_t va);
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_PMAP_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/segments.h b/usr/src/compat/freebsd/amd64/machine/segments.h
new file mode 100644
index 0000000000..d0655f4a0e
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/segments.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_SEGMENTS_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_SEGMENTS_H_
+
+#include <x86/segments.h>
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_SEGMENTS_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/smp.h b/usr/src/compat/freebsd/amd64/machine/smp.h
new file mode 100644
index 0000000000..ef719b9684
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/smp.h
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_SMP_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_SMP_H_
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_SMP_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/vmm.h b/usr/src/compat/freebsd/amd64/machine/vmm.h
new file mode 100644
index 0000000000..79c3ec959e
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/vmm.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_VMM_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_VMM_H_
+
+#include <sys/vmm.h>
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_VMM_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/vmm_dev.h b/usr/src/compat/freebsd/amd64/machine/vmm_dev.h
new file mode 100644
index 0000000000..fe9cb6c705
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/vmm_dev.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_VMM_DEV_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_VMM_DEV_H_
+
+#include <sys/vmm_dev.h>
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_VMM_DEV_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/vmm_instruction_emul.h b/usr/src/compat/freebsd/amd64/machine/vmm_instruction_emul.h
new file mode 100644
index 0000000000..02c3f391c7
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/vmm_instruction_emul.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_VMM_INSTRUCTION_EMUL_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_VMM_INSTRUCTION_EMUL_H_
+
+#include <sys/vmm_instruction_emul.h>
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_VMM_INSTRUCTION_EMUL_H_ */
diff --git a/usr/src/compat/freebsd/amd64/machine/vmparam.h b/usr/src/compat/freebsd/amd64/machine/vmparam.h
new file mode 100644
index 0000000000..c80c2af545
--- /dev/null
+++ b/usr/src/compat/freebsd/amd64/machine/vmparam.h
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_VMPARAM_H_
+#define	_COMPAT_FREEBSD_AMD64_MACHINE_VMPARAM_H_
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_VMPARAM_H_ */
diff --git a/usr/src/compat/freebsd/libutil.h b/usr/src/compat/freebsd/libutil.h
new file mode 100644
index 0000000000..e22ffc0551
--- /dev/null
+++ b/usr/src/compat/freebsd/libutil.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_LIBUTIL_H_
+#define	_COMPAT_FREEBSD_LIBUTIL_H_
+
+int	expand_number(const char *_buf, uint64_t *_num);
+
+#endif	/* _COMPAT_FREEBSD_LIBUTIL_H_ */
diff --git a/usr/src/compat/freebsd/net/ethernet.h b/usr/src/compat/freebsd/net/ethernet.h
new file mode 100644
index 0000000000..a0d5a828c6
--- /dev/null
+++ b/usr/src/compat/freebsd/net/ethernet.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_NET_ETHERNET_H_
+#define	_COMPAT_FREEBSD_SYS_NET_ETHERNET_H_
+
+#include <sys/ethernet.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_NET_ETHERNET_H_ */
diff --git a/usr/src/compat/freebsd/paths.h b/usr/src/compat/freebsd/paths.h
new file mode 100644
index 0000000000..e43c963f93
--- /dev/null
+++ b/usr/src/compat/freebsd/paths.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_PATHS_H_
+#define	_COMPAT_FREEBSD_PATHS_H_
+
+#define	_PATH_TMP	"/tmp/"
+
+#endif	/* _COMPAT_FREEBSD_PATHS_H_ */
diff --git a/usr/src/compat/freebsd/pthread_np.h b/usr/src/compat/freebsd/pthread_np.h
new file mode 100644
index 0000000000..641c58f406
--- /dev/null
+++ b/usr/src/compat/freebsd/pthread_np.h
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_PTHREAD_NP_H_
+#define	_COMPAT_FREEBSD_PTHREAD_NP_H_
+
+#include <sys/param.h>
+#include <sys/cpuset.h>
+
+#include <synch.h>
+
+#define	pthread_set_name_np(thread, name)
+
+#define	pthread_mutex_isowned_np(x)	_mutex_held(x)
+
+#endif	/* _COMPAT_FREEBSD_PTHREAD_NP_H_ */
diff --git a/usr/src/compat/freebsd/string.h b/usr/src/compat/freebsd/string.h
new file mode 100644
index 0000000000..7e0f5c7ddc
--- /dev/null
+++ b/usr/src/compat/freebsd/string.h
@@ -0,0 +1,26 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_STRING_H_
+#define	_COMPAT_FREEBSD_STRING_H_
+
+/*
+ * This is quite a hack; blame bcopy/bcmp/bzero and memcpy/memcmp/memset.
+ */
+#include <strings.h>
+
+#include_next <string.h>
+
+#endif	/* _COMPAT_FREEBSD_STRING_H_ */
diff --git a/usr/src/compat/freebsd/strings.h b/usr/src/compat/freebsd/strings.h
new file mode 100644
index 0000000000..fa3539fb96
--- /dev/null
+++ b/usr/src/compat/freebsd/strings.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_STRINGS_H_
+#define	_COMPAT_FREEBSD_STRINGS_H_
+
+#include <machine/cpufunc.h>
+
+#include_next <strings.h>
+
+#endif	/* _COMPAT_FREEBSD_STRINGS_H_ */
diff --git a/usr/src/compat/freebsd/sys/_iovec.h b/usr/src/compat/freebsd/sys/_iovec.h
new file mode 100644
index 0000000000..b755ae7e21
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/_iovec.h
@@ -0,0 +1,24 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS__IOVEC_H_
+#define	_COMPAT_FREEBSD_SYS__IOVEC_H_
+
+struct iovec {
+        void	*iov_base;	/* Base address. */
+        size_t	iov_len;	/* Length. */
+};
+
+#endif	/* _COMPAT_FREEBSD_SYS__IOVEC_H_ */
diff --git a/usr/src/compat/freebsd/sys/_pthreadtypes.h b/usr/src/compat/freebsd/sys/_pthreadtypes.h
new file mode 100644
index 0000000000..d746da3712
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/_pthreadtypes.h
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS__PTHREADTYPES_H_
+#define	_COMPAT_FREEBSD_SYS__PTHREADTYPES_H_
+
+#endif	/* _COMPAT_FREEBSD_SYS__PTHREADTYPES_H_ */
diff --git a/usr/src/compat/freebsd/sys/_types.h b/usr/src/compat/freebsd/sys/_types.h
new file mode 100644
index 0000000000..62c327d216
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/_types.h
@@ -0,0 +1,22 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS__TYPES_H_
+#define	_COMPAT_FREEBSD_SYS__TYPES_H_
+
+#include <sys/cdefs.h>
+#include <machine/_types.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS__TYPES_H_ */
diff --git a/usr/src/compat/freebsd/sys/callout.h b/usr/src/compat/freebsd/sys/callout.h
new file mode 100644
index 0000000000..17b6e31507
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/callout.h
@@ -0,0 +1,70 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_CALLOUT_H_
+#define	_COMPAT_FREEBSD_SYS_CALLOUT_H_
+
+#include <sys/cyclic.h>
+
+struct callout {
+	cyclic_id_t	c_cyc_id;
+	int		c_flags;
+	void		(*c_func)(void *);
+	void		*c_arg;
+
+};
+
+#define	CALLOUT_ACTIVE		0x0002	/* callout is currently active */
+#define	CALLOUT_PENDING		0x0004	/* callout is waiting for timeout */
+
+#define	C_ABSOLUTE		0x0200	/* event time is absolute. */
+
+#define	callout_active(c)	((c)->c_flags & CALLOUT_ACTIVE)
+#define	callout_deactivate(c)	((c)->c_flags &= ~CALLOUT_ACTIVE)
+#define	callout_pending(c)	((c)->c_flags & CALLOUT_PENDING)
+
+void	vmm_glue_callout_init(struct callout *c, int mpsafe);
+int	vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt,
+    sbintime_t pr, void (*func)(void *), void *arg, int flags);
+int	vmm_glue_callout_stop(struct callout *c);
+int	vmm_glue_callout_drain(struct callout *c);
+
+static __inline void
+callout_init(struct callout *c, int mpsafe)
+{
+	vmm_glue_callout_init(c, mpsafe);
+}
+
+static __inline int
+callout_stop(struct callout *c)
+{
+	return (vmm_glue_callout_stop(c));
+}
+
+static __inline int
+callout_drain(struct callout *c)
+{
+	return (vmm_glue_callout_drain(c));
+}
+
+static __inline int
+callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
+    void (*func)(void *), void *arg, int flags)
+{
+	return (vmm_glue_callout_reset_sbt(c, sbt, pr, func, arg, flags));
+}
+
+
+#endif	/* _COMPAT_FREEBSD_SYS_CALLOUT_H_ */
diff --git a/usr/src/compat/freebsd/sys/cdefs.h b/usr/src/compat/freebsd/sys/cdefs.h
new file mode 100644
index 0000000000..974e323dbe
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/cdefs.h
@@ -0,0 +1,58 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_CDEFS_H_
+#define	_COMPAT_FREEBSD_SYS_CDEFS_H_
+
+#define	__FBSDID(s)
+
+#ifdef	__GNUC__
+#define	inline		__inline
+
+#define	__GNUCLIKE___SECTION		1
+
+#define	__dead2		__attribute__((__noreturn__))
+#define	__unused	__attribute__((__unused__))
+#define	__used		__attribute__((__used__))
+#define	__packed	__attribute__((__packed__))
+#define	__aligned(x)	__attribute__((__aligned__(x)))
+#define	__section(x)	__attribute__((__section__(x)))
+#endif
+
+/*
+ * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
+ * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
+ * The __CONCAT macro is a bit tricky to use if it must work in non-ANSI
+ * mode -- there must be no spaces between its arguments, and for nested
+ * __CONCAT's, all the __CONCAT's must be at the left.  __CONCAT can also
+ * concatenate double-quoted strings produced by the __STRING macro, but
+ * this only works with ANSI C.
+ *
+ * __XSTRING is like __STRING, but it expands any macros in its argument
+ * first.  It is only available with ANSI C.
+ */
+#if defined(__STDC__) || defined(__cplusplus)
+#define	__P(protos)	protos		/* full-blown ANSI C */
+#define	__CONCAT1(x,y)	x ## y
+#define	__CONCAT(x,y)	__CONCAT1(x,y)
+#define	__STRING(x)	#x		/* stringify without expanding x */
+#define	__XSTRING(x)	__STRING(x)	/* expand x, then stringify */
+#else	/* !(__STDC__ || __cplusplus) */
+#define	__P(protos)	()		/* traditional C preprocessor */
+#define	__CONCAT(x,y)	x/**/y
+#define	__STRING(x)	"x"
+#endif	/* !(__STDC__ || __cplusplus) */
+
+#endif	/* _COMPAT_FREEBSD_SYS_CDEFS_H_ */
diff --git a/usr/src/compat/freebsd/sys/cpuset.h b/usr/src/compat/freebsd/sys/cpuset.h
new file mode 100644
index 0000000000..8527624b5e
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/cpuset.h
@@ -0,0 +1,44 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_CPUSET_H_
+#define	_COMPAT_FREEBSD_SYS_CPUSET_H_
+
+#define	NOCPU			-1
+
+#ifdef	_KERNEL
+#define	CPU_SET(cpu, set)		CPUSET_ADD(*(set), cpu)
+#define	CPU_SETOF(cpu, set)		CPUSET_ONLY(*(set), cpu)
+#define	CPU_ZERO(set)			CPUSET_ZERO(*(set))
+#define	CPU_CLR(cpu, set)		CPUSET_DEL(*(set), cpu)
+#define	CPU_FFS(set)			cpusetobj_ffs(set)
+#define	CPU_ISSET(cpu, set)		CPU_IN_SET(*(set), cpu)
+#define	CPU_CMP(set1, set2)		CPUSET_ISEQUAL(*(set1), *(set2))
+#define	CPU_SET_ATOMIC(cpu, set)	CPUSET_ATOMIC_ADD(*(set), cpu)
+
+#include <sys/cpuvar.h>
+
+int	cpusetobj_ffs(const cpuset_t *set);
+#else
+#include <machine/atomic.h>
+
+typedef int cpuset_t;
+
+#define	CPUSET(cpu)			(1UL << (cpu))
+
+#define	CPU_SET_ATOMIC(cpu, set)	atomic_set_int((set), CPUSET(cpu))
+#endif
+
+#endif	/* _COMPAT_FREEBSD_SYS_CPUSET_H_ */
diff --git a/usr/src/compat/freebsd/sys/disk.h b/usr/src/compat/freebsd/sys/disk.h
new file mode 100644
index 0000000000..c9bdc6a2d8
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/disk.h
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_DISK_H_
+#define	_COMPAT_FREEBSD_SYS_DISK_H_
+
+#endif	/* _COMPAT_FREEBSD_SYS_DISK_H_ */
diff --git a/usr/src/compat/freebsd/sys/endian.h b/usr/src/compat/freebsd/sys/endian.h
new file mode 100644
index 0000000000..a31bff55d6
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/endian.h
@@ -0,0 +1,125 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_ENDIAN_H_
+#define	_COMPAT_FREEBSD_SYS_ENDIAN_H_
+
+static __inline uint16_t
+be16dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return ((p[0] << 8) | p[1]);
+}
+
+static __inline uint32_t
+be32dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return (((unsigned)p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
+}
+
+static __inline uint64_t
+be64dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return (((uint64_t)be32dec(p) << 32) | be32dec(p + 4));
+}
+
+static __inline uint16_t
+le16dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return ((p[1] << 8) | p[0]);
+}
+
+static __inline uint32_t
+le32dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return (((unsigned)p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
+}
+
+static __inline uint64_t
+le64dec(const void *pp)
+{
+	uint8_t const *p = (uint8_t const *)pp;
+
+	return (((uint64_t)le32dec(p + 4) << 32) | le32dec(p));
+}
+
+static __inline void
+be16enc(void *pp, uint16_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	p[0] = (u >> 8) & 0xff;
+	p[1] = u & 0xff;
+}
+
+static __inline void
+be32enc(void *pp, uint32_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	p[0] = (u >> 24) & 0xff;
+	p[1] = (u >> 16) & 0xff;
+	p[2] = (u >> 8) & 0xff;
+	p[3] = u & 0xff;
+}
+
+static __inline void
+be64enc(void *pp, uint64_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	be32enc(p, (uint32_t)(u >> 32));
+	be32enc(p + 4, (uint32_t)(u & 0xffffffffU));
+}
+
+static __inline void
+le16enc(void *pp, uint16_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	p[0] = u & 0xff;
+	p[1] = (u >> 8) & 0xff;
+}
+
+static __inline void
+le32enc(void *pp, uint32_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	p[0] = u & 0xff;
+	p[1] = (u >> 8) & 0xff;
+	p[2] = (u >> 16) & 0xff;
+	p[3] = (u >> 24) & 0xff;
+}
+
+static __inline void
+le64enc(void *pp, uint64_t u)
+{
+	uint8_t *p = (uint8_t *)pp;
+
+	le32enc(p, (uint32_t)(u & 0xffffffffU));
+	le32enc(p + 4, (uint32_t)(u >> 32));
+}
+
+#endif	/* _COMPAT_FREEBSD_SYS_ENDIAN_H_ */
diff --git a/usr/src/compat/freebsd/sys/errno.h b/usr/src/compat/freebsd/sys/errno.h
new file mode 100644
index 0000000000..bd37f43065
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/errno.h
@@ -0,0 +1,27 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_ERRNO_H_
+#define	_COMPAT_FREEBSD_SYS_ERRNO_H_
+
+#ifndef	_KERNEL
+extern int *___errno();
+
+#define	errno	(*(___errno()))
+#endif
+
+#include_next <sys/errno.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_ERRNO_H_ */
diff --git a/usr/src/compat/freebsd/sys/fcntl.h b/usr/src/compat/freebsd/sys/fcntl.h
new file mode 100644
index 0000000000..062a3b84ac
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/fcntl.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_FCNTL_H_
+#define	_COMPAT_FREEBSD_SYS_FCNTL_H_
+
+#define	O_DIRECT	0x0
+
+#include_next <sys/fcntl.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_FCNTL_H_ */
diff --git a/usr/src/compat/freebsd/sys/ioctl.h b/usr/src/compat/freebsd/sys/ioctl.h
new file mode 100644
index 0000000000..e223e1e4c7
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/ioctl.h
@@ -0,0 +1,22 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_IOCTL_H_
+#define	_COMPAT_FREEBSD_SYS_IOCTL_H_
+
+#include <sys/ioccom.h>
+#include_next <sys/ioctl.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_IOCTL_H_ */
diff --git a/usr/src/compat/freebsd/sys/kernel.h b/usr/src/compat/freebsd/sys/kernel.h
new file mode 100644
index 0000000000..b1c07674e4
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/kernel.h
@@ -0,0 +1,25 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_KERNEL_H_
+#define	_COMPAT_FREEBSD_SYS_KERNEL_H_
+
+#define	SYSINIT(uniquifier, subsystem, order, func, ident)
+
+#include <sys/linker_set.h>
+
+#define	ticks	ddi_get_lbolt()
+
+#endif	/* _COMPAT_FREEBSD_SYS_KERNEL_H_ */
diff --git a/usr/src/compat/freebsd/sys/ktr.h b/usr/src/compat/freebsd/sys/ktr.h
new file mode 100644
index 0000000000..96c499ef18
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/ktr.h
@@ -0,0 +1,27 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_KTR_H_
+#define	_COMPAT_FREEBSD_SYS_KTR_H_
+
+#define	CTR0(m, format)
+#define	CTR1(m, format, p1)
+#define	CTR2(m, format, p1, p2)
+#define	CTR3(m, format, p1, p2, p3)
+#define	CTR4(m, format, p1, p2, p3, p4)
+#define	CTR5(m, format, p1, p2, p3, p4, p5)
+#define	CTR6(m, d, p1, p2, p3, p4, p5, p6)
+
+#endif	/* _COMPAT_FREEBSD_SYS_KTR_H_ */
diff --git a/usr/src/compat/freebsd/sys/libkern.h b/usr/src/compat/freebsd/sys/libkern.h
new file mode 100644
index 0000000000..94675a0d66
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/libkern.h
@@ -0,0 +1,25 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_LIBKERN_H_
+#define	_COMPAT_FREEBSD_SYS_LIBKERN_H_
+
+#include <sys/systm.h>
+
+#ifndef	min
+static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); }
+#endif
+
+#endif	/* _COMPAT_FREEBSD_SYS_LIBKERN_H_ */
diff --git a/usr/src/compat/freebsd/sys/limits.h b/usr/src/compat/freebsd/sys/limits.h
new file mode 100644
index 0000000000..99ae0f4d64
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/limits.h
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_LIMITS_H_
+#define	_COMPAT_FREEBSD_SYS_LIMITS_H_
+
+#endif	/* _COMPAT_FREEBSD_SYS_LIMITS_H_ */
diff --git a/usr/src/compat/freebsd/sys/malloc.h b/usr/src/compat/freebsd/sys/malloc.h
new file mode 100644
index 0000000000..579df44533
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/malloc.h
@@ -0,0 +1,44 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_MALLOC_H_
+#define	_COMPAT_FREEBSD_SYS_MALLOC_H_
+
+/*
+ * flags to malloc.
+ */
+#define	M_NOWAIT	0x0001		/* do not block */
+#define	M_WAITOK	0x0002		/* ok to block */
+#define	M_ZERO		0x0100		/* bzero the allocation */
+
+struct malloc_type {
+	const char	*ks_shortdesc;	/* Printable type name. */
+};
+
+#ifdef	_KERNEL
+#define	MALLOC_DEFINE(type, shortdesc, longdesc)			\
+	struct malloc_type type[1] = {					\
+		{ shortdesc }						\
+	}
+
+#define	MALLOC_DECLARE(type)						\
+	extern struct malloc_type type[1]
+
+void	free(void *addr, struct malloc_type *type);
+void	*malloc(unsigned long size, struct malloc_type *type, int flags);
+void	*old_malloc(unsigned long size, struct malloc_type *type , int flags);
+#endif	/* _KERNEL */
+
+#endif	/* _COMPAT_FREEBSD_SYS_MALLOC_H_ */
diff --git a/usr/src/compat/freebsd/sys/module.h b/usr/src/compat/freebsd/sys/module.h
new file mode 100644
index 0000000000..87b73e3fa3
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/module.h
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_MODULE_H_
+#define	_COMPAT_FREEBSD_SYS_MODULE_H_
+
+#endif	/* _COMPAT_FREEBSD_SYS_MODULE_H_ */
diff --git a/usr/src/compat/freebsd/sys/mutex.h b/usr/src/compat/freebsd/sys/mutex.h
new file mode 100644
index 0000000000..b99884b652
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/mutex.h
@@ -0,0 +1,81 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_MUTEX_H_
+#define	_COMPAT_FREEBSD_SYS_MUTEX_H_
+
+#ifdef	_KERNEL
+
+#include <sys/debug.h>
+
+#define	MTX_DEF		0x00000000
+#define	MTX_SPIN	0x00000001
+
+struct mtx;
+
+void mtx_init(struct mtx *, char *name, const char *type_name, int opts);
+void mtx_destroy(struct mtx *);
+
+int mtx_sleep(void *chan, struct mtx *mtx, int priority, const char *wmesg,
+    int timo);
+
+#endif	/* KERNEL */
+#include_next <sys/mutex.h>
+#ifdef	_KERNEL
+
+struct mtx {
+	kmutex_type_t	t;
+	kmutex_t	m;
+};
+
+static __inline void mtx_lock(struct mtx *mtx)
+{
+	mutex_enter(&mtx->m);
+}
+
+static __inline void mtx_unlock(struct mtx *mtx)
+{
+	mutex_exit(&mtx->m);
+}
+
+static __inline void mtx_lock_spin(struct mtx *mtx)
+{
+	mutex_enter(&mtx->m);
+}
+
+static __inline void mtx_unlock_spin(struct mtx *mtx)
+{
+	mutex_exit(&mtx->m);
+}
+
+static __inline int mtx_owned(struct mtx *mtx)
+{
+	return (mutex_owned(&mtx->m));
+}
+
+#define	MA_OWNED	0
+
+static __inline void mtx_assert(struct mtx *mtx, int what)
+{
+	switch (what) {
+	case MA_OWNED:
+		ASSERT(mutex_owned(&mtx->m));
+		break;
+	}
+}
+
+#endif	/* _KERNEL */
+
+#endif	/* _COMPAT_FREEBSD_SYS_MUTEX_H_ */
diff --git a/usr/src/compat/freebsd/sys/param.h b/usr/src/compat/freebsd/sys/param.h
new file mode 100644
index 0000000000..f09e9183f6
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/param.h
@@ -0,0 +1,48 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_PARAM_H_
+#define	_COMPAT_FREEBSD_SYS_PARAM_H_
+
+#ifndef	_KERNEL
+#define	MAXCOMLEN	16
+#endif
+#define	MAXHOSTNAMELEN	256
+
+#ifdef	_KERNEL
+#include <sys/time.h>
+
+#ifndef	FALSE
+#define	FALSE	0
+#endif
+#ifndef	TRUE
+#define	TRUE	1
+#endif
+#endif
+
+#include <machine/param.h>
+
+#define	nitems(x)	(sizeof((x)) / sizeof((x)[0]))
+#define	rounddown(x,y)	(((x)/(y))*(y))
+#define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))  /* to any y */
+#define	roundup2(x,y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
+
+/* Macros for min/max. */
+#define	MIN(a,b) (((a)<(b))?(a):(b))
+#define	MAX(a,b) (((a)>(b))?(a):(b))
+
+#include_next <sys/param.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_PARAM_H_ */
diff --git a/usr/src/compat/freebsd/sys/pcpu.h b/usr/src/compat/freebsd/sys/pcpu.h
new file mode 100644
index 0000000000..f29c9c5018
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/pcpu.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_PCPU_H_
+#define	_COMPAT_FREEBSD_SYS_PCPU_H_
+
+#define	curcpu	(CPU->cpu_id)
+
+#endif	/* _COMPAT_FREEBSD_SYS_PCPU_H_ */
diff --git a/usr/src/compat/freebsd/sys/sched.h b/usr/src/compat/freebsd/sys/sched.h
new file mode 100644
index 0000000000..b426ee757e
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/sched.h
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_SCHED_H_
+#define	_COMPAT_FREEBSD_SYS_SCHED_H_
+
+#endif	/* _COMPAT_FREEBSD_SYS_SCHED_H_ */
diff --git a/usr/src/compat/freebsd/sys/select.h b/usr/src/compat/freebsd/sys/select.h
new file mode 100644
index 0000000000..fcb40c23b1
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/select.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_MODULE_H_
+#define	_COMPAT_FREEBSD_SYS_MODULE_H_
+
+void *memset(void *s, int c, size_t n);
+
+#include_next <sys/select.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_MODULE_H_ */
diff --git a/usr/src/compat/freebsd/sys/smp.h b/usr/src/compat/freebsd/sys/smp.h
new file mode 100644
index 0000000000..46183e8677
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/smp.h
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_SMP_H_
+#define	_COMPAT_FREEBSD_SYS_SMP_H_
+
+#include <sys/cpuset.h>
+
+void	smp_rendezvous(void (*)(void *),
+		       void (*)(void *),
+		       void (*)(void *),
+		       void *arg);
+
+void	ipi_cpu(int cpu, u_int ipi);
+
+#endif	/* _COMPAT_FREEBSD_SYS_SMP_H_ */
diff --git a/usr/src/compat/freebsd/sys/sysctl.h b/usr/src/compat/freebsd/sys/sysctl.h
new file mode 100644
index 0000000000..9f6a695e34
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/sysctl.h
@@ -0,0 +1,27 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_SYSCTL_H_
+#define	_COMPAT_FREEBSD_SYS_SYSCTL_H_
+
+#define	SYSCTL_DECL(name)
+
+#define	SYSCTL_NODE(parent, nbr, name, access, handler, descr)
+
+#define	SYSCTL_INT(parent, nbr, name, access, ptr, val, descr)
+#define	SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr)
+#define	SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr)
+
+#endif	/* _COMPAT_FREEBSD_SYS_SYSCTL_H_ */
diff --git a/usr/src/compat/freebsd/sys/systm.h b/usr/src/compat/freebsd/sys/systm.h
new file mode 100644
index 0000000000..e25acc0e4a
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/systm.h
@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_SYSTM_H_
+#define	_COMPAT_FREEBSD_SYS_SYSTM_H_
+
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <sys/callout.h>
+#include <sys/queue.h>
+
+struct mtx;
+
+#define	KASSERT(exp,msg) do {						\
+	if (!(exp))							\
+		panic msg;						\
+} while (0)
+
+#define	CTASSERT(x)	_CTASSERT(x, __LINE__)
+#define	_CTASSERT(x,y)	__CTASSERT(x,y)
+#define	__CTASSERT(x,y)	typedef char __assert ## y[(x) ? 1 : -1]
+
+void	critical_enter(void);
+void	critical_exit(void);
+
+int	msleep_spin(void *chan, struct mtx *mutex, const char *wmesg,
+    int ticks);
+void	wakeup(void *chan);
+void	wakeup_one(void *chan);
+
+struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex);
+void delete_unrhdr(struct unrhdr *uh);
+int alloc_unr(struct unrhdr *uh);
+void free_unr(struct unrhdr *uh, u_int item);
+
+#include <sys/libkern.h>
+
+#include_next <sys/systm.h>
+#include <sys/cmn_err.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_SYSTM_H_ */
diff --git a/usr/src/compat/freebsd/sys/time.h b/usr/src/compat/freebsd/sys/time.h
new file mode 100644
index 0000000000..f8f9da5cdf
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/time.h
@@ -0,0 +1,104 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_TIME_H_
+#define	_COMPAT_FREEBSD_SYS_TIME_H_
+
+#include_next <sys/time.h>
+
+#define	tc_precexp	0
+
+struct bintime {
+	ulong_t		sec;		/* seconds */
+	uint64_t	frac;		/* 64 bit fraction of a second */
+};
+
+#define	BT2FREQ(bt)							\
+	(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /		\
+	    ((bt)->frac >> 1))
+
+#define	FREQ2BT(freq, bt)						\
+{									\
+	(bt)->sec = 0;							\
+	(bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;	\
+}
+
+static __inline void
+binuptime(struct bintime *bt)
+{
+	hrtime_t	now = gethrtime();
+
+	bt->sec = now / 1000000000;
+	/* 18446744073 = int(2^64 / 1000000000) = 1ns in 64-bit fractions */
+	bt->frac = (now % 1000000000) * (uint64_t)18446744073LL;
+}
+
+#define	bintime_cmp(a, b, cmp)						\
+	(((a)->sec == (b)->sec) ?					\
+	    ((a)->frac cmp (b)->frac) :					\
+	    ((a)->sec cmp (b)->sec))
+
+#define	SBT_1US	(1000)
+
+static __inline void
+bintime_add(struct bintime *bt, const struct bintime *bt2)
+{
+	uint64_t u;
+
+	u = bt->frac;
+	bt->frac += bt2->frac;
+	if (u > bt->frac)
+		bt->sec++;
+	bt->sec += bt2->sec;
+}
+
+static __inline void
+bintime_sub(struct bintime *bt, const struct bintime *bt2)
+{
+	uint64_t u;
+
+	u = bt->frac;
+	bt->frac -= bt2->frac;
+	if (u < bt->frac)
+		bt->sec--;
+	bt->sec -= bt2->sec;
+}
+
+static __inline void
+bintime_mul(struct bintime *bt, u_int x)
+{
+	uint64_t p1, p2;
+
+	p1 = (bt->frac & 0xffffffffull) * x;
+	p2 = (bt->frac >> 32) * x + (p1 >> 32);
+	bt->sec *= x;
+	bt->sec += (p2 >> 32);
+	bt->frac = (p2 << 32) | (p1 & 0xffffffffull);
+}
+
+static __inline sbintime_t
+bttosbt(const struct bintime bt)
+{
+	return ((bt.sec * 1000000000) +
+	    (((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32));
+}
+
+static __inline sbintime_t
+sbinuptime(void)
+{
+	return (gethrtime());
+}
+
+#endif	/* _COMPAT_FREEBSD_SYS_TIME_H_ */
diff --git a/usr/src/compat/freebsd/sys/types.h b/usr/src/compat/freebsd/sys/types.h
new file mode 100644
index 0000000000..6fc8179f2e
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/types.h
@@ -0,0 +1,74 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_TYPES_H_
+#define	_COMPAT_FREEBSD_SYS_TYPES_H_
+
+#include <sys/_types.h>
+
+typedef __uint8_t	u_int8_t;	/* unsigned integrals (deprecated) */
+typedef __uint16_t	u_int16_t;
+typedef __uint32_t	u_int32_t;
+typedef __uint64_t	u_int64_t;
+
+#ifndef	__REGISTER_T_DEFINED
+#define	__REGISTER_T_DEFINED
+typedef __register_t	register_t;
+#endif
+
+#ifndef	__SBINTIME_T_DEFINED
+#define	__SBINTIME_T_DEFINED
+typedef __int64_t	sbintime_t;
+#endif
+
+#ifndef	__VM_MEMATTR_T_DEFINED
+#define	__VM_MEMATTR_T_DEFINED
+typedef char	vm_memattr_t;
+#endif
+
+#ifndef	__VM_OFFSET_T_DEFINED
+#define	__VM_OFFSET_T_DEFINED
+typedef __vm_offset_t	vm_offset_t;
+#endif
+
+#ifndef	__VM_OOFFSET_T_DEFINED
+#define	__VM_OOFFSET_T_DEFINED
+typedef __vm_ooffset_t	vm_ooffset_t;
+#endif
+
+#ifndef	__VM_PADDR_T_DEFINED
+#define	__VM_PADDR_T_DEFINED
+typedef __vm_paddr_t	vm_paddr_t;
+#endif
+
+#ifndef	__VM_MEMATTR_T_DEFINED
+#define	__VM_MEMATTR_T_DEFINED
+typedef char		vm_memattr_t;
+#endif
+
+#ifndef	__bool_true_false_are_defined
+#define	__bool_true_false_are_defined	1
+#define	false	0
+#define	true	1
+typedef _Bool bool;
+#endif
+
+#if defined(_KERNEL) && !defined(offsetof)
+#define	offsetof(s, m)	((size_t)(&(((s *)0)->m)))
+#endif
+
+#include_next <sys/types.h>
+
+#endif	/* _COMPAT_FREEBSD_SYS_TYPES_H_ */
diff --git a/usr/src/compat/freebsd/sys/uio.h b/usr/src/compat/freebsd/sys/uio.h
new file mode 100644
index 0000000000..05c6f2a028
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/uio.h
@@ -0,0 +1,26 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_UIO_H_
+#define	_COMPAT_FREEBSD_SYS_UIO_H_
+
+#include_next <sys/uio.h>
+
+#ifndef	_KERNEL
+ssize_t preadv(int, const struct iovec *, int, off_t);
+ssize_t pwritev(int, const struct iovec *, int, off_t);
+#endif
+
+#endif	/* _COMPAT_FREEBSD_SYS_UIO_H_ */
diff --git a/usr/src/compat/freebsd/termios.h b/usr/src/compat/freebsd/termios.h
new file mode 100644
index 0000000000..feaa705358
--- /dev/null
+++ b/usr/src/compat/freebsd/termios.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_TERMIOS_H_
+#define	_COMPAT_FREEBSD_TERMIOS_H_
+
+#include_next <termios.h>
+
+void	cfmakeraw(struct termios *);
+
+#endif	/* _COMPAT_FREEBSD_TERMIOS_H_ */
diff --git a/usr/src/compat/freebsd/uuid.h b/usr/src/compat/freebsd/uuid.h
new file mode 100644
index 0000000000..72ef2c7787
--- /dev/null
+++ b/usr/src/compat/freebsd/uuid.h
@@ -0,0 +1,55 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_UUID_H_
+#define	_COMPAT_FREEBSD_UUID_H_
+
+#include <sys/endian.h>
+#include <uuid/uuid.h>
+
+/* Status codes returned by the functions. */
+#define	uuid_s_ok			0
+#define	uuid_s_bad_version		1
+#define	uuid_s_invalid_string_uuid	2
+
+static __inline void
+uuid_from_string(char *str, uuid_t *uuidp, uint32_t *status)
+{
+	if (uuid_parse(str, *uuidp) == 0) {
+		*status = uuid_s_ok;
+	} else {
+		*status = uuid_s_invalid_string_uuid;
+	}
+}
+
+static __inline void
+uuid_enc_le(void *buf, uuid_t *uuidp)
+{
+	uchar_t	*p;
+	int	i;
+
+	p = buf;
+	be32enc(p, ((struct uuid *)uuidp)->time_low);
+	be16enc(p + 4, ((struct uuid *)uuidp)->time_mid);
+	be16enc(p + 6, ((struct uuid *)uuidp)->time_hi_and_version);
+	p[8] = ((struct uuid *)uuidp)->clock_seq_hi_and_reserved;
+	p[9] = ((struct uuid *)uuidp)->clock_seq_low;
+
+	for (i = 0; i < 6; i++)
+		p[10 + i] = ((struct uuid *)uuidp)->node_addr[i];
+
+}
+
+#endif	/* _COMPAT_FREEBSD_UUID_H_ */
diff --git a/usr/src/compat/freebsd/vm/pmap.h b/usr/src/compat/freebsd/vm/pmap.h
new file mode 100644
index 0000000000..5958c4b101
--- /dev/null
+++ b/usr/src/compat/freebsd/vm/pmap.h
@@ -0,0 +1,21 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_VM_PMAP_H_
+#define	_COMPAT_FREEBSD_VM_PMAP_H_
+
+#include <machine/pmap.h>
+
+#endif	/* _COMPAT_FREEBSD_VM_PMAP_H_ */
diff --git a/usr/src/compat/freebsd/vm/vm.h b/usr/src/compat/freebsd/vm/vm.h
new file mode 100644
index 0000000000..7da22099b6
--- /dev/null
+++ b/usr/src/compat/freebsd/vm/vm.h
@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _FREEBSD_VM_VM_H_
+#define	_FREEBSD_VM_VM_H_
+
+#include <machine/vm.h>
+
+typedef u_char vm_prot_t;
+
+#define	VM_PROT_NONE		((vm_prot_t) 0x00)
+#define	VM_PROT_READ		((vm_prot_t) 0x01)
+#define	VM_PROT_WRITE		((vm_prot_t) 0x02)
+#define	VM_PROT_EXECUTE		((vm_prot_t) 0x04)
+
+#define	VM_PROT_ALL		(VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+#define	VM_PROT_RW		(VM_PROT_READ|VM_PROT_WRITE)
+
+/*
+ * <sys/promif.h> contains a troublesome preprocessor define for BYTE.
+ * Do this ugly workaround to avoid it.
+ */
+#define	_SYS_PROMIF_H
+#include <vm/hat_i86.h>
+#undef	_SYS_PROMIF_H
+
+#endif	/* _FREEBSD_VM_VM_H_ */
diff --git a/usr/src/compat/freebsd/x86/_types.h b/usr/src/compat/freebsd/x86/_types.h
new file mode 100644
index 0000000000..a07fc017ad
--- /dev/null
+++ b/usr/src/compat/freebsd/x86/_types.h
@@ -0,0 +1,49 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _FREEBSD_X86__TYPES_H_
+#define	_FREEBSD_X86__TYPES_H_
+
+/*
+ * Basic types upon which most other types are built.
+ */
+typedef signed char		__int8_t;
+typedef unsigned char		__uint8_t;
+typedef short			__int16_t;
+typedef unsigned short		__uint16_t;
+typedef int			__int32_t;
+typedef unsigned int		__uint32_t;
+#ifdef	_LP64
+typedef long			__int64_t;
+typedef unsigned long		__uint64_t;
+#else
+typedef long long		__int64_t;
+typedef unsigned long long	__uint64_t;
+#endif
+
+/* 
+ * Standard type definitions.
+ */
+#ifdef	_LP64
+typedef __int64_t	__register_t;
+typedef __uint64_t	__vm_offset_t;
+typedef __uint64_t	__vm_paddr_t;
+typedef __int64_t	__vm_ooffset_t;
+#else
+typedef __int32_t	__register_t;
+typedef __uint32_t	__vm_paddr_t;
+#endif
+
+#endif	/* _FREEBSD_X86__TYPES_H_ */
diff --git a/usr/src/compat/freebsd/x86/segments.h b/usr/src/compat/freebsd/x86/segments.h
new file mode 100644
index 0000000000..bc6ba976b8
--- /dev/null
+++ b/usr/src/compat/freebsd/x86/segments.h
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_X86_SEGMENTS_H_
+#define	_COMPAT_FREEBSD_X86_SEGMENTS_H_
+
+/*
+ * Entries in the Interrupt Descriptor Table (IDT)
+ */
+#define	IDT_BP		3	/* #BP: Breakpoint */
+#define	IDT_UD		6	/* #UD: Undefined/Invalid Opcode */
+#define	IDT_SS		12	/* #SS: Stack Segment Fault */
+#define	IDT_GP		13	/* #GP: General Protection Fault */
+#define	IDT_AC		17	/* #AC: Alignment Check */
+
+#endif	/* _COMPAT_FREEBSD_AMD64_MACHINE_SEGMENTS_H_ */
diff --git a/usr/src/head/bhyve.h b/usr/src/head/bhyve.h
new file mode 100644
index 0000000000..8c79ca1ccc
--- /dev/null
+++ b/usr/src/head/bhyve.h
@@ -0,0 +1,25 @@
+/*
+ * COPYRIGHT 2013 Pluribus Networks Inc.
+ *
+ * All rights reserved. This copyright notice is Copyright Management
+ * Information under 17 USC 1202 and is included to protect this work and
+ * deter copyright infringement.  Removal or alteration of this Copyright
+ * Management Information without the express written permission from
+ * Pluribus Networks Inc is prohibited, and any such unauthorized removal
+ * or alteration will be a violation of federal law.
+ */
+#ifndef	_BHYVE_H
+#define	_BHYVE_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	BHYVE_TMPDIR			"/var/run/bhyve"
+#define	BHYVE_CONS_SOCKPATH		BHYVE_TMPDIR "/%s.console_sock"
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _BHYVE_H */
diff --git a/usr/src/lib/libvmmapi/Makefile b/usr/src/lib/libvmmapi/Makefile
new file mode 100644
index 0000000000..60621fcb75
--- /dev/null
+++ b/usr/src/lib/libvmmapi/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+include		../Makefile.lib
+
+HDRS =		vmmapi.h
+
+HDRDIR =	common
+
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all:=		TARGET= all
+install:=	TARGET= install
+clean:=		TARGET= clean
+clobber:=	TARGET= clobber
+lint:=		TARGET= lint
+_msg:=		TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber lint: $(SUBDIRS)
+
+# install rule for install_h target
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+_msg: $(MSGSUBDIRS)
+
+$(SUBDIRS): FRC
+	cd $@; pwd; $(MAKE) CW_NO_SHADOW=true __GNUC= $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
+include ../../Makefile.msg.targ
diff --git a/usr/src/lib/libvmmapi/Makefile.com b/usr/src/lib/libvmmapi/Makefile.com
new file mode 100644
index 0000000000..e41a82f9a2
--- /dev/null
+++ b/usr/src/lib/libvmmapi/Makefile.com
@@ -0,0 +1,53 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+LIBRARY		= libvmmapi.a
+VERS		= .1
+
+OBJECTS		= vmmapi.o expand_number.o
+
+# include library definitions
+include ../../Makefile.lib
+
+# install this library in the root filesystem
+include ../../Makefile.rootfs
+
+SRCDIR		=	../common
+
+LIBS		=	$(DYNLIB) $(LINTLIB)
+
+CPPFLAGS	=	-I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd \
+	$(CPPFLAGS.master) -I$(SRC)/uts/i86pc
+
+$(LINTLIB) :=	SRCS = $(SRCDIR)/$(LINTSRC)
+
+LDLIBS		+=	-lc
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+pics/%.o: $(CONTRIB)/freebsd/lib/libutil/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+pics/%.o: ../common/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+# include library targets
+include ../../Makefile.targ
diff --git a/usr/src/lib/libvmmapi/amd64/Makefile b/usr/src/lib/libvmmapi/amd64/Makefile
new file mode 100644
index 0000000000..b5cac1ffce
--- /dev/null
+++ b/usr/src/lib/libvmmapi/amd64/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+CPPFLAGS += -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libvmmapi/common/llib-lvmmapi b/usr/src/lib/libvmmapi/common/llib-lvmmapi
new file mode 100644
index 0000000000..221ed3a23e
--- /dev/null
+++ b/usr/src/lib/libvmmapi/common/llib-lvmmapi
@@ -0,0 +1,2 @@
+/* LINTLIBRARY */
+/* PROTOLIB1 */
diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers
new file mode 100644
index 0000000000..7a8443a2b8
--- /dev/null
+++ b/usr/src/lib/libvmmapi/common/mapfile-vers
@@ -0,0 +1,77 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING:  STOP NOW.  DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+#	usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate_1.0 {
+    global:
+	vcpu_reset;
+	vm_activate_cpu;
+	vm_apicid2vcpu;
+	vm_capability_name2type;
+	vm_capability_type2name;
+	vm_copy_setup;
+	vm_copy_teardown;
+	vm_copyin;
+	vm_copyout;
+	vm_create;
+	vm_destroy;
+	vm_get_capability;
+	vm_get_desc;
+	vm_get_highmem_size;
+	vm_get_lowmem_limit;
+	vm_get_lowmem_size;
+	vm_get_memory_seg;
+	vm_get_register;
+	vm_get_seg_desc;
+	vm_get_x2apic_state;
+	vm_gla2gpa;
+	vm_inject_exception;
+	vm_isa_assert_irq;
+	vm_isa_deassert_irq;
+	vm_isa_pulse_irq;
+	vm_isa_set_irq_trigger;
+	vm_ioapic_assert_irq;
+	vm_ioapic_deassert_irq;
+	vm_ioapic_pincount;
+	vm_ioapic_pulse_irq;
+	vm_lapic_irq;
+	vm_lapic_msi;
+	vm_map_gpa;
+	vm_open;
+	vm_parse_memsize;
+	vm_restart_instruction;
+	vm_run;
+	vm_set_capability;
+	vm_set_desc;
+	vm_set_register;
+	vm_set_x2apic_state;
+	vm_setup_memory;
+	vm_setup_rom;
+    local:
+        *;
+};
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
new file mode 100644
index 0000000000..bbab3961a9
--- /dev/null
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -0,0 +1,1257 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/lib/libvmmapi/vmmapi.c 280929 2015-04-01 00:15:31Z tychon $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/lib/libvmmapi/vmmapi.c 280929 2015-04-01 00:15:31Z tychon $");
+
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/_iovec.h>
+#include <sys/cpuset.h>
+
+#include <machine/specialreg.h>
+
+#ifndef	__FreeBSD__
+#include <errno.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <libutil.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#ifndef	__FreeBSD__
+#include <sys/vmm_impl.h>
+#endif
+
+#include "vmmapi.h"
+
+#define	KB	(1024UL)
+#define	MB	(1024 * 1024UL)
+#define	GB	(1024 * 1024 * 1024UL)
+
+struct vmctx {
+	int	fd;
+	uint32_t lowmem_limit;
+	enum vm_mmap_style vms;
+	char	*lowermem_addr;
+	char	*biosmem_addr;
+	size_t	lowmem;
+	char	*lowmem_addr;
+	size_t	highmem;
+	char	*highmem_addr;
+	uint64_t rombase;
+	uint64_t romlimit;
+	char	*rom_addr;
+	char	*name;
+};
+
+#ifdef	__FreeBSD__
+#define	CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
+#define	DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
+#else
+#define	CREATE(x)	vmm_vm_create(x)
+#define	DESTROY(x)	vmm_vm_destroy(x)
+#endif
+
+static int
+vm_device_open(const char *name)
+{
+        int fd, len;
+        char *vmfile;
+
+#ifdef	__FreeBSD__
+	len = strlen("/dev/vmm/") + strlen(name) + 1;
+#else
+	len = strlen("/devices/pseudo/vmm@0:") + strlen(name) + 1;
+#endif
+	vmfile = malloc(len);
+	assert(vmfile != NULL);
+#ifdef	__FreeBSD__
+	snprintf(vmfile, len, "/dev/vmm/%s", name);
+#else
+	snprintf(vmfile, len, "/devices/pseudo/vmm@0:%s", name);
+#endif
+
+        /* Open the device file */
+        fd = open(vmfile, O_RDWR, 0);
+
+	free(vmfile);
+        return (fd);
+}
+
+#ifndef	__FreeBSD__
+static int
+vmm_vm_create(const char *name)
+{
+	const char vmm_ctl[] = "/devices/pseudo/vmm@0:ctl";
+	struct vmm_ioctl vi;
+	int err = 0;
+	int ctl_fd;
+
+	(void) strlcpy(vi.vmm_name, name, sizeof (vi.vmm_name) - 1);
+
+	ctl_fd = open(vmm_ctl, O_EXCL | O_RDWR);
+	if (ctl_fd == -1) {
+		err = errno;
+		if ((errno == EPERM) || (errno == EACCES)) {
+			fprintf(stderr, "you do not have permission to "
+				"perform that operation.\n");
+		} else {
+			fprintf(stderr, "open: %s: %s\n", vmm_ctl,
+				strerror(errno));
+		}
+		return (err);
+	}
+	if (ioctl(ctl_fd, VMM_CREATE_VM, &vi) == -1) {
+		err = errno;
+		fprintf(stderr, "couldn't create vm \"%s\"", name);
+	}
+	close (ctl_fd);
+
+	return (err);
+}
+#endif
+
+int
+vm_create(const char *name)
+{
+
+	return (CREATE((char *)name));
+}
+
+struct vmctx *
+vm_open(const char *name)
+{
+	struct vmctx *vm;
+
+	vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
+	assert(vm != NULL);
+
+	vm->fd = -1;
+	vm->lowmem_limit = 3 * GB;
+	vm->name = (char *)(vm + 1);
+	strcpy(vm->name, name);
+
+	if ((vm->fd = vm_device_open(vm->name)) < 0)
+		goto err;
+
+	return (vm);
+err:
+	(void) vm_destroy(vm);
+	return (NULL);
+}
+
+#ifndef	__FreeBSD__
+static int
+vmm_vm_destroy(const char *name)
+{
+	const char vmm_ctl[] = "/devices/pseudo/vmm@0:ctl";
+	struct vmm_ioctl vi;	
+	int ctl_fd;
+	int err = 0;
+
+	(void) strlcpy(vi.vmm_name, name, sizeof (vi.vmm_name) - 1);
+
+	ctl_fd = open(vmm_ctl, O_EXCL | O_RDWR);
+	if (ctl_fd == -1) {
+		err = errno;
+		if ((errno == EPERM) || (errno == EACCES)) {
+			fprintf(stderr, "you do not have permission to "
+				"perform that operation.\n");
+		} else {
+			fprintf(stderr, "open: %s: %s\n", vmm_ctl,
+				strerror(errno));
+		}
+		return (err);
+	}
+	if (ioctl(ctl_fd, VMM_DESTROY_VM, &vi) == -1) {
+		err = errno;
+		fprintf(stderr, "couldn't destroy vm \"%s\"", name);
+	}
+	close (ctl_fd);
+	return (err);
+}
+#endif
+
+int
+vm_destroy(struct vmctx *vm)
+{
+	int err;
+	assert(vm != NULL);
+
+	if (vm->fd >= 0)
+		close(vm->fd);
+	err = DESTROY(vm->name);
+
+	free(vm);
+	return (err);
+}
+
+int
+vm_parse_memsize(const char *optarg, size_t *ret_memsize)
+{
+	char *endptr;
+	size_t optval;
+	int error;
+
+	optval = strtoul(optarg, &endptr, 0);
+	if (*optarg != '\0' && *endptr == '\0') {
+		/*
+		 * For the sake of backward compatibility if the memory size
+		 * specified on the command line is less than a megabyte then
+		 * it is interpreted as being in units of MB.
+		 */
+		if (optval < MB)
+			optval *= MB;
+		*ret_memsize = optval;
+		error = 0;
+	} else
+		error = expand_number(optarg, ret_memsize);
+
+	return (error);
+}
+
+#ifdef	__FreeBSD__
+size_t
+vmm_get_mem_total(void)
+{
+	size_t mem_total = 0;
+	size_t oldlen = sizeof(mem_total);
+	int error;
+	error = sysctlbyname("hw.vmm.mem_total", &mem_total, &oldlen, NULL, 0);
+	if (error)
+		return -1;
+	return mem_total;
+}
+
+size_t
+vmm_get_mem_free(void)
+{
+	size_t mem_free = 0;
+	size_t oldlen = sizeof(mem_free);
+	int error;
+	error = sysctlbyname("hw.vmm.mem_free", &mem_free, &oldlen, NULL, 0);
+	if (error)
+		return -1;
+	return mem_free;
+}
+#endif
+
+int
+vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
+		  int *wired)
+{
+	int error;
+	struct vm_memory_segment seg;
+
+	bzero(&seg, sizeof(seg));
+	seg.gpa = gpa;
+	error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
+	*ret_len = seg.len;
+	if (wired != NULL)
+		*wired = seg.wired;
+	return (error);
+}
+
+uint32_t
+vm_get_lowmem_limit(struct vmctx *ctx)
+{
+
+	return (ctx->lowmem_limit);
+}
+
+void
+vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
+{
+
+	ctx->lowmem_limit = limit;
+}
+
+static int
+setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr)
+{
+	int error;
+	struct vm_memory_segment seg;
+
+	/*
+	 * Create and optionally map 'len' bytes of memory at guest
+	 * physical address 'gpa'
+	 */
+	bzero(&seg, sizeof(seg));
+	seg.gpa = gpa;
+	seg.len = len;
+	error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
+	if (error == 0 && addr != NULL) {
+		*addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
+				ctx->fd, gpa);
+	}
+	return (error);
+}
+
+int
+vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+{
+	char **addr;
+	int error;
+
+	/* XXX VM_MMAP_SPARSE not implemented yet */
+	assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL);
+	ctx->vms = vms;
+
+	/*
+	 * If 'memsize' cannot fit entirely in the 'lowmem' segment then
+	 * create another 'highmem' segment above 4GB for the remainder.
+	 */
+	if (memsize > ctx->lowmem_limit) {
+		ctx->lowmem = ctx->lowmem_limit;
+		ctx->highmem = memsize - ctx->lowmem;
+	} else {
+		ctx->lowmem = memsize;
+		ctx->highmem = 0;
+	}
+
+	if (ctx->lowmem > 0) {
+		addr = (vms == VM_MMAP_ALL) ? &ctx->lowermem_addr : NULL;
+		error = setup_memory_segment(ctx, 0, 640*KB, addr);
+		if (error)
+			return (error);
+
+		addr = (vms == VM_MMAP_ALL) ? &ctx->biosmem_addr : NULL;
+		error = setup_memory_segment(ctx, 768*KB, 256*KB, addr);
+		if (error)
+			return (error);
+
+		addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL;
+		error = setup_memory_segment(ctx, 1*MB, ctx->lowmem - 1*MB, addr);
+		if (error)
+			return (error);
+	}
+
+	if (ctx->highmem > 0) {
+		addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL;
+		error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr);
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
+int
+vm_setup_rom(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
+{
+	ctx->rombase = gpa;
+	ctx->romlimit = gpa + len;
+
+	return (setup_memory_segment(ctx, gpa, len, &ctx->rom_addr));
+}
+
+void *
+vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
+{
+
+	/* XXX VM_MMAP_SPARSE not implemented yet */
+	assert(ctx->vms == VM_MMAP_ALL);
+
+	if (gaddr + len <= 1*MB) {
+		if (gaddr + len <= 640*KB)
+			return ((void *)(ctx->lowermem_addr + gaddr));
+
+		if (768*KB <= gaddr && gaddr + len <= 1*MB) {
+			gaddr -= 768*KB;
+			return ((void *)(ctx->biosmem_addr + gaddr));
+		}
+
+		return (NULL);
+	}
+
+	if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem) {
+		gaddr -= 1*MB;
+		return ((void *)(ctx->lowmem_addr + gaddr));
+	}
+
+	if (ctx->rombase <= gaddr && gaddr + len <= ctx->romlimit) {
+		gaddr -= ctx->rombase;
+		return ((void *)(ctx->rom_addr + gaddr));
+	}
+
+	if (gaddr >= 4*GB) {
+		gaddr -= 4*GB;
+		if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem)
+			return ((void *)(ctx->highmem_addr + gaddr));
+	}
+
+	return (NULL);
+}
+
+size_t
+vm_get_lowmem_size(struct vmctx *ctx)
+{
+
+	return (ctx->lowmem);
+}
+
+size_t
+vm_get_highmem_size(struct vmctx *ctx)
+{
+
+	return (ctx->highmem);
+}
+
+int
+vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
+	    uint64_t base, uint32_t limit, uint32_t access)
+{
+	int error;
+	struct vm_seg_desc vmsegdesc;
+
+	bzero(&vmsegdesc, sizeof(vmsegdesc));
+	vmsegdesc.cpuid = vcpu;
+	vmsegdesc.regnum = reg;
+	vmsegdesc.desc.base = base;
+	vmsegdesc.desc.limit = limit;
+	vmsegdesc.desc.access = access;
+
+	error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
+	return (error);
+}
+
+int
+vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
+	    uint64_t *base, uint32_t *limit, uint32_t *access)
+{
+	int error;
+	struct vm_seg_desc vmsegdesc;
+
+	bzero(&vmsegdesc, sizeof(vmsegdesc));
+	vmsegdesc.cpuid = vcpu;
+	vmsegdesc.regnum = reg;
+
+	error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
+	if (error == 0) {
+		*base = vmsegdesc.desc.base;
+		*limit = vmsegdesc.desc.limit;
+		*access = vmsegdesc.desc.access;
+	}
+	return (error);
+}
+
+int
+vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
+{
+	int error;
+
+	error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
+	    &seg_desc->access);
+	return (error);
+}
+
+int
+vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
+{
+	int error;
+	struct vm_register vmreg;
+
+	bzero(&vmreg, sizeof(vmreg));
+	vmreg.cpuid = vcpu;
+	vmreg.regnum = reg;
+	vmreg.regval = val;
+
+	error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
+	return (error);
+}
+
+int
+vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
+{
+	int error;
+	struct vm_register vmreg;
+
+	bzero(&vmreg, sizeof(vmreg));
+	vmreg.cpuid = vcpu;
+	vmreg.regnum = reg;
+
+	error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
+	*ret_val = vmreg.regval;
+	return (error);
+}
+
+int
+vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
+{
+	int error;
+	struct vm_run vmrun;
+
+	bzero(&vmrun, sizeof(vmrun));
+	vmrun.cpuid = vcpu;
+
+	error = ioctl(ctx->fd, VM_RUN, &vmrun);
+	bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
+	return (error);
+}
+
+static int
+vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector,
+    int error_code, int error_code_valid)
+{
+	struct vm_exception exc;
+
+	bzero(&exc, sizeof(exc));
+	exc.cpuid = vcpu;
+	exc.vector = vector;
+	exc.error_code = error_code;
+	exc.error_code_valid = error_code_valid;
+
+	return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
+}
+
+int
+vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
+    uint32_t errcode, int restart_instruction)
+{
+	struct vm_exception exc;
+
+	exc.cpuid = vcpu;
+	exc.vector = vector;
+	exc.error_code = errcode;
+	exc.error_code_valid = errcode_valid;
+	exc.restart_instruction = restart_instruction;
+
+	return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
+}
+
+int
+vm_apicid2vcpu(struct vmctx *ctx, int apicid)
+{
+	/*
+	 * The apic id associated with the 'vcpu' has the same numerical value
+	 * as the 'vcpu' itself.
+	 */
+	return (apicid);
+}
+
+int
+vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
+{
+	struct vm_lapic_irq vmirq;
+
+	bzero(&vmirq, sizeof(vmirq));
+	vmirq.cpuid = vcpu;
+	vmirq.vector = vector;
+
+	return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
+}
+
+int
+vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
+{
+	struct vm_lapic_irq vmirq;
+
+	bzero(&vmirq, sizeof(vmirq));
+	vmirq.cpuid = vcpu;
+	vmirq.vector = vector;
+
+	return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
+}
+
+int
+vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
+{
+	struct vm_lapic_msi vmmsi;
+
+	bzero(&vmmsi, sizeof(vmmsi));
+	vmmsi.addr = addr;
+	vmmsi.msg = msg;
+
+	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
+}
+
+int
+vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
+{
+	struct vm_ioapic_irq ioapic_irq;
+
+	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
+	ioapic_irq.irq = irq;
+
+	return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
+}
+
+int
+vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
+{
+	struct vm_ioapic_irq ioapic_irq;
+
+	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
+	ioapic_irq.irq = irq;
+
+	return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
+}
+
+int
+vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
+{
+	struct vm_ioapic_irq ioapic_irq;
+
+	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
+	ioapic_irq.irq = irq;
+
+	return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
+}
+
+int
+vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
+{
+
+	return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
+}
+
+int
+vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
+{
+	struct vm_isa_irq isa_irq;
+
+	bzero(&isa_irq, sizeof(struct vm_isa_irq));
+	isa_irq.atpic_irq = atpic_irq;
+	isa_irq.ioapic_irq = ioapic_irq;
+
+	return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
+}
+
+int
+vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
+{
+	struct vm_isa_irq isa_irq;
+
+	bzero(&isa_irq, sizeof(struct vm_isa_irq));
+	isa_irq.atpic_irq = atpic_irq;
+	isa_irq.ioapic_irq = ioapic_irq;
+
+	return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
+}
+
+int
+vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
+{
+	struct vm_isa_irq isa_irq;
+
+	bzero(&isa_irq, sizeof(struct vm_isa_irq));
+	isa_irq.atpic_irq = atpic_irq;
+	isa_irq.ioapic_irq = ioapic_irq;
+
+	return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
+}
+
+int
+vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
+    enum vm_intr_trigger trigger)
+{
+	struct vm_isa_irq_trigger isa_irq_trigger;
+
+	bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
+	isa_irq_trigger.atpic_irq = atpic_irq;
+	isa_irq_trigger.trigger = trigger;
+
+	return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
+}
+
+int
+vm_inject_nmi(struct vmctx *ctx, int vcpu)
+{
+	struct vm_nmi vmnmi;
+
+	bzero(&vmnmi, sizeof(vmnmi));
+	vmnmi.cpuid = vcpu;
+
+	return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
+}
+
+static struct {
+	const char	*name;
+	int		type;
+} capstrmap[] = {
+	{ "hlt_exit",		VM_CAP_HALT_EXIT },
+	{ "mtrap_exit",		VM_CAP_MTRAP_EXIT },
+	{ "pause_exit",		VM_CAP_PAUSE_EXIT },
+	{ "unrestricted_guest",	VM_CAP_UNRESTRICTED_GUEST },
+	{ "enable_invpcid",	VM_CAP_ENABLE_INVPCID },
+	{ 0 }
+};
+
+int
+vm_capability_name2type(const char *capname)
+{
+	int i;
+
+	for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
+		if (strcmp(capstrmap[i].name, capname) == 0)
+			return (capstrmap[i].type);
+	}
+
+	return (-1);
+}
+
+const char *
+vm_capability_type2name(int type)
+{
+	int i;
+
+	for (i = 0; capstrmap[i].name != NULL; i++) {
+		if (capstrmap[i].type == type)
+			return (capstrmap[i].name);
+	}
+
+	return (NULL);
+}
+
+int
+vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+		  int *retval)
+{
+	int error;
+	struct vm_capability vmcap;
+
+	bzero(&vmcap, sizeof(vmcap));
+	vmcap.cpuid = vcpu;
+	vmcap.captype = cap;
+
+	error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
+	*retval = vmcap.capval;
+	return (error);
+}
+
+int
+vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
+{
+	struct vm_capability vmcap;
+
+	bzero(&vmcap, sizeof(vmcap));
+	vmcap.cpuid = vcpu;
+	vmcap.captype = cap;
+	vmcap.capval = val;
+	
+	return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
+}
+
+int
+vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
+{
+	struct vm_pptdev pptdev;
+
+	bzero(&pptdev, sizeof(pptdev));
+	pptdev.bus = bus;
+	pptdev.slot = slot;
+	pptdev.func = func;
+
+	return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
+}
+
+int
+vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
+{
+	struct vm_pptdev pptdev;
+
+	bzero(&pptdev, sizeof(pptdev));
+	pptdev.bus = bus;
+	pptdev.slot = slot;
+	pptdev.func = func;
+
+	return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
+}
+
+int
+vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
+		   vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
+{
+	struct vm_pptdev_mmio pptmmio;
+
+	bzero(&pptmmio, sizeof(pptmmio));
+	pptmmio.bus = bus;
+	pptmmio.slot = slot;
+	pptmmio.func = func;
+	pptmmio.gpa = gpa;
+	pptmmio.len = len;
+	pptmmio.hpa = hpa;
+
+	return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
+}
+
+int
+vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
+    uint64_t addr, uint64_t msg, int numvec)
+{
+	struct vm_pptdev_msi pptmsi;
+
+	bzero(&pptmsi, sizeof(pptmsi));
+	pptmsi.vcpu = vcpu;
+	pptmsi.bus = bus;
+	pptmsi.slot = slot;
+	pptmsi.func = func;
+	pptmsi.msg = msg;
+	pptmsi.addr = addr;
+	pptmsi.numvec = numvec;
+
+	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
+}
+
+int	
+vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
+    int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
+{
+	struct vm_pptdev_msix pptmsix;
+
+	bzero(&pptmsix, sizeof(pptmsix));
+	pptmsix.vcpu = vcpu;
+	pptmsix.bus = bus;
+	pptmsix.slot = slot;
+	pptmsix.func = func;
+	pptmsix.idx = idx;
+	pptmsix.msg = msg;
+	pptmsix.addr = addr;
+	pptmsix.vector_control = vector_control;
+
+	return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
+}
+
+#ifdef	__FreeBSD__
+uint64_t *
+vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
+	     int *ret_entries)
+{
+	int error;
+
+	static struct vm_stats vmstats;
+
+	vmstats.cpuid = vcpu;
+
+	error = ioctl(ctx->fd, VM_STATS, &vmstats);
+	if (error == 0) {
+		if (ret_entries)
+			*ret_entries = vmstats.num_entries;
+		if (ret_tv)
+			*ret_tv = vmstats.tv;
+		return (vmstats.statbuf);
+	} else
+		return (NULL);
+}
+
+const char *
+vm_get_stat_desc(struct vmctx *ctx, int index)
+{
+	static struct vm_stat_desc statdesc;
+
+	statdesc.index = index;
+	if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
+		return (statdesc.desc);
+	else
+		return (NULL);
+}
+#endif
+
+int
+vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
+{
+	int error;
+	struct vm_x2apic x2apic;
+
+	bzero(&x2apic, sizeof(x2apic));
+	x2apic.cpuid = vcpu;
+
+	error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
+	*state = x2apic.state;
+	return (error);
+}
+
+int
+vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
+{
+	int error;
+	struct vm_x2apic x2apic;
+
+	bzero(&x2apic, sizeof(x2apic));
+	x2apic.cpuid = vcpu;
+	x2apic.state = state;
+
+	error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
+
+	return (error);
+}
+
+/*
+ * From Intel Vol 3a:
+ * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
+ */
+int
+vcpu_reset(struct vmctx *vmctx, int vcpu)
+{
+	int error;
+	uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
+	uint32_t desc_access, desc_limit;
+	uint16_t sel;
+
+	zero = 0;
+
+	rflags = 0x2;
+	error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
+	if (error)
+		goto done;
+
+	rip = 0xfff0;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
+		goto done;
+
+	cr0 = CR0_NE;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
+		goto done;
+
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
+		goto done;
+	
+	cr4 = 0;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
+		goto done;
+
+	/*
+	 * CS: present, r/w, accessed, 16-bit, byte granularity, usable
+	 */
+	desc_base = 0xffff0000;
+	desc_limit = 0xffff;
+	desc_access = 0x0093;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	sel = 0xf000;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
+		goto done;
+
+	/*
+	 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
+	 */
+	desc_base = 0;
+	desc_limit = 0xffff;
+	desc_access = 0x0093;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
+			    desc_base, desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	sel = 0;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
+		goto done;
+
+	/* General purpose registers */
+	rdx = 0xf00;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
+		goto done;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
+		goto done;
+
+	/* GDTR, IDTR */
+	desc_base = 0;
+	desc_limit = 0xffff;
+	desc_access = 0;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
+			    desc_base, desc_limit, desc_access);
+	if (error != 0)
+		goto done;
+
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
+			    desc_base, desc_limit, desc_access);
+	if (error != 0)
+		goto done;
+
+	/* TR */
+	desc_base = 0;
+	desc_limit = 0xffff;
+	desc_access = 0x0000008b;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
+	if (error)
+		goto done;
+
+	sel = 0;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
+		goto done;
+
+	/* LDTR */
+	desc_base = 0;
+	desc_limit = 0xffff;
+	desc_access = 0x00000082;
+	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
+			    desc_limit, desc_access);
+	if (error)
+		goto done;
+
+	sel = 0;
+	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
+		goto done;
+
+	/* XXX cr2, debug registers */
+
+	error = 0;
+done:
+	return (error);
+}
+
+int
+vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
+{
+	int error, i;
+	struct vm_gpa_pte gpapte;
+
+	bzero(&gpapte, sizeof(gpapte));
+	gpapte.gpa = gpa;
+
+	error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
+
+	if (error == 0) {
+		*num = gpapte.ptenum;
+		for (i = 0; i < gpapte.ptenum; i++)
+			pte[i] = gpapte.pte[i];
+	}
+
+	return (error);
+}
+
+int
+vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
+{
+	int error;
+	struct vm_hpet_cap cap;
+
+	bzero(&cap, sizeof(struct vm_hpet_cap));
+	error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
+	if (capabilities != NULL)
+		*capabilities = cap.capabilities;
+	return (error);
+}
+
+static int
+gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, int *fault, uint64_t *gpa)
+{
+	struct vm_gla2gpa gg;
+	int error;
+
+	bzero(&gg, sizeof(struct vm_gla2gpa));
+	gg.vcpuid = vcpu;
+	gg.prot = prot;
+	gg.gla = gla;
+	gg.paging = *paging;
+
+	error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
+	if (error == 0) {
+		*fault = gg.fault;
+		*gpa = gg.gpa;
+	}
+	return (error);
+}
+
+int
+vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa)
+{
+	int error, fault;
+
+	error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, gpa);
+	if (fault)
+		error = fault;
+	return (error);
+}
+
+#ifndef min
+#define	min(a,b)	(((a) < (b)) ? (a) : (b))
+#endif
+
+int
+vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt)
+{
+	void *va;
+	uint64_t gpa;
+	int error, fault, i, n, off;
+
+	for (i = 0; i < iovcnt; i++) {
+		iov[i].iov_base = 0;
+		iov[i].iov_len = 0;
+	}
+
+	while (len) {
+		assert(iovcnt > 0);
+		error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa);
+		if (error)
+			return (-1);
+		if (fault)
+			return (1);
+
+		off = gpa & PAGE_MASK;
+		n = min(len, PAGE_SIZE - off);
+
+		va = vm_map_gpa(ctx, gpa, n);
+		if (va == NULL)
+			return (-1);
+
+		iov->iov_base = va;
+		iov->iov_len = n;
+		iov++;
+		iovcnt--;
+
+		gla += n;
+		len -= n;
+	}
+	return (0);
+}
+
+void
+vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
+{
+
+	return;
+}
+
+void
+vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
+{
+	const char *src;
+	char *dst;
+	size_t n;
+
+	dst = vp;
+	while (len) {
+		assert(iov->iov_len);
+		n = min(len, iov->iov_len);
+		src = iov->iov_base;
+		bcopy(src, dst, n);
+
+		iov++;
+		dst += n;
+		len -= n;
+	}
+}
+
+void
+vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
+    size_t len)
+{
+	const char *src;
+	char *dst;
+	size_t n;
+
+	src = vp;
+	while (len) {
+		assert(iov->iov_len);
+		n = min(len, iov->iov_len);
+		dst = iov->iov_base;
+		bcopy(src, dst, n);
+
+		iov++;
+		src += n;
+		len -= n;
+	}
+}
+
+int
+vm_activate_cpu(struct vmctx *ctx, int vcpu)
+{
+	struct vm_activate_cpu ac;
+	int error;
+
+	bzero(&ac, sizeof(struct vm_activate_cpu));
+	ac.vcpuid = vcpu;
+	error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
+	return (error);
+}
+
+int
+vm_restart_instruction(void *arg, int vcpu)
+{
+	struct vmctx *ctx = arg;
+
+	return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
+}
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
new file mode 100644
index 0000000000..d7eb67aa58
--- /dev/null
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/lib/libvmmapi/vmmapi.h 280929 2015-04-01 00:15:31Z tychon $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef _VMMAPI_H_
+#define	_VMMAPI_H_
+
+#include <sys/param.h>
+
+struct iovec;
+struct vmctx;
+enum x2apic_state;
+
+/*
+ * Different styles of mapping the memory assigned to a VM into the address
+ * space of the controlling process.
+ */
+enum vm_mmap_style {
+	VM_MMAP_NONE,		/* no mapping */
+	VM_MMAP_ALL,		/* fully and statically mapped */
+	VM_MMAP_SPARSE,		/* mappings created on-demand */
+};
+
+int	vm_create(const char *name);
+struct vmctx *vm_open(const char *name);
+int	vm_destroy(struct vmctx *ctx);
+int	vm_parse_memsize(const char *optarg, size_t *memsize);
+int	vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
+			  int *wired);
+int	vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
+int	vm_setup_rom(struct vmctx *ctx, vm_paddr_t gpa, size_t len);
+void	*vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
+int	vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
+		   uint64_t gla, int prot, uint64_t *gpa);
+uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
+void	vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
+size_t	vm_get_lowmem_size(struct vmctx *ctx);
+size_t	vm_get_highmem_size(struct vmctx *ctx);
+int	vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
+		    uint64_t base, uint32_t limit, uint32_t access);
+int	vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
+		    uint64_t *base, uint32_t *limit, uint32_t *access);
+int	vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
+			struct seg_desc *seg_desc);
+int	vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
+int	vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
+int	vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
+int	vm_apicid2vcpu(struct vmctx *ctx, int apicid);
+int	vm_inject_exception(struct vmctx *ctx, int vcpu, int vector,
+    int errcode_valid, uint32_t errcode, int restart_instruction);
+int	vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
+int	vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
+int	vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
+int	vm_ioapic_assert_irq(struct vmctx *ctx, int irq);
+int	vm_ioapic_deassert_irq(struct vmctx *ctx, int irq);
+int	vm_ioapic_pulse_irq(struct vmctx *ctx, int irq);
+int	vm_ioapic_pincount(struct vmctx *ctx, int *pincount);
+int	vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
+int	vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
+int	vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
+int	vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
+	    enum vm_intr_trigger trigger);
+int	vm_inject_nmi(struct vmctx *ctx, int vcpu);
+int	vm_capability_name2type(const char *capname);
+const char *vm_capability_type2name(int type);
+int	vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+			  int *retval);
+int	vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+			  int val);
+int	vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
+int	vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
+int	vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
+			   vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
+int	vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
+	    int func, uint64_t addr, uint64_t msg, int numvec);
+int	vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
+	    int func, int idx, uint64_t addr, uint64_t msg,
+	    uint32_t vector_control);
+
+/*
+ * Return a pointer to the statistics buffer. Note that this is not MT-safe.
+ */
+uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
+		       int *ret_entries);
+const char *vm_get_stat_desc(struct vmctx *ctx, int index);
+
+int	vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s);
+int	vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s);
+
+int	vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities);
+
+/*
+ * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'.
+ * The 'iovcnt' should be big enough to accomodate all GPA segments.
+ * Returns 0 on success, 1 on a guest fault condition and -1 otherwise.
+ */
+int	vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg,
+	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt);
+void	vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
+	    void *host_dst, size_t len);
+void	vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
+	    struct iovec *guest_iov, size_t len);
+void	vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov,
+	    int iovcnt);
+
+/* Reset vcpu register state */
+int	vcpu_reset(struct vmctx *ctx, int vcpu);
+
+int	vm_activate_cpu(struct vmctx *ctx, int vcpu);
+
+#ifdef	__FreeBSD__
+/*
+ * FreeBSD specific APIs
+ */
+int	vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
+				uint64_t rip, uint64_t cr3, uint64_t gdtbase,
+				uint64_t rsp);
+int	vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu,
+					uint32_t eip, uint32_t gdtbase,
+					uint32_t esp);
+void	vm_setup_freebsd_gdt(uint64_t *gdtr);
+#endif
+#endif	/* _VMMAPI_H_ */
diff --git a/usr/src/tools/scripts/gensetdefs.pl b/usr/src/tools/scripts/gensetdefs.pl
new file mode 100644
index 0000000000..8ca5782feb
--- /dev/null
+++ b/usr/src/tools/scripts/gensetdefs.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl -w
+#
+# COPYRIGHT 2013 Pluribus Networks Inc.
+#
+# All rights reserved. This copyright notice is Copyright Management
+# Information under 17 USC 1202 and is included to protect this work and
+# deter copyright infringement.  Removal or alteration of this Copyright
+# Management Information without the express written permission from
+# Pluribus Networks Inc is prohibited, and any such unauthorized removal
+# or alteration will be a violation of federal law.
+
+use strict;
+
+my @Sections = split(/\n/, `elfedit -r -e \'shdr:sh_name -osimple\' $ARGV[0] 2>&1`);
+
+foreach my $Section (@Sections) {
+	if ($Section =~ "^set_") {
+		print "\tfixing $Section\n";
+
+		chomp(my $SectionAddr = `elfedit -r -e \'shdr:sh_addr -onum $Section\' $ARGV[0] 2>&1`);
+		chomp(my $SectionSize = `elfedit -r -e \'shdr:sh_size -onum $Section\' $ARGV[0] 2>&1`);
+		my $SectionEnd = hex($SectionAddr) + hex($SectionSize);
+
+		`elfedit -e \'sym:st_bind __start_$Section global\' $ARGV[0] 2>&1`;
+		`elfedit -e \'sym:st_value __start_$Section $SectionAddr\' $ARGV[0] 2>&1`;
+		`elfedit -e \'sym:st_shndx __start_$Section $Section\' $ARGV[0] 2>&1`;
+		`elfedit -e \'sym:st_bind __stop_$Section global\' $ARGV[0] 2>&1`;
+		`elfedit -e \'sym:st_value __stop_$Section $SectionEnd\' $ARGV[0] 2>&1`;
+		`elfedit -e \'sym:st_shndx __stop_$Section $Section\' $ARGV[0] 2>&1`;
+	}
+}
diff --git a/usr/src/uts/i86pc/io/viona/viona.c b/usr/src/uts/i86pc/io/viona/viona.c
new file mode 100644
index 0000000000..40bdd80a6e
--- /dev/null
+++ b/usr/src/uts/i86pc/io/viona/viona.c
@@ -0,0 +1,1404 @@
+/*
+ * Copyright (c) 2013  Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/sysmacros.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <vm/seg_kmem.h>
+
+#include <sys/dls.h>
+#include <sys/mac_client.h>
+
+#include <sys/viona_io.h>
+
+#define	MB	(1024UL * 1024)
+#define	GB	(1024UL * MB)
+
+/*
+ * Min. octets in an ethernet frame minus FCS
+ */
+#define	MIN_BUF_SIZE	60
+
+#define	VIONA_NAME		"Virtio Network Accelerator"
+
+#define	VIONA_CTL_MINOR		0
+#define	VIONA_CTL_NODE_NAME	"ctl"
+
+#define	VIONA_CLI_NAME		"viona"
+
+#define	VTNET_MAXSEGS		32
+
+#define	VRING_ALIGN		4096
+
+#define	VRING_DESC_F_NEXT	(1 << 0)
+#define	VRING_DESC_F_WRITE	(1 << 1)
+#define	VRING_DESC_F_INDIRECT	(1 << 2)
+
+#define	VRING_AVAIL_F_NO_INTERRUPT	1
+
+#define	VRING_USED_F_NO_NOTIFY		1
+
+#define	BCM_NIC_DRIVER		"bnxe"
+/*
+ * Host capabilities
+ */
+#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
+#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
+#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
+
+#define	VIONA_S_HOSTCAPS		\
+	(VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | \
+	VIRTIO_NET_F_STATUS)
+
+#pragma pack(1)
+struct virtio_desc {
+	uint64_t	vd_addr;
+	uint32_t	vd_len;
+	uint16_t	vd_flags;
+	uint16_t	vd_next;
+};
+#pragma pack()
+
+#pragma pack(1)
+struct virtio_used {
+	uint32_t	vu_idx;
+	uint32_t	vu_tlen;
+};
+#pragma pack()
+
+#pragma pack(1)
+struct virtio_net_mrgrxhdr {
+	uint8_t		vrh_flags;
+	uint8_t		vrh_gso_type;
+	uint16_t	vrh_hdr_len;
+	uint16_t	vrh_gso_size;
+	uint16_t	vrh_csum_start;
+	uint16_t	vrh_csum_offset;
+	uint16_t	vrh_bufs;
+};
+struct virtio_net_hdr {
+	uint8_t		vrh_flags;
+	uint8_t		vrh_gso_type;
+	uint16_t	vrh_hdr_len;
+	uint16_t	vrh_gso_size;
+	uint16_t	vrh_csum_start;
+	uint16_t	vrh_csum_offset;
+};
+#pragma pack()
+
+typedef struct viona_vring_hqueue {
+	/* Internal state */
+	uint16_t		hq_size;
+	kmutex_t		hq_a_mutex;
+	kmutex_t		hq_u_mutex;
+	uint16_t		hq_cur_aidx;	/* trails behind 'avail_idx' */
+
+	/* Host-context pointers to the queue */
+	caddr_t			hq_baseaddr;
+	uint16_t		*hq_avail_flags;
+	uint16_t		*hq_avail_idx;	/* monotonically increasing */
+	uint16_t		*hq_avail_ring;
+
+	uint16_t		*hq_used_flags;
+	uint16_t		*hq_used_idx;	/* monotonically increasing */
+	struct virtio_used	*hq_used_ring;
+} viona_vring_hqueue_t;
+
+
+typedef struct viona_link {
+	datalink_id_t		l_linkid;
+
+	struct vm		*l_vm;
+	size_t			l_vm_lomemsize;
+	caddr_t			l_vm_lomemaddr;
+	size_t			l_vm_himemsize;
+	caddr_t			l_vm_himemaddr;
+
+	mac_handle_t		l_mh;
+	mac_client_handle_t	l_mch;
+
+	kmem_cache_t		*l_desb_kmc;
+
+	pollhead_t		l_pollhead;
+
+	viona_vring_hqueue_t	l_rx_vring;
+	uint_t			l_rx_intr;
+
+	viona_vring_hqueue_t	l_tx_vring;
+	kcondvar_t		l_tx_cv;
+	uint_t			l_tx_intr;
+	kmutex_t		l_tx_mutex;
+	int			l_tx_outstanding;
+	uint32_t		l_features;
+} viona_link_t;
+
+typedef struct {
+	frtn_t			d_frtn;
+	viona_link_t		*d_link;
+	uint_t			d_ref;
+	uint16_t		d_cookie;
+	int			d_len;
+} viona_desb_t;
+
+typedef struct viona_soft_state {
+	viona_link_t		*ss_link;
+} viona_soft_state_t;
+
+typedef struct used_elem {
+	uint16_t	id;
+	uint32_t	len;
+} used_elem_t;
+
+static void			*viona_state;
+static dev_info_t		*viona_dip;
+static id_space_t		*viona_minor_ids;
+/*
+ * copy tx mbufs from virtio ring to avoid necessitating a wait 
+ * for packet transmission to free resources.
+ */
+static boolean_t		copy_tx_mblks = B_TRUE;
+
+extern struct vm *vm_lookup_by_name(char *name);
+extern uint64_t vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t len);
+
+static int viona_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int viona_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int viona_open(dev_t *devp, int flag, int otype, cred_t *credp);
+static int viona_close(dev_t dev, int flag, int otype, cred_t *credp);
+static int viona_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
+    cred_t *credp, int *rval);
+static int viona_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+    struct pollhead **phpp);
+
+static int viona_ioc_create(viona_soft_state_t *ss, vioc_create_t *u_create);
+static int viona_ioc_delete(viona_soft_state_t *ss);
+
+static int viona_vm_map(viona_link_t *link);
+static caddr_t viona_gpa2kva(viona_link_t *link, uint64_t gpa);
+static void viona_vm_unmap(viona_link_t *link);
+
+static int viona_ioc_rx_ring_init(viona_link_t *link,
+    vioc_ring_init_t *u_ri);
+static int viona_ioc_tx_ring_init(viona_link_t *link,
+    vioc_ring_init_t *u_ri);
+static int viona_ioc_rx_ring_reset(viona_link_t *link);
+static int viona_ioc_tx_ring_reset(viona_link_t *link);
+static void viona_ioc_rx_ring_kick(viona_link_t *link);
+static void viona_ioc_tx_ring_kick(viona_link_t *link);
+static int viona_ioc_rx_intr_clear(viona_link_t *link);
+static int viona_ioc_tx_intr_clear(viona_link_t *link);
+
+static void viona_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+    boolean_t loopback);
+static void viona_tx(viona_link_t *link, viona_vring_hqueue_t *hq);
+
+static struct cb_ops viona_cb_ops = {
+	viona_open,
+	viona_close,
+	nodev,
+	nodev,
+	nodev,
+	nodev,
+	nodev,
+	viona_ioctl,
+	nodev,
+	nodev,
+	nodev,
+	viona_chpoll,
+	ddi_prop_op,
+	0,
+	D_MP | D_NEW | D_HOTPLUG,
+	CB_REV,
+	nodev,
+	nodev
+};
+
+static struct dev_ops viona_ops = {
+	DEVO_REV,
+	0,
+	nodev,
+	nulldev,
+	nulldev,
+	viona_attach,
+	viona_detach,
+	nodev,
+	&viona_cb_ops,
+	NULL,
+	ddi_power,
+	ddi_quiesce_not_needed
+};
+
+static struct modldrv modldrv = {
+	&mod_driverops,
+	VIONA_NAME,
+	&viona_ops,
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, &modldrv, NULL
+};
+
+int
+_init(void)
+{
+	int	ret;
+
+	ret = ddi_soft_state_init(&viona_state,
+	    sizeof (viona_soft_state_t), 0);
+	if (ret == 0) {
+		ret = mod_install(&modlinkage);
+		if (ret != 0) {
+			ddi_soft_state_fini(&viona_state);
+			return (ret);
+		}
+	}
+
+	return (ret);
+}
+
+int
+_fini(void)
+{
+	int	ret;
+
+	ret = mod_remove(&modlinkage);
+	if (ret == 0) {
+		ddi_soft_state_fini(&viona_state);
+	}
+
+	return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+static void
+set_viona_tx_mode()
+{
+	major_t bcm_nic_major;
+	if ((bcm_nic_major = ddi_name_to_major(BCM_NIC_DRIVER))
+	    != DDI_MAJOR_T_NONE) {
+		if (ddi_hold_installed_driver(bcm_nic_major) != NULL) {
+			copy_tx_mblks = B_FALSE;
+			ddi_rele_driver(bcm_nic_major);
+		}
+	}
+}
+
+static int
+viona_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	if (cmd != DDI_ATTACH) {
+		return (DDI_FAILURE);
+	}
+
+	viona_minor_ids = id_space_create("viona_minor_id",
+	    VIONA_CTL_MINOR + 1, UINT16_MAX);
+
+	if (ddi_create_minor_node(dip, VIONA_CTL_NODE_NAME,
+	    S_IFCHR, VIONA_CTL_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS) {
+		return (DDI_FAILURE);
+	}
+
+	viona_dip = dip;
+
+	set_viona_tx_mode();
+	ddi_report_dev(viona_dip);
+
+	return (DDI_SUCCESS);
+}
+
+static int
+viona_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	if (cmd != DDI_DETACH) {
+		return (DDI_FAILURE);
+	}
+
+	id_space_destroy(viona_minor_ids);
+
+	ddi_remove_minor_node(viona_dip, NULL);
+
+	viona_dip = NULL;
+
+	return (DDI_SUCCESS);
+}
+
+static int
+viona_open(dev_t *devp, int flag, int otype, cred_t *credp)
+{
+	int	minor;
+
+	if (otype != OTYP_CHR) {
+		return (EINVAL);
+	}
+
+	if (drv_priv(credp) != 0) {
+		return (EPERM);
+	}
+
+	if (getminor(*devp) != VIONA_CTL_MINOR) {
+		return (ENXIO);
+	}
+
+	minor = id_alloc(viona_minor_ids);
+	if (minor == 0) {
+		/* All minors are busy */
+		return (EBUSY);
+	}
+
+	if (ddi_soft_state_zalloc(viona_state, minor) != DDI_SUCCESS) {
+		id_free(viona_minor_ids, minor);
+	}
+
+	*devp = makedevice(getmajor(*devp), minor);
+
+	return (0);
+}
+
+static int
+viona_close(dev_t dev, int flag, int otype, cred_t *credp)
+{
+	int			minor;
+	viona_soft_state_t	*ss;
+
+	if (otype != OTYP_CHR) {
+		return (EINVAL);
+	}
+
+	if (drv_priv(credp) != 0) {
+		return (EPERM);
+	}
+
+	minor = getminor(dev);
+
+	ss = ddi_get_soft_state(viona_state, minor);
+	if (ss == NULL) {
+		return (ENXIO);
+	}
+
+	viona_ioc_delete(ss);
+
+	ddi_soft_state_free(viona_state, minor);
+
+	id_free(viona_minor_ids, minor);
+
+	return (0);
+}
+
+static int
+viona_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
+    cred_t *credp, int *rval)
+{
+	viona_soft_state_t	*ss;
+	int			err = 0;
+
+	ss = ddi_get_soft_state(viona_state, getminor(dev));
+	if (ss == NULL) {
+		return (ENXIO);
+	}
+
+	switch (cmd) {
+	case VNA_IOC_CREATE:
+		err = viona_ioc_create(ss, (vioc_create_t *)data);
+		break;
+	case VNA_IOC_DELETE:
+		err = viona_ioc_delete(ss);
+		break;
+	case VNA_IOC_SET_FEATURES:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		ss->ss_link->l_features = *(int *)data & VIONA_S_HOSTCAPS;
+		break;
+	case VNA_IOC_GET_FEATURES:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		*(int *)data = VIONA_S_HOSTCAPS;
+		break;
+	case VNA_IOC_RX_RING_INIT:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		err = viona_ioc_rx_ring_init(ss->ss_link,
+		    (vioc_ring_init_t *)data);
+		break;
+	case VNA_IOC_RX_RING_RESET:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		err = viona_ioc_rx_ring_reset(ss->ss_link);
+		break;
+	case VNA_IOC_RX_RING_KICK:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		viona_ioc_rx_ring_kick(ss->ss_link);
+		err = 0;
+		break;
+	case VNA_IOC_TX_RING_INIT:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		err = viona_ioc_tx_ring_init(ss->ss_link,
+		    (vioc_ring_init_t *)data);
+		break;
+	case VNA_IOC_TX_RING_RESET:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		err = viona_ioc_tx_ring_reset(ss->ss_link);
+		break;
+	case VNA_IOC_TX_RING_KICK:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		viona_ioc_tx_ring_kick(ss->ss_link);
+		err = 0;
+		break;
+	case VNA_IOC_RX_INTR_CLR:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		err = viona_ioc_rx_intr_clear(ss->ss_link);
+		break;
+	case VNA_IOC_TX_INTR_CLR:
+		if (ss->ss_link == NULL) {
+			return (ENOSYS);
+		}
+		err = viona_ioc_tx_intr_clear(ss->ss_link);
+		break;
+	default:
+		err = ENOTTY;
+		break;
+	}
+
+	return (err);
+}
+
+static int
+viona_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+    struct pollhead **phpp)
+{
+	viona_soft_state_t	*ss;
+
+	ss = ddi_get_soft_state(viona_state, getminor(dev));
+	if (ss == NULL || ss->ss_link == NULL) {
+		return (ENXIO);
+	}
+
+	*reventsp = 0;
+
+	if (ss->ss_link->l_rx_intr && (events & POLLIN)) {
+		*reventsp |= POLLIN;
+	}
+
+	if (ss->ss_link->l_tx_intr && (events & POLLOUT)) {
+		*reventsp |= POLLOUT;
+	}
+
+	if (*reventsp == 0 && !anyyet) {
+		*phpp = &ss->ss_link->l_pollhead;
+	}
+
+	return (0);
+}
+
+static int
+viona_ioc_create(viona_soft_state_t *ss, vioc_create_t *u_create)
+{
+	vioc_create_t		k_create;
+	viona_link_t		*link;
+	char			cli_name[MAXNAMELEN];
+	int			err;
+
+	if (ss->ss_link != NULL) {
+		return (ENOSYS);
+	}
+	if (copyin(u_create, &k_create, sizeof (k_create)) != 0) {
+		return (EFAULT);
+	}
+
+	link = kmem_zalloc(sizeof (viona_link_t), KM_SLEEP);
+
+	link->l_linkid = k_create.c_linkid;
+	link->l_vm = vm_lookup_by_name(k_create.c_vmname);
+	if (link->l_vm == NULL) {
+		err = ENXIO;
+		goto bail;
+	}
+
+	link->l_vm_lomemsize = k_create.c_lomem_size;
+	link->l_vm_himemsize = k_create.c_himem_size;
+	err = viona_vm_map(link);
+	if (err != 0) {
+		goto bail;
+	}
+
+	err = mac_open_by_linkid(link->l_linkid, &link->l_mh);
+	if (err != 0) {
+		cmn_err(CE_WARN, "viona create mac_open_by_linkid"
+		    " returned %d\n", err);
+		goto bail;
+	}
+
+	snprintf(cli_name, sizeof (cli_name), "%s-%d",
+	    VIONA_CLI_NAME, link->l_linkid);
+	err = mac_client_open(link->l_mh, &link->l_mch, cli_name, 0);
+	if (err != 0) {
+		cmn_err(CE_WARN, "viona create mac_client_open"
+		    " returned %d\n", err);
+		goto bail;
+	}
+
+	link->l_features = VIONA_S_HOSTCAPS;
+	link->l_desb_kmc = kmem_cache_create(cli_name,
+	    sizeof (viona_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+	mutex_init(&link->l_rx_vring.hq_a_mutex, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&link->l_rx_vring.hq_u_mutex, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&link->l_rx_vring.hq_a_mutex, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&link->l_tx_vring.hq_u_mutex, NULL, MUTEX_DRIVER, NULL);
+	if (copy_tx_mblks) {
+		mutex_init(&link->l_tx_mutex, NULL, MUTEX_DRIVER, NULL);
+		cv_init(&link->l_tx_cv, NULL, CV_DRIVER, NULL);
+	}
+	ss->ss_link = link;
+
+	return (0);
+
+bail:
+	if (link->l_mch != NULL) {
+		mac_client_close(link->l_mch, 0);
+	}
+	if (link->l_mh != NULL) {
+		mac_close(link->l_mh);
+	}
+
+	kmem_free(link, sizeof (viona_link_t));
+
+	return (err);
+}
+
+static int
+viona_ioc_delete(viona_soft_state_t *ss)
+{
+	viona_link_t	*link;
+
+	link = ss->ss_link;
+	if (link == NULL) {
+		return (ENOSYS);
+	}
+	if (copy_tx_mblks) {
+		mutex_enter(&link->l_tx_mutex);
+		while (link->l_tx_outstanding != 0) {
+			cv_wait(&link->l_tx_cv, &link->l_tx_mutex);
+		}
+		mutex_exit(&link->l_tx_mutex);
+	}
+	if (link->l_mch != NULL) {
+		mac_rx_clear(link->l_mch);
+		mac_client_close(link->l_mch, 0);
+	}
+	if (link->l_mh != NULL) {
+		mac_close(link->l_mh);
+	}
+
+	viona_vm_unmap(link);
+	mutex_destroy(&link->l_tx_vring.hq_a_mutex);
+	mutex_destroy(&link->l_tx_vring.hq_u_mutex);
+	mutex_destroy(&link->l_rx_vring.hq_a_mutex);
+	mutex_destroy(&link->l_rx_vring.hq_u_mutex);
+	if (copy_tx_mblks) {
+		mutex_destroy(&link->l_tx_mutex);
+		cv_destroy(&link->l_tx_cv);
+	}
+
+	kmem_cache_destroy(link->l_desb_kmc);
+
+	kmem_free(link, sizeof (viona_link_t));
+
+	ss->ss_link = NULL;
+
+	return (0);
+}
+
+static caddr_t
+viona_mapin_vm_chunk(viona_link_t *link, uint64_t gpa, size_t len)
+{
+	caddr_t		addr;
+	size_t		offset;
+	pfn_t		pfnum;
+
+	if (len == 0)
+		return (NULL);
+
+	addr = vmem_alloc(heap_arena, len, VM_SLEEP);
+	if (addr == NULL)
+		return (NULL);
+
+	for (offset = 0; offset < len; offset += PAGESIZE) {
+		pfnum = btop(vm_gpa2hpa(link->l_vm, gpa + offset, PAGESIZE));
+		ASSERT(pfnum);
+		hat_devload(kas.a_hat, addr + offset, PAGESIZE, pfnum,
+		    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
+	}
+
+	return (addr);
+}
+
+/*
+ * Map the guest physical address space into the kernel virtual address space.
+ */
+static int
+viona_vm_map(viona_link_t *link)
+{
+	link->l_vm_lomemaddr = viona_mapin_vm_chunk(link,
+	    0, link->l_vm_lomemsize);
+	if (link->l_vm_lomemaddr == NULL)
+		return (-1);
+	link->l_vm_himemaddr = viona_mapin_vm_chunk(link,
+	    4 * (1024 * 1024 * 1024UL), link->l_vm_himemsize);
+	if (link->l_vm_himemsize && link->l_vm_himemaddr == NULL)
+		return (-1);
+
+	return (0);
+}
+
+/*
+ * Translate a guest physical address into a kernel virtual address.
+ */
+static caddr_t
+viona_gpa2kva(viona_link_t *link, uint64_t gpa)
+{
+	if (gpa < link->l_vm_lomemsize)
+		return (link->l_vm_lomemaddr + gpa);
+
+	gpa -= (4 * GB);
+	if (gpa < link->l_vm_himemsize)
+		return (link->l_vm_himemaddr + gpa);
+
+	return (NULL);
+}
+
+static void
+viona_vm_unmap(viona_link_t *link)
+{
+	if (link->l_vm_lomemaddr) {
+		hat_unload(kas.a_hat, link->l_vm_lomemaddr,
+		    link->l_vm_lomemsize, HAT_UNLOAD_UNLOCK);
+		vmem_free(heap_arena, link->l_vm_lomemaddr,
+		    link->l_vm_lomemsize);
+	}
+	if (link->l_vm_himemaddr) {
+		hat_unload(kas.a_hat, link->l_vm_himemaddr,
+		    link->l_vm_himemsize, HAT_UNLOAD_UNLOCK);
+		vmem_free(heap_arena, link->l_vm_himemaddr,
+		    link->l_vm_himemsize);
+	}
+}
+
+static int
+viona_ioc_ring_init_common(viona_link_t *link, viona_vring_hqueue_t *hq,
+    vioc_ring_init_t *u_ri)
+{
+	vioc_ring_init_t	k_ri;
+
+	if (copyin(u_ri, &k_ri, sizeof (k_ri)) != 0) {
+		return (EFAULT);
+	}
+
+	hq->hq_size = k_ri.ri_qsize;
+	hq->hq_baseaddr = viona_gpa2kva(link, k_ri.ri_qaddr);
+	if (hq->hq_baseaddr == NULL)
+		return (EINVAL);
+
+	hq->hq_avail_flags = (uint16_t *)(viona_gpa2kva(link,
+	    k_ri.ri_qaddr + hq->hq_size * sizeof (struct virtio_desc)));
+	if (hq->hq_avail_flags == NULL)
+		return (EINVAL);
+	hq->hq_avail_idx = hq->hq_avail_flags + 1;
+	hq->hq_avail_ring = hq->hq_avail_flags + 2;
+
+	hq->hq_used_flags = (uint16_t *)(viona_gpa2kva(link,
+	    P2ROUNDUP(k_ri.ri_qaddr +
+	    hq->hq_size * sizeof (struct virtio_desc) + 2, VRING_ALIGN)));
+	if (hq->hq_used_flags == NULL)
+		return (EINVAL);
+	hq->hq_used_idx = hq->hq_used_flags + 1;
+	hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
+
+	/*
+	 * Initialize queue indexes
+	 */
+	hq->hq_cur_aidx = 0;
+
+	return (0);
+}
+
+static int
+viona_ioc_rx_ring_init(viona_link_t *link, vioc_ring_init_t *u_ri)
+{
+	viona_vring_hqueue_t	*hq;
+	int			rval;
+
+	hq = &link->l_rx_vring;
+
+	rval = viona_ioc_ring_init_common(link, hq, u_ri);
+	if (rval != 0) {
+		return (rval);
+	}
+
+	return (0);
+}
+
+static int
+viona_ioc_tx_ring_init(viona_link_t *link, vioc_ring_init_t *u_ri)
+{
+	viona_vring_hqueue_t	*hq;
+
+	hq = &link->l_tx_vring;
+
+	return (viona_ioc_ring_init_common(link, hq, u_ri));
+}
+
+static int
+viona_ioc_ring_reset_common(viona_vring_hqueue_t *hq)
+{
+	/*
+	 * Reset all soft state
+	 */
+	hq->hq_cur_aidx = 0;
+
+	return (0);
+}
+
+static int
+viona_ioc_rx_ring_reset(viona_link_t *link)
+{
+	viona_vring_hqueue_t	*hq;
+
+	mac_rx_clear(link->l_mch);
+
+	hq = &link->l_rx_vring;
+
+	return (viona_ioc_ring_reset_common(hq));
+}
+
+static int
+viona_ioc_tx_ring_reset(viona_link_t *link)
+{
+	viona_vring_hqueue_t	*hq;
+
+	hq = &link->l_tx_vring;
+
+	return (viona_ioc_ring_reset_common(hq));
+}
+
+static void
+viona_ioc_rx_ring_kick(viona_link_t *link)
+{
+	viona_vring_hqueue_t	*hq = &link->l_rx_vring;
+
+	atomic_or_16(hq->hq_used_flags, VRING_USED_F_NO_NOTIFY);
+
+	mac_rx_set(link->l_mch, viona_rx, link);
+}
+
+/*
+ * Return the number of available descriptors in the vring taking care
+ * of the 16-bit index wraparound.
+ */
+static inline int
+viona_hq_num_avail(viona_vring_hqueue_t *hq)
+{
+	uint16_t ndesc;
+
+	/*
+	 * We're just computing (a-b) in GF(216).
+	 *
+	 * The only glitch here is that in standard C,
+	 * uint16_t promotes to (signed) int when int has
+	 * more than 16 bits (pretty much always now), so
+	 * we have to force it back to unsigned.
+	 */
+	ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
+
+	ASSERT(ndesc <= hq->hq_size);
+
+	return (ndesc);
+}
+
+static void
+viona_ioc_tx_ring_kick(viona_link_t *link)
+{
+	viona_vring_hqueue_t	*hq = &link->l_tx_vring;
+
+	do {
+		atomic_or_16(hq->hq_used_flags, VRING_USED_F_NO_NOTIFY);
+		while (viona_hq_num_avail(hq)) {
+			viona_tx(link, hq);
+		}
+		if (copy_tx_mblks) {
+			mutex_enter(&link->l_tx_mutex);
+			if (link->l_tx_outstanding != 0) {
+				cv_wait_sig(&link->l_tx_cv, &link->l_tx_mutex);
+			}
+			mutex_exit(&link->l_tx_mutex);
+		}
+		atomic_and_16(hq->hq_used_flags, ~VRING_USED_F_NO_NOTIFY);
+	} while (viona_hq_num_avail(hq));
+}
+
+static int
+viona_ioc_rx_intr_clear(viona_link_t *link)
+{
+	link->l_rx_intr = 0;
+
+	return (0);
+}
+
+static int
+viona_ioc_tx_intr_clear(viona_link_t *link)
+{
+	link->l_tx_intr = 0;
+
+	return (0);
+}
+#define	VQ_MAX_DESCRIPTORS	512
+
+static int
+vq_popchain(viona_link_t *link, viona_vring_hqueue_t *hq, struct iovec *iov,
+int n_iov, uint16_t *cookie)
+{
+	int			i;
+	int			ndesc, nindir;
+	int			idx, head, next;
+	struct virtio_desc	*vdir, *vindir, *vp;
+
+	idx = hq->hq_cur_aidx;
+	ndesc = (uint16_t)((unsigned)*hq->hq_avail_idx - (unsigned)idx);
+
+	if (ndesc == 0)
+		return (0);
+	if (ndesc > hq->hq_size) {
+		cmn_err(CE_NOTE, "ndesc (%d) out of range\n", ndesc);
+		return (-1);
+	}
+
+	head = hq->hq_avail_ring[idx & (hq->hq_size - 1)];
+	next = head;
+
+	for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
+		if (next >= hq->hq_size) {
+			cmn_err(CE_NOTE, "descriptor index (%d)"
+			    "out of range\n", next);
+			return (-1);
+		}
+
+		vdir = (struct virtio_desc *)(hq->hq_baseaddr +
+		    next * sizeof (struct virtio_desc));
+		if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
+			if (i > n_iov)
+				return (-1);
+			iov[i].iov_base = viona_gpa2kva(link, vdir->vd_addr);
+			if (iov[i].iov_base == NULL) {
+				cmn_err(CE_NOTE, "invalid guest physical"
+				    " address 0x%"PRIx64"\n", vdir->vd_addr);
+				return (-1);
+			}
+			iov[i++].iov_len = vdir->vd_len;
+		} else {
+			nindir = vdir->vd_len / 16;
+			if ((vdir->vd_len & 0xf) || nindir == 0) {
+				cmn_err(CE_NOTE, "invalid indir len 0x%x\n",
+				    vdir->vd_len);
+				return (-1);
+			}
+			vindir = (struct virtio_desc *)
+			    viona_gpa2kva(link, vdir->vd_addr);
+			if (vindir == NULL) {
+				cmn_err(CE_NOTE, "invalid guest physical"
+				    " address 0x%"PRIx64"\n", vdir->vd_addr);
+				return (-1);
+			}
+			next = 0;
+			for (;;) {
+				vp = &vindir[next];
+				if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
+					cmn_err(CE_NOTE, "indirect desc"
+					    " has INDIR flag\n");
+					return (-1);
+				}
+				if (i > n_iov)
+					return (-1);
+				iov[i].iov_base =
+				    viona_gpa2kva(link, vp->vd_addr);
+				if (iov[i].iov_base == NULL) {
+					cmn_err(CE_NOTE, "invalid guest"
+					    " physical address 0x%"PRIx64"\n",
+					    vp->vd_addr);
+					return (-1);
+				}
+				iov[i++].iov_len = vp->vd_len;
+
+				if (i > VQ_MAX_DESCRIPTORS)
+					goto loopy;
+				if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
+					break;
+
+				next = vp->vd_next;
+				if (next >= nindir) {
+					cmn_err(CE_NOTE, "invalid next"
+					    " %d > %d\n", next, nindir);
+					return (-1);
+				}
+			}
+		}
+		if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0) {
+			*cookie = head;
+			hq->hq_cur_aidx++;
+			return (i);
+		}
+	}
+
+loopy:
+	cmn_err(CE_NOTE, "%d > descriptor loop count\n", i);
+
+	return (-1);
+}
+
+static void
+vq_pushchain(viona_vring_hqueue_t *hq, uint32_t len, uint16_t cookie)
+{
+	struct virtio_used	*vu;
+	int			uidx;
+
+	uidx = *hq->hq_used_idx;
+	vu = &hq->hq_used_ring[uidx++ & (hq->hq_size - 1)];
+	vu->vu_idx = cookie;
+	vu->vu_tlen = len;
+	membar_producer();
+	*hq->hq_used_idx = uidx;
+}
+
+static void
+vq_pushchain_mrgrx(viona_vring_hqueue_t *hq, int num_bufs, used_elem_t *elem)
+{
+	struct virtio_used	*vu;
+	int			uidx;
+	int			i;
+
+	uidx = *hq->hq_used_idx;
+	if (num_bufs == 1) {
+		vu = &hq->hq_used_ring[uidx++ & (hq->hq_size - 1)];
+		vu->vu_idx = elem[0].id;
+		vu->vu_tlen = elem[0].len;
+	} else {
+		for (i = 0; i < num_bufs; i++) {
+			vu = &hq->hq_used_ring[(uidx + i) & (hq->hq_size - 1)];
+			vu->vu_idx = elem[i].id;
+			vu->vu_tlen = elem[i].len;
+		}
+		uidx = uidx + num_bufs;
+	}
+	membar_producer();
+	*hq->hq_used_idx = uidx;
+}
+
+/*
+ * Copy bytes from mp to iov.
+ * copied_buf: Total num_bytes copied from mblk to iov array.
+ * buf: pointer to iov_base.
+ * i: index of iov array. Mainly used to identify if we are
+ *    dealing with first iov array element.
+ * rxhdr_size: Virtio header size. Two possibilities in case
+ *    of MRGRX buf, header has 2 additional bytes.
+ *    In case of mrgrx, virtio header should be part of iov[0].
+ *    In case of non-mrgrx, virtio header may or may not be part
+ *    of iov[0].
+ */
+static int
+copy_in_mblk(mblk_t *mp, int copied_buf, caddr_t buf, struct iovec *iov,
+    int i, int rxhdr_size)
+{
+	int copied_chunk = 0;
+	mblk_t *ml;
+	int total_buf_len = iov->iov_len;
+	/*
+	 * iov[0] might have header, adjust
+	 * total_buf_len accordingly
+	 */
+	if (i == 0) {
+		total_buf_len = iov->iov_len - rxhdr_size;
+	}
+	for (ml = mp; ml != NULL; ml = ml->b_cont) {
+		size_t	chunk = MBLKL(ml);
+		/*
+		 * If chunk is less than
+		 * copied_buf we should move
+		 * to correct msgblk
+		 */
+		if (copied_buf != 0) {
+			if (copied_buf < chunk) {
+				chunk -= copied_buf;
+			} else {
+				copied_buf -= chunk;
+				continue;
+			}
+		}
+		/*
+		 * iov[0] already has virtio header.
+		 * and if copied chunk is length of iov_len break
+		 */
+		if (copied_chunk == total_buf_len) {
+			break;
+		}
+		/*
+		 * Sometimes chunk is total mblk len, sometimes mblk is
+		 * divided into multiple chunks.
+		 */
+		if (chunk > copied_buf) {
+			if (chunk > copied_chunk) {
+				if ((chunk + copied_chunk) > total_buf_len)
+					chunk = (size_t)total_buf_len
+					    - copied_chunk;
+			} else {
+				if (chunk > (total_buf_len - copied_chunk))
+					chunk = (size_t)((total_buf_len
+					    - copied_chunk) - chunk);
+			}
+			bcopy(ml->b_rptr + copied_buf, buf, chunk);
+		} else {
+			if (chunk > (total_buf_len - copied_chunk)) {
+				chunk = (size_t)(total_buf_len - copied_chunk);
+			}
+			bcopy(ml->b_rptr + copied_buf, buf, chunk);
+		}
+		buf += chunk;
+		copied_chunk += chunk;
+	}
+	return (copied_chunk);
+}
+
+static void
+viona_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+    boolean_t loopback)
+{
+	viona_link_t		*link = arg;
+	viona_vring_hqueue_t	*hq = &link->l_rx_vring;
+	mblk_t			*mp0 = mp;
+
+	while (viona_hq_num_avail(hq)) {
+		struct iovec		iov[VTNET_MAXSEGS];
+		size_t			mblklen;
+		int			n, i = 0;
+		uint16_t		cookie;
+		struct virtio_net_hdr	*vrx;
+		struct virtio_net_mrgrxhdr *vmrgrx;
+		mblk_t			*ml;
+		caddr_t			buf;
+		int			total_len = 0;
+		int			copied_buf = 0;
+		int			num_bufs = 0;
+		int			num_pops = 0;
+		used_elem_t		uelem[VTNET_MAXSEGS];
+
+		if (mp == NULL) {
+			break;
+		}
+		mblklen = msgsize(mp);
+		if (mblklen == 0) {
+			break;
+		}
+
+		mutex_enter(&hq->hq_a_mutex);
+		n = vq_popchain(link, hq, iov, VTNET_MAXSEGS, &cookie);
+		mutex_exit(&hq->hq_a_mutex);
+		if (n <= 0) {
+			break;
+		}
+		num_pops++;
+		if (link->l_features & VIRTIO_NET_F_MRG_RXBUF) {
+			int total_n = n;
+			int mrgrxhdr_size = sizeof (struct virtio_net_mrgrxhdr);
+			/*
+			 * Get a pointer to the rx header, and use the
+			 * data immediately following it for the packet buffer.
+			 */
+			vmrgrx = (struct virtio_net_mrgrxhdr *)iov[0].iov_base;
+			if (n == 1) {
+				buf = iov[0].iov_base + mrgrxhdr_size;
+			}
+			while (mblklen > copied_buf) {
+				if (total_n == i) {
+					mutex_enter(&hq->hq_a_mutex);
+					n = vq_popchain(link, hq, &iov[i],
+					    VTNET_MAXSEGS, &cookie);
+					mutex_exit(&hq->hq_a_mutex);
+					if (n <= 0) {
+						freemsgchain(mp0);
+						return;
+					}
+					num_pops++;
+					total_n += n;
+				}
+				if (total_n > i) {
+					int copied_chunk = 0;
+					if (i != 0) {
+						buf = iov[i].iov_base;
+					}
+					copied_chunk = copy_in_mblk(mp,
+					    copied_buf, buf, &iov[i], i,
+					    mrgrxhdr_size);
+					copied_buf += copied_chunk;
+					uelem[i].id = cookie;
+					uelem[i].len = copied_chunk;
+					if (i == 0) {
+						uelem[i].len += mrgrxhdr_size;
+					}
+				}
+				num_bufs++;
+				i++;
+			}
+		} else {
+			boolean_t virt_hdr_incl_iov = B_FALSE;
+			int rxhdr_size = sizeof (struct virtio_net_hdr);
+			/* First element is header */
+			vrx = (struct virtio_net_hdr *)iov[0].iov_base;
+			if (n == 1 || iov[0].iov_len > rxhdr_size) {
+				buf = iov[0].iov_base + rxhdr_size;
+				virt_hdr_incl_iov = B_TRUE;
+				total_len += rxhdr_size;
+				if (iov[0].iov_len < rxhdr_size) {
+					// Buff too small to fit pkt. Drop it.
+					freemsgchain(mp0);
+					return;
+				}
+			} else {
+				total_len = iov[0].iov_len;
+			}
+			if (iov[0].iov_len == rxhdr_size)
+				i++;
+			while (mblklen > copied_buf) {
+				if (n > i) {
+					int copied_chunk = 0;
+					if (i != 0) {
+						buf = iov[i].iov_base;
+					}
+					/*
+					 * In case of non-mrgrx buf, first
+					 * descriptor always has header and
+					 * rest of the descriptors have data.
+					 * But it is not guaranteed that first
+					 * descriptor will only have virtio
+					 * header. It might also have data.
+					 */
+					if (virt_hdr_incl_iov) {
+						copied_chunk = copy_in_mblk(mp,
+						    copied_buf, buf, &iov[i],
+						    i, rxhdr_size);
+					} else {
+						copied_chunk = copy_in_mblk(mp,
+						    copied_buf, buf, &iov[i],
+						    i, 0);
+					}
+					copied_buf += copied_chunk;
+					total_len += copied_chunk;
+				} else {
+					/*
+					 * Drop packet as it cant fit
+					 * in buf provided by guest.
+					 */
+					freemsgchain(mp0);
+					return;
+				}
+				i++;
+			}
+		}
+		/*
+		 * The only valid field in the rx packet header is the
+		 * number of buffers, which is always 1 without TSO
+		 * support.
+		 */
+		if (link->l_features & VIRTIO_NET_F_MRG_RXBUF) {
+			memset(vmrgrx, 0, sizeof (struct virtio_net_mrgrxhdr));
+			vmrgrx->vrh_bufs = num_bufs;
+			/*
+			 * Make sure iov[0].iov_len >= MIN_BUF_SIZE
+			 * otherwise guest will consider it as invalid frame.
+			 */
+			if (num_bufs == 1 && uelem[0].len < MIN_BUF_SIZE) {
+				uelem[0].len = MIN_BUF_SIZE;
+			}
+			/*
+			 * Release this chain and handle more chains.
+			 */
+			mutex_enter(&hq->hq_u_mutex);
+			vq_pushchain_mrgrx(hq, num_pops, uelem);
+			mutex_exit(&hq->hq_u_mutex);
+		} else {
+			memset(vrx, 0, sizeof (struct virtio_net_hdr));
+			if (total_len < MIN_BUF_SIZE) {
+				total_len = MIN_BUF_SIZE;
+			}
+			/*
+			 * Release this chain and handle more chains.
+			 */
+			mutex_enter(&hq->hq_u_mutex);
+			vq_pushchain(hq, total_len, cookie);
+			mutex_exit(&hq->hq_u_mutex);
+		}
+
+		mp = mp->b_next;
+	}
+
+	if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
+		if (atomic_cas_uint(&link->l_rx_intr, 0, 1) == 0) {
+			pollwakeup(&link->l_pollhead, POLLIN);
+		}
+	}
+
+	freemsgchain(mp0);
+}
+
+static void
+viona_desb_free(viona_desb_t *dp)
+{
+	viona_link_t		*link;
+	viona_vring_hqueue_t	*hq;
+	struct virtio_used	*vu;
+	int			uidx;
+	uint_t			ref;
+
+	ref = atomic_dec_uint_nv(&dp->d_ref);
+	if (ref != 0)
+		return;
+
+	link = dp->d_link;
+	hq = &link->l_tx_vring;
+
+	mutex_enter(&hq->hq_u_mutex);
+	vq_pushchain(hq, dp->d_len, dp->d_cookie);
+	mutex_exit(&hq->hq_u_mutex);
+
+	kmem_cache_free(link->l_desb_kmc, dp);
+
+	if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
+		if (atomic_cas_uint(&link->l_tx_intr, 0, 1) == 0) {
+			pollwakeup(&link->l_pollhead, POLLOUT);
+		}
+	}
+	if (copy_tx_mblks) {
+		mutex_enter(&link->l_tx_mutex);
+		if (--link->l_tx_outstanding == 0) {
+			cv_broadcast(&link->l_tx_cv);
+		}
+		mutex_exit(&link->l_tx_mutex);
+	}
+}
+
+static void
+viona_tx(viona_link_t *link, viona_vring_hqueue_t *hq)
+{
+	struct iovec		iov[VTNET_MAXSEGS];
+	uint16_t		cookie;
+	int			i, n;
+	mblk_t			*mp_head, *mp_tail, *mp;
+	viona_desb_t		*dp;
+	mac_client_handle_t	link_mch = link->l_mch;
+
+	mp_head = mp_tail = NULL;
+
+	mutex_enter(&hq->hq_a_mutex);
+	n = vq_popchain(link, hq, iov, VTNET_MAXSEGS, &cookie);
+	mutex_exit(&hq->hq_a_mutex);
+	ASSERT(n != 0);
+
+	dp = kmem_cache_alloc(link->l_desb_kmc, KM_SLEEP);
+	dp->d_frtn.free_func = viona_desb_free;
+	dp->d_frtn.free_arg = (void *)dp;
+	dp->d_link = link;
+	dp->d_cookie = cookie;
+
+	dp->d_ref = 0;
+	dp->d_len = iov[0].iov_len;
+
+	for (i = 1; i < n; i++) {
+		dp->d_ref++;
+		dp->d_len += iov[i].iov_len;
+		if (copy_tx_mblks) {
+			mp = desballoc((uchar_t *)iov[i].iov_base,
+			    iov[i].iov_len, BPRI_MED, &dp->d_frtn);
+			ASSERT(mp);
+		} else {
+			mp = allocb(iov[i].iov_len, BPRI_MED);
+			ASSERT(mp);
+			bcopy((uchar_t *)iov[i].iov_base, mp->b_wptr,
+			    iov[i].iov_len);
+		}
+		mp->b_wptr += iov[i].iov_len;
+		if (mp_head == NULL) {
+			ASSERT(mp_tail == NULL);
+			mp_head = mp;
+		} else {
+			ASSERT(mp_tail != NULL);
+			mp_tail->b_cont = mp;
+		}
+		mp_tail = mp;
+	}
+	if (copy_tx_mblks == B_FALSE) {
+		viona_desb_free(dp);
+	}
+	if (copy_tx_mblks) {
+		mutex_enter(&link->l_tx_mutex);
+		link->l_tx_outstanding++;
+		mutex_exit(&link->l_tx_mutex);
+	}
+	mac_tx(link_mch, mp_head, 0, MAC_DROP_ON_NO_DESC, NULL);
+}
diff --git a/usr/src/uts/i86pc/io/viona/viona.conf b/usr/src/uts/i86pc/io/viona/viona.conf
new file mode 100644
index 0000000000..e66488531a
--- /dev/null
+++ b/usr/src/uts/i86pc/io/viona/viona.conf
@@ -0,0 +1,14 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+name="viona" parent="pseudo";
diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdv.c b/usr/src/uts/i86pc/io/vmm/amd/amdv.c
new file mode 100644
index 0000000000..6b62daae6c
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/amd/amdv.c
@@ -0,0 +1,271 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/amd/amdv.c 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/amd/amdv.c 245678 2013-01-20 03:42:49Z neel $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/smp.h>
+
+#include <machine/vmm.h>
+#ifdef	__FreeBSD__
+#include "io/iommu.h"
+#endif
+
+static int
+amdv_init(void)
+{
+
+	printf("amdv_init: not implemented\n");
+	return (ENXIO);
+}
+
+static int
+amdv_cleanup(void)
+{
+
+	printf("amdv_cleanup: not implemented\n");
+	return (ENXIO);
+}
+
+static void *
+amdv_vminit(struct vm *vm)
+{
+
+	printf("amdv_vminit: not implemented\n");
+	return (NULL);
+}
+
+static int
+amdv_vmrun(void *arg, int vcpu, register_t rip)
+{
+
+	printf("amdv_vmrun: not implemented\n");
+	return (ENXIO);
+}
+
+static void
+amdv_vmcleanup(void *arg)
+{
+
+	printf("amdv_vmcleanup: not implemented\n");
+	return;
+}
+
+static int
+amdv_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
+	    vm_memattr_t attr, int prot, boolean_t spok)
+{
+
+	printf("amdv_vmmmap_set: not implemented\n");
+	return (EINVAL);
+}
+
+static vm_paddr_t
+amdv_vmmmap_get(void *arg, vm_paddr_t gpa)
+{
+
+	printf("amdv_vmmmap_get: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
+{
+	
+	printf("amdv_getreg: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
+{
+	
+	printf("amdv_setreg: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
+{
+
+	printf("amdv_get_desc: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
+{
+
+	printf("amdv_get_desc: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_getcap(void *arg, int vcpu, int type, int *retval)
+{
+
+	printf("amdv_getcap: not implemented\n");
+	return (EINVAL);
+}
+
+static int
+amdv_setcap(void *arg, int vcpu, int type, int val)
+{
+
+	printf("amdv_setcap: not implemented\n");
+	return (EINVAL);
+}
+
+struct vmm_ops vmm_ops_amd = {
+	amdv_init,
+	amdv_cleanup,
+	amdv_vminit,
+	amdv_vmrun,
+	amdv_vmcleanup,
+	amdv_vmmmap_set,
+	amdv_vmmmap_get,
+	amdv_getreg,
+	amdv_setreg,
+	amdv_getdesc,
+	amdv_setdesc,
+	amdv_getcap,
+	amdv_setcap
+};
+
+static int
+amd_iommu_init(void)
+{
+
+	printf("amd_iommu_init: not implemented\n");
+	return (ENXIO);
+}
+
+static void
+amd_iommu_cleanup(void)
+{
+
+	printf("amd_iommu_cleanup: not implemented\n");
+}
+
+static void
+amd_iommu_enable(void)
+{
+
+	printf("amd_iommu_enable: not implemented\n");
+}
+
+static void
+amd_iommu_disable(void)
+{
+
+	printf("amd_iommu_disable: not implemented\n");
+}
+
+static void *
+amd_iommu_create_domain(vm_paddr_t maxaddr)
+{
+
+	printf("amd_iommu_create_domain: not implemented\n");
+	return (NULL);
+}
+
+static void
+amd_iommu_destroy_domain(void *domain)
+{
+
+	printf("amd_iommu_destroy_domain: not implemented\n");
+}
+
+static uint64_t
+amd_iommu_create_mapping(void *domain, vm_paddr_t gpa, vm_paddr_t hpa,
+			 uint64_t len)
+{
+
+	printf("amd_iommu_create_mapping: not implemented\n");
+	return (0);
+}
+
+static uint64_t
+amd_iommu_remove_mapping(void *domain, vm_paddr_t gpa, uint64_t len)
+{
+
+	printf("amd_iommu_remove_mapping: not implemented\n");
+	return (0);
+}
+
+static void
+amd_iommu_add_device(void *domain, int bus, int slot, int func)
+{
+
+	printf("amd_iommu_add_device: not implemented\n");
+}
+
+static void
+amd_iommu_remove_device(void *domain, int bus, int slot, int func)
+{
+
+	printf("amd_iommu_remove_device: not implemented\n");
+}
+
+static void
+amd_iommu_invalidate_tlb(void *domain)
+{
+
+	printf("amd_iommu_invalidate_tlb: not implemented\n");
+}
+
+#ifdef	__FreeBSD__
+struct iommu_ops iommu_ops_amd = {
+	amd_iommu_init,
+	amd_iommu_cleanup,
+	amd_iommu_enable,
+	amd_iommu_disable,
+	amd_iommu_create_domain,
+	amd_iommu_destroy_domain,
+	amd_iommu_create_mapping,
+	amd_iommu_remove_mapping,
+	amd_iommu_add_device,
+	amd_iommu_remove_device,
+	amd_iommu_invalidate_tlb,
+};
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c
new file mode 100644
index 0000000000..5ae9ed2f6a
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c
@@ -0,0 +1,452 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/ept.c 252475 2013-07-01 20:05:43Z grehan $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/intel/ept.c 252475 2013-07-01 20:05:43Z grehan $");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/param.h>
+#include <machine/cpufunc.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+
+#include <machine/vmm.h>
+#include "vmx_cpufunc.h"
+#include "vmx.h"
+#include "ept.h"
+
+#define	EPT_PWL4(cap)			((cap) & (1UL << 6))
+#define	EPT_MEMORY_TYPE_WB(cap)		((cap) & (1UL << 14))
+#define	EPT_PDE_SUPERPAGE(cap)		((cap) & (1UL << 16))	/* 2MB pages */
+#define	EPT_PDPTE_SUPERPAGE(cap)	((cap) & (1UL << 17))	/* 1GB pages */
+#define	INVVPID_SUPPORTED(cap)		((cap) & (1UL << 32))
+#define	INVEPT_SUPPORTED(cap)		((cap) & (1UL << 20))
+
+#define	INVVPID_ALL_TYPES_MASK		0xF0000000000UL
+#define	INVVPID_ALL_TYPES_SUPPORTED(cap)	\
+	(((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK)
+
+#define	INVEPT_ALL_TYPES_MASK		0x6000000UL
+#define	INVEPT_ALL_TYPES_SUPPORTED(cap)		\
+	(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
+
+#define	EPT_PG_RD			(1 << 0)
+#define	EPT_PG_WR			(1 << 1)
+#define	EPT_PG_EX			(1 << 2)
+#define	EPT_PG_MEMORY_TYPE(x)		((x) << 3)
+#define	EPT_PG_IGNORE_PAT		(1 << 6)
+#define	EPT_PG_SUPERPAGE		(1 << 7)
+
+#define	EPT_ADDR_MASK			((uint64_t)-1 << 12)
+
+MALLOC_DECLARE(M_VMX);
+
+static uint64_t page_sizes_mask;
+
+/*
+ * Set this to 1 to have the EPT tables respect the guest PAT settings
+ */
+static int ept_pat_passthru;
+
+int
+ept_init(void)
+{
+	int page_shift;
+	uint64_t cap;
+
+	cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
+
+	/*
+	 * Verify that:
+	 * - page walk length is 4 steps
+	 * - extended page tables can be laid out in write-back memory
+	 * - invvpid instruction with all possible types is supported
+	 * - invept instruction with all possible types is supported
+	 */
+	if (!EPT_PWL4(cap) ||
+	    !EPT_MEMORY_TYPE_WB(cap) ||
+	    !INVVPID_SUPPORTED(cap) ||
+	    !INVVPID_ALL_TYPES_SUPPORTED(cap) ||
+	    !INVEPT_SUPPORTED(cap) ||
+	    !INVEPT_ALL_TYPES_SUPPORTED(cap))
+		return (EINVAL);
+
+	/* Set bits in 'page_sizes_mask' for each valid page size */
+	page_shift = PAGE_SHIFT;
+	page_sizes_mask = 1UL << page_shift;		/* 4KB page */
+
+	page_shift += 9;
+	if (EPT_PDE_SUPERPAGE(cap))
+		page_sizes_mask |= 1UL << page_shift;	/* 2MB superpage */
+
+	page_shift += 9;
+	if (EPT_PDPTE_SUPERPAGE(cap))
+		page_sizes_mask |= 1UL << page_shift;	/* 1GB superpage */
+
+	return (0);
+}
+
+#if 0
+static void
+ept_dump(uint64_t *ptp, int nlevels)
+{
+	int i, t, tabs;
+	uint64_t *ptpnext, ptpval;
+
+	if (--nlevels < 0)
+		return;
+
+	tabs = 3 - nlevels;
+	for (t = 0; t < tabs; t++)
+		printf("\t");
+	printf("PTP = %p\n", ptp);
+
+	for (i = 0; i < 512; i++) {
+		ptpval = ptp[i];
+
+		if (ptpval == 0)
+			continue;
+		
+		for (t = 0; t < tabs; t++)
+			printf("\t");
+		printf("%3d 0x%016lx\n", i, ptpval);
+
+		if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) {
+			ptpnext = (uint64_t *)
+				  PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
+			ept_dump(ptpnext, nlevels);
+		}
+	}
+}
+#endif
+
+static size_t
+ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
+		   vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
+{
+	int spshift, ptpshift, ptpindex, nlevels;
+
+	/*
+	 * Compute the size of the mapping that we can accomodate.
+	 *
+	 * This is based on three factors:
+	 * - super page sizes supported by the processor
+	 * - alignment of the region starting at 'gpa' and 'hpa'
+	 * - length of the region 'len'
+	 */
+	spshift = PAGE_SHIFT;
+	if (spok)
+		spshift += (EPT_PWLEVELS - 1) * 9;
+	while (spshift >= PAGE_SHIFT) {
+		uint64_t spsize = 1UL << spshift;
+		if ((page_sizes_mask & spsize) != 0 &&
+		    (gpa & (spsize - 1)) == 0 &&
+		    (hpa & (spsize - 1)) == 0 &&
+		    length >= spsize) {
+			break;
+		}
+		spshift -= 9;
+	}
+
+	if (spshift < PAGE_SHIFT) {
+		panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
+		      "length 0x%016lx, page_sizes_mask 0x%016lx",
+		      gpa, hpa, length, page_sizes_mask);
+	}
+
+	nlevels = EPT_PWLEVELS;
+	while (--nlevels >= 0) {
+		ptpshift = PAGE_SHIFT + nlevels * 9;
+		ptpindex = (gpa >> ptpshift) & 0x1FF;
+
+		/* We have reached the leaf mapping */
+		if (spshift >= ptpshift)
+			break;
+
+		/*
+		 * We are working on a non-leaf page table page.
+		 *
+		 * Create the next level page table page if necessary and point
+		 * to it from the current page table.
+		 */
+		if (ptp[ptpindex] == 0) {
+#ifdef	__FreeBSD__
+			void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
+#else
+			void *nlp = kmem_zalloc(PAGE_SIZE, KM_SLEEP);
+			ASSERT((((uintptr_t)nlp) & PAGE_MASK) == 0);
+#endif
+			ptp[ptpindex] = vtophys(nlp);
+			ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
+		}
+
+		/* Work our way down to the next level page table page */
+#ifdef	__FreeBSD__
+		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
+#else
+		ptp = (uint64_t *)hat_kpm_pfn2va(btop(ptp[ptpindex] & EPT_ADDR_MASK));
+#endif
+	}
+
+	if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
+		panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
+		      "mismatch\n", gpa, ptpshift);
+	}
+
+	if (prot != VM_PROT_NONE) {
+		/* Do the mapping */
+		ptp[ptpindex] = hpa;
+
+		/* Apply the access controls */
+		if (prot & VM_PROT_READ)
+			ptp[ptpindex] |= EPT_PG_RD;
+		if (prot & VM_PROT_WRITE)
+			ptp[ptpindex] |= EPT_PG_WR;
+		if (prot & VM_PROT_EXECUTE)
+			ptp[ptpindex] |= EPT_PG_EX;
+
+		/*
+		 * By default the PAT type is ignored - this appears to
+		 * be how other hypervisors handle EPT. Allow this to be
+		 * overridden.
+		 */
+		ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
+		if (!ept_pat_passthru)
+			ptp[ptpindex] |= EPT_PG_IGNORE_PAT;
+
+		if (nlevels > 0)
+			ptp[ptpindex] |= EPT_PG_SUPERPAGE;
+	} else {
+		/* Remove the mapping */
+		ptp[ptpindex] = 0;
+	}
+
+	return (1UL << ptpshift);
+}
+
+static vm_paddr_t
+ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa)
+{
+	int nlevels, ptpshift, ptpindex;
+	uint64_t ptpval, hpabase, pgmask;
+
+	nlevels = EPT_PWLEVELS;
+	while (--nlevels >= 0) {
+		ptpshift = PAGE_SHIFT + nlevels * 9;
+		ptpindex = (gpa >> ptpshift) & 0x1FF;
+
+		ptpval = ptp[ptpindex];
+
+		/* Cannot make progress beyond this point */
+		if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0)
+			break;
+
+		if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) {
+			pgmask = (1UL << ptpshift) - 1;
+			hpabase = ptpval & ~pgmask;
+			return (hpabase | (gpa & pgmask));
+		}
+
+		/* Work our way down to the next level page table page */
+#ifdef	__FreBSD__
+		ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
+#else
+		ptp = (uint64_t *)hat_kpm_pfn2va(btop(ptpval & EPT_ADDR_MASK));
+#endif
+	}
+
+	return ((vm_paddr_t)-1);
+}
+
+static void
+ept_free_pt_entry(pt_entry_t pte)
+{
+	if (pte == 0)
+		return;
+
+	/* sanity check */
+	if ((pte & EPT_PG_SUPERPAGE) != 0)
+		panic("ept_free_pt_entry: pte cannot have superpage bit");
+
+	return;
+}
+
+static void
+ept_free_pd_entry(pd_entry_t pde)
+{
+	pt_entry_t	*pt;
+	int		i;
+
+	if (pde == 0)
+		return;
+
+	if ((pde & EPT_PG_SUPERPAGE) == 0) {
+#ifdef	__FreeBSD__
+		pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
+		for (i = 0; i < NPTEPG; i++)
+			ept_free_pt_entry(pt[i]);
+		free(pt, M_VMX);	/* free the page table page */
+#else
+		page_t		*pp;
+		pt = (pt_entry_t *)hat_kpm_pfn2va(btop(pde & EPT_ADDR_MASK));
+		for (i = 0; i < NPTEPG; i++)
+			ept_free_pt_entry(pt[i]);
+		pp = page_numtopp_nolock(btop(pde & EPT_ADDR_MASK));
+		kmem_free((void *)pp->p_offset, PAGE_SIZE);
+#endif
+	}
+}
+
+static void
+ept_free_pdp_entry(pdp_entry_t pdpe)
+{
+	pd_entry_t 	*pd;
+	int		 i;
+
+	if (pdpe == 0)
+		return;
+
+	if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
+#ifdef	__FreeBSD__
+		pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
+		for (i = 0; i < NPDEPG; i++)
+			ept_free_pd_entry(pd[i]);
+		free(pd, M_VMX);	/* free the page directory page */
+#else
+		page_t		*pp;
+		pd = (pd_entry_t *)hat_kpm_pfn2va(btop(pdpe & EPT_ADDR_MASK));
+		for (i = 0; i < NPDEPG; i++)
+			ept_free_pd_entry(pd[i]);
+		pp = page_numtopp_nolock(btop(pdpe & EPT_ADDR_MASK));
+		kmem_free((void *)pp->p_offset, PAGE_SIZE);
+#endif
+	}
+}
+
+static void
+ept_free_pml4_entry(pml4_entry_t pml4e)
+{
+	pdp_entry_t	*pdp;
+	int		i;
+
+	if (pml4e == 0)
+		return;
+
+	if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
+#ifdef	__FreeBSD__
+		pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
+		for (i = 0; i < NPDPEPG; i++)
+			ept_free_pdp_entry(pdp[i]);
+		free(pdp, M_VMX);	/* free the page directory ptr page */
+#else
+		page_t		*pp;
+		pdp = (pdp_entry_t *)hat_kpm_pfn2va(btop(pml4e
+		    & EPT_ADDR_MASK));
+		for (i = 0; i < NPDPEPG; i++)
+			ept_free_pdp_entry(pdp[i]);
+		pp = page_numtopp_nolock(btop(pml4e & EPT_ADDR_MASK));
+		kmem_free((void *)pp->p_offset, PAGE_SIZE);
+#endif
+	}
+}
+
+void
+ept_vmcleanup(struct vmx *vmx)
+{
+	int 		 i;
+
+	for (i = 0; i < NPML4EPG; i++)
+		ept_free_pml4_entry(vmx->pml4ept[i]);
+}
+
+int
+ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
+		vm_memattr_t attr, int prot, boolean_t spok)
+{
+	size_t n;
+	struct vmx *vmx = arg;
+
+	while (len > 0) {
+		n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
+				       prot, spok);
+		len -= n;
+		gpa += n;
+		hpa += n;
+	}
+
+	return (0);
+}
+
+vm_paddr_t
+ept_vmmmap_get(void *arg, vm_paddr_t gpa)
+{
+	vm_paddr_t hpa;
+	struct vmx *vmx;
+
+	vmx = arg;
+	hpa = ept_lookup_mapping(vmx->pml4ept, gpa);
+	return (hpa);
+}
+
+static void
+invept_single_context(void *arg)
+{
+	struct invept_desc desc = *(struct invept_desc *)arg;
+
+	invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
+}
+
+void
+ept_invalidate_mappings(u_long pml4ept)
+{
+	struct invept_desc invept_desc = { 0 };
+
+	invept_desc.eptp = EPTP(pml4ept);
+
+	smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.h b/usr/src/uts/i86pc/io/vmm/intel/ept.h
new file mode 100644
index 0000000000..d0bcce7ec3
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/ept.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/ept.h 245678 2013-01-20 03:42:49Z neel $
+ */
+
+#ifndef	_EPT_H_
+#define	_EPT_H_
+
+struct vmx;
+
+#define	EPT_PWLEVELS	4		/* page walk levels */
+#define	EPTP(pml4)	((pml4) | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK)
+
+int	ept_init(void);
+int	ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
+	    vm_memattr_t attr, int prot, boolean_t allow_superpage_mappings);
+vm_paddr_t ept_vmmmap_get(void *arg, vm_paddr_t gpa);
+void	ept_invalidate_mappings(u_long ept_pml4);
+void	ept_vmcleanup(struct vmx *vmx);
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
new file mode 100644
index 0000000000..bbd2da2a34
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
@@ -0,0 +1,597 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmcs.c 266550 2014-05-22 17:22:37Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifdef	__FreeBSD__
+#include "opt_ddb.h"
+#endif
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/intel/vmcs.c 266550 2014-05-22 17:22:37Z neel $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/pcpu.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/segments.h>
+#include <machine/vmm.h>
+#include "vmm_host.h"
+#include "vmx_cpufunc.h"
+#include "vmcs.h"
+#include "ept.h"
+#include "vmx.h"
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static uint64_t
+vmcs_fix_regval(uint32_t encoding, uint64_t val)
+{
+
+	switch (encoding) {
+	case VMCS_GUEST_CR0:
+		val = vmx_fix_cr0(val);
+		break;
+	case VMCS_GUEST_CR4:
+		val = vmx_fix_cr4(val);
+		break;
+	default:
+		break;
+	}
+	return (val);
+}
+
+static uint32_t
+vmcs_field_encoding(int ident)
+{
+	switch (ident) {
+	case VM_REG_GUEST_CR0:
+		return (VMCS_GUEST_CR0);
+	case VM_REG_GUEST_CR3:
+		return (VMCS_GUEST_CR3);
+	case VM_REG_GUEST_CR4:
+		return (VMCS_GUEST_CR4);
+	case VM_REG_GUEST_DR7:
+		return (VMCS_GUEST_DR7);
+	case VM_REG_GUEST_RSP:
+		return (VMCS_GUEST_RSP);
+	case VM_REG_GUEST_RIP:
+		return (VMCS_GUEST_RIP);
+	case VM_REG_GUEST_RFLAGS:
+		return (VMCS_GUEST_RFLAGS);
+	case VM_REG_GUEST_ES:
+		return (VMCS_GUEST_ES_SELECTOR);
+	case VM_REG_GUEST_CS:
+		return (VMCS_GUEST_CS_SELECTOR);
+	case VM_REG_GUEST_SS:
+		return (VMCS_GUEST_SS_SELECTOR);
+	case VM_REG_GUEST_DS:
+		return (VMCS_GUEST_DS_SELECTOR);
+	case VM_REG_GUEST_FS:
+		return (VMCS_GUEST_FS_SELECTOR);
+	case VM_REG_GUEST_GS:
+		return (VMCS_GUEST_GS_SELECTOR);
+	case VM_REG_GUEST_TR:
+		return (VMCS_GUEST_TR_SELECTOR);
+	case VM_REG_GUEST_LDTR:
+		return (VMCS_GUEST_LDTR_SELECTOR);
+	case VM_REG_GUEST_EFER:
+		return (VMCS_GUEST_IA32_EFER);
+	default:
+		return (-1);
+	}
+
+}
+
+static int
+vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
+{
+
+	switch (seg) {
+	case VM_REG_GUEST_ES:
+		*base = VMCS_GUEST_ES_BASE;
+		*lim = VMCS_GUEST_ES_LIMIT;
+		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_CS:
+		*base = VMCS_GUEST_CS_BASE;
+		*lim = VMCS_GUEST_CS_LIMIT;
+		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_SS:
+		*base = VMCS_GUEST_SS_BASE;
+		*lim = VMCS_GUEST_SS_LIMIT;
+		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_DS:
+		*base = VMCS_GUEST_DS_BASE;
+		*lim = VMCS_GUEST_DS_LIMIT;
+		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_FS:
+		*base = VMCS_GUEST_FS_BASE;
+		*lim = VMCS_GUEST_FS_LIMIT;
+		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_GS:
+		*base = VMCS_GUEST_GS_BASE;
+		*lim = VMCS_GUEST_GS_LIMIT;
+		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_TR:
+		*base = VMCS_GUEST_TR_BASE;
+		*lim = VMCS_GUEST_TR_LIMIT;
+		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_LDTR:
+		*base = VMCS_GUEST_LDTR_BASE;
+		*lim = VMCS_GUEST_LDTR_LIMIT;
+		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
+		break;
+	case VM_REG_GUEST_IDTR:
+		*base = VMCS_GUEST_IDTR_BASE;
+		*lim = VMCS_GUEST_IDTR_LIMIT;
+		*acc = VMCS_INVALID_ENCODING;
+		break;
+	case VM_REG_GUEST_GDTR:
+		*base = VMCS_GUEST_GDTR_BASE;
+		*lim = VMCS_GUEST_GDTR_LIMIT;
+		*acc = VMCS_INVALID_ENCODING;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+int
+vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
+{
+	int error;
+	uint32_t encoding;
+
+	/*
+	 * If we need to get at vmx-specific state in the VMCS we can bypass
+	 * the translation of 'ident' to 'encoding' by simply setting the
+	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
+	 * set to 0) in the encodings for the VMCS so we are free to use the
+	 * sign bit.
+	 */
+	if (ident < 0)
+		encoding = ident & 0x7fffffff;
+	else
+		encoding = vmcs_field_encoding(ident);
+
+	if (encoding == (uint32_t)-1)
+		return (EINVAL);
+
+	if (!running)
+		VMPTRLD(vmcs);
+
+	error = vmread(encoding, retval);
+
+	if (!running)
+		VMCLEAR(vmcs);
+
+	return (error);
+}
+
+int
+vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
+{
+	int error;
+	uint32_t encoding;
+
+	if (ident < 0)
+		encoding = ident & 0x7fffffff;
+	else
+		encoding = vmcs_field_encoding(ident);
+
+	if (encoding == (uint32_t)-1)
+		return (EINVAL);
+
+	val = vmcs_fix_regval(encoding, val);
+
+	if (!running)
+		VMPTRLD(vmcs);
+
+	error = vmwrite(encoding, val);
+
+	if (!running)
+		VMCLEAR(vmcs);
+
+	return (error);
+}
+
+int
+vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
+{
+	int error;
+	uint32_t base, limit, access;
+
+	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+	if (error != 0)
+		panic("vmcs_setdesc: invalid segment register %d", seg);
+
+	if (!running)
+		VMPTRLD(vmcs);
+	if ((error = vmwrite(base, desc->base)) != 0)
+		goto done;
+
+	if ((error = vmwrite(limit, desc->limit)) != 0)
+		goto done;
+
+	if (access != VMCS_INVALID_ENCODING) {
+		if ((error = vmwrite(access, desc->access)) != 0)
+			goto done;
+	}
+done:
+	if (!running)
+		VMCLEAR(vmcs);
+	return (error);
+}
+
+int
+vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
+{
+	int error;
+	uint32_t base, limit, access;
+	uint64_t u64;
+
+	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+	if (error != 0)
+		panic("vmcs_getdesc: invalid segment register %d", seg);
+
+	if (!running)
+		VMPTRLD(vmcs);
+	if ((error = vmread(base, &u64)) != 0)
+		goto done;
+	desc->base = u64;
+
+	if ((error = vmread(limit, &u64)) != 0)
+		goto done;
+	desc->limit = u64;
+
+	if (access != VMCS_INVALID_ENCODING) {
+		if ((error = vmread(access, &u64)) != 0)
+			goto done;
+		desc->access = u64;
+	}
+done:
+	if (!running)
+		VMCLEAR(vmcs);
+	return (error);
+}
+
+int
+vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
+{
+	int error;
+
+	VMPTRLD(vmcs);
+
+	/*
+	 * Guest MSRs are saved in the VM-exit MSR-store area.
+	 * Guest MSRs are loaded from the VM-entry MSR-load area.
+	 * Both areas point to the same location in memory.
+	 */
+	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
+		goto done;
+
+	error = 0;
+done:
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+#ifndef	__FreeBSD__
+int
+vmcs_set_host_msr_save(struct vmcs *vmcs, u_long h_area, u_int h_count)
+{
+	int error;
+
+	VMPTRLD(vmcs);
+
+	/*
+	 * Host MSRs are loaded from the VM-exit MSR-load area.
+	 */
+	if ((error = vmwrite(VMCS_EXIT_MSR_LOAD, h_area)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_EXIT_MSR_LOAD_COUNT, h_count)) != 0)
+		goto done;
+
+	error = 0;
+done:
+	VMCLEAR(vmcs);
+	return (error);
+}
+#endif
+
+int
+vmcs_set_defaults(struct vmcs *vmcs,
+		  u_long host_rip, u_long host_rsp, u_long ept_pml4,
+		  uint32_t pinbased_ctls, uint32_t procbased_ctls,
+		  uint32_t procbased_ctls2, uint32_t exit_ctls,
+		  uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
+{
+	int error, codesel, datasel, tsssel;
+	u_long cr0, cr4, efer;
+	uint64_t eptp, pat, fsbase, idtrbase;
+	uint32_t exc_bitmap;
+
+	codesel = vmm_get_host_codesel();
+	datasel = vmm_get_host_datasel();
+	tsssel = vmm_get_host_tsssel();
+
+	/*
+	 * Make sure we have a "current" VMCS to work with.
+	 */
+	VMPTRLD(vmcs);
+
+	/*
+	 * Load the VMX controls
+	 */
+	if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
+		goto done;
+	if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
+		goto done;
+
+	/* Guest state */
+
+	/* Initialize guest IA32_PAT MSR with the default value */
+	pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
+	      PAT_VALUE(1, PAT_WRITE_THROUGH)	|
+	      PAT_VALUE(2, PAT_UNCACHED)	|
+	      PAT_VALUE(3, PAT_UNCACHEABLE)	|
+	      PAT_VALUE(4, PAT_WRITE_BACK)	|
+	      PAT_VALUE(5, PAT_WRITE_THROUGH)	|
+	      PAT_VALUE(6, PAT_UNCACHED)	|
+	      PAT_VALUE(7, PAT_UNCACHEABLE);
+	if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
+		goto done;
+
+	/* Host state */
+
+	/* Initialize host IA32_PAT MSR */
+	pat = vmm_get_host_pat();
+	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
+		goto done;
+
+	/* Load the IA32_EFER MSR */
+	efer = vmm_get_host_efer();
+	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
+		goto done;
+
+	/* Load the control registers */
+
+	cr0 = vmm_get_host_cr0();
+	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
+		goto done;
+	
+	cr4 = vmm_get_host_cr4() | CR4_VMXE;
+	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
+		goto done;
+
+	/* Load the segment selectors */
+	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
+		goto done;
+
+#ifdef	__FreeBSD__
+	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
+		goto done;
+#else
+	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel())) != 0)
+		goto done;
+
+	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel())) != 0)
+		goto done;
+#endif
+
+	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
+		goto done;
+
+#ifdef	__FreeBSD__
+	/*
+	 * Load the Base-Address for %fs and idtr.
+	 *
+	 * Note that we exclude %gs, tss and gdtr here because their base
+	 * address is pcpu specific.
+	 */
+	fsbase = vmm_get_host_fsbase();
+	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
+		goto done;
+#endif
+
+	idtrbase = vmm_get_host_idtrbase();
+	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
+		goto done;
+
+	/* instruction pointer */
+	if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
+		goto done;
+
+	/* stack pointer */
+	if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
+		goto done;
+
+	/* eptp */
+	eptp = EPTP(ept_pml4);
+	if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
+		goto done;
+
+	/* vpid */
+	if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
+		goto done;
+
+	/* msr bitmap */
+	if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
+		goto done;
+
+	/* exception bitmap */
+	exc_bitmap = 1 << IDT_MC;
+	if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
+		goto done;
+
+	/* link pointer */
+	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
+		goto done;
+done:
+	VMCLEAR(vmcs);
+	return (error);
+}
+
+#ifdef DDB
+extern int vmxon_enabled[];
+
+DB_SHOW_COMMAND(vmcs, db_show_vmcs)
+{
+	uint64_t cur_vmcs, val;
+	uint32_t exit;
+
+	if (!vmxon_enabled[curcpu]) {
+		db_printf("VMX not enabled\n");
+		return;
+	}
+
+	if (have_addr) {
+		db_printf("Only current VMCS supported\n");
+		return;
+	}
+
+	vmptrst(&cur_vmcs);
+	if (cur_vmcs == VMCS_INITIAL) {
+		db_printf("No current VM context\n");
+		return;
+	}
+	db_printf("VMCS: %jx\n", cur_vmcs);
+	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
+	db_printf("Activity: ");
+	val = vmcs_read(VMCS_GUEST_ACTIVITY);
+	switch (val) {
+	case 0:
+		db_printf("Active");
+		break;
+	case 1:
+		db_printf("HLT");
+		break;
+	case 2:
+		db_printf("Shutdown");
+		break;
+	case 3:
+		db_printf("Wait for SIPI");
+		break;
+	default:
+		db_printf("Unknown: %#lx", val);
+	}
+	db_printf("\n");
+	exit = vmcs_read(VMCS_EXIT_REASON);
+	if (exit & 0x80000000)
+		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
+	else
+		db_printf("Exit Reason: %u\n", exit & 0xffff);
+	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
+	db_printf("Guest Linear Address: %#lx\n",
+	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
+	switch (exit & 0x8000ffff) {
+	case EXIT_REASON_EXCEPTION:
+	case EXIT_REASON_EXT_INTR:
+		val = vmcs_read(VMCS_EXIT_INTR_INFO);
+		db_printf("Interrupt Type: ");
+		switch (val >> 8 & 0x7) {
+		case 0:
+			db_printf("external");
+			break;
+		case 2:
+			db_printf("NMI");
+			break;
+		case 3:
+			db_printf("HW exception");
+			break;
+		case 4:
+			db_printf("SW exception");
+			break;
+		default:
+			db_printf("?? %lu", val >> 8 & 0x7);
+			break;
+		}
+		db_printf("  Vector: %lu", val & 0xff);
+		if (val & 0x800)
+			db_printf("  Error Code: %lx",
+			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
+		db_printf("\n");
+		break;
+	case EXIT_REASON_EPT_FAULT:
+	case EXIT_REASON_EPT_MISCONFIG:
+		db_printf("Guest Physical Address: %#lx\n",
+		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
+		break;
+	}
+	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
+}
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
new file mode 100644
index 0000000000..20e99e8184
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
@@ -0,0 +1,410 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmcs.h 276098 2014-12-23 02:14:49Z neel $
+ */
+
+#ifndef _VMCS_H_
+#define	_VMCS_H_
+
+#ifdef _KERNEL
+struct vmcs {
+	uint32_t	identifier;
+	uint32_t	abort_code;
+	char		_impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
+};
+CTASSERT(sizeof(struct vmcs) == PAGE_SIZE);
+
+/* MSR save region is composed of an array of 'struct msr_entry' */
+struct msr_entry {
+	uint32_t	index;
+	uint32_t	reserved;
+	uint64_t	val;
+
+};
+
+int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
+int	vmcs_init(struct vmcs *vmcs);
+#ifndef	__FreeBSD__
+int vmcs_set_host_msr_save(struct vmcs *vmcs, u_long h_area, u_int h_count);
+#endif
+int	vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
+			  u_long ept_pml4,
+			  uint32_t pinbased_ctls, uint32_t procbased_ctls,
+			  uint32_t procbased_ctls2, uint32_t exit_ctls,
+			  uint32_t entry_ctls, u_long msr_bitmap,
+			  uint16_t vpid);
+int	vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv);
+int	vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val);
+int	vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
+		     struct seg_desc *desc);
+int	vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
+		     struct seg_desc *desc);
+
+/*
+ * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
+ */
+#ifdef _VMX_CPUFUNC_H_
+static __inline uint64_t
+vmcs_read(uint32_t encoding)
+{
+	int error;
+	uint64_t val;
+
+	error = vmread(encoding, &val);
+	KASSERT(error == 0, ("vmcs_read(%u) error %d", encoding, error));
+	return (val);
+}
+
+static __inline void
+vmcs_write(uint32_t encoding, uint64_t val)
+{
+	int error;
+
+	error = vmwrite(encoding, val);
+	KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error));
+}
+#endif	/* _VMX_CPUFUNC_H_ */
+
+#define	vmexit_instruction_length()	vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
+#define	vmcs_guest_rip()		vmcs_read(VMCS_GUEST_RIP)
+#define	vmcs_instruction_error()	vmcs_read(VMCS_INSTRUCTION_ERROR)
+#define	vmcs_exit_reason()		(vmcs_read(VMCS_EXIT_REASON) & 0xffff)
+#define	vmcs_exit_qualification()	vmcs_read(VMCS_EXIT_QUALIFICATION)
+#define	vmcs_guest_cr3()		vmcs_read(VMCS_GUEST_CR3)
+#define	vmcs_gpa()			vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
+#define	vmcs_gla()			vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)
+#define	vmcs_idt_vectoring_info()	vmcs_read(VMCS_IDT_VECTORING_INFO)
+#define	vmcs_idt_vectoring_err()	vmcs_read(VMCS_IDT_VECTORING_ERROR)
+
+#endif	/* _KERNEL */
+
+#define	VMCS_INITIAL			0xffffffffffffffff
+
+#define	VMCS_IDENT(encoding)		((encoding) | 0x80000000)
+/*
+ * VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
+ */
+#define	VMCS_INVALID_ENCODING		0xffffffff
+
+/* 16-bit control fields */
+#define	VMCS_VPID			0x00000000
+#define	VMCS_PIR_VECTOR			0x00000002
+
+/* 16-bit guest-state fields */
+#define	VMCS_GUEST_ES_SELECTOR		0x00000800
+#define	VMCS_GUEST_CS_SELECTOR		0x00000802
+#define	VMCS_GUEST_SS_SELECTOR		0x00000804
+#define	VMCS_GUEST_DS_SELECTOR		0x00000806
+#define	VMCS_GUEST_FS_SELECTOR		0x00000808
+#define	VMCS_GUEST_GS_SELECTOR		0x0000080A
+#define	VMCS_GUEST_LDTR_SELECTOR	0x0000080C
+#define	VMCS_GUEST_TR_SELECTOR		0x0000080E
+#define	VMCS_GUEST_INTR_STATUS		0x00000810
+
+/* 16-bit host-state fields */
+#define	VMCS_HOST_ES_SELECTOR		0x00000C00
+#define	VMCS_HOST_CS_SELECTOR		0x00000C02
+#define	VMCS_HOST_SS_SELECTOR		0x00000C04
+#define	VMCS_HOST_DS_SELECTOR		0x00000C06
+#define	VMCS_HOST_FS_SELECTOR		0x00000C08
+#define	VMCS_HOST_GS_SELECTOR		0x00000C0A
+#define	VMCS_HOST_TR_SELECTOR		0x00000C0C
+
+/* 64-bit control fields */
+#define	VMCS_IO_BITMAP_A		0x00002000
+#define	VMCS_IO_BITMAP_B		0x00002002
+#define	VMCS_MSR_BITMAP			0x00002004
+#define	VMCS_EXIT_MSR_STORE		0x00002006
+#define	VMCS_EXIT_MSR_LOAD		0x00002008
+#define	VMCS_ENTRY_MSR_LOAD		0x0000200A
+#define	VMCS_EXECUTIVE_VMCS		0x0000200C
+#define	VMCS_TSC_OFFSET			0x00002010
+#define	VMCS_VIRTUAL_APIC		0x00002012
+#define	VMCS_APIC_ACCESS		0x00002014
+#define	VMCS_PIR_DESC			0x00002016
+#define	VMCS_EPTP			0x0000201A
+#define	VMCS_EOI_EXIT0			0x0000201C
+#define	VMCS_EOI_EXIT1			0x0000201E
+#define	VMCS_EOI_EXIT2			0x00002020
+#define	VMCS_EOI_EXIT3			0x00002022
+#define	VMCS_EOI_EXIT(vector)		(VMCS_EOI_EXIT0 + ((vector) / 64) * 2)
+
+/* 64-bit read-only fields */
+#define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
+
+/* 64-bit guest-state fields */
+#define	VMCS_LINK_POINTER		0x00002800
+#define	VMCS_GUEST_IA32_DEBUGCTL	0x00002802
+#define	VMCS_GUEST_IA32_PAT		0x00002804
+#define	VMCS_GUEST_IA32_EFER		0x00002806
+#define	VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
+#define	VMCS_GUEST_PDPTE0		0x0000280A
+#define	VMCS_GUEST_PDPTE1		0x0000280C
+#define	VMCS_GUEST_PDPTE2		0x0000280E
+#define	VMCS_GUEST_PDPTE3		0x00002810
+
+/* 64-bit host-state fields */
+#define	VMCS_HOST_IA32_PAT		0x00002C00
+#define	VMCS_HOST_IA32_EFER		0x00002C02
+#define	VMCS_HOST_IA32_PERF_GLOBAL_CTRL	0x00002C04
+
+/* 32-bit control fields */
+#define	VMCS_PIN_BASED_CTLS		0x00004000
+#define	VMCS_PRI_PROC_BASED_CTLS	0x00004002
+#define	VMCS_EXCEPTION_BITMAP		0x00004004
+#define	VMCS_PF_ERROR_MASK		0x00004006
+#define	VMCS_PF_ERROR_MATCH		0x00004008
+#define	VMCS_CR3_TARGET_COUNT		0x0000400A
+#define	VMCS_EXIT_CTLS			0x0000400C
+#define	VMCS_EXIT_MSR_STORE_COUNT	0x0000400E
+#define	VMCS_EXIT_MSR_LOAD_COUNT	0x00004010
+#define	VMCS_ENTRY_CTLS			0x00004012
+#define	VMCS_ENTRY_MSR_LOAD_COUNT	0x00004014
+#define	VMCS_ENTRY_INTR_INFO		0x00004016
+#define	VMCS_ENTRY_EXCEPTION_ERROR	0x00004018
+#define	VMCS_ENTRY_INST_LENGTH		0x0000401A
+#define	VMCS_TPR_THRESHOLD		0x0000401C
+#define	VMCS_SEC_PROC_BASED_CTLS	0x0000401E
+#define	VMCS_PLE_GAP			0x00004020
+#define	VMCS_PLE_WINDOW			0x00004022
+
+/* 32-bit read-only data fields */
+#define	VMCS_INSTRUCTION_ERROR		0x00004400
+#define	VMCS_EXIT_REASON		0x00004402
+#define	VMCS_EXIT_INTR_INFO		0x00004404
+#define	VMCS_EXIT_INTR_ERRCODE		0x00004406
+#define	VMCS_IDT_VECTORING_INFO		0x00004408
+#define	VMCS_IDT_VECTORING_ERROR	0x0000440A
+#define	VMCS_EXIT_INSTRUCTION_LENGTH	0x0000440C
+#define	VMCS_EXIT_INSTRUCTION_INFO	0x0000440E
+
+/* 32-bit guest-state fields */
+#define	VMCS_GUEST_ES_LIMIT		0x00004800
+#define	VMCS_GUEST_CS_LIMIT		0x00004802
+#define	VMCS_GUEST_SS_LIMIT		0x00004804
+#define	VMCS_GUEST_DS_LIMIT		0x00004806
+#define	VMCS_GUEST_FS_LIMIT		0x00004808
+#define	VMCS_GUEST_GS_LIMIT		0x0000480A
+#define	VMCS_GUEST_LDTR_LIMIT		0x0000480C
+#define	VMCS_GUEST_TR_LIMIT		0x0000480E
+#define	VMCS_GUEST_GDTR_LIMIT		0x00004810
+#define	VMCS_GUEST_IDTR_LIMIT		0x00004812
+#define	VMCS_GUEST_ES_ACCESS_RIGHTS	0x00004814
+#define	VMCS_GUEST_CS_ACCESS_RIGHTS	0x00004816
+#define	VMCS_GUEST_SS_ACCESS_RIGHTS	0x00004818
+#define	VMCS_GUEST_DS_ACCESS_RIGHTS	0x0000481A
+#define	VMCS_GUEST_FS_ACCESS_RIGHTS	0x0000481C
+#define	VMCS_GUEST_GS_ACCESS_RIGHTS	0x0000481E
+#define	VMCS_GUEST_LDTR_ACCESS_RIGHTS	0x00004820
+#define	VMCS_GUEST_TR_ACCESS_RIGHTS	0x00004822
+#define	VMCS_GUEST_INTERRUPTIBILITY	0x00004824
+#define	VMCS_GUEST_ACTIVITY		0x00004826
+#define VMCS_GUEST_SMBASE		0x00004828
+#define	VMCS_GUEST_IA32_SYSENTER_CS	0x0000482A
+#define	VMCS_PREEMPTION_TIMER_VALUE	0x0000482E
+
+/* 32-bit host state fields */
+#define	VMCS_HOST_IA32_SYSENTER_CS	0x00004C00
+
+/* Natural Width control fields */
+#define	VMCS_CR0_MASK			0x00006000
+#define	VMCS_CR4_MASK			0x00006002
+#define	VMCS_CR0_SHADOW			0x00006004
+#define	VMCS_CR4_SHADOW			0x00006006
+#define	VMCS_CR3_TARGET0		0x00006008
+#define	VMCS_CR3_TARGET1		0x0000600A
+#define	VMCS_CR3_TARGET2		0x0000600C
+#define	VMCS_CR3_TARGET3		0x0000600E
+
+/* Natural Width read-only fields */
+#define	VMCS_EXIT_QUALIFICATION		0x00006400
+#define	VMCS_IO_RCX			0x00006402
+#define	VMCS_IO_RSI			0x00006404
+#define	VMCS_IO_RDI			0x00006406
+#define	VMCS_IO_RIP			0x00006408
+#define	VMCS_GUEST_LINEAR_ADDRESS	0x0000640A
+
+/* Natural Width guest-state fields */
+#define	VMCS_GUEST_CR0			0x00006800
+#define	VMCS_GUEST_CR3			0x00006802
+#define	VMCS_GUEST_CR4			0x00006804
+#define	VMCS_GUEST_ES_BASE		0x00006806
+#define	VMCS_GUEST_CS_BASE		0x00006808
+#define	VMCS_GUEST_SS_BASE		0x0000680A
+#define	VMCS_GUEST_DS_BASE		0x0000680C
+#define	VMCS_GUEST_FS_BASE		0x0000680E
+#define	VMCS_GUEST_GS_BASE		0x00006810
+#define	VMCS_GUEST_LDTR_BASE		0x00006812
+#define	VMCS_GUEST_TR_BASE		0x00006814
+#define	VMCS_GUEST_GDTR_BASE		0x00006816
+#define	VMCS_GUEST_IDTR_BASE		0x00006818
+#define	VMCS_GUEST_DR7			0x0000681A
+#define	VMCS_GUEST_RSP			0x0000681C
+#define	VMCS_GUEST_RIP			0x0000681E
+#define	VMCS_GUEST_RFLAGS		0x00006820
+#define	VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822
+#define	VMCS_GUEST_IA32_SYSENTER_ESP	0x00006824
+#define	VMCS_GUEST_IA32_SYSENTER_EIP	0x00006826
+
+/* Natural Width host-state fields */
+#define	VMCS_HOST_CR0			0x00006C00
+#define	VMCS_HOST_CR3			0x00006C02
+#define	VMCS_HOST_CR4			0x00006C04
+#define	VMCS_HOST_FS_BASE		0x00006C06
+#define	VMCS_HOST_GS_BASE		0x00006C08
+#define	VMCS_HOST_TR_BASE		0x00006C0A
+#define	VMCS_HOST_GDTR_BASE		0x00006C0C
+#define	VMCS_HOST_IDTR_BASE		0x00006C0E
+#define	VMCS_HOST_IA32_SYSENTER_ESP	0x00006C10
+#define	VMCS_HOST_IA32_SYSENTER_EIP	0x00006C12
+#define	VMCS_HOST_RSP			0x00006C14
+#define	VMCS_HOST_RIP			0x00006c16
+
+/*
+ * VM instruction error numbers
+ */
+#define	VMRESUME_WITH_NON_LAUNCHED_VMCS	5
+
+/*
+ * VMCS exit reasons
+ */
+#define EXIT_REASON_EXCEPTION		0
+#define EXIT_REASON_EXT_INTR		1
+#define EXIT_REASON_TRIPLE_FAULT	2
+#define EXIT_REASON_INIT		3
+#define EXIT_REASON_SIPI		4
+#define EXIT_REASON_IO_SMI		5
+#define EXIT_REASON_SMI			6
+#define EXIT_REASON_INTR_WINDOW		7
+#define EXIT_REASON_NMI_WINDOW		8
+#define EXIT_REASON_TASK_SWITCH		9
+#define EXIT_REASON_CPUID		10
+#define EXIT_REASON_GETSEC		11
+#define EXIT_REASON_HLT			12
+#define EXIT_REASON_INVD		13
+#define EXIT_REASON_INVLPG		14
+#define EXIT_REASON_RDPMC		15
+#define EXIT_REASON_RDTSC		16
+#define EXIT_REASON_RSM			17
+#define EXIT_REASON_VMCALL		18
+#define EXIT_REASON_VMCLEAR		19
+#define EXIT_REASON_VMLAUNCH		20
+#define EXIT_REASON_VMPTRLD		21
+#define EXIT_REASON_VMPTRST		22
+#define EXIT_REASON_VMREAD		23
+#define EXIT_REASON_VMRESUME		24
+#define EXIT_REASON_VMWRITE		25
+#define EXIT_REASON_VMXOFF		26
+#define EXIT_REASON_VMXON		27
+#define EXIT_REASON_CR_ACCESS		28
+#define EXIT_REASON_DR_ACCESS		29
+#define EXIT_REASON_INOUT		30
+#define EXIT_REASON_RDMSR		31
+#define EXIT_REASON_WRMSR		32
+#define EXIT_REASON_INVAL_VMCS		33
+#define EXIT_REASON_INVAL_MSR		34
+#define EXIT_REASON_MWAIT		36
+#define EXIT_REASON_MTF			37
+#define EXIT_REASON_MONITOR		39
+#define EXIT_REASON_PAUSE		40
+#define EXIT_REASON_MCE_DURING_ENTRY	41
+#define EXIT_REASON_TPR			43
+#define EXIT_REASON_APIC_ACCESS		44
+#define	EXIT_REASON_VIRTUALIZED_EOI	45
+#define EXIT_REASON_GDTR_IDTR		46
+#define EXIT_REASON_LDTR_TR		47
+#define EXIT_REASON_EPT_FAULT		48
+#define EXIT_REASON_EPT_MISCONFIG	49
+#define EXIT_REASON_INVEPT		50
+#define EXIT_REASON_RDTSCP		51
+#define EXIT_REASON_VMX_PREEMPT		52
+#define EXIT_REASON_INVVPID		53
+#define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
+#define	EXIT_REASON_APIC_WRITE		56
+
+/*
+ * NMI unblocking due to IRET.
+ *
+ * Applies to VM-exits due to hardware exception or EPT fault.
+ */
+#define	EXIT_QUAL_NMIUDTI	(1 << 12)
+/*
+ * VMCS interrupt information fields
+ */
+#define	VMCS_INTR_VALID		(1U << 31)
+#define	VMCS_INTR_T_MASK	0x700		/* Interruption-info type */
+#define	VMCS_INTR_T_HWINTR	(0 << 8)
+#define	VMCS_INTR_T_NMI		(2 << 8)
+#define	VMCS_INTR_T_HWEXCEPTION	(3 << 8)
+#define	VMCS_INTR_T_SWINTR	(4 << 8)
+#define	VMCS_INTR_T_PRIV_SWEXCEPTION (5 << 8)
+#define	VMCS_INTR_T_SWEXCEPTION	(6 << 8)
+#define	VMCS_INTR_DEL_ERRCODE	(1 << 11)
+
+/*
+ * VMCS IDT-Vectoring information fields
+ */
+#define	VMCS_IDT_VEC_VALID		(1U << 31)
+#define	VMCS_IDT_VEC_ERRCODE_VALID	(1 << 11)
+
+/*
+ * VMCS Guest interruptibility field
+ */
+#define	VMCS_INTERRUPTIBILITY_STI_BLOCKING	(1 << 0)
+#define	VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING	(1 << 1)
+#define	VMCS_INTERRUPTIBILITY_SMI_BLOCKING	(1 << 2)
+#define	VMCS_INTERRUPTIBILITY_NMI_BLOCKING	(1 << 3)
+
+/*
+ * Exit qualification for EXIT_REASON_INVAL_VMCS
+ */
+#define	EXIT_QUAL_NMI_WHILE_STI_BLOCKING	3
+
+/*
+ * Exit qualification for EPT violation
+ */
+#define	EPT_VIOLATION_DATA_READ		(1UL << 0)
+#define	EPT_VIOLATION_DATA_WRITE	(1UL << 1)
+#define	EPT_VIOLATION_INST_FETCH	(1UL << 2)
+#define	EPT_VIOLATION_GPA_READABLE	(1UL << 3)
+#define	EPT_VIOLATION_GPA_WRITEABLE	(1UL << 4)
+#define	EPT_VIOLATION_GPA_EXECUTABLE	(1UL << 5)
+#define	EPT_VIOLATION_GLA_VALID		(1UL << 7)
+#define	EPT_VIOLATION_XLAT_VALID	(1UL << 8)
+
+/*
+ * Exit qualification for APIC-access VM exit
+ */
+#define	APIC_ACCESS_OFFSET(qual)	((qual) & 0xFFF)
+#define	APIC_ACCESS_TYPE(qual)		(((qual) >> 12) & 0xF)
+
+/*
+ * Exit qualification for APIC-write VM exit
+ */
+#define	APIC_WRITE_OFFSET(qual)		((qual) & 0xFFF)
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
new file mode 100644
index 0000000000..7ddf4e2a46
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -0,0 +1,2842 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmx.c 284174 2015-06-09 00:14:47Z tychon $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/intel/vmx.c 284174 2015-06-09 00:14:47Z tychon $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/psl.h>
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+#include <machine/vmparam.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+#include "vmm_lapic.h"
+#include "vmm_host.h"
+#include "vmm_ioport.h"
+#include "vmm_ktr.h"
+#include "vmm_stat.h"
+#include "vatpic.h"
+#include "vlapic.h"
+#include "vlapic_priv.h"
+
+#include "ept.h"
+#include "vmx_cpufunc.h"
+#include "vmx.h"
+#include "vmx_msr.h"
+#include "x86.h"
+#include "vmx_controls.h"
+
+#define	PINBASED_CTLS_ONE_SETTING					\
+	(PINBASED_EXTINT_EXITING	|				\
+	 PINBASED_NMI_EXITING		|				\
+	 PINBASED_VIRTUAL_NMI)
+#define	PINBASED_CTLS_ZERO_SETTING	0
+
+#define PROCBASED_CTLS_WINDOW_SETTING					\
+	(PROCBASED_INT_WINDOW_EXITING	|				\
+	 PROCBASED_NMI_WINDOW_EXITING)
+
+#define	PROCBASED_CTLS_ONE_SETTING 					\
+	(PROCBASED_SECONDARY_CONTROLS	|				\
+	 PROCBASED_IO_EXITING		|				\
+	 PROCBASED_MSR_BITMAPS		|				\
+	 PROCBASED_CTLS_WINDOW_SETTING	|				\
+	 PROCBASED_CR8_LOAD_EXITING	|				\
+	 PROCBASED_CR8_STORE_EXITING)
+#define	PROCBASED_CTLS_ZERO_SETTING	\
+	(PROCBASED_CR3_LOAD_EXITING |	\
+	PROCBASED_CR3_STORE_EXITING |	\
+	PROCBASED_IO_BITMAPS)
+
+#define	PROCBASED_CTLS2_ONE_SETTING	PROCBASED2_ENABLE_EPT
+#define	PROCBASED_CTLS2_ZERO_SETTING	0
+
+#define	VM_EXIT_CTLS_ONE_SETTING					\
+	(VM_EXIT_HOST_LMA			|			\
+	VM_EXIT_SAVE_EFER			|			\
+	VM_EXIT_LOAD_EFER			|			\
+	VM_EXIT_LOAD_PAT			|			\
+	VM_EXIT_SAVE_PAT			|			\
+	VM_EXIT_LOAD_PAT)
+
+#define	VM_EXIT_CTLS_ZERO_SETTING	VM_EXIT_SAVE_DEBUG_CONTROLS
+
+#define	VM_ENTRY_CTLS_ONE_SETTING	(VM_ENTRY_LOAD_EFER | VM_ENTRY_LOAD_PAT)
+
+#define	VM_ENTRY_CTLS_ZERO_SETTING					\
+	(VM_ENTRY_LOAD_DEBUG_CONTROLS		|			\
+	VM_ENTRY_INTO_SMM			|			\
+	VM_ENTRY_DEACTIVATE_DUAL_MONITOR)
+
+#define	HANDLED		1
+#define	UNHANDLED	0
+
+static MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL);
+
+int vmxon_enabled[MAXCPU];
+static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
+#ifndef	__FreeBSD__
+static vm_paddr_t vmxon_region_pa[MAXCPU];
+#endif
+
+static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2;
+static uint32_t exit_ctls, entry_ctls;
+
+static uint64_t cr0_ones_mask, cr0_zeros_mask;
+SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_ones_mask, CTLFLAG_RD,
+	     &cr0_ones_mask, 0, NULL);
+SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_zeros_mask, CTLFLAG_RD,
+	     &cr0_zeros_mask, 0, NULL);
+
+static uint64_t cr4_ones_mask, cr4_zeros_mask;
+SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_ones_mask, CTLFLAG_RD,
+	     &cr4_ones_mask, 0, NULL);
+SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_zeros_mask, CTLFLAG_RD,
+	     &cr4_zeros_mask, 0, NULL);
+
+static int vmx_initialized;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD,
+	   &vmx_initialized, 0, "Intel VMX initialized");
+
+/*
+ * Optional capabilities
+ */
+static int cap_halt_exit;
+static int cap_pause_exit;
+static int cap_unrestricted_guest;
+static int cap_monitor_trap;
+static int cap_invpcid;
+
+static int virtual_interrupt_delivery;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD,
+    &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support");
+
+static int posted_interrupts;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupts, CTLFLAG_RD,
+    &posted_interrupts, 0, "APICv posted interrupt support");
+
+static int pirvec;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD,
+    &pirvec, 0, "APICv posted interrupt vector");
+
+static struct unrhdr *vpid_unr;
+static u_int vpid_alloc_failed;
+SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
+	    &vpid_alloc_failed, 0, NULL);
+
+/*
+ * Use the last page below 4GB as the APIC access address. This address is
+ * occupied by the boot firmware so it is guaranteed that it will not conflict
+ * with a page in system memory.
+ */
+#define	APIC_ACCESS_ADDRESS	0xFFFFF000
+
+static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
+static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
+static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
+static void vmx_inject_pir(struct vlapic *vlapic);
+
+#ifdef KTR
+static const char *
+exit_reason_to_str(int reason)
+{
+	static char reasonbuf[32];
+
+	switch (reason) {
+	case EXIT_REASON_EXCEPTION:
+		return "exception";
+	case EXIT_REASON_EXT_INTR:
+		return "extint";
+	case EXIT_REASON_TRIPLE_FAULT:
+		return "triplefault";
+	case EXIT_REASON_INIT:
+		return "init";
+	case EXIT_REASON_SIPI:
+		return "sipi";
+	case EXIT_REASON_IO_SMI:
+		return "iosmi";
+	case EXIT_REASON_SMI:
+		return "smi";
+	case EXIT_REASON_INTR_WINDOW:
+		return "intrwindow";
+	case EXIT_REASON_NMI_WINDOW:
+		return "nmiwindow";
+	case EXIT_REASON_TASK_SWITCH:
+		return "taskswitch";
+	case EXIT_REASON_CPUID:
+		return "cpuid";
+	case EXIT_REASON_GETSEC:
+		return "getsec";
+	case EXIT_REASON_HLT:
+		return "hlt";
+	case EXIT_REASON_INVD:
+		return "invd";
+	case EXIT_REASON_INVLPG:
+		return "invlpg";
+	case EXIT_REASON_RDPMC:
+		return "rdpmc";
+	case EXIT_REASON_RDTSC:
+		return "rdtsc";
+	case EXIT_REASON_RSM:
+		return "rsm";
+	case EXIT_REASON_VMCALL:
+		return "vmcall";
+	case EXIT_REASON_VMCLEAR:
+		return "vmclear";
+	case EXIT_REASON_VMLAUNCH:
+		return "vmlaunch";
+	case EXIT_REASON_VMPTRLD:
+		return "vmptrld";
+	case EXIT_REASON_VMPTRST:
+		return "vmptrst";
+	case EXIT_REASON_VMREAD:
+		return "vmread";
+	case EXIT_REASON_VMRESUME:
+		return "vmresume";
+	case EXIT_REASON_VMWRITE:
+		return "vmwrite";
+	case EXIT_REASON_VMXOFF:
+		return "vmxoff";
+	case EXIT_REASON_VMXON:
+		return "vmxon";
+	case EXIT_REASON_CR_ACCESS:
+		return "craccess";
+	case EXIT_REASON_DR_ACCESS:
+		return "draccess";
+	case EXIT_REASON_INOUT:
+		return "inout";
+	case EXIT_REASON_RDMSR:
+		return "rdmsr";
+	case EXIT_REASON_WRMSR:
+		return "wrmsr";
+	case EXIT_REASON_INVAL_VMCS:
+		return "invalvmcs";
+	case EXIT_REASON_INVAL_MSR:
+		return "invalmsr";
+	case EXIT_REASON_MWAIT:
+		return "mwait";
+	case EXIT_REASON_MTF:
+		return "mtf";
+	case EXIT_REASON_MONITOR:
+		return "monitor";
+	case EXIT_REASON_PAUSE:
+		return "pause";
+	case EXIT_REASON_MCE:
+		return "mce";
+	case EXIT_REASON_TPR:
+		return "tpr";
+	case EXIT_REASON_APIC_ACCESS:
+		return "apic-access";
+	case EXIT_REASON_GDTR_IDTR:
+		return "gdtridtr";
+	case EXIT_REASON_LDTR_TR:
+		return "ldtrtr";
+	case EXIT_REASON_EPT_FAULT:
+		return "eptfault";
+	case EXIT_REASON_EPT_MISCONFIG:
+		return "eptmisconfig";
+	case EXIT_REASON_INVEPT:
+		return "invept";
+	case EXIT_REASON_RDTSCP:
+		return "rdtscp";
+	case EXIT_REASON_VMX_PREEMPT:
+		return "vmxpreempt";
+	case EXIT_REASON_INVVPID:
+		return "invvpid";
+	case EXIT_REASON_WBINVD:
+		return "wbinvd";
+	case EXIT_REASON_XSETBV:
+		return "xsetbv";
+	case EXIT_REASON_APIC_WRITE:
+		return "apic-write";
+	default:
+		snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason);
+		return (reasonbuf);
+	}
+}
+
+#ifdef SETJMP_TRACE
+static const char *
+vmx_setjmp_rc2str(int rc)
+{
+	switch (rc) {
+	case VMX_RETURN_DIRECT:
+		return "direct";
+	case VMX_RETURN_LONGJMP:
+		return "longjmp";
+	case VMX_RETURN_VMRESUME:
+		return "vmresume";
+	case VMX_RETURN_VMLAUNCH:
+		return "vmlaunch";
+	case VMX_RETURN_AST:
+		return "ast";
+	default:
+		return "unknown";
+	}
+}
+
+#define	SETJMP_TRACE(vmx, vcpu, vmxctx, regname)			  \
+	VMM_CTR1((vmx)->vm, (vcpu), "setjmp trace " #regname " 0x%016lx", \
+		 (vmxctx)->regname)
+
+static void
+vmx_setjmp_trace(struct vmx *vmx, int vcpu, struct vmxctx *vmxctx, int rc)
+{
+	uint64_t host_rip, host_rsp;
+
+	if (vmxctx != &vmx->ctx[vcpu])
+		panic("vmx_setjmp_trace: invalid vmxctx %p; should be %p",
+			vmxctx, &vmx->ctx[vcpu]);
+
+	VMM_CTR1((vmx)->vm, (vcpu), "vmxctx = %p", vmxctx);
+	VMM_CTR2((vmx)->vm, (vcpu), "setjmp return code %s(%d)",
+		 vmx_setjmp_rc2str(rc), rc);
+
+	host_rsp = host_rip = ~0;
+	vmread(VMCS_HOST_RIP, &host_rip);
+	vmread(VMCS_HOST_RSP, &host_rsp);
+	VMM_CTR2((vmx)->vm, (vcpu), "vmcs host_rip 0x%016lx, host_rsp 0x%016lx",
+		 host_rip, host_rsp);
+
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r15);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r14);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r13);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_r12);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rbp);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rsp);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rbx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, host_rip);
+
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rdi);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rsi);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rdx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rcx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r8);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r9);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rax);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rbx);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_rbp);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r10);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r11);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r12);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r13);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r14);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_r15);
+	SETJMP_TRACE(vmx, vcpu, vmxctx, guest_cr2);
+}
+#endif
+#else
+static void __inline
+vmx_setjmp_trace(struct vmx *vmx, int vcpu, struct vmxctx *vmxctx, int rc)
+{
+	return;
+}
+#endif	/* KTR */
+
+static int
+vmx_allow_x2apic_msrs(struct vmx *vmx)
+{
+	int i, error;
+
+	error = 0;
+
+	/*
+	 * Allow readonly access to the following x2APIC MSRs from the guest.
+	 */
+	error += guest_msr_ro(vmx, MSR_APIC_ID);
+	error += guest_msr_ro(vmx, MSR_APIC_VERSION);
+	error += guest_msr_ro(vmx, MSR_APIC_LDR);
+	error += guest_msr_ro(vmx, MSR_APIC_SVR);
+
+	for (i = 0; i < 8; i++)
+		error += guest_msr_ro(vmx, MSR_APIC_ISR0 + i);
+
+	for (i = 0; i < 8; i++)
+		error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i);
+	
+	for (i = 0; i < 8; i++)
+		error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i);
+
+	error += guest_msr_ro(vmx, MSR_APIC_ESR);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_TIMER);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_THERMAL);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_PCINT);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT0);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT1);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_ERROR);
+	error += guest_msr_ro(vmx, MSR_APIC_ICR_TIMER);
+	error += guest_msr_ro(vmx, MSR_APIC_DCR_TIMER);
+	error += guest_msr_ro(vmx, MSR_APIC_ICR);
+
+	/*
+	 * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest.
+	 *
+	 * These registers get special treatment described in the section
+	 * "Virtualizing MSR-Based APIC Accesses".
+	 */
+	error += guest_msr_rw(vmx, MSR_APIC_TPR);
+	error += guest_msr_rw(vmx, MSR_APIC_EOI);
+	error += guest_msr_rw(vmx, MSR_APIC_SELF_IPI);
+
+	return (error);
+}
+
+u_long
+vmx_fix_cr0(u_long cr0)
+{
+
+	return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask);
+}
+
+u_long
+vmx_fix_cr4(u_long cr4)
+{
+
+	return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask);
+}
+
+static void
+vpid_free(int vpid)
+{
+	if (vpid < 0 || vpid > 0xffff)
+		panic("vpid_free: invalid vpid %d", vpid);
+
+	/*
+	 * VPIDs [0,VM_MAXCPU] are special and are not allocated from
+	 * the unit number allocator.
+	 */
+
+	if (vpid > VM_MAXCPU)
+		free_unr(vpid_unr, vpid);
+}
+
+static void
+vpid_alloc(uint16_t *vpid, int num)
+{
+	int i, x;
+
+	if (num <= 0 || num > VM_MAXCPU)
+		panic("invalid number of vpids requested: %d", num);
+
+	/*
+	 * If the "enable vpid" execution control is not enabled then the
+	 * VPID is required to be 0 for all vcpus.
+	 */
+	if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) {
+		for (i = 0; i < num; i++)
+			vpid[i] = 0;
+		return;
+	}
+
+	/*
+	 * Allocate a unique VPID for each vcpu from the unit number allocator.
+	 */
+	for (i = 0; i < num; i++) {
+		x = alloc_unr(vpid_unr);
+		if (x == -1)
+			break;
+		else
+			vpid[i] = x;
+	}
+
+	if (i < num) {
+		atomic_add_int(&vpid_alloc_failed, 1);
+
+		/*
+		 * If the unit number allocator does not have enough unique
+		 * VPIDs then we need to allocate from the [1,VM_MAXCPU] range.
+		 *
+		 * These VPIDs are not be unique across VMs but this does not
+		 * affect correctness because the combined mappings are also
+		 * tagged with the EP4TA which is unique for each VM.
+		 *
+		 * It is still sub-optimal because the invvpid will invalidate
+		 * combined mappings for a particular VPID across all EP4TAs.
+		 */
+		while (i-- > 0)
+			vpid_free(vpid[i]);
+
+		for (i = 0; i < num; i++)
+			vpid[i] = i + 1;
+	}
+}
+
+static void
+vpid_init(void)
+{
+	/*
+	 * VPID 0 is required when the "enable VPID" execution control is
+	 * disabled.
+	 *
+	 * VPIDs [1,VM_MAXCPU] are used as the "overflow namespace" when the
+	 * unit number allocator does not have sufficient unique VPIDs to
+	 * satisfy the allocation.
+	 *
+	 * The remaining VPIDs are managed by the unit number allocator.
+	 */
+	vpid_unr = new_unrhdr(VM_MAXCPU + 1, 0xffff, NULL);
+}
+
+#ifndef	__FreeBSD__
+static void
+msr_save_area_init(struct msr_entry *g_area, int *g_count)
+{
+	int cnt;
+
+	static struct msr_entry guest_msrs[] = {
+		{ MSR_KGSBASE, 0, 0 },
+		{ MSR_LSTAR, 0, 0 },
+		{ MSR_CSTAR, 0, 0 },
+		{ MSR_STAR, 0, 0 },
+		{ MSR_SF_MASK, 0, 0 },
+	};
+
+	cnt = sizeof(guest_msrs) / sizeof(guest_msrs[0]);
+	if (cnt > GUEST_MSR_MAX_ENTRIES)
+		panic("guest msr save area overrun");
+	bcopy(guest_msrs, g_area, sizeof(guest_msrs));
+	*g_count = cnt;
+}
+
+static void
+host_msr_save_area_init(struct msr_entry *h_area, int *h_count)
+{
+	int i, cnt;
+
+	static struct msr_entry host_msrs[] = {
+		{ MSR_LSTAR, 0, 0 },
+		{ MSR_CSTAR, 0, 0 },
+		{ MSR_STAR, 0, 0 },
+		{ MSR_SF_MASK, 0, 0 },
+	};
+
+	cnt = sizeof(host_msrs) / sizeof(host_msrs[0]);
+	if (cnt > HOST_MSR_MAX_ENTRIES)
+		panic("host msr save area overrun");
+	for (i = 0; i < cnt; i++) {
+		host_msrs[i].val = rdmsr(host_msrs[i].index);
+	}
+	bcopy(host_msrs, h_area, sizeof(host_msrs));
+	*h_count = cnt;
+}
+#endif
+
+static void
+vmx_disable(void *arg __unused)
+{
+	struct invvpid_desc invvpid_desc = { 0 };
+	struct invept_desc invept_desc = { 0 };
+
+	if (vmxon_enabled[curcpu]) {
+		/*
+		 * See sections 25.3.3.3 and 25.3.3.4 in Intel Vol 3b.
+		 *
+		 * VMXON or VMXOFF are not required to invalidate any TLB
+		 * caching structures. This prevents potential retention of
+		 * cached information in the TLB between distinct VMX episodes.
+		 */
+		invvpid(INVVPID_TYPE_ALL_CONTEXTS, invvpid_desc);
+		invept(INVEPT_TYPE_ALL_CONTEXTS, invept_desc);
+		vmxoff();
+	}
+	load_cr4(rcr4() & ~CR4_VMXE);
+}
+
+static int
+vmx_cleanup(void)
+{
+	
+#ifdef	__FreeBSD__
+	if (pirvec != 0)
+		vmm_ipi_free(pirvec);
+#endif
+
+	if (vpid_unr != NULL) {
+		delete_unrhdr(vpid_unr);
+		vpid_unr = NULL;
+	}
+
+	smp_rendezvous(NULL, vmx_disable, NULL, NULL);
+
+	return (0);
+}
+
+static void
+vmx_enable(void *arg __unused)
+{
+	int error;
+	uint64_t feature_control;
+
+	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+	if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 ||
+	    (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
+		wrmsr(MSR_IA32_FEATURE_CONTROL,
+		    feature_control | IA32_FEATURE_CONTROL_VMX_EN |
+		    IA32_FEATURE_CONTROL_LOCK);
+	}
+
+	load_cr4(rcr4() | CR4_VMXE);
+
+	*(uint32_t *)vmxon_region[curcpu] = vmx_revision();
+#ifdef	__FreeBSD__
+	error = vmxon(vmxon_region[curcpu]);
+#else
+	error = vmxon_pa(vmxon_region_pa[curcpu]);
+	ASSERT(error == 0);
+#endif
+	if (error == 0)
+		vmxon_enabled[curcpu] = 1;
+}
+
+static int
+vmx_init(void)
+{
+#define	X86FSET_VMX	35
+	extern uchar_t x86_featureset[];
+	extern boolean_t is_x86_feature(void *featureset, uint_t feature);
+	int error;
+	uint64_t fixed0, fixed1, feature_control;
+	uint32_t tmp;
+#ifndef	__FreeBSD__
+	int i;
+#endif
+
+	/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
+#ifdef	__FreeBSD__
+	if (!(cpu_feature2 & CPUID2_VMX)) {
+		printf("vmx_init: processor does not support VMX operation\n");
+		return (ENXIO);
+	}
+#else
+	if (!is_x86_feature(x86_featureset, X86FSET_VMX)) {
+		cmn_err(CE_WARN, "vmx_init: processor does not support VMX operation\n");
+	}
+#endif
+
+	/*
+	 * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits
+	 * are set (bits 0 and 2 respectively).
+	 */
+	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+	if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 1 &&
+	    (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
+		printf("vmx_init: VMX operation disabled by BIOS\n");
+		return (ENXIO);
+	}
+
+	/* Check support for primary processor-based VM-execution controls */
+	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+			       MSR_VMX_TRUE_PROCBASED_CTLS,
+			       PROCBASED_CTLS_ONE_SETTING,
+			       PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls);
+	if (error) {
+		printf("vmx_init: processor does not support desired primary "
+		       "processor-based controls\n");
+		return (error);
+	}
+
+	/* Clear the processor-based ctl bits that are set on demand */
+	procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING;
+
+	/* Check support for secondary processor-based VM-execution controls */
+	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+			       MSR_VMX_PROCBASED_CTLS2,
+			       PROCBASED_CTLS2_ONE_SETTING,
+			       PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2);
+	if (error) {
+		printf("vmx_init: processor does not support desired secondary "
+		       "processor-based controls\n");
+		return (error);
+	}
+
+	/* Check support for VPID */
+	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
+			       PROCBASED2_ENABLE_VPID, 0, &tmp);
+	if (error == 0)
+		procbased_ctls2 |= PROCBASED2_ENABLE_VPID;
+
+	/* Check support for pin-based VM-execution controls */
+	error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
+			       MSR_VMX_TRUE_PINBASED_CTLS,
+			       PINBASED_CTLS_ONE_SETTING,
+			       PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls);
+	if (error) {
+		printf("vmx_init: processor does not support desired "
+		       "pin-based controls\n");
+		return (error);
+	}
+
+	/* Check support for VM-exit controls */
+	error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS,
+			       VM_EXIT_CTLS_ONE_SETTING,
+			       VM_EXIT_CTLS_ZERO_SETTING,
+			       &exit_ctls);
+	if (error) {
+		printf("vmx_init: processor does not support desired "
+		    "exit controls\n");
+		return (error);
+	}
+
+	/* Check support for VM-entry controls */
+	error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS,
+	    VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING,
+	    &entry_ctls);
+	if (error) {
+		printf("vmx_init: processor does not support desired "
+		    "entry controls\n");
+		return (error);
+	}
+
+	/*
+	 * Check support for optional features by testing them
+	 * as individual bits
+	 */
+	cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+					MSR_VMX_TRUE_PROCBASED_CTLS,
+					PROCBASED_HLT_EXITING, 0,
+					&tmp) == 0);
+
+	cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+					MSR_VMX_PROCBASED_CTLS,
+					PROCBASED_MTF, 0,
+					&tmp) == 0);
+
+	cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+					 MSR_VMX_TRUE_PROCBASED_CTLS,
+					 PROCBASED_PAUSE_EXITING, 0,
+					 &tmp) == 0);
+
+	cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+					MSR_VMX_PROCBASED_CTLS2,
+					PROCBASED2_UNRESTRICTED_GUEST, 0,
+				        &tmp) == 0);
+
+	/* Initialize EPT */
+	error = ept_init();
+	if (error) {
+		printf("vmx_init: ept initialization failed (%d)\n", error);
+		return (error);
+	}
+
+	/*
+	 * Stash the cr0 and cr4 bits that must be fixed to 0 or 1
+	 */
+	fixed0 = rdmsr(MSR_VMX_CR0_FIXED0);
+	fixed1 = rdmsr(MSR_VMX_CR0_FIXED1);
+	cr0_ones_mask = fixed0 & fixed1;
+	cr0_zeros_mask = ~fixed0 & ~fixed1;
+
+	/*
+	 * CR0_PE and CR0_PG can be set to zero in VMX non-root operation
+	 * if unrestricted guest execution is allowed.
+	 */
+	if (cap_unrestricted_guest)
+		cr0_ones_mask &= ~(CR0_PG | CR0_PE);
+
+	/*
+	 * Do not allow the guest to set CR0_NW or CR0_CD.
+	 */
+	cr0_zeros_mask |= (CR0_NW | CR0_CD);
+
+	fixed0 = rdmsr(MSR_VMX_CR4_FIXED0);
+	fixed1 = rdmsr(MSR_VMX_CR4_FIXED1);
+	cr4_ones_mask = fixed0 & fixed1;
+	cr4_zeros_mask = ~fixed0 & ~fixed1;
+
+#ifndef	__FreeBSD__
+	for (i = 0; i < MAXCPU; i++) {
+		vmxon_region_pa[i] = vtophys(&vmxon_region[i]);
+	}
+#endif
+
+	vpid_init();
+
+	vmx_msr_init();
+
+	/* enable VMX operation */
+	smp_rendezvous(NULL, vmx_enable, NULL, NULL);
+
+	vmx_initialized = 1;
+
+	return (0);
+}
+
+static int
+vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial)
+{
+	int error, mask_ident, shadow_ident;
+	uint64_t mask_value;
+
+	if (which != 0 && which != 4)
+		panic("vmx_setup_cr_shadow: unknown cr%d", which);
+
+	if (which == 0) {
+		mask_ident = VMCS_CR0_MASK;
+		mask_value = cr0_ones_mask | cr0_zeros_mask;
+		shadow_ident = VMCS_CR0_SHADOW;
+	} else {
+		mask_ident = VMCS_CR4_MASK;
+		mask_value = cr4_ones_mask | cr4_zeros_mask;
+		shadow_ident = VMCS_CR4_SHADOW;
+	}
+
+	error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value);
+	if (error)
+		return (error);
+
+	error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial);
+	if (error)
+		return (error);
+
+	return (0);
+}
+#define	vmx_setup_cr0_shadow(vmcs,init)	vmx_setup_cr_shadow(0, (vmcs), (init))
+#define	vmx_setup_cr4_shadow(vmcs,init)	vmx_setup_cr_shadow(4, (vmcs), (init))
+
+static void *
+vmx_vminit(struct vm *vm)
+{
+	uint16_t vpid[VM_MAXCPU];
+	int i, error, guest_msr_count;
+#ifndef	__FreeBSD__
+	int host_msr_count;
+#endif
+	struct vmx *vmx;
+	struct vmcs *vmcs;
+
+	vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO);
+	if ((uintptr_t)vmx & PAGE_MASK) {
+		panic("malloc of struct vmx not aligned on %d byte boundary",
+		      PAGE_SIZE);
+	}
+	vmx->vm = vm;
+
+	/*
+	 * Clean up EPTP-tagged guest physical and combined mappings
+	 *
+	 * VMX transitions are not required to invalidate any guest physical
+	 * mappings. So, it may be possible for stale guest physical mappings
+	 * to be present in the processor TLBs.
+	 *
+	 * Combined mappings for this EP4TA are also invalidated for all VPIDs.
+	 */
+	ept_invalidate_mappings(vtophys(vmx->pml4ept));
+
+	msr_bitmap_initialize(vmx->msr_bitmap);
+
+	/*
+	 * It is safe to allow direct access to MSR_GSBASE and MSR_FSBASE.
+	 * The guest FSBASE and GSBASE are saved and restored during
+	 * vm-exit and vm-entry respectively. The host FSBASE and GSBASE are
+	 * always restored from the vmcs host state area on vm-exit.
+	 *
+	 * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in
+	 * how they are saved/restored so can be directly accessed by the
+	 * guest.
+	 *
+	 * MSR_EFER is saved and restored in the guest VMCS area on a
+	 * VM exit and entry respectively. It is also restored from the
+	 * host VMCS area on a VM exit.
+	 *
+	 * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
+	 * and entry respectively. It is also restored from the host VMCS
+	 * area on a VM exit.
+	 *
+	 * The TSC MSR is exposed read-only. Writes are disallowed as
+	 * that will impact the host TSC.  If the guest does a write
+	 * the "use TSC offsetting" execution control is enabled and the
+	 * difference between the host TSC and the guest TSC is written
+	 * into the TSC offset in the VMCS.
+	 */
+	if (guest_msr_rw(vmx, MSR_GSBASE) ||
+	    guest_msr_rw(vmx, MSR_FSBASE) ||
+	    guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) ||
+	    guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
+	    guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
+	    guest_msr_rw(vmx, MSR_EFER) ||
+	    guest_msr_rw(vmx, MSR_PAT) ||
+	    guest_msr_ro(vmx, MSR_TSC))
+		panic("vmx_vminit: error setting guest msr access");
+
+	vpid_alloc(vpid, VM_MAXCPU);
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		vmcs = &vmx->vmcs[i];
+		vmcs->identifier = vmx_revision();
+		error = vmclear(vmcs);
+		if (error != 0) {
+			panic("vmx_vminit: vmclear error %d on vcpu %d\n",
+			      error, i);
+		}
+
+		vmx_msr_guest_init(vmx, i);
+
+		error = vmcs_set_defaults(vmcs,
+					  (u_long)vmx_longjmp,
+					  (u_long)&vmx->ctx[i],
+					  vtophys(vmx->pml4ept),
+					  pinbased_ctls,
+					  procbased_ctls,
+					  procbased_ctls2,
+					  exit_ctls, entry_ctls,
+					  vtophys(vmx->msr_bitmap),
+					  vpid[i]);
+
+		if (error != 0)
+			panic("vmx_vminit: vmcs_set_defaults error %d", error);
+
+		vmx->cap[i].set = 0;
+		vmx->cap[i].proc_ctls = procbased_ctls;
+
+		vmx->state[i].lastcpu = -1;
+		vmx->state[i].vpid = vpid[i];
+
+#ifndef	__FreeBSD__
+		msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count);
+
+		error = vmcs_set_msr_save(vmcs, vtophys(vmx->guest_msrs[i]),
+		    guest_msr_count);
+		if (error != 0)
+			panic("vmcs_set_msr_save error %d", error);
+
+		host_msr_save_area_init(vmx->host_msrs[i], &host_msr_count);
+
+		error = vmcs_set_host_msr_save(&vmx->vmcs[i],
+					       vtophys(vmx->host_msrs[i]),
+					       host_msr_count);
+		if (error != 0)
+			panic("vmcs_set_msr_save error %d", error);
+#endif
+
+		/*
+		 * Set up the CR0/4 shadows, and init the read shadow
+		 * to the power-on register value from the Intel Sys Arch.
+		 *  CR0 - 0x60000010
+		 *  CR4 - 0
+		 */
+		error = vmx_setup_cr0_shadow(vmcs, 0x60000010);
+		if (error != 0)
+			panic("vmx_setup_cr0_shadow %d", error);
+
+		error = vmx_setup_cr4_shadow(vmcs, 0);
+		if (error != 0)
+			panic("vmx_setup_cr4_shadow %d", error);
+	}
+
+	return (vmx);
+}
+
+static int
+vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx)
+{
+	int handled, func;
+	
+	func = vmxctx->guest_rax;
+
+	handled = x86_emulate_cpuid(vm, vcpu,
+				    (uint32_t*)(&vmxctx->guest_rax),
+				    (uint32_t*)(&vmxctx->guest_rbx),
+				    (uint32_t*)(&vmxctx->guest_rcx),
+				    (uint32_t*)(&vmxctx->guest_rdx));
+	return (handled);
+}
+
+static __inline void
+vmx_run_trace(struct vmx *vmx, int vcpu)
+{
+#ifdef KTR
+	VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip());
+#endif
+}
+
+static __inline void
+vmx_exit_trace(struct vmx *vmx, int vcpu, uint64_t rip, uint32_t exit_reason,
+	       int handled)
+{
+#ifdef KTR
+	VCPU_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx",
+		 handled ? "handled" : "unhandled",
+		 exit_reason_to_str(exit_reason), rip);
+#endif
+}
+
+static __inline void
+vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip)
+{
+#ifdef KTR
+	VCPU_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip);
+#endif
+}
+
+static void
+vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu)
+{
+	struct vmxstate *vmxstate;
+	struct invvpid_desc invvpid_desc = { 0 };
+#ifndef	__FreeBSD__
+	desctbr_t idtr, gdtr;
+#endif
+
+	vmxstate = &vmx->state[vcpu];
+	vmcs_write(VMCS_HOST_FS_BASE, vmm_get_host_fsbase());
+	if (vmxstate->lastcpu == curcpu)
+		return;
+
+	vmxstate->lastcpu = curcpu;
+
+	vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
+
+	vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
+	vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase());
+	vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase());
+
+#ifndef	__FreeBSD__
+	vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, rdmsr(MSR_SYSENTER_CS_MSR));
+	vmcs_write(VMCS_HOST_IA32_SYSENTER_ESP, rdmsr(MSR_SYSENTER_ESP_MSR));
+	vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP, rdmsr(MSR_SYSENTER_EIP_MSR));
+#endif
+
+	/*
+	 * If we are using VPIDs then invalidate all mappings tagged with 'vpid'
+	 *
+	 * We do this because this vcpu was executing on a different host
+	 * cpu when it last ran. We do not track whether it invalidated
+	 * mappings associated with its 'vpid' during that run. So we must
+	 * assume that the mappings associated with 'vpid' on 'curcpu' are
+	 * stale and invalidate them.
+	 *
+	 * Note that we incur this penalty only when the scheduler chooses to
+	 * move the thread associated with this vcpu between host cpus.
+	 *
+	 * Note also that this will invalidate mappings tagged with 'vpid'
+	 * for "all" EP4TAs.
+	 */
+	if (vmxstate->vpid != 0) {
+		invvpid_desc.vpid = vmxstate->vpid;
+		invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
+	}
+}
+
+static void 
+vm_exit_update_rip(struct vm_exit *vmexit)
+{
+	int error;
+
+	error = vmwrite(VMCS_GUEST_RIP, vmexit->rip + vmexit->inst_length);
+	if (error)
+		panic("vmx_run: error %d writing to VMCS_GUEST_RIP", error);
+}
+
+/*
+ * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set.
+ */
+CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0);
+
+static void __inline
+vmx_set_int_window_exiting(struct vmx *vmx, int vcpu)
+{
+
+	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) {
+		vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING;
+		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+		VCPU_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting");
+	}
+}
+
+static void __inline
+vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu)
+{
+
+#ifdef	__FreeBSD__
+	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0,
+	    ("intr_window_exiting not set: %#x", vmx->cap[vcpu].proc_ctls));
+#else
+	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0,
+	    ("intr_window_exiting not set: %x", vmx->cap[vcpu].proc_ctls));
+#endif
+	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING;
+	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+	VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting");
+}
+
+static void __inline
+vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu)
+{
+
+	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) == 0) {
+		vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING;
+		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+		VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting");
+	}
+}
+
+static void __inline
+vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
+{
+
+#ifdef	__FreeBSD__
+	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0,
+	    ("nmi_window_exiting not set %#x", vmx->cap[vcpu].proc_ctls));
+#else
+	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0,
+	    ("nmi_window_exiting not set %x", vmx->cap[vcpu].proc_ctls));
+#endif
+	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING;
+	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+	VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
+}
+
+int
+vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
+{
+	int error;
+
+	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) {
+		vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET;
+		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+		VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting");
+	}
+
+	error = vmwrite(VMCS_TSC_OFFSET, offset);
+
+	return (error);
+}
+
+#define	NMI_BLOCKING	(VMCS_INTERRUPTIBILITY_NMI_BLOCKING |		\
+			 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
+#define	HWINTR_BLOCKING	(VMCS_INTERRUPTIBILITY_STI_BLOCKING |		\
+			 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
+
+static void
+vmx_inject_nmi(struct vmx *vmx, int vcpu)
+{
+	uint32_t gi, info;
+
+	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+#ifdef	__FreeBSD__
+	KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest "
+	    "interruptibility-state %#x", gi));
+#else
+	KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest "
+	    "interruptibility-state %x", gi));
+#endif
+
+	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
+#ifdef	__FreeBSD__
+	KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid "
+	    "VM-entry interruption information %#x", info));
+#else
+	KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid "
+	    "VM-entry interruption information %x", info));
+#endif
+
+	/*
+	 * Inject the virtual NMI. The vector must be the NMI IDT entry
+	 * or the VMCS entry check will fail.
+	 */
+	info = IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID;
+	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
+
+	VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI");
+
+	/* Clear the request */
+	vm_nmi_clear(vmx->vm, vcpu);
+}
+
+static void
+vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
+{
+	int vector, need_nmi_exiting, extint_pending;
+	uint64_t rflags, entryinfo;
+	uint32_t gi, info;
+
+	if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
+#ifdef	__FreeBSD__
+		KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
+		    "intinfo is not valid: %#lx", __func__, entryinfo));
+#else
+		KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
+		    "intinfo is not valid: %lx", __func__, entryinfo));
+#endif
+
+		info = vmcs_read(VMCS_ENTRY_INTR_INFO);
+#ifdef	__FreeBSD__
+		KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
+		     "pending exception: %#lx/%#x", __func__, entryinfo, info));
+#else
+		KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject "
+		     "pending exception: %lx/%x", __func__, entryinfo, info));
+#endif
+
+		info = entryinfo;
+		vector = info & 0xff;
+		if (vector == IDT_BP || vector == IDT_OF) {
+			/*
+			 * VT-x requires #BP and #OF to be injected as software
+			 * exceptions.
+			 */
+			info &= ~VMCS_INTR_T_MASK;
+			info |= VMCS_INTR_T_SWEXCEPTION;
+		}
+
+		if (info & VMCS_INTR_DEL_ERRCODE)
+			vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32);
+
+		vmcs_write(VMCS_ENTRY_INTR_INFO, info);
+	}
+
+	if (vm_nmi_pending(vmx->vm, vcpu)) {
+		/*
+		 * If there are no conditions blocking NMI injection then
+		 * inject it directly here otherwise enable "NMI window
+		 * exiting" to inject it as soon as we can.
+		 *
+		 * We also check for STI_BLOCKING because some implementations
+		 * don't allow NMI injection in this case. If we are running
+		 * on a processor that doesn't have this restriction it will
+		 * immediately exit and the NMI will be injected in the
+		 * "NMI window exiting" handler.
+		 */
+		need_nmi_exiting = 1;
+		gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+		if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) {
+			info = vmcs_read(VMCS_ENTRY_INTR_INFO);
+			if ((info & VMCS_INTR_VALID) == 0) {
+				vmx_inject_nmi(vmx, vcpu);
+				need_nmi_exiting = 0;
+			} else {
+				VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI "
+				    "due to VM-entry intr info %#x", info);
+			}
+		} else {
+			VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to "
+			    "Guest Interruptibility-state %#x", gi);
+		}
+
+		if (need_nmi_exiting)
+			vmx_set_nmi_window_exiting(vmx, vcpu);
+	}
+
+	extint_pending = vm_extint_pending(vmx->vm, vcpu);
+
+#ifdef	__FreeBSD__
+	if (!extint_pending && virtual_interrupt_delivery) {
+		vmx_inject_pir(vlapic);
+		return;
+	}
+#endif
+
+	/*
+	 * If interrupt-window exiting is already in effect then don't bother
+	 * checking for pending interrupts. This is just an optimization and
+	 * not needed for correctness.
+	 */
+	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0) {
+		VCPU_CTR0(vmx->vm, vcpu, "Skip interrupt injection due to "
+		    "pending int_window_exiting");
+		return;
+	}
+
+	if (!extint_pending) {
+		/* Ask the local apic for a vector to inject */
+		if (!vlapic_pending_intr(vlapic, &vector))
+			return;
+
+		/*
+		 * From the Intel SDM, Volume 3, Section "Maskable
+		 * Hardware Interrupts":
+		 * - maskable interrupt vectors [16,255] can be delivered
+		 *   through the local APIC.
+		*/
+		KASSERT(vector >= 16 && vector <= 255,
+		    ("invalid vector %d from local APIC", vector));
+	} else {
+		/* Ask the legacy pic for a vector to inject */
+		vatpic_pending_intr(vmx->vm, &vector);
+
+		/*
+		 * From the Intel SDM, Volume 3, Section "Maskable
+		 * Hardware Interrupts":
+		 * - maskable interrupt vectors [0,255] can be delivered
+		 *   through the INTR pin.
+		 */
+		KASSERT(vector >= 0 && vector <= 255,
+		    ("invalid vector %d from INTR", vector));
+	}
+
+	/* Check RFLAGS.IF and the interruptibility state of the guest */
+	rflags = vmcs_read(VMCS_GUEST_RFLAGS);
+	if ((rflags & PSL_I) == 0) {
+		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
+		    "rflags %#lx", vector, rflags);
+		goto cantinject;
+	}
+
+	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+	if (gi & HWINTR_BLOCKING) {
+		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
+		    "Guest Interruptibility-state %#x", vector, gi);
+		goto cantinject;
+	}
+
+	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
+	if (info & VMCS_INTR_VALID) {
+		/*
+		 * This is expected and could happen for multiple reasons:
+		 * - A vectoring VM-entry was aborted due to astpending
+		 * - A VM-exit happened during event injection.
+		 * - An exception was injected above.
+		 * - An NMI was injected above or after "NMI window exiting"
+		 */
+		VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to "
+		    "VM-entry intr info %#x", vector, info);
+		goto cantinject;
+	}
+
+	/* Inject the interrupt */
+	info = VMCS_INTR_T_HWINTR | VMCS_INTR_VALID;
+	info |= vector;
+	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
+
+	if (!extint_pending) {
+		/* Update the Local APIC ISR */
+		vlapic_intr_accepted(vlapic, vector);
+	} else {
+		vm_extint_clear(vmx->vm, vcpu);
+		vatpic_intr_accepted(vmx->vm, vector);
+
+		/*
+		 * After we accepted the current ExtINT the PIC may
+		 * have posted another one.  If that is the case, set
+		 * the Interrupt Window Exiting execution control so
+		 * we can inject that one too.
+		 *
+		 * Also, interrupt window exiting allows us to inject any
+		 * pending APIC vector that was preempted by the ExtINT
+		 * as soon as possible. This applies both for the software
+		 * emulated vlapic and the hardware assisted virtual APIC.
+		 */
+		vmx_set_int_window_exiting(vmx, vcpu);
+	}
+
+	VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
+
+	return;
+
+cantinject:
+	/*
+	 * Set the Interrupt Window Exiting execution control so we can inject
+	 * the interrupt as soon as blocking condition goes away.
+	 */
+	vmx_set_int_window_exiting(vmx, vcpu);
+}
+
+/*
+ * If the Virtual NMIs execution control is '1' then the logical processor
+ * tracks virtual-NMI blocking in the Guest Interruptibility-state field of
+ * the VMCS. An IRET instruction in VMX non-root operation will remove any
+ * virtual-NMI blocking.
+ *
+ * This unblocking occurs even if the IRET causes a fault. In this case the
+ * hypervisor needs to restore virtual-NMI blocking before resuming the guest.
+ */
+static void
+vmx_restore_nmi_blocking(struct vmx *vmx, int vcpuid)
+{
+	uint32_t gi;
+
+	VCPU_CTR0(vmx->vm, vcpuid, "Restore Virtual-NMI blocking");
+	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+	gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
+	vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
+}
+
+static void
+vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid)
+{
+	uint32_t gi;
+
+	VCPU_CTR0(vmx->vm, vcpuid, "Clear Virtual-NMI blocking");
+	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+	gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
+	vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
+}
+
+static uint64_t
+vmx_get_guest_reg(struct vmx *vmx, int vcpu, int ident)
+{
+	const struct vmxctx *vmxctx;
+
+	vmxctx = &vmx->ctx[vcpu];
+
+	switch (ident) {
+	case 0:
+		return (vmxctx->guest_rax);
+	case 1:
+		return (vmxctx->guest_rcx);
+	case 2:
+		return (vmxctx->guest_rdx);
+	case 3:
+		return (vmxctx->guest_rbx);
+	case 4:
+		return (vmcs_read(VMCS_GUEST_RSP));
+	case 5:
+		return (vmxctx->guest_rbp);
+	case 6:
+		return (vmxctx->guest_rsi);
+	case 7:
+		return (vmxctx->guest_rdi);
+	case 8:
+		return (vmxctx->guest_r8);
+	case 9:
+		return (vmxctx->guest_r9);
+	case 10:
+		return (vmxctx->guest_r10);
+	case 11:
+		return (vmxctx->guest_r11);
+	case 12:
+		return (vmxctx->guest_r12);
+	case 13:
+		return (vmxctx->guest_r13);
+	case 14:
+		return (vmxctx->guest_r14);
+	case 15:
+		return (vmxctx->guest_r15);
+	default:
+		panic("invalid vmx register %d", ident);
+	}
+}
+
+static void
+vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval)
+{
+	struct vmxctx *vmxctx;
+
+	vmxctx = &vmx->ctx[vcpu];
+
+	switch (ident) {
+	case 0:
+		vmxctx->guest_rax = regval;
+		break;
+	case 1:
+		vmxctx->guest_rcx = regval;
+		break;
+	case 2:
+		vmxctx->guest_rdx = regval;
+		break;
+	case 3:
+		vmxctx->guest_rbx = regval;
+		break;
+	case 4:
+		vmcs_write(VMCS_GUEST_RSP, regval);
+		break;
+	case 5:
+		vmxctx->guest_rbp = regval;
+		break;
+	case 6:
+		vmxctx->guest_rsi = regval;
+		break;
+	case 7:
+		vmxctx->guest_rdi = regval;
+		break;
+	case 8:
+		vmxctx->guest_r8 = regval;
+		break;
+	case 9:
+		vmxctx->guest_r9 = regval;
+		break;
+	case 10:
+		vmxctx->guest_r10 = regval;
+		break;
+	case 11:
+		vmxctx->guest_r11 = regval;
+		break;
+	case 12:
+		vmxctx->guest_r12 = regval;
+		break;
+	case 13:
+		vmxctx->guest_r13 = regval;
+		break;
+	case 14:
+		vmxctx->guest_r14 = regval;
+		break;
+	case 15:
+		vmxctx->guest_r15 = regval;
+		break;
+	default:
+		panic("invalid vmx register %d", ident);
+	}
+}
+
+static int
+vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
+{
+	uint64_t crval, regval;
+
+	/* We only handle mov to %cr0 at this time */
+	if ((exitqual & 0xf0) != 0x00)
+		return (UNHANDLED);
+
+	regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf);
+
+	vmcs_write(VMCS_CR0_SHADOW, regval);
+
+	crval = regval | cr0_ones_mask;
+	crval &= ~cr0_zeros_mask;
+	vmcs_write(VMCS_GUEST_CR0, crval);
+
+	if (regval & CR0_PG) {
+		uint64_t efer, entry_ctls;
+
+		/*
+		 * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and
+		 * the "IA-32e mode guest" bit in VM-entry control must be
+		 * equal.
+		 */
+		efer = vmcs_read(VMCS_GUEST_IA32_EFER);
+		if (efer & EFER_LME) {
+			efer |= EFER_LMA;
+			vmcs_write(VMCS_GUEST_IA32_EFER, efer);
+			entry_ctls = vmcs_read(VMCS_ENTRY_CTLS);
+			entry_ctls |= VM_ENTRY_GUEST_LMA;
+			vmcs_write(VMCS_ENTRY_CTLS, entry_ctls);
+		}
+	}
+
+	return (HANDLED);
+}
+
+static int
+vmx_emulate_cr4_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
+{
+	uint64_t crval, regval;
+
+	/* We only handle mov to %cr4 at this time */
+	if ((exitqual & 0xf0) != 0x00)
+		return (UNHANDLED);
+
+	regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf);
+
+	vmcs_write(VMCS_CR4_SHADOW, regval);
+
+	crval = regval | cr4_ones_mask;
+	crval &= ~cr4_zeros_mask;
+	vmcs_write(VMCS_GUEST_CR4, crval);
+
+	return (HANDLED);
+}
+
+static int
+vmx_emulate_cr8_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
+{
+	struct vlapic *vlapic;
+	uint64_t cr8;
+	int regnum;
+
+	/* We only handle mov %cr8 to/from a register at this time. */
+	if ((exitqual & 0xe0) != 0x00) {
+		return (UNHANDLED);
+	}
+
+	vlapic = vm_lapic(vmx->vm, vcpu);
+	regnum = (exitqual >> 8) & 0xf;
+	if (exitqual & 0x10) {
+		cr8 = vlapic_get_cr8(vlapic);
+		vmx_set_guest_reg(vmx, vcpu, regnum, cr8);
+	} else {
+		cr8 = vmx_get_guest_reg(vmx, vcpu, regnum);
+		vlapic_set_cr8(vlapic, cr8);
+	}
+
+	return (HANDLED);
+}
+
+/*
+ * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL
+ */
+static int
+vmx_cpl(void)
+{
+	uint32_t ssar;
+
+	ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS);
+	return ((ssar >> 5) & 0x3);
+}
+
+static enum vm_cpu_mode
+vmx_cpu_mode(void)
+{
+	uint32_t csar;
+
+	if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LMA) {
+		csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS);
+		if (csar & 0x2000)
+			return (CPU_MODE_64BIT);	/* CS.L = 1 */
+		else
+			return (CPU_MODE_COMPATIBILITY);
+	} else if (vmcs_read(VMCS_GUEST_CR0) & CR0_PE) {
+		return (CPU_MODE_PROTECTED);
+	} else {
+		return (CPU_MODE_REAL);
+	}
+}
+
+static enum vm_paging_mode
+vmx_paging_mode(void)
+{
+
+	if (!(vmcs_read(VMCS_GUEST_CR0) & CR0_PG))
+		return (PAGING_MODE_FLAT);
+	if (!(vmcs_read(VMCS_GUEST_CR4) & CR4_PAE))
+		return (PAGING_MODE_32);
+	if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LME)
+		return (PAGING_MODE_64);
+	else
+		return (PAGING_MODE_PAE);
+}
+
+static uint64_t
+inout_str_index(struct vmx *vmx, int vcpuid, int in)
+{
+	uint64_t val;
+	int error;
+	enum vm_reg_name reg;
+
+	reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
+	error = vmx_getreg(vmx, vcpuid, reg, &val);
+	KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error));
+	return (val);
+}
+
+static uint64_t
+inout_str_count(struct vmx *vmx, int vcpuid, int rep)
+{
+	uint64_t val;
+	int error;
+
+	if (rep) {
+		error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val);
+		KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error));
+	} else {
+		val = 1;
+	}
+	return (val);
+}
+
+static int
+inout_str_addrsize(uint32_t inst_info)
+{
+	uint32_t size;
+
+	size = (inst_info >> 7) & 0x7;
+	switch (size) {
+	case 0:
+		return (2);	/* 16 bit */
+	case 1:
+		return (4);	/* 32 bit */
+	case 2:
+		return (8);	/* 64 bit */
+	default:
+		panic("%s: invalid size encoding %d", __func__, size);
+	}
+}
+
+static void
+inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in,
+    struct vm_inout_str *vis)
+{
+	int error, s;
+
+	if (in) {
+		vis->seg_name = VM_REG_GUEST_ES;
+	} else {
+		s = (inst_info >> 15) & 0x7;
+		vis->seg_name = vm_segment_name(s);
+	}
+
+	error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc);
+	KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error));
+}
+
+static void
+vmx_paging_info(struct vm_guest_paging *paging)
+{
+	paging->cr3 = vmcs_guest_cr3();
+	paging->cpl = vmx_cpl();
+	paging->cpu_mode = vmx_cpu_mode();
+	paging->paging_mode = vmx_paging_mode();
+}
+
+static void
+vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
+{
+	struct vm_guest_paging *paging;
+	uint32_t csar;
+
+	paging = &vmexit->u.inst_emul.paging;
+
+	vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+	vmexit->u.inst_emul.gpa = gpa;
+	vmexit->u.inst_emul.gla = gla;
+	vmx_paging_info(paging);
+	switch (paging->cpu_mode) {
+	case CPU_MODE_REAL:
+		vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
+		vmexit->u.inst_emul.cs_d = 0;
+		break;
+	case CPU_MODE_PROTECTED:
+	case CPU_MODE_COMPATIBILITY:
+		vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
+		csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS);
+		vmexit->u.inst_emul.cs_d = SEG_DESC_DEF32(csar);
+		break;
+	default:
+		vmexit->u.inst_emul.cs_base = 0;
+		vmexit->u.inst_emul.cs_d = 0;
+		break;
+	}
+	vie_init(&vmexit->u.inst_emul.vie, NULL, 0);
+}
+
+static int
+ept_fault_type(uint64_t ept_qual)
+{
+	int fault_type;
+
+	if (ept_qual & EPT_VIOLATION_DATA_WRITE)
+		fault_type = VM_PROT_WRITE;
+	else if (ept_qual & EPT_VIOLATION_INST_FETCH)
+		fault_type = VM_PROT_EXECUTE;
+	else
+		fault_type= VM_PROT_READ;
+
+	return (fault_type);
+}
+
+static boolean_t
+ept_emulation_fault(uint64_t ept_qual)
+{
+	int read, write;
+
+	/* EPT fault on an instruction fetch doesn't make sense here */
+	if (ept_qual & EPT_VIOLATION_INST_FETCH)
+		return (FALSE);
+
+	/* EPT fault must be a read fault or a write fault */
+	read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
+	write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
+	if ((read | write) == 0)
+		return (FALSE);
+
+	/*
+	 * The EPT violation must have been caused by accessing a
+	 * guest-physical address that is a translation of a guest-linear
+	 * address.
+	 */
+	if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
+	    (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
+		return (FALSE);
+	}
+
+	return (TRUE);
+}
+
+static int
+emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
+{
+	int error;
+
+	if (lapic_msr(num))
+		error = lapic_wrmsr(vmx->vm, vcpuid, num, val, retu);
+	else
+		error = vmx_wrmsr(vmx, vcpuid, num, val, retu);
+
+	return (error);
+}
+
+static int
+emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
+{
+	struct vmxctx *vmxctx;
+	uint64_t result;
+	uint32_t eax, edx;
+	int error;
+
+	if (lapic_msr(num))
+		error = lapic_rdmsr(vmx->vm, vcpuid, num, &result, retu);
+	else
+		error = vmx_rdmsr(vmx, vcpuid, num, &result, retu);
+
+	if (error == 0) {
+		eax = result;
+		vmxctx = &vmx->ctx[vcpuid];
+		error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax);
+		KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error));
+
+		edx = result >> 32;
+		error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx);
+		KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error));
+	}
+
+	return (error);
+}
+
+static int
+vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
+{
+	int error, handled, in;
+	struct vmcs *vmcs;
+	struct vmxctx *vmxctx;
+	struct vm_inout_str *vis;
+	uint32_t eax, ecx, edx, idtvec_info, intr_info, inst_info;
+	uint64_t qual, gla, gpa, cr3;
+	bool retu;
+
+	CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0);
+	CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_NMI_EXITING) != 0);
+
+	handled = UNHANDLED;
+	vmcs = &vmx->vmcs[vcpu];
+	vmxctx = &vmx->ctx[vcpu];
+	qual = vmexit->u.vmx.exit_qualification;
+	vmexit->exitcode = VM_EXITCODE_BOGUS;
+
+	vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1);
+
+	switch (vmexit->u.vmx.exit_reason) {
+	case EXIT_REASON_CR_ACCESS:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1);
+		switch (qual & 0xf) {
+		case 0:
+			handled = vmx_emulate_cr0_access(vmx, vcpu, qual);
+			break;
+		case 4:
+			handled = vmx_emulate_cr4_access(vmx, vcpu, qual);
+			break;
+		case 8:
+			handled = vmx_emulate_cr8_access(vmx, vcpu, qual);
+			break;
+		}
+		break;
+	case EXIT_REASON_RDMSR:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
+		retu = false;
+		ecx = vmxctx->guest_rcx;
+		VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx);
+		error = emulate_rdmsr(vmx, vcpu, ecx, &retu);
+		if (error) {
+			vmexit->exitcode = VM_EXITCODE_RDMSR;
+			vmexit->u.msr.code = ecx;
+		} else if (!retu) {
+			handled = HANDLED;
+		} else {
+			/* Return to userspace with a valid exitcode */
+			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+			    ("emulate_rdmsr retu with bogus exitcode"));
+		}
+		break;
+	case EXIT_REASON_WRMSR:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1);
+		retu = false;
+		eax = vmxctx->guest_rax;
+		ecx = vmxctx->guest_rcx;
+		edx = vmxctx->guest_rdx;
+		VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx",
+		    ecx, (uint64_t)edx << 32 | eax);
+		error = emulate_wrmsr(vmx, vcpu, ecx,
+		    (uint64_t)edx << 32 | eax, &retu);
+		if (error) {
+			vmexit->exitcode = VM_EXITCODE_WRMSR;
+			vmexit->u.msr.code = ecx;
+			vmexit->u.msr.wval = (uint64_t)edx << 32 | eax;
+		} else if (!retu) {
+			handled = HANDLED;
+		} else {
+			/* Return to userspace with a valid exitcode */
+			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+			    ("emulate_wrmsr retu with bogus exitcode"));
+		}
+		break;
+	case EXIT_REASON_HLT:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
+		vmexit->exitcode = VM_EXITCODE_HLT;
+		vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS);
+		break;
+	case EXIT_REASON_MTF:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
+		vmexit->exitcode = VM_EXITCODE_MTRAP;
+		break;
+	case EXIT_REASON_PAUSE:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
+		vmexit->exitcode = VM_EXITCODE_PAUSE;
+		break;
+	case EXIT_REASON_INTR_WINDOW:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1);
+		vmx_clear_int_window_exiting(vmx, vcpu);
+		return (1);
+	case EXIT_REASON_EXT_INTR:
+		/*
+		 * External interrupts serve only to cause VM exits and allow
+		 * the host interrupt handler to run.
+		 *
+		 * If this external interrupt triggers a virtual interrupt
+		 * to a VM, then that state will be recorded by the
+		 * host interrupt handler in the VM's softc. We will inject
+		 * this virtual interrupt during the subsequent VM enter.
+		 */
+		intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
+
+		/*
+		 * XXX: Ignore this exit if VMCS_INTR_VALID is not set.
+		 * This appears to be a bug in VMware Fusion?
+		 */
+		if (!(intr_info & VMCS_INTR_VALID))
+			return (1);
+#ifdef	__FreeBSD__
+		KASSERT((intr_info & VMCS_INTR_VALID) != 0 &&
+		    (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR,
+		    ("VM exit interruption info invalid: %#x", intr_info));
+#else
+		KASSERT((intr_info & VMCS_INTR_VALID) != 0 &&
+		    (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR,
+		    ("VM exit interruption info invalid: %x", intr_info));
+#endif
+#if 0	/* XXX */
+		vmx_trigger_hostintr(intr_info & 0xff);
+#endif
+
+		/*
+		 * This is special. We want to treat this as an 'handled'
+		 * VM-exit but not increment the instruction pointer.
+		 */
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1);
+		return (1);
+	case EXIT_REASON_NMI_WINDOW:
+		/* Exit to allow the pending virtual NMI to be injected */
+		if (vm_nmi_pending(vmx->vm, vcpu))
+			vmx_inject_nmi(vmx, vcpu);
+		vmx_clear_nmi_window_exiting(vmx, vcpu);
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1);
+		return (1);
+	case EXIT_REASON_INOUT:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1);
+		vmexit->exitcode = VM_EXITCODE_INOUT;
+		vmexit->u.inout.bytes = (qual & 0x7) + 1;
+		vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0;
+		vmexit->u.inout.string = (qual & 0x10) ? 1 : 0;
+		vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0;
+		vmexit->u.inout.port = (uint16_t)(qual >> 16);
+		vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
+		if (vmexit->u.inout.string) {
+			inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
+			vmexit->exitcode = VM_EXITCODE_INOUT_STR;
+			vis = &vmexit->u.inout_str;
+			vmx_paging_info(&vis->paging);
+			vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS);
+			vis->cr0 = vmcs_read(VMCS_GUEST_CR0);
+			vis->index = inout_str_index(vmx, vcpu, in);
+			vis->count = inout_str_count(vmx, vcpu, vis->inout.rep);
+			vis->addrsize = inout_str_addrsize(inst_info);
+			inout_str_seginfo(vmx, vcpu, inst_info, in, vis);
+		}
+		break;
+	case EXIT_REASON_CPUID:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1);
+		handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
+		break;
+	case EXIT_REASON_EXCEPTION:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1);
+		intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
+#ifdef	__FreeBSD__
+		KASSERT((intr_info & VMCS_INTR_VALID) != 0,
+		    ("VM exit interruption info invalid: %#x", intr_info));
+#else
+		KASSERT((intr_info & VMCS_INTR_VALID) != 0,
+		    ("VM exit interruption info invalid: %x", intr_info));
+#endif
+
+		/*
+		 * If Virtual NMIs control is 1 and the VM-exit is due to a
+		 * fault encountered during the execution of IRET then we must
+		 * restore the state of "virtual-NMI blocking" before resuming
+		 * the guest.
+		 *
+		 * See "Resuming Guest Software after Handling an Exception".
+		 */
+		if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 &&
+		    (intr_info & 0xff) != IDT_DF &&
+		    (intr_info & EXIT_QUAL_NMIUDTI) != 0)
+			vmx_restore_nmi_blocking(vmx, vcpu);
+
+		/*
+		 * The NMI has already been handled in vmx_exit_handle_nmi().
+		 */
+		if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI)
+			return (1);
+		break;
+	case EXIT_REASON_EPT_FAULT:
+		gpa = vmcs_gpa();
+		if (ept_emulation_fault(qual)) {
+			vmexit_inst_emul(vmexit, gpa, vmcs_gla());
+			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
+		}
+		break;
+	default:
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
+		break;
+	}
+
+	if (handled) {
+		/*
+		 * It is possible that control is returned to userland
+		 * even though we were able to handle the VM exit in the
+		 * kernel.
+		 *
+		 * In such a case we want to make sure that the userland
+		 * restarts guest execution at the instruction *after*
+		 * the one we just processed. Therefore we update the
+		 * guest rip in the VMCS and in 'vmexit'.
+		 */
+		vm_exit_update_rip(vmexit);
+		vmexit->rip += vmexit->inst_length;
+		vmexit->inst_length = 0;
+
+		/*
+		 * Special case for spinning up an AP - exit to userspace to
+		 * give the controlling process a chance to intercept and
+		 * spin up a thread for the AP.
+		 */
+		if (vmexit->exitcode == VM_EXITCODE_SPINUP_AP)
+			handled = 0;
+	} else {
+		if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
+			/*
+			 * If this VM exit was not claimed by anybody then
+			 * treat it as a generic VMX exit.
+			 */
+			vmexit->exitcode = VM_EXITCODE_VMX;
+			vmexit->u.vmx.status = VM_SUCCESS;
+			vmexit->u.vmx.inst_type = 0;
+			vmexit->u.vmx.inst_error = 0;
+		} else {
+			/*
+			 * The exitcode and collateral have been populated.
+			 * The VM exit will be processed further in userland.
+			 */
+		}
+	}
+	return (handled);
+}
+
+static int
+vmx_run(void *arg, int vcpu, register_t rip)
+{
+	int error, vie, rc, handled, astpending;
+	uint32_t exit_reason;
+	struct vmx *vmx;
+	struct vm *vm;
+	struct vmxctx *vmxctx;
+	struct vmcs *vmcs;
+	struct vm_exit *vmexit;
+	struct vlapic *vlapic;
+	
+	vmx = arg;
+	vm = vmx->vm;
+	vmcs = &vmx->vmcs[vcpu];
+	vmxctx = &vmx->ctx[vcpu];
+	vlapic = vm_lapic(vm, vcpu);
+	vmxctx->launched = 0;
+
+	astpending = 0;
+	vmexit = vm_exitinfo(vmx->vm, vcpu);
+
+	vmx_msr_guest_enter(vmx, vcpu);
+
+	VMPTRLD(vmcs);
+
+	/*
+	 * XXX
+	 * We do this every time because we may setup the virtual machine
+	 * from a different process than the one that actually runs it.
+	 *
+	 * If the life of a virtual machine was spent entirely in the context
+	 * of a single process we could do this once in vmcs_set_defaults().
+	 */
+	vmcs_write(VMCS_HOST_CR3, rcr3());
+
+	vmcs_write(VMCS_GUEST_RIP, rip);
+	vmx_set_pcpu_defaults(vmx, vcpu);
+	do {
+		vmx_inject_interrupts(vmx, vcpu, vlapic);
+		vmx_run_trace(vmx, vcpu);
+		rc = vmx_setjmp(vmxctx);
+#ifdef SETJMP_TRACE
+		vmx_setjmp_trace(vmx, vcpu, vmxctx, rc);
+#endif
+		switch (rc) {
+		case VMX_RETURN_DIRECT:
+			if (vmxctx->launched == 0) {
+				vmxctx->launched = 1;
+				vmx_launch(vmxctx);
+			} else
+				vmx_resume(vmxctx);
+			panic("vmx_launch/resume should not return");
+			break;
+		case VMX_RETURN_LONGJMP:
+			break;			/* vm exit */
+		case VMX_RETURN_AST:
+			astpending = 1;
+			break;
+		case VMX_RETURN_VMRESUME:
+			vie = vmcs_instruction_error();
+			if (vmxctx->launch_error == VM_FAIL_INVALID ||
+			    vie != VMRESUME_WITH_NON_LAUNCHED_VMCS) {
+				printf("vmresume error %d vmcs inst error %d\n",
+					vmxctx->launch_error, vie);
+				goto err_exit;
+			}
+			vmx_launch(vmxctx);	/* try to launch the guest */
+			panic("vmx_launch should not return");
+			break;
+		case VMX_RETURN_VMLAUNCH:
+			vie = vmcs_instruction_error();
+#if 1
+			printf("vmlaunch error %d vmcs inst error %d\n",
+				vmxctx->launch_error, vie);
+#endif
+			goto err_exit;
+		default:
+			panic("vmx_setjmp returned %d", rc);
+		}
+		
+		/* collect some basic information for VM exit processing */
+		vmexit->rip = rip = vmcs_guest_rip();
+		vmexit->inst_length = vmexit_instruction_length();
+		vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason();
+		vmexit->u.vmx.exit_qualification = vmcs_exit_qualification();
+
+		/* Update 'nextrip' */
+		vmx->state[vcpu].nextrip = rip;
+
+		/* enable interrupts */
+		enable_intr();
+
+		if (astpending) {
+			handled = 1;
+			vmexit->inst_length = 0;
+			vmexit->exitcode = VM_EXITCODE_BOGUS;
+			vmx_astpending_trace(vmx, vcpu, rip);
+			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_ASTPENDING, 1);
+			break;
+		}
+
+		handled = vmx_exit_process(vmx, vcpu, vmexit);
+		vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled);
+
+	} while (handled);
+
+	/*
+	 * If a VM exit has been handled then the exitcode must be BOGUS
+	 * If a VM exit is not handled then the exitcode must not be BOGUS
+	 */
+	if ((handled && vmexit->exitcode != VM_EXITCODE_BOGUS) ||
+	    (!handled && vmexit->exitcode == VM_EXITCODE_BOGUS)) {
+		panic("Mismatch between handled (%d) and exitcode (%d)",
+		      handled, vmexit->exitcode);
+	}
+
+	if (!handled)
+		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_USERSPACE, 1);
+
+	VCPU_CTR1(vmx->vm, vcpu, "goto userland: exitcode %d",
+	    vmexit->exitcode);
+
+	VMCLEAR(vmcs);
+	vmx_msr_guest_exit(vmx, vcpu);
+
+	return (0);
+
+err_exit:
+	vmexit->exitcode = VM_EXITCODE_VMX;
+	vmexit->u.vmx.exit_reason = (uint32_t)-1;
+	vmexit->u.vmx.exit_qualification = (uint32_t)-1;
+	vmexit->u.vmx.status = ~0;
+	VMCLEAR(vmcs);
+	vmx_msr_guest_exit(vmx, vcpu);
+
+	return (ENOEXEC);
+}
+
+static void
+vmx_vmcleanup(void *arg)
+{
+	int i, error;
+	struct vmx *vmx = arg;
+
+	for (i = 0; i < VM_MAXCPU; i++)
+		vpid_free(vmx->state[i].vpid);
+
+	/*
+	 * XXXSMP we also need to clear the VMCS active on the other vcpus.
+	 */
+	error = vmclear(&vmx->vmcs[0]);
+	if (error != 0)
+		panic("vmx_vmcleanup: vmclear error %d on vcpu 0", error);
+
+	ept_vmcleanup(vmx);
+	free(vmx, M_VMX);
+
+	return;
+}
+
+static register_t *
+vmxctx_regptr(struct vmxctx *vmxctx, int reg)
+{
+
+	switch (reg) {
+	case VM_REG_GUEST_RAX:
+		return (&vmxctx->guest_rax);
+	case VM_REG_GUEST_RBX:
+		return (&vmxctx->guest_rbx);
+	case VM_REG_GUEST_RCX:
+		return (&vmxctx->guest_rcx);
+	case VM_REG_GUEST_RDX:
+		return (&vmxctx->guest_rdx);
+	case VM_REG_GUEST_RSI:
+		return (&vmxctx->guest_rsi);
+	case VM_REG_GUEST_RDI:
+		return (&vmxctx->guest_rdi);
+	case VM_REG_GUEST_RBP:
+		return (&vmxctx->guest_rbp);
+	case VM_REG_GUEST_R8:
+		return (&vmxctx->guest_r8);
+	case VM_REG_GUEST_R9:
+		return (&vmxctx->guest_r9);
+	case VM_REG_GUEST_R10:
+		return (&vmxctx->guest_r10);
+	case VM_REG_GUEST_R11:
+		return (&vmxctx->guest_r11);
+	case VM_REG_GUEST_R12:
+		return (&vmxctx->guest_r12);
+	case VM_REG_GUEST_R13:
+		return (&vmxctx->guest_r13);
+	case VM_REG_GUEST_R14:
+		return (&vmxctx->guest_r14);
+	case VM_REG_GUEST_R15:
+		return (&vmxctx->guest_r15);
+	case VM_REG_GUEST_CR2:
+		return (&vmxctx->guest_cr2);
+	default:
+		break;
+	}
+	return (NULL);
+}
+
+static int
+vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval)
+{
+	register_t *regp;
+
+	if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
+		*retval = *regp;
+		return (0);
+	} else
+		return (EINVAL);
+}
+
+static int
+vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
+{
+	register_t *regp;
+
+	if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
+		*regp = val;
+		return (0);
+	} else
+		return (EINVAL);
+}
+
+static int
+vmx_shadow_reg(int reg)
+{
+	int shreg;
+
+	shreg = -1;
+
+	switch (reg) {
+	case VM_REG_GUEST_CR0:
+		shreg = VMCS_CR0_SHADOW;
+                break;
+        case VM_REG_GUEST_CR4:
+		shreg = VMCS_CR4_SHADOW;
+		break;
+	default:
+		break;
+	}
+
+	return (shreg);
+}
+
+static int
+vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
+{
+	int running, hostcpu;
+	struct vmx *vmx = arg;
+
+	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+	if (running && hostcpu != curcpu)
+		panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
+
+	if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
+		return (0);
+
+	return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval));
+}
+
+static int
+vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
+{
+	int error, hostcpu, running, shadow;
+	uint64_t ctls;
+	struct vmx *vmx = arg;
+
+	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+	if (running && hostcpu != curcpu)
+		panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
+
+	if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
+		return (0);
+
+	error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val);
+
+	if (error == 0) {
+		/*
+		 * If the "load EFER" VM-entry control is 1 then the
+		 * value of EFER.LMA must be identical to "IA-32e mode guest"
+		 * bit in the VM-entry control.
+		 */
+		if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 &&
+		    (reg == VM_REG_GUEST_EFER)) {
+			vmcs_getreg(&vmx->vmcs[vcpu], running,
+				    VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls);
+			if (val & EFER_LMA)
+				ctls |= VM_ENTRY_GUEST_LMA;
+			else
+				ctls &= ~VM_ENTRY_GUEST_LMA;
+			vmcs_setreg(&vmx->vmcs[vcpu], running,
+				    VMCS_IDENT(VMCS_ENTRY_CTLS), ctls);
+		}
+
+		shadow = vmx_shadow_reg(reg);
+		if (shadow > 0) {
+			/*
+			 * Store the unmodified value in the shadow
+			 */			
+			error = vmcs_setreg(&vmx->vmcs[vcpu], running,
+				    VMCS_IDENT(shadow), val);
+		}
+	}
+
+	return (error);
+}
+
+static int
+vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+	int hostcpu, running;
+	struct vmx *vmx = arg;
+
+	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+	if (running && hostcpu != curcpu)
+		panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu);
+
+	return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc));
+}
+
+static int
+vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+	int hostcpu, running;
+	struct vmx *vmx = arg;
+
+	running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+	if (running && hostcpu != curcpu)
+		panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu);
+
+	return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc));
+}
+
+static int
+vmx_getcap(void *arg, int vcpu, int type, int *retval)
+{
+	struct vmx *vmx = arg;
+	int vcap;
+	int ret;
+
+	ret = ENOENT;
+
+	vcap = vmx->cap[vcpu].set;
+
+	switch (type) {
+	case VM_CAP_HALT_EXIT:
+		if (cap_halt_exit)
+			ret = 0;
+		break;
+	case VM_CAP_PAUSE_EXIT:
+		if (cap_pause_exit)
+			ret = 0;
+		break;
+	case VM_CAP_MTRAP_EXIT:
+		if (cap_monitor_trap)
+			ret = 0;
+		break;
+	case VM_CAP_UNRESTRICTED_GUEST:
+		if (cap_unrestricted_guest)
+			ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	if (ret == 0)
+		*retval = (vcap & (1 << type)) ? 1 : 0;
+
+	return (ret);
+}
+
+static int
+vmx_setcap(void *arg, int vcpu, int type, int val)
+{
+	struct vmx *vmx = arg;
+	struct vmcs *vmcs = &vmx->vmcs[vcpu];
+	uint32_t baseval;
+	uint32_t *pptr;
+	int error;
+	int flag;
+	int reg;
+	int retval;
+
+	retval = ENOENT;
+	pptr = NULL;
+
+	switch (type) {
+	case VM_CAP_HALT_EXIT:
+		if (cap_halt_exit) {
+			retval = 0;
+			pptr = &vmx->cap[vcpu].proc_ctls;
+			baseval = *pptr;
+			flag = PROCBASED_HLT_EXITING;
+			reg = VMCS_PRI_PROC_BASED_CTLS;
+		}
+		break;
+	case VM_CAP_MTRAP_EXIT:
+		if (cap_monitor_trap) {
+			retval = 0;
+			pptr = &vmx->cap[vcpu].proc_ctls;
+			baseval = *pptr;
+			flag = PROCBASED_MTF;
+			reg = VMCS_PRI_PROC_BASED_CTLS;
+		}
+		break;
+	case VM_CAP_PAUSE_EXIT:
+		if (cap_pause_exit) {
+			retval = 0;
+			pptr = &vmx->cap[vcpu].proc_ctls;
+			baseval = *pptr;
+			flag = PROCBASED_PAUSE_EXITING;
+			reg = VMCS_PRI_PROC_BASED_CTLS;
+		}
+		break;
+	case VM_CAP_UNRESTRICTED_GUEST:
+		if (cap_unrestricted_guest) {
+			retval = 0;
+			baseval = procbased_ctls2;
+			flag = PROCBASED2_UNRESTRICTED_GUEST;
+			reg = VMCS_SEC_PROC_BASED_CTLS;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (retval == 0) {
+		if (val) {
+			baseval |= flag;
+		} else {
+			baseval &= ~flag;
+		}
+		VMPTRLD(vmcs);
+		error = vmwrite(reg, baseval);
+		VMCLEAR(vmcs);
+
+		if (error) {
+			retval = error;
+		} else {
+			/*
+			 * Update optional stored flags, and record
+			 * setting
+			 */
+			if (pptr != NULL) {
+				*pptr = baseval;
+			}
+
+			if (val) {
+				vmx->cap[vcpu].set |= (1 << type);
+			} else {
+				vmx->cap[vcpu].set &= ~(1 << type);
+			}
+		}
+	}
+
+        return (retval);
+}
+
+struct vlapic_vtx {
+	struct vlapic	vlapic;
+	struct pir_desc	*pir_desc;
+	struct vmx	*vmx;
+};
+
+#define	VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg)	\
+do {									\
+	VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d",	\
+	    level ? "level" : "edge", vector);				\
+	VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]);	\
+	VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]);	\
+	VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]);	\
+	VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]);	\
+	VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\
+} while (0)
+
+/*
+ * vlapic->ops handlers that utilize the APICv hardware assist described in
+ * Chapter 29 of the Intel SDM.
+ */
+static int
+vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct pir_desc *pir_desc;
+	uint64_t mask;
+	int idx, notify;
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	pir_desc = vlapic_vtx->pir_desc;
+
+	/*
+	 * Keep track of interrupt requests in the PIR descriptor. This is
+	 * because the virtual APIC page pointed to by the VMCS cannot be
+	 * modified if the vcpu is running.
+	 */
+	idx = vector / 64;
+	mask = 1UL << (vector % 64);
+	atomic_set_long(&pir_desc->pir[idx], mask);
+	notify = atomic_cmpset_long(&pir_desc->pending, 0, 1);
+
+	VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
+	    level, "vmx_set_intr_ready");
+	return (notify);
+}
+
+static int
+vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct pir_desc *pir_desc;
+	struct LAPIC *lapic;
+	uint64_t pending, pirval;
+	uint32_t ppr, vpr;
+	int i;
+
+	/*
+	 * This function is only expected to be called from the 'HLT' exit
+	 * handler which does not care about the vector that is pending.
+	 */
+	KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL"));
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	pir_desc = vlapic_vtx->pir_desc;
+
+	pending = atomic_load_acq_long(&pir_desc->pending);
+	if (!pending)
+		return (0);	/* common case */
+
+	/*
+	 * If there is an interrupt pending then it will be recognized only
+	 * if its priority is greater than the processor priority.
+	 *
+	 * Special case: if the processor priority is zero then any pending
+	 * interrupt will be recognized.
+	 */
+	lapic = vlapic->apic_page;
+	ppr = lapic->ppr & 0xf0;
+	if (ppr == 0)
+		return (1);
+
+	VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
+	    lapic->ppr);
+
+	for (i = 3; i >= 0; i--) {
+		pirval = pir_desc->pir[i];
+		if (pirval != 0) {
+			vpr = (i * 64 + flsl(pirval) - 1) & 0xf0;
+			return (vpr > ppr);
+		}
+	}
+	return (0);
+}
+
+static void
+vmx_intr_accepted(struct vlapic *vlapic, int vector)
+{
+
+	panic("vmx_intr_accepted: not expected to be called");
+}
+
+static void
+vmx_set_tmr(struct vlapic *vlapic, int vector, bool level)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct vmx *vmx;
+	struct vmcs *vmcs;
+	uint64_t mask, val;
+
+	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
+	KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL),
+	    ("vmx_set_tmr: vcpu cannot be running"));
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	vmx = vlapic_vtx->vmx;
+	vmcs = &vmx->vmcs[vlapic->vcpuid];
+	mask = 1UL << (vector % 64);
+
+	VMPTRLD(vmcs);
+	val = vmcs_read(VMCS_EOI_EXIT(vector));
+	if (level)
+		val |= mask;
+	else
+		val &= ~mask;
+	vmcs_write(VMCS_EOI_EXIT(vector), val);
+	VMCLEAR(vmcs);
+}
+
+static void
+vmx_post_intr(struct vlapic *vlapic, int hostcpu)
+{
+
+	ipi_cpu(hostcpu, pirvec);
+}
+
+/*
+ * Transfer the pending interrupts in the PIR descriptor to the IRR
+ * in the virtual APIC page.
+ */
+static void
+vmx_inject_pir(struct vlapic *vlapic)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct pir_desc *pir_desc;
+	struct LAPIC *lapic;
+	uint64_t val, pirval;
+	int rvi, pirbase = -1;
+	uint16_t intr_status_old, intr_status_new;
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	pir_desc = vlapic_vtx->pir_desc;
+	if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) {
+		VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
+		    "no posted interrupt pending");
+		return;
+	}
+
+	pirval = 0;
+	pirbase = -1;
+	lapic = vlapic->apic_page;
+
+	val = atomic_readandclear_long(&pir_desc->pir[0]);
+	if (val != 0) {
+		lapic->irr0 |= val;
+		lapic->irr1 |= val >> 32;
+		pirbase = 0;
+		pirval = val;
+	}
+
+	val = atomic_readandclear_long(&pir_desc->pir[1]);
+	if (val != 0) {
+		lapic->irr2 |= val;
+		lapic->irr3 |= val >> 32;
+		pirbase = 64;
+		pirval = val;
+	}
+
+	val = atomic_readandclear_long(&pir_desc->pir[2]);
+	if (val != 0) {
+		lapic->irr4 |= val;
+		lapic->irr5 |= val >> 32;
+		pirbase = 128;
+		pirval = val;
+	}
+
+	val = atomic_readandclear_long(&pir_desc->pir[3]);
+	if (val != 0) {
+		lapic->irr6 |= val;
+		lapic->irr7 |= val >> 32;
+		pirbase = 192;
+		pirval = val;
+	}
+
+	VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir");
+
+	/*
+	 * Update RVI so the processor can evaluate pending virtual
+	 * interrupts on VM-entry.
+	 *
+	 * It is possible for pirval to be 0 here, even though the
+	 * pending bit has been set. The scenario is:
+	 * CPU-Y is sending a posted interrupt to CPU-X, which
+	 * is running a guest and processing posted interrupts in h/w.
+	 * CPU-X will eventually exit and the state seen in s/w is
+	 * the pending bit set, but no PIR bits set.
+	 *
+	 *      CPU-X                      CPU-Y
+	 *   (vm running)                (host running)
+	 *   rx posted interrupt
+	 *   CLEAR pending bit
+	 *				 SET PIR bit
+	 *   READ/CLEAR PIR bits
+	 *				 SET pending bit
+	 *   (vm exit)
+	 *   pending bit set, PIR 0
+	 */
+	if (pirval != 0) {
+		rvi = pirbase + flsl(pirval) - 1;
+		intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
+		intr_status_new = (intr_status_old & 0xFF00) | rvi;
+		if (intr_status_new > intr_status_old) {
+			vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new);
+			VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
+			    "guest_intr_status changed from 0x%04x to 0x%04x",
+			    intr_status_old, intr_status_new);
+		}
+	}
+}
+
+static struct vlapic *
+vmx_vlapic_init(void *arg, int vcpuid)
+{
+	struct vmx *vmx;
+	struct vlapic *vlapic;
+	struct vlapic_vtx *vlapic_vtx;
+	
+	vmx = arg;
+
+	vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO);
+	vlapic->vm = vmx->vm;
+	vlapic->vcpuid = vcpuid;
+	vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid];
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid];
+	vlapic_vtx->vmx = vmx;
+
+	if (virtual_interrupt_delivery) {
+		vlapic->ops.set_intr_ready = vmx_set_intr_ready;
+		vlapic->ops.pending_intr = vmx_pending_intr;
+		vlapic->ops.intr_accepted = vmx_intr_accepted;
+		vlapic->ops.set_tmr = vmx_set_tmr;
+#ifdef	__FreeBSD__
+		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode;
+#endif
+	}
+
+	if (posted_interrupts)
+		vlapic->ops.post_intr = vmx_post_intr;
+
+	vlapic_init(vlapic);
+
+	return (vlapic);
+}
+
+static void
+vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic)
+{
+
+	vlapic_cleanup(vlapic);
+	free(vlapic, M_VLAPIC);
+}
+
+struct vmm_ops vmm_ops_intel = {
+	vmx_init,
+	vmx_cleanup,
+	vmx_vminit,
+	vmx_run,
+	vmx_vmcleanup,
+	ept_vmmmap_set,
+	ept_vmmmap_get,
+	vmx_getreg,
+	vmx_setreg,
+	vmx_getdesc,
+	vmx_setdesc,
+	vmx_getcap,
+	vmx_setcap,
+	vmx_vlapic_init,
+	vmx_vlapic_cleanup,
+};
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
new file mode 100644
index 0000000000..50ca62b371
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -0,0 +1,156 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmx.h 284174 2015-06-09 00:14:47Z tychon $
+ */
+
+#ifndef _VMX_H_
+#define	_VMX_H_
+
+#include "vmcs.h"
+
+#ifndef	__FreeBSD__
+#define	GUEST_MSR_MAX_ENTRIES	64		/* arbitrary */
+#define	HOST_MSR_MAX_ENTRIES	64		/* arbitrary */
+#endif
+
+struct vmxctx {
+	register_t	tmpstk[32];		/* vmx_return() stack */
+	register_t	tmpstktop;
+
+	register_t	guest_rdi;		/* Guest state */
+	register_t	guest_rsi;
+	register_t	guest_rdx;
+	register_t	guest_rcx;
+	register_t	guest_r8;
+	register_t	guest_r9;
+	register_t	guest_rax;
+	register_t	guest_rbx;
+	register_t	guest_rbp;
+	register_t	guest_r10;
+	register_t	guest_r11;
+	register_t	guest_r12;
+	register_t	guest_r13;
+	register_t	guest_r14;
+	register_t	guest_r15;
+	register_t	guest_cr2;
+
+	register_t	host_r15;		/* Host state */
+	register_t	host_r14;
+	register_t	host_r13;
+	register_t	host_r12;
+	register_t	host_rbp;
+	register_t	host_rsp;
+	register_t	host_rbx;
+	register_t	host_rip;
+	/*
+	 * XXX todo debug registers and fpu state
+	 */
+	
+	int		launched;		/* vmcs launch state */
+	int		launch_error;
+};
+
+struct vmxcap {
+	int	set;
+	uint32_t proc_ctls;
+	uint32_t proc_ctls2;
+};
+
+struct vmxstate {
+	uint64_t nextrip;	/* next instruction to be executed by guest */
+	int	lastcpu;	/* host cpu that this 'vcpu' last ran on */
+	uint16_t vpid;
+};
+
+struct apic_page {
+	uint32_t reg[PAGE_SIZE / 4];
+};
+CTASSERT(sizeof(struct apic_page) == PAGE_SIZE);
+
+/* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM) */
+struct pir_desc {
+	uint64_t	pir[4];
+	uint64_t	pending;
+	uint64_t	unused[3];
+} __aligned(64);
+CTASSERT(sizeof(struct pir_desc) == 64);
+
+/* Index into the 'guest_msrs[]' array */
+enum {
+	IDX_MSR_LSTAR,
+	IDX_MSR_CSTAR,
+	IDX_MSR_STAR,
+	IDX_MSR_SF_MASK,
+	IDX_MSR_KGSBASE,
+	GUEST_MSR_NUM		/* must be the last enumeration */
+};
+
+/* virtual machine softc */
+struct vmx {
+	pml4_entry_t	pml4ept[NPML4EPG];
+	struct vmcs	vmcs[VM_MAXCPU];	/* one vmcs per virtual cpu */
+	struct apic_page apic_page[VM_MAXCPU];	/* one apic page per vcpu */
+	char		msr_bitmap[PAGE_SIZE];
+	struct pir_desc	pir_desc[VM_MAXCPU];
+#ifdef	__FreeBSD__
+	uint64_t	guest_msrs[VM_MAXCPU][GUEST_MSR_NUM];
+#else
+	struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
+	struct msr_entry host_msrs[VM_MAXCPU][HOST_MSR_MAX_ENTRIES];
+#endif
+	struct vmxctx	ctx[VM_MAXCPU];
+	struct vmxcap	cap[VM_MAXCPU];
+	struct vmxstate	state[VM_MAXCPU];
+	struct vm	*vm;
+};
+CTASSERT((offsetof(struct vmx, pml4ept) & PAGE_MASK) == 0);
+CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
+CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
+
+#define	VMX_RETURN_DIRECT	0
+#define	VMX_RETURN_LONGJMP	1
+#define	VMX_RETURN_VMRESUME	2
+#define	VMX_RETURN_VMLAUNCH	3
+#define	VMX_RETURN_AST		4
+/*
+ * vmx_setjmp() returns:
+ * - 0 when it returns directly
+ * - 1 when it returns from vmx_longjmp
+ * - 2 when it returns from vmx_resume (which would only be in the error case)
+ * - 3 when it returns from vmx_launch (which would only be in the error case)
+ * - 4 when it returns from vmx_resume or vmx_launch because of AST pending
+ */
+int	vmx_setjmp(struct vmxctx *ctx);
+void	vmx_longjmp(void);			/* returns via vmx_setjmp */
+void	vmx_launch(struct vmxctx *ctx) __dead2;	/* may return via vmx_setjmp */
+void	vmx_resume(struct vmxctx *ctx) __dead2;	/* may return via vmx_setjmp */
+
+u_long	vmx_fix_cr0(u_long cr0);
+u_long	vmx_fix_cr4(u_long cr4);
+
+int	vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset);
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_controls.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_controls.h
new file mode 100644
index 0000000000..08b1469f19
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_controls.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmx_controls.h 260410 2014-01-07 21:04:49Z neel $
+ */
+
+#ifndef _VMX_CONTROLS_H_
+#define	_VMX_CONTROLS_H_
+
+/* Pin-Based VM-Execution Controls */
+#define	PINBASED_EXTINT_EXITING		(1 << 0)
+#define	PINBASED_NMI_EXITING		(1 << 3)
+#define	PINBASED_VIRTUAL_NMI		(1 << 5)
+#define	PINBASED_PREMPTION_TIMER	(1 << 6)
+#define	PINBASED_POSTED_INTERRUPT	(1 << 7)
+
+/* Primary Processor-Based VM-Execution Controls */
+#define	PROCBASED_INT_WINDOW_EXITING	(1 << 2)
+#define	PROCBASED_TSC_OFFSET		(1 << 3)
+#define	PROCBASED_HLT_EXITING		(1 << 7)
+#define	PROCBASED_INVLPG_EXITING	(1 << 9)
+#define	PROCBASED_MWAIT_EXITING		(1 << 10)
+#define	PROCBASED_RDPMC_EXITING		(1 << 11)
+#define	PROCBASED_RDTSC_EXITING		(1 << 12)
+#define	PROCBASED_CR3_LOAD_EXITING	(1 << 15)
+#define	PROCBASED_CR3_STORE_EXITING	(1 << 16)
+#define	PROCBASED_CR8_LOAD_EXITING	(1 << 19)
+#define	PROCBASED_CR8_STORE_EXITING	(1 << 20)
+#define	PROCBASED_USE_TPR_SHADOW	(1 << 21)
+#define	PROCBASED_NMI_WINDOW_EXITING	(1 << 22)
+#define PROCBASED_MOV_DR_EXITING	(1 << 23)
+#define	PROCBASED_IO_EXITING		(1 << 24)
+#define	PROCBASED_IO_BITMAPS		(1 << 25)
+#define	PROCBASED_MTF			(1 << 27)
+#define	PROCBASED_MSR_BITMAPS		(1 << 28)
+#define	PROCBASED_MONITOR_EXITING	(1 << 29)
+#define	PROCBASED_PAUSE_EXITING		(1 << 30)
+#define	PROCBASED_SECONDARY_CONTROLS	(1U << 31)
+
+/* Secondary Processor-Based VM-Execution Controls */
+#define	PROCBASED2_VIRTUALIZE_APIC_ACCESSES	(1 << 0)
+#define	PROCBASED2_ENABLE_EPT			(1 << 1)
+#define	PROCBASED2_DESC_TABLE_EXITING		(1 << 2)
+#define	PROCBASED2_ENABLE_RDTSCP		(1 << 3)
+#define	PROCBASED2_VIRTUALIZE_X2APIC_MODE	(1 << 4)
+#define	PROCBASED2_ENABLE_VPID			(1 << 5)
+#define	PROCBASED2_WBINVD_EXITING		(1 << 6)
+#define	PROCBASED2_UNRESTRICTED_GUEST		(1 << 7)
+#define	PROCBASED2_APIC_REGISTER_VIRTUALIZATION	(1 << 8)
+#define	PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY	(1 << 9)
+#define	PROCBASED2_PAUSE_LOOP_EXITING		(1 << 10)
+#define	PROCBASED2_ENABLE_INVPCID		(1 << 12)
+
+/* VM Exit Controls */
+#define	VM_EXIT_SAVE_DEBUG_CONTROLS	(1 << 2)
+#define	VM_EXIT_HOST_LMA		(1 << 9)
+#define	VM_EXIT_LOAD_PERF_GLOBAL_CTRL	(1 << 12)
+#define	VM_EXIT_ACKNOWLEDGE_INTERRUPT	(1 << 15)
+#define	VM_EXIT_SAVE_PAT		(1 << 18)
+#define	VM_EXIT_LOAD_PAT		(1 << 19)
+#define	VM_EXIT_SAVE_EFER		(1 << 20)
+#define	VM_EXIT_LOAD_EFER		(1 << 21)
+#define	VM_EXIT_SAVE_PREEMPTION_TIMER	(1 << 22)
+
+/* VM Entry Controls */
+#define	VM_ENTRY_LOAD_DEBUG_CONTROLS	(1 << 2)
+#define	VM_ENTRY_GUEST_LMA		(1 << 9)
+#define	VM_ENTRY_INTO_SMM		(1 << 10)
+#define	VM_ENTRY_DEACTIVATE_DUAL_MONITOR (1 << 11)
+#define	VM_ENTRY_LOAD_PERF_GLOBAL_CTRL	(1 << 13)
+#define	VM_ENTRY_LOAD_PAT		(1 << 14)
+#define	VM_ENTRY_LOAD_EFER		(1 << 15)
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h
new file mode 100644
index 0000000000..9513f6c70b
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h
@@ -0,0 +1,245 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmx_cpufunc.h 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef	_VMX_CPUFUNC_H_
+#define	_VMX_CPUFUNC_H_
+
+struct vmcs;
+
+/*
+ * Section 5.2 "Conventions" from Intel Architecture Manual 2B.
+ *
+ *			error
+ * VMsucceed		  0
+ * VMFailInvalid	  1
+ * VMFailValid		  2	see also VMCS VM-Instruction Error Field
+ */
+#define	VM_SUCCESS		0
+#define	VM_FAIL_INVALID		1
+#define	VM_FAIL_VALID		2
+#define	VMX_SET_ERROR_CODE \
+	"	jnc 1f;"						\
+	"	mov $1, %[error];"	/* CF: error = 1 */		\
+	"	jmp 3f;"						\
+	"1:	jnz 2f;"						\
+	"	mov $2, %[error];"	/* ZF: error = 2 */		\
+	"	jmp 3f;"						\
+	"2:	mov $0, %[error];"					\
+	"3:"
+
+/* returns 0 on success and non-zero on failure */
+static __inline int
+vmxon(char *region)
+{
+	int error;
+	uint64_t addr;
+
+	addr = vtophys(region);
+	__asm __volatile("vmxon %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [addr] "m" (*(uint64_t *)&addr)
+			 : "memory");
+
+	return (error);
+}
+
+/* returns 0 on success and non-zero on failure */
+static __inline int
+vmxon_pa(vm_paddr_t addr)
+{
+	int error;
+
+	__asm __volatile("vmxon %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [addr] "m" (*(uint64_t *)&addr)
+			 : "memory");
+
+	return (error);
+}
+
+/* returns 0 on success and non-zero on failure */
+static __inline int
+vmclear(struct vmcs *vmcs)
+{
+	int error;
+	uint64_t addr;
+
+	addr = vtophys(vmcs);
+	__asm __volatile("vmclear %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [addr] "m" (*(uint64_t *)&addr)
+			 : "memory");
+	return (error);
+}
+
+static __inline void
+vmxoff(void)
+{
+
+	__asm __volatile("vmxoff");
+}
+
+static __inline void
+vmptrst(uint64_t *addr)
+{
+
+	__asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory");
+}
+
+static __inline int
+vmptrld(struct vmcs *vmcs)
+{
+	int error;
+	uint64_t addr;
+
+	addr = vtophys(vmcs);
+	__asm __volatile("vmptrld %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [addr] "m" (*(uint64_t *)&addr)
+			 : "memory");
+	return (error);
+}
+
+static __inline int
+vmwrite(uint64_t reg, uint64_t val)
+{
+	int error;
+
+	__asm __volatile("vmwrite %[val], %[reg];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [val] "r" (val), [reg] "r" (reg)
+			 : "memory");
+
+	return (error);
+}
+
+static __inline int
+vmread(uint64_t r, uint64_t *addr)
+{
+	int error;
+
+	__asm __volatile("vmread %[r], %[addr];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [r] "r" (r), [addr] "m" (*addr)
+			 : "memory");
+
+	return (error);
+}
+
+static void __inline
+VMCLEAR(struct vmcs *vmcs)
+{
+	int err;
+
+	err = vmclear(vmcs);
+	if (err != 0)
+		panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
+
+	critical_exit();
+}
+
+static void __inline
+VMPTRLD(struct vmcs *vmcs)
+{
+	int err;
+
+	critical_enter();
+
+	err = vmptrld(vmcs);
+	if (err != 0)
+		panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
+}
+
+#define	INVVPID_TYPE_ADDRESS		0UL
+#define	INVVPID_TYPE_SINGLE_CONTEXT	1UL
+#define	INVVPID_TYPE_ALL_CONTEXTS	2UL
+
+struct invvpid_desc {
+	uint16_t	vpid;
+	uint16_t	_res1;
+	uint32_t	_res2;
+	uint64_t	linear_addr;
+};
+CTASSERT(sizeof(struct invvpid_desc) == 16);
+
+static void __inline
+invvpid(uint64_t type, struct invvpid_desc desc)
+{
+	int error;
+
+	__asm __volatile("invvpid %[desc], %[type];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [desc] "m" (desc), [type] "r" (type)
+			 : "memory");
+
+	if (error)
+		panic("invvpid error %d", error);
+}
+
+#define	INVEPT_TYPE_SINGLE_CONTEXT	1UL
+#define	INVEPT_TYPE_ALL_CONTEXTS	2UL
+struct invept_desc {
+	uint64_t	eptp;
+	uint64_t	_res;
+};
+CTASSERT(sizeof(struct invept_desc) == 16);
+
+static void __inline
+invept(uint64_t type, struct invept_desc desc)
+{
+	int error;
+
+	__asm __volatile("invept %[desc], %[type];"
+			 VMX_SET_ERROR_CODE
+			 : [error] "=r" (error)
+			 : [desc] "m" (desc), [type] "r" (type)
+			 : "memory");
+
+	if (error)
+		panic("invept error %d", error);
+}
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
new file mode 100644
index 0000000000..1ced311ca8
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -0,0 +1,445 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmx_msr.c 284174 2015-06-09 00:14:47Z tychon $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/intel/vmx_msr.c 284174 2015-06-09 00:14:47Z tychon $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/cpuset.h>
+
+#include <machine/clock.h>
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <machine/vmm.h>
+
+#ifndef	__FreeBSD__
+#include <vm/pmap.h>
+#endif
+
+#include "vmx.h"
+#include "vmx_msr.h"
+
+static boolean_t
+vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
+{
+
+	if (msr_val & (1UL << (bitpos + 32)))
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+static boolean_t
+vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
+{
+
+	if ((msr_val & (1UL << bitpos)) == 0)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+uint32_t
+vmx_revision(void)
+{
+
+	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
+}
+
+/*
+ * Generate a bitmask to be used for the VMCS execution control fields.
+ *
+ * The caller specifies what bits should be set to one in 'ones_mask'
+ * and what bits should be set to zero in 'zeros_mask'. The don't-care
+ * bits are set to the default value. The default values are obtained
+ * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
+ * VMX Capabilities".
+ *
+ * Returns zero on success and non-zero on error.
+ */
+int
+vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
+	       uint32_t zeros_mask, uint32_t *retval)
+{
+	int i;
+	uint64_t val, trueval;
+	boolean_t true_ctls_avail, one_allowed, zero_allowed;
+
+	/* We cannot ask the same bit to be set to both '1' and '0' */
+	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
+		return (EINVAL);
+
+	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
+		true_ctls_avail = TRUE;
+	else
+		true_ctls_avail = FALSE;
+
+	val = rdmsr(ctl_reg);
+	if (true_ctls_avail)
+		trueval = rdmsr(true_ctl_reg);		/* step c */
+	else
+		trueval = val;				/* step a */
+
+	for (i = 0; i < 32; i++) {
+		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
+		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
+
+		KASSERT(one_allowed || zero_allowed,
+			("invalid zero/one setting for bit %d of ctl 0x%0x, "
+			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
+
+		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
+			if (ones_mask & (1 << i))
+				return (EINVAL);
+			*retval &= ~(1 << i);
+		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
+			if (zeros_mask & (1 << i))
+				return (EINVAL);
+			*retval |= 1 << i;
+		} else {
+			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
+				*retval &= ~(1 << i);
+			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
+				*retval |= 1 << i;
+			else if (!true_ctls_avail)
+				*retval &= ~(1 << i);	/* b(iii) */
+			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
+				*retval &= ~(1 << i);
+			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
+				*retval |= 1 << i;
+			else {
+				panic("vmx_set_ctlreg: unable to determine "
+				      "correct value of ctl bit %d for msr "
+				      "0x%0x and true msr 0x%0x", i, ctl_reg,
+				      true_ctl_reg);
+			}
+		}
+	}
+
+	return (0);
+}
+
+void
+msr_bitmap_initialize(char *bitmap)
+{
+
+	memset(bitmap, 0xff, PAGE_SIZE);
+}
+
+int
+msr_bitmap_change_access(char *bitmap, u_int msr, int access)
+{
+	int byte, bit;
+
+	if (msr <= 0x00001FFF)
+		byte = msr / 8;
+	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
+		byte = 1024 + (msr - 0xC0000000) / 8;
+	else
+		return (EINVAL);
+
+	bit = msr & 0x7;
+
+	if (access & MSR_BITMAP_ACCESS_READ)
+		bitmap[byte] &= ~(1 << bit);
+	else
+		bitmap[byte] |= 1 << bit;
+
+	byte += 2048;
+	if (access & MSR_BITMAP_ACCESS_WRITE)
+		bitmap[byte] &= ~(1 << bit);
+	else
+		bitmap[byte] |= 1 << bit;
+
+	return (0);
+}
+
+static uint64_t misc_enable;
+static uint64_t platform_info;
+static uint64_t turbo_ratio_limit;
+static uint64_t host_msrs[GUEST_MSR_NUM];
+
+static bool
+nehalem_cpu(void)
+{
+	u_int family, model;
+
+	/*
+	 * The family:model numbers belonging to the Nehalem microarchitecture
+	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
+	 */
+	family = CPUID_TO_FAMILY(cpu_id);
+	model = CPUID_TO_MODEL(cpu_id);
+	if (family == 0x6) {
+		switch (model) {
+		case 0x1A:
+		case 0x1E:
+		case 0x1F:
+		case 0x2E:
+			return (true);
+		default:
+			break;
+		}
+	}
+	return (false);
+}
+
+static bool
+westmere_cpu(void)
+{
+	u_int family, model;
+
+	/*
+	 * The family:model numbers belonging to the Westmere microarchitecture
+	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
+	 */
+	family = CPUID_TO_FAMILY(cpu_id);
+	model = CPUID_TO_MODEL(cpu_id);
+	if (family == 0x6) {
+		switch (model) {
+		case 0x25:
+		case 0x2C:
+			return (true);
+		default:
+			break;
+		}
+	}
+	return (false);
+}
+
+void
+vmx_msr_init(void)
+{
+	uint64_t bus_freq, ratio;
+	int i;
+
+#ifdef	__FreeBSD__
+	/*
+	 * It is safe to cache the values of the following MSRs because
+	 * they don't change based on curcpu, curproc or curthread.
+	 */
+	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+#endif
+
+	/*
+	 * Initialize emulated MSRs
+	 */
+	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
+	/*
+	 * Set mandatory bits
+	 *  11:   branch trace disabled
+	 *  12:   PEBS unavailable
+	 * Clear unsupported features
+	 *  16:   SpeedStep enable
+	 *  18:   enable MONITOR FSM
+	 */
+	misc_enable |= (1 << 12) | (1 << 11);
+	misc_enable &= ~((1 << 18) | (1 << 16));
+
+	if (nehalem_cpu() || westmere_cpu())
+		bus_freq = 133330000;		/* 133Mhz */
+	else
+		bus_freq = 100000000;		/* 100Mhz */
+
+	/*
+	 * XXXtime
+	 * The ratio should really be based on the virtual TSC frequency as
+	 * opposed to the host TSC.
+	 */
+	ratio = (tsc_freq / bus_freq) & 0xff;
+
+	/*
+	 * The register definition is based on the micro-architecture
+	 * but the following bits are always the same:
+	 * [15:8]  Maximum Non-Turbo Ratio
+	 * [28]    Programmable Ratio Limit for Turbo Mode
+	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
+	 * [47:40] Maximum Efficiency Ratio
+	 *
+	 * The other bits can be safely set to 0 on all
+	 * micro-architectures up to Haswell.
+	 */
+	platform_info = (ratio << 8) | (ratio << 40);
+
+	/*
+	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
+	 * dependent on the maximum cores per package supported by the micro-
+	 * architecture. For e.g., Westmere supports 6 cores per package and
+	 * uses the low 48 bits. Sandybridge support 8 cores per package and
+	 * uses up all 64 bits.
+	 *
+	 * However, the unused bits are reserved so we pretend that all bits
+	 * in this MSR are valid.
+	 */
+	for (i = 0; i < 8; i++)
+		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
+}
+
+void
+vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
+{
+	/*
+	 * The permissions bitmap is shared between all vcpus so initialize it
+	 * once when initializing the vBSP.
+	 */
+	if (vcpuid == 0) {
+		guest_msr_rw(vmx, MSR_LSTAR);
+		guest_msr_rw(vmx, MSR_CSTAR);
+		guest_msr_rw(vmx, MSR_STAR);
+		guest_msr_rw(vmx, MSR_SF_MASK);
+		guest_msr_rw(vmx, MSR_KGSBASE);
+	}
+	return;
+}
+
+void
+vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
+{
+#ifdef	__FreeBSD__
+	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
+
+	/* Save host MSRs (if any) and restore guest MSRs */
+	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
+	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
+	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
+	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
+	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
+#endif
+}
+
+void
+vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
+{
+#ifdef	__FreeBSD__
+	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
+
+	/* Save guest MSRs */
+	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
+
+	/* Restore host MSRs */
+	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
+	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
+	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
+	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
+
+	/* MSR_KGSBASE will be restored on the way back to userspace */
+#endif
+}
+
+int
+vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
+{
+	int error = 0;
+
+	switch (num) {
+	case MSR_MCG_CAP:
+	case MSR_MCG_STATUS:
+		*val = 0;
+		break;
+	case MSR_MTRRcap:
+	case MSR_MTRRdefType:
+	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+	case MSR_MTRR64kBase:
+		*val = 0;
+		break;
+	case MSR_IA32_MISC_ENABLE:
+		*val = misc_enable;
+		break;
+	case MSR_PLATFORM_INFO:
+		*val = platform_info;
+		break;
+	case MSR_TURBO_RATIO_LIMIT:
+	case MSR_TURBO_RATIO_LIMIT1:
+		*val = turbo_ratio_limit;
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+}
+
+int
+vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
+{
+	uint64_t changed;
+	int error;
+	
+	error = 0;
+	switch (num) {
+	case MSR_MCG_CAP:
+	case MSR_MCG_STATUS:
+		break;		/* ignore writes */
+	case MSR_MTRRcap:
+		vm_inject_gp(vmx->vm, vcpuid);
+		break;
+	case MSR_MTRRdefType:
+	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+	case MSR_MTRR64kBase:
+		break;		/* Ignore writes */
+	case MSR_IA32_MISC_ENABLE:
+		changed = val ^ misc_enable;
+		/*
+		 * If the host has disabled the NX feature then the guest
+		 * also cannot use it. However, a Linux guest will try to
+		 * enable the NX feature by writing to the MISC_ENABLE MSR.
+		 *
+		 * This can be safely ignored because the memory management
+		 * code looks at CPUID.80000001H:EDX.NX to check if the
+		 * functionality is actually enabled.
+		 */
+		changed &= ~(1UL << 34);
+
+		/*
+		 * Punt to userspace if any other bits are being modified.
+		 */
+		if (changed)
+			error = EINVAL;
+
+		break;
+	case MSR_TSC:
+		error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
new file mode 100644
index 0000000000..5300d14d9b
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmx_msr.h 271888 2014-09-20 02:35:21Z neel $
+ */
+
+#ifndef _VMX_MSR_H_
+#define	_VMX_MSR_H_
+
+struct vmx;
+
+void vmx_msr_init(void);
+void vmx_msr_guest_init(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
+int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
+int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
+
+uint32_t vmx_revision(void);
+
+int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
+		   uint32_t zeros_mask, uint32_t *retval);
+
+/*
+ * According to Section 21.10.4 "Software Access to Related Structures",
+ * changes to data structures pointed to by the VMCS must be made only when
+ * there is no logical processor with a current VMCS that points to the
+ * data structure.
+ *
+ * This pretty much limits us to configuring the MSR bitmap before VMCS
+ * initialization for SMP VMs. Unless of course we do it the hard way - which
+ * would involve some form of synchronization between the vcpus to vmclear
+ * all VMCSs' that point to the bitmap.
+ */
+#define	MSR_BITMAP_ACCESS_NONE	0x0
+#define	MSR_BITMAP_ACCESS_READ	0x1
+#define	MSR_BITMAP_ACCESS_WRITE	0x2
+#define	MSR_BITMAP_ACCESS_RW	(MSR_BITMAP_ACCESS_READ|MSR_BITMAP_ACCESS_WRITE)
+void	msr_bitmap_initialize(char *bitmap);
+int	msr_bitmap_change_access(char *bitmap, u_int msr, int access);
+
+#define	guest_msr_rw(vmx, msr) \
+    msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
+
+#define	guest_msr_ro(vmx, msr) \
+    msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ)
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s b/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
new file mode 100644
index 0000000000..d57dde1093
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_support.s
@@ -0,0 +1,271 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/intel/vmx_support.S 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <machine/asmacros.h>
+
+#include "vmx_assym.s"
+
+/*
+ * Disable interrupts before updating %rsp in VMX_CHECK_AST or
+ * VMX_GUEST_RESTORE.
+ *
+ * The location that %rsp points to is a 'vmxctx' and not a
+ * real stack so we don't want an interrupt handler to trash it
+ */
+#define	VMX_DISABLE_INTERRUPTS		cli
+
+/*
+ * If the thread hosting the vcpu has an ast pending then take care of it
+ * by returning from vmx_setjmp() with a return value of VMX_RETURN_AST.
+ *
+ * Assumes that %rdi holds a pointer to the 'vmxctx' and that interrupts
+ * are disabled.
+ */
+#ifdef	__FreeBSD__
+#define	VMX_CHECK_AST							\
+	movq	PCPU(CURTHREAD),%rax;					\
+	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax);	\
+	je	9f;							\
+	movq	$VMX_RETURN_AST,%rsi;					\
+	movq	%rdi,%rsp;						\
+	addq	$VMXCTX_TMPSTKTOP,%rsp;					\
+	callq	vmx_return;						\
+9:
+#else
+#define	VMX_CHECK_AST							\
+	movq	%gs:CPU_THREAD,%rax;					\
+	movl	T_ASTFLAG(%rax),%eax;					\
+	test	%al,%al;						\
+	je	9f;							\
+	movq	$VMX_RETURN_AST,%rsi;					\
+	movq	%rdi,%rsp;						\
+	addq	$VMXCTX_TMPSTKTOP,%rsp;					\
+	callq	vmx_return;						\
+9:
+#endif
+
+/*
+ * Assumes that %rdi holds a pointer to the 'vmxctx'.
+ *
+ * On "return" all registers are updated to reflect guest state. The two
+ * exceptions are %rip and %rsp. These registers are atomically switched
+ * by hardware from the guest area of the vmcs.
+ *
+ * We modify %rsp to point to the 'vmxctx' so we can use it to restore
+ * host context in case of an error with 'vmlaunch' or 'vmresume'.
+ */
+#define	VMX_GUEST_RESTORE						\
+	movq	%rdi,%rsp;						\
+	movq	VMXCTX_GUEST_CR2(%rdi),%rsi;				\
+	movq	%rsi,%cr2;						\
+	movq	VMXCTX_GUEST_RSI(%rdi),%rsi;				\
+	movq	VMXCTX_GUEST_RDX(%rdi),%rdx;				\
+	movq	VMXCTX_GUEST_RCX(%rdi),%rcx;				\
+	movq	VMXCTX_GUEST_R8(%rdi),%r8;				\
+	movq	VMXCTX_GUEST_R9(%rdi),%r9;				\
+	movq	VMXCTX_GUEST_RAX(%rdi),%rax;				\
+	movq	VMXCTX_GUEST_RBX(%rdi),%rbx;				\
+	movq	VMXCTX_GUEST_RBP(%rdi),%rbp;				\
+	movq	VMXCTX_GUEST_R10(%rdi),%r10;				\
+	movq	VMXCTX_GUEST_R11(%rdi),%r11;				\
+	movq	VMXCTX_GUEST_R12(%rdi),%r12;				\
+	movq	VMXCTX_GUEST_R13(%rdi),%r13;				\
+	movq	VMXCTX_GUEST_R14(%rdi),%r14;				\
+	movq	VMXCTX_GUEST_R15(%rdi),%r15;				\
+	movq	VMXCTX_GUEST_RDI(%rdi),%rdi; /* restore rdi the last */
+
+#define	VM_INSTRUCTION_ERROR(reg)					\
+	jnc 	1f;							\
+	movl 	$VM_FAIL_INVALID,reg;		/* CF is set */		\
+	jmp 	3f;							\
+1:	jnz 	2f;							\
+	movl 	$VM_FAIL_VALID,reg;		/* ZF is set */		\
+	jmp 	3f;							\
+2:	movl 	$VM_SUCCESS,reg;					\
+3:	movl	reg,VMXCTX_LAUNCH_ERROR(%rsp)
+
+	.text
+/*
+ * int vmx_setjmp(ctxp)
+ * %rdi = ctxp
+ *
+ * Return value is '0' when it returns directly from here.
+ * Return value is '1' when it returns after a vm exit through vmx_longjmp.
+ */
+ENTRY(vmx_setjmp)
+	movq	(%rsp),%rax			/* return address */
+	movq    %r15,VMXCTX_HOST_R15(%rdi)
+	movq    %r14,VMXCTX_HOST_R14(%rdi)
+	movq    %r13,VMXCTX_HOST_R13(%rdi)
+	movq    %r12,VMXCTX_HOST_R12(%rdi)
+	movq    %rbp,VMXCTX_HOST_RBP(%rdi)
+	movq    %rsp,VMXCTX_HOST_RSP(%rdi)
+	movq    %rbx,VMXCTX_HOST_RBX(%rdi)
+	movq    %rax,VMXCTX_HOST_RIP(%rdi)
+
+	/*
+	 * XXX save host debug registers
+	 */
+	movl	$VMX_RETURN_DIRECT,%eax
+	ret
+END(vmx_setjmp)
+
+/*
+ * void vmx_return(struct vmxctx *ctxp, int retval)
+ * %rdi = ctxp
+ * %rsi = retval
+ * Return to vmm context through vmx_setjmp() with a value of 'retval'.
+ */
+ENTRY(vmx_return)
+	/* Restore host context. */
+	movq	VMXCTX_HOST_R15(%rdi),%r15
+	movq	VMXCTX_HOST_R14(%rdi),%r14
+	movq	VMXCTX_HOST_R13(%rdi),%r13
+	movq	VMXCTX_HOST_R12(%rdi),%r12
+	movq	VMXCTX_HOST_RBP(%rdi),%rbp
+	movq	VMXCTX_HOST_RSP(%rdi),%rsp
+	movq	VMXCTX_HOST_RBX(%rdi),%rbx
+	movq	VMXCTX_HOST_RIP(%rdi),%rax
+	movq	%rax,(%rsp)			/* return address */
+
+	/*
+	 * XXX restore host debug registers
+	 */
+	movl	%esi,%eax
+	ret
+END(vmx_return)
+
+/*
+ * void vmx_longjmp(void)
+ * %rsp points to the struct vmxctx
+ */
+ENTRY(vmx_longjmp)
+	/*
+	 * Save guest state that is not automatically saved in the vmcs.
+	 */
+	movq	%rdi,VMXCTX_GUEST_RDI(%rsp)
+	movq	%rsi,VMXCTX_GUEST_RSI(%rsp)
+	movq	%rdx,VMXCTX_GUEST_RDX(%rsp)
+	movq	%rcx,VMXCTX_GUEST_RCX(%rsp)
+	movq	%r8,VMXCTX_GUEST_R8(%rsp)
+	movq	%r9,VMXCTX_GUEST_R9(%rsp)
+	movq	%rax,VMXCTX_GUEST_RAX(%rsp)
+	movq	%rbx,VMXCTX_GUEST_RBX(%rsp)
+	movq	%rbp,VMXCTX_GUEST_RBP(%rsp)
+	movq	%r10,VMXCTX_GUEST_R10(%rsp)
+	movq	%r11,VMXCTX_GUEST_R11(%rsp)
+	movq	%r12,VMXCTX_GUEST_R12(%rsp)
+	movq	%r13,VMXCTX_GUEST_R13(%rsp)
+	movq	%r14,VMXCTX_GUEST_R14(%rsp)
+	movq	%r15,VMXCTX_GUEST_R15(%rsp)
+
+	movq	%cr2,%rdi
+	movq	%rdi,VMXCTX_GUEST_CR2(%rsp)
+
+	movq	%rsp,%rdi
+	movq	$VMX_RETURN_LONGJMP,%rsi
+
+	addq	$VMXCTX_TMPSTKTOP,%rsp
+	callq	vmx_return
+END(vmx_longjmp)
+
+/*
+ * void vmx_resume(struct vmxctx *ctxp)
+ * %rdi = ctxp
+ *
+ * Although the return type is a 'void' this function may return indirectly
+ * through vmx_setjmp() with a return value of 2.
+ */
+ENTRY(vmx_resume)
+	VMX_DISABLE_INTERRUPTS
+
+	VMX_CHECK_AST
+
+	/*
+	 * Restore guest state that is not automatically loaded from the vmcs.
+	 */
+	VMX_GUEST_RESTORE
+
+	vmresume
+
+	/*
+	 * Capture the reason why vmresume failed.
+	 */
+	VM_INSTRUCTION_ERROR(%eax)
+
+	/* Return via vmx_setjmp with return value of VMX_RETURN_VMRESUME */
+	movq	%rsp,%rdi
+	movq	$VMX_RETURN_VMRESUME,%rsi
+
+	addq	$VMXCTX_TMPSTKTOP,%rsp
+	callq	vmx_return
+END(vmx_resume)
+
+/*
+ * void vmx_launch(struct vmxctx *ctxp)
+ * %rdi = ctxp
+ *
+ * Although the return type is a 'void' this function may return indirectly
+ * through vmx_setjmp() with a return value of 3.
+ */
+ENTRY(vmx_launch)
+	VMX_DISABLE_INTERRUPTS
+
+	VMX_CHECK_AST
+
+	/*
+	 * Restore guest state that is not automatically loaded from the vmcs.
+	 */
+	VMX_GUEST_RESTORE
+
+	vmlaunch
+
+	/*
+	 * Capture the reason why vmlaunch failed.
+	 */
+	VM_INSTRUCTION_ERROR(%eax)
+
+	/* Return via vmx_setjmp with return value of VMX_RETURN_VMLAUNCH */
+	movq	%rsp,%rdi
+	movq	$VMX_RETURN_VMLAUNCH,%rsi
+
+	addq	$VMXCTX_TMPSTKTOP,%rsp
+	callq	vmx_return
+END(vmx_launch)
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.c b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
new file mode 100644
index 0000000000..a93b252c91
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
@@ -0,0 +1,809 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/vatpic.c 279683 2015-03-06 02:05:45Z tychon $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+#include <x86/apicreg.h>
+#include <dev/ic/i8259.h>
+
+#include <machine/vmm.h>
+
+#include "vmm_ktr.h"
+#include "vmm_lapic.h"
+#include "vioapic.h"
+#include "vatpic.h"
+
+static MALLOC_DEFINE(M_VATPIC, "atpic", "bhyve virtual atpic (8259)");
+
+#define	VATPIC_LOCK(vatpic)		mtx_lock_spin(&((vatpic)->mtx))
+#define	VATPIC_UNLOCK(vatpic)		mtx_unlock_spin(&((vatpic)->mtx))
+#define	VATPIC_LOCKED(vatpic)		mtx_owned(&((vatpic)->mtx))
+
+enum irqstate {
+	IRQSTATE_ASSERT,
+	IRQSTATE_DEASSERT,
+	IRQSTATE_PULSE
+};
+
+struct atpic {
+	bool		ready;
+	int		icw_num;
+	int		rd_cmd_reg;
+
+	bool		aeoi;
+	bool		poll;
+	bool		rotate;
+	bool		sfn;		/* special fully-nested mode */
+
+	int		irq_base;
+	uint8_t		request;	/* Interrupt Request Register (IIR) */
+	uint8_t		service;	/* Interrupt Service (ISR) */
+	uint8_t		mask;		/* Interrupt Mask Register (IMR) */
+	uint8_t		smm;		/* special mask mode */
+
+	int		acnt[8];	/* sum of pin asserts and deasserts */
+	int		lowprio;	/* lowest priority irq */
+
+	bool		intr_raised;
+};
+
+struct vatpic {
+	struct vm	*vm;
+	struct mtx	mtx;
+	struct atpic	atpic[2];
+	uint8_t		elc[2];
+};
+
+#define	VATPIC_CTR0(vatpic, fmt)					\
+	VM_CTR0((vatpic)->vm, fmt)
+
+#define	VATPIC_CTR1(vatpic, fmt, a1)					\
+	VM_CTR1((vatpic)->vm, fmt, a1)
+
+#define	VATPIC_CTR2(vatpic, fmt, a1, a2)				\
+	VM_CTR2((vatpic)->vm, fmt, a1, a2)
+
+#define	VATPIC_CTR3(vatpic, fmt, a1, a2, a3)				\
+	VM_CTR3((vatpic)->vm, fmt, a1, a2, a3)
+
+#define	VATPIC_CTR4(vatpic, fmt, a1, a2, a3, a4)			\
+	VM_CTR4((vatpic)->vm, fmt, a1, a2, a3, a4)
+
+/*
+ * Loop over all the pins in priority order from highest to lowest.
+ */
+#define	ATPIC_PIN_FOREACH(pinvar, atpic, tmpvar)			\
+	for (tmpvar = 0, pinvar = (atpic->lowprio + 1) & 0x7;		\
+	    tmpvar < 8;							\
+	    tmpvar++, pinvar = (pinvar + 1) & 0x7)
+
+static void vatpic_set_pinstate(struct vatpic *vatpic, int pin, bool newstate);
+
+static __inline bool
+master_atpic(struct vatpic *vatpic, struct atpic *atpic)
+{
+
+	if (atpic == &vatpic->atpic[0])
+		return (true);
+	else
+		return (false);
+}
+
+static __inline int
+vatpic_get_highest_isrpin(struct atpic *atpic)
+{
+	int bit, pin;
+	int i;
+
+	ATPIC_PIN_FOREACH(pin, atpic, i) {
+                bit = (1 << pin);
+
+		if (atpic->service & bit) {
+			/*
+			 * An IS bit that is masked by an IMR bit will not be
+			 * cleared by a non-specific EOI in Special Mask Mode.
+			 */
+			if (atpic->smm && (atpic->mask & bit) != 0)
+				continue;
+			else
+				return (pin);
+		}
+	}
+
+	return (-1);
+}
+
+static __inline int
+vatpic_get_highest_irrpin(struct atpic *atpic)
+{
+	int serviced;
+	int bit, pin, tmp;
+
+	/*
+	 * In 'Special Fully-Nested Mode' when an interrupt request from
+	 * a slave is in service, the slave is not locked out from the
+	 * master's priority logic.
+	 */
+	serviced = atpic->service;
+	if (atpic->sfn)
+		serviced &= ~(1 << 2);
+
+	/*
+	 * In 'Special Mask Mode', when a mask bit is set in OCW1 it inhibits
+	 * further interrupts at that level and enables interrupts from all
+	 * other levels that are not masked. In other words the ISR has no
+	 * bearing on the levels that can generate interrupts.
+	 */
+	if (atpic->smm)
+		serviced = 0;
+
+	ATPIC_PIN_FOREACH(pin, atpic, tmp) {
+		bit = 1 << pin;
+
+		/*
+		 * If there is already an interrupt in service at the same
+		 * or higher priority then bail.
+		 */
+		if ((serviced & bit) != 0)
+			break;
+
+		/*
+		 * If an interrupt is asserted and not masked then return
+		 * the corresponding 'pin' to the caller.
+		 */
+		if ((atpic->request & bit) != 0 && (atpic->mask & bit) == 0)
+			return (pin);
+	}
+
+	return (-1);
+}
+
+static void
+vatpic_notify_intr(struct vatpic *vatpic)
+{
+	struct atpic *atpic;
+	int pin;
+
+	KASSERT(VATPIC_LOCKED(vatpic), ("vatpic_notify_intr not locked"));
+
+	/*
+	 * First check the slave.
+	 */
+	atpic = &vatpic->atpic[1];
+	if (!atpic->intr_raised &&
+	    (pin = vatpic_get_highest_irrpin(atpic)) != -1) {
+		VATPIC_CTR4(vatpic, "atpic slave notify pin = %d "
+		    "(imr 0x%x irr 0x%x isr 0x%x)", pin,
+		    atpic->mask, atpic->request, atpic->service);
+
+		/*
+		 * Cascade the request from the slave to the master.
+		 */
+		atpic->intr_raised = true;
+		vatpic_set_pinstate(vatpic, 2, true);
+		vatpic_set_pinstate(vatpic, 2, false);
+	} else {
+		VATPIC_CTR3(vatpic, "atpic slave no eligible interrupts "
+		    "(imr 0x%x irr 0x%x isr 0x%x)",
+		    atpic->mask, atpic->request, atpic->service);
+	}
+
+	/*
+	 * Then check the master.
+	 */
+	atpic = &vatpic->atpic[0];
+	if (!atpic->intr_raised &&
+	    (pin = vatpic_get_highest_irrpin(atpic)) != -1) {
+		VATPIC_CTR4(vatpic, "atpic master notify pin = %d "
+		    "(imr 0x%x irr 0x%x isr 0x%x)", pin,
+		    atpic->mask, atpic->request, atpic->service);
+
+		/*
+		 * From Section 3.6.2, "Interrupt Modes", in the
+		 * MPtable Specification, Version 1.4
+		 *
+		 * PIC interrupts are routed to both the Local APIC
+		 * and the I/O APIC to support operation in 1 of 3
+		 * modes.
+		 *
+		 * 1. Legacy PIC Mode: the PIC effectively bypasses
+		 * all APIC components.  In this mode the local APIC is
+		 * disabled and LINT0 is reconfigured as INTR to
+		 * deliver the PIC interrupt directly to the CPU.
+		 *
+		 * 2. Virtual Wire Mode: the APIC is treated as a
+		 * virtual wire which delivers interrupts from the PIC
+		 * to the CPU.  In this mode LINT0 is programmed as
+		 * ExtINT to indicate that the PIC is the source of
+		 * the interrupt.
+		 *
+		 * 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
+		 * fielded by the I/O APIC and delivered to the appropriate
+		 * CPU.  In this mode the I/O APIC input 0 is programmed
+		 * as ExtINT to indicate that the PIC is the source of the
+		 * interrupt.
+		 */
+		atpic->intr_raised = true;
+		lapic_set_local_intr(vatpic->vm, -1, APIC_LVT_LINT0);
+		vioapic_pulse_irq(vatpic->vm, 0);
+	} else {
+		VATPIC_CTR3(vatpic, "atpic master no eligible interrupts "
+		    "(imr 0x%x irr 0x%x isr 0x%x)",
+		    atpic->mask, atpic->request, atpic->service);
+	}
+}
+
+static int
+vatpic_icw1(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
+{
+	VATPIC_CTR1(vatpic, "atpic icw1 0x%x", val);
+
+	atpic->ready = false;
+
+	atpic->icw_num = 1;
+	atpic->request = 0;
+	atpic->mask = 0;
+	atpic->lowprio = 7;
+	atpic->rd_cmd_reg = 0;
+	atpic->poll = 0;
+	atpic->smm = 0;
+
+	if ((val & ICW1_SNGL) != 0) {
+		VATPIC_CTR0(vatpic, "vatpic cascade mode required");
+		return (-1);
+	}
+
+	if ((val & ICW1_IC4) == 0) {
+		VATPIC_CTR0(vatpic, "vatpic icw4 required");
+		return (-1);
+	}
+
+	atpic->icw_num++;
+
+	return (0);
+}
+
+static int
+vatpic_icw2(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
+{
+	VATPIC_CTR1(vatpic, "atpic icw2 0x%x", val);
+
+	atpic->irq_base = val & 0xf8;
+
+	atpic->icw_num++;
+
+	return (0);
+}
+
+static int
+vatpic_icw3(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
+{
+	VATPIC_CTR1(vatpic, "atpic icw3 0x%x", val);
+
+	atpic->icw_num++;
+
+	return (0);
+}
+
+static int
+vatpic_icw4(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
+{
+	VATPIC_CTR1(vatpic, "atpic icw4 0x%x", val);
+
+	if ((val & ICW4_8086) == 0) {
+		VATPIC_CTR0(vatpic, "vatpic microprocessor mode required");
+		return (-1);
+	}
+
+	if ((val & ICW4_AEOI) != 0)
+		atpic->aeoi = true;
+
+	if ((val & ICW4_SFNM) != 0) {
+		if (master_atpic(vatpic, atpic)) {
+			atpic->sfn = true;
+		} else {
+			VATPIC_CTR1(vatpic, "Ignoring special fully nested "
+			    "mode on slave atpic: %#x", val);
+		}
+	}
+
+	atpic->icw_num = 0;
+	atpic->ready = true;
+
+	return (0);
+}
+
+static int
+vatpic_ocw1(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
+{
+	VATPIC_CTR1(vatpic, "atpic ocw1 0x%x", val);
+
+	atpic->mask = val & 0xff;
+
+	return (0);
+}
+
+static int
+vatpic_ocw2(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
+{
+	VATPIC_CTR1(vatpic, "atpic ocw2 0x%x", val);
+
+	atpic->rotate = ((val & OCW2_R) != 0);
+
+	if ((val & OCW2_EOI) != 0) {
+		int isr_bit;
+
+		if ((val & OCW2_SL) != 0) {
+			/* specific EOI */
+			isr_bit = val & 0x7;
+		} else {
+			/* non-specific EOI */
+			isr_bit = vatpic_get_highest_isrpin(atpic);
+		}
+
+		if (isr_bit != -1) {
+			atpic->service &= ~(1 << isr_bit);
+
+			if (atpic->rotate)
+				atpic->lowprio = isr_bit;
+		}
+	} else if ((val & OCW2_SL) != 0 && atpic->rotate == true) {
+		/* specific priority */
+		atpic->lowprio = val & 0x7;
+	}
+
+	return (0);
+}
+
+static int
+vatpic_ocw3(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
+{
+	VATPIC_CTR1(vatpic, "atpic ocw3 0x%x", val);
+
+	if (val & OCW3_ESMM) {
+		atpic->smm = val & OCW3_SMM ? 1 : 0;
+		VATPIC_CTR2(vatpic, "%s atpic special mask mode %s",
+		    master_atpic(vatpic, atpic) ? "master" : "slave",
+		    atpic->smm ?  "enabled" : "disabled");
+	}
+
+	if (val & OCW3_RR) {
+		/* read register command */
+		atpic->rd_cmd_reg = val & OCW3_RIS;
+
+		/* Polling mode */
+		atpic->poll = ((val & OCW3_P) != 0);
+	}
+
+	return (0);
+}
+
+static void
+vatpic_set_pinstate(struct vatpic *vatpic, int pin, bool newstate)
+{
+	struct atpic *atpic;
+	int oldcnt, newcnt;
+	bool level;
+
+	KASSERT(pin >= 0 && pin < 16,
+	    ("vatpic_set_pinstate: invalid pin number %d", pin));
+	KASSERT(VATPIC_LOCKED(vatpic),
+	    ("vatpic_set_pinstate: vatpic is not locked"));
+
+	atpic = &vatpic->atpic[pin >> 3];
+
+	oldcnt = atpic->acnt[pin & 0x7];
+	if (newstate)
+		atpic->acnt[pin & 0x7]++;
+	else
+		atpic->acnt[pin & 0x7]--;
+	newcnt = atpic->acnt[pin & 0x7];
+
+	if (newcnt < 0) {
+		VATPIC_CTR2(vatpic, "atpic pin%d: bad acnt %d", pin, newcnt);
+	}
+
+	level = ((vatpic->elc[pin >> 3] & (1 << (pin & 0x7))) != 0);
+
+	if ((oldcnt == 0 && newcnt == 1) || (newcnt > 0 && level == true)) {
+		/* rising edge or level */
+		VATPIC_CTR1(vatpic, "atpic pin%d: asserted", pin);
+		atpic->request |= (1 << (pin & 0x7));
+	} else if (oldcnt == 1 && newcnt == 0) {
+		/* falling edge */
+		VATPIC_CTR1(vatpic, "atpic pin%d: deasserted", pin);
+		if (level)
+			atpic->request &= ~(1 << (pin & 0x7));
+	} else {
+		VATPIC_CTR3(vatpic, "atpic pin%d: %s, ignored, acnt %d",
+		    pin, newstate ? "asserted" : "deasserted", newcnt);
+	}
+
+	vatpic_notify_intr(vatpic);
+}
+
+static int
+vatpic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
+{
+	struct vatpic *vatpic;
+	struct atpic *atpic;
+
+	if (irq < 0 || irq > 15)
+		return (EINVAL);
+
+	vatpic = vm_atpic(vm);
+	atpic = &vatpic->atpic[irq >> 3];
+
+	if (atpic->ready == false)
+		return (0);
+
+	VATPIC_LOCK(vatpic);
+	switch (irqstate) {
+	case IRQSTATE_ASSERT:
+		vatpic_set_pinstate(vatpic, irq, true);
+		break;
+	case IRQSTATE_DEASSERT:
+		vatpic_set_pinstate(vatpic, irq, false);
+		break;
+	case IRQSTATE_PULSE:
+		vatpic_set_pinstate(vatpic, irq, true);
+		vatpic_set_pinstate(vatpic, irq, false);
+		break;
+	default:
+		panic("vatpic_set_irqstate: invalid irqstate %d", irqstate);
+	}
+	VATPIC_UNLOCK(vatpic);
+
+	return (0);
+}
+
+int
+vatpic_assert_irq(struct vm *vm, int irq)
+{
+	return (vatpic_set_irqstate(vm, irq, IRQSTATE_ASSERT));
+}
+
+int
+vatpic_deassert_irq(struct vm *vm, int irq)
+{
+	return (vatpic_set_irqstate(vm, irq, IRQSTATE_DEASSERT));
+}
+
+int
+vatpic_pulse_irq(struct vm *vm, int irq)
+{
+	return (vatpic_set_irqstate(vm, irq, IRQSTATE_PULSE));
+}
+
+int
+vatpic_set_irq_trigger(struct vm *vm, int irq, enum vm_intr_trigger trigger)
+{
+	struct vatpic *vatpic;
+
+	if (irq < 0 || irq > 15)
+		return (EINVAL);
+
+	/*
+	 * See comment in vatpic_elc_handler.  These IRQs must be
+	 * edge triggered.
+	 */
+	if (trigger == LEVEL_TRIGGER) {
+		switch (irq) {
+		case 0:
+		case 1:
+		case 2:
+		case 8:
+		case 13:
+			return (EINVAL);
+		}
+	}
+
+	vatpic = vm_atpic(vm);
+
+	VATPIC_LOCK(vatpic);
+
+	if (trigger == LEVEL_TRIGGER)
+		vatpic->elc[irq >> 3] |=  1 << (irq & 0x7);
+	else
+		vatpic->elc[irq >> 3] &=  ~(1 << (irq & 0x7));
+
+	VATPIC_UNLOCK(vatpic);
+
+	return (0);
+}
+
+void
+vatpic_pending_intr(struct vm *vm, int *vecptr)
+{
+	struct vatpic *vatpic;
+	struct atpic *atpic;
+	int pin;
+
+	vatpic = vm_atpic(vm);
+
+	atpic = &vatpic->atpic[0];
+
+	VATPIC_LOCK(vatpic);
+
+	pin = vatpic_get_highest_irrpin(atpic);
+	if (pin == 2) {
+		atpic = &vatpic->atpic[1];
+		pin = vatpic_get_highest_irrpin(atpic);
+	}
+
+	/*
+	 * If there are no pins active at this moment then return the spurious
+	 * interrupt vector instead.
+	 */
+	if (pin == -1)
+		pin = 7;
+
+	KASSERT(pin >= 0 && pin <= 7, ("%s: invalid pin %d", __func__, pin));
+	*vecptr = atpic->irq_base + pin;
+
+	VATPIC_UNLOCK(vatpic);
+}
+
+static void
+vatpic_pin_accepted(struct atpic *atpic, int pin)
+{
+	atpic->intr_raised = false;
+
+	if (atpic->acnt[pin] == 0)
+		atpic->request &= ~(1 << pin);
+
+	if (atpic->aeoi == true) {
+		if (atpic->rotate == true)
+			atpic->lowprio = pin;
+	} else {
+		atpic->service |= (1 << pin);
+	}
+}
+
+void
+vatpic_intr_accepted(struct vm *vm, int vector)
+{
+	struct vatpic *vatpic;
+	int pin;
+
+	vatpic = vm_atpic(vm);
+
+	VATPIC_LOCK(vatpic);
+
+	pin = vector & 0x7;
+
+	if ((vector & ~0x7) == vatpic->atpic[1].irq_base) {
+		vatpic_pin_accepted(&vatpic->atpic[1], pin);
+		/*
+		 * If this vector originated from the slave,
+		 * accept the cascaded interrupt too.
+		 */
+		vatpic_pin_accepted(&vatpic->atpic[0], 2);
+	} else {
+		vatpic_pin_accepted(&vatpic->atpic[0], pin);
+	}
+
+	vatpic_notify_intr(vatpic);
+
+	VATPIC_UNLOCK(vatpic);
+}
+
+static int
+vatpic_read(struct vatpic *vatpic, struct atpic *atpic, bool in, int port,
+	    int bytes, uint32_t *eax)
+{
+	int pin;
+
+	VATPIC_LOCK(vatpic);
+
+	if (atpic->poll) {
+		atpic->poll = 0;
+		pin = vatpic_get_highest_irrpin(atpic);
+		if (pin >= 0) {
+			vatpic_pin_accepted(atpic, pin);
+			*eax = 0x80 | pin;
+		} else {
+			*eax = 0;
+		}
+	} else {
+		if (port & ICU_IMR_OFFSET) {
+			/* read interrrupt mask register */
+			*eax = atpic->mask;
+		} else {
+			if (atpic->rd_cmd_reg == OCW3_RIS) {
+				/* read interrupt service register */
+				*eax = atpic->service;
+			} else {
+				/* read interrupt request register */
+				*eax = atpic->request;
+			}
+		}
+	}
+
+	VATPIC_UNLOCK(vatpic);
+
+	return (0);
+
+}
+
+static int
+vatpic_write(struct vatpic *vatpic, struct atpic *atpic, bool in, int port,
+    int bytes, uint32_t *eax)
+{
+	int error;
+	uint8_t val;
+
+	error = 0;
+	val = *eax;
+
+	VATPIC_LOCK(vatpic);
+
+	if (port & ICU_IMR_OFFSET) {
+		switch (atpic->icw_num) {
+		case 2:
+			error = vatpic_icw2(vatpic, atpic, val);
+			break;
+		case 3:
+			error = vatpic_icw3(vatpic, atpic, val);
+			break;
+		case 4:
+			error = vatpic_icw4(vatpic, atpic, val);
+			break;
+		default:
+			error = vatpic_ocw1(vatpic, atpic, val);
+			break;
+		}
+	} else {
+		if (val & (1 << 4))
+			error = vatpic_icw1(vatpic, atpic, val);
+
+		if (atpic->ready) {
+			if (val & (1 << 3))
+				error = vatpic_ocw3(vatpic, atpic, val);
+			else
+				error = vatpic_ocw2(vatpic, atpic, val);
+		}
+	}
+
+	if (atpic->ready)
+		vatpic_notify_intr(vatpic);
+
+	VATPIC_UNLOCK(vatpic);
+
+	return (error);
+}
+
+int
+vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *eax)
+{
+	struct vatpic *vatpic;
+	struct atpic *atpic;
+
+	vatpic = vm_atpic(vm);
+	atpic = &vatpic->atpic[0];
+
+	if (bytes != 1)
+		return (-1);
+ 
+	if (in) {
+		return (vatpic_read(vatpic, atpic, in, port, bytes, eax));
+	}
+ 
+	return (vatpic_write(vatpic, atpic, in, port, bytes, eax));
+}
+
+int
+vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *eax)
+{
+	struct vatpic *vatpic;
+	struct atpic *atpic;
+
+	vatpic = vm_atpic(vm);
+	atpic = &vatpic->atpic[1];
+
+	if (bytes != 1)
+		return (-1);
+
+	if (in) {
+		return (vatpic_read(vatpic, atpic, in, port, bytes, eax));
+	}
+
+	return (vatpic_write(vatpic, atpic, in, port, bytes, eax));
+}
+
+int
+vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *eax)
+{
+	struct vatpic *vatpic;
+	bool is_master;
+
+	vatpic = vm_atpic(vm);
+	is_master = (port == IO_ELCR1);
+
+	if (bytes != 1)
+		return (-1);
+
+	VATPIC_LOCK(vatpic);
+
+	if (in) {
+		if (is_master)
+			*eax = vatpic->elc[0];
+		else
+			*eax = vatpic->elc[1];
+	} else {
+		/*
+		 * For the master PIC the cascade channel (IRQ2), the
+		 * heart beat timer (IRQ0), and the keyboard
+		 * controller (IRQ1) cannot be programmed for level
+		 * mode.
+		 *
+		 * For the slave PIC the real time clock (IRQ8) and
+		 * the floating point error interrupt (IRQ13) cannot
+		 * be programmed for level mode.
+		 */
+		if (is_master)
+			vatpic->elc[0] = (*eax & 0xf8);
+		else
+			vatpic->elc[1] = (*eax & 0xde);
+	}
+
+	VATPIC_UNLOCK(vatpic);
+
+	return (0);
+}
+
+struct vatpic *
+vatpic_init(struct vm *vm)
+{
+	struct vatpic *vatpic;
+
+	vatpic = malloc(sizeof(struct vatpic), M_VATPIC, M_WAITOK | M_ZERO);
+	vatpic->vm = vm;
+
+	mtx_init(&vatpic->mtx, "vatpic lock", NULL, MTX_SPIN);
+
+	return (vatpic);
+}
+
+void
+vatpic_cleanup(struct vatpic *vatpic)
+{
+	free(vatpic, M_VATPIC);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.h b/usr/src/uts/i86pc/io/vmm/io/vatpic.h
new file mode 100644
index 0000000000..ef5e51b158
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vatpic.h 273706 2014-10-26 19:03:06Z neel $
+ */
+
+#ifndef _VATPIC_H_
+#define	_VATPIC_H_
+
+#include <isa/isareg.h>
+
+#define	ICU_IMR_OFFSET	1
+
+#define	IO_ELCR1	0x4d0
+#define	IO_ELCR2	0x4d1
+
+struct vatpic *vatpic_init(struct vm *vm);
+void vatpic_cleanup(struct vatpic *vatpic);
+
+int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port,
+    int bytes, uint32_t *eax);
+int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port,
+    int bytes, uint32_t *eax);
+int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *eax);
+
+int vatpic_assert_irq(struct vm *vm, int irq);
+int vatpic_deassert_irq(struct vm *vm, int irq);
+int vatpic_pulse_irq(struct vm *vm, int irq);
+int vatpic_set_irq_trigger(struct vm *vm, int irq, enum vm_intr_trigger trigger);
+
+void vatpic_pending_intr(struct vm *vm, int *vecptr);
+void vatpic_intr_accepted(struct vm *vm, int vector);
+
+#endif	/* _VATPIC_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
new file mode 100644
index 0000000000..ce17bdc92c
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
@@ -0,0 +1,458 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/vatpit.c 273706 2014-10-26 19:03:06Z neel $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+#include <machine/vmm.h>
+
+#include "vmm_ktr.h"
+#include "vatpic.h"
+#include "vioapic.h"
+#include "vatpit.h"
+
+static MALLOC_DEFINE(M_VATPIT, "atpit", "bhyve virtual atpit (8254)");
+
+#define	VATPIT_LOCK(vatpit)		mtx_lock_spin(&((vatpit)->mtx))
+#define	VATPIT_UNLOCK(vatpit)		mtx_unlock_spin(&((vatpit)->mtx))
+#define	VATPIT_LOCKED(vatpit)		mtx_owned(&((vatpit)->mtx))
+
+#define	TIMER_SEL_MASK		0xc0
+#define	TIMER_RW_MASK		0x30
+#define	TIMER_MODE_MASK		0x0f
+#define	TIMER_SEL_READBACK	0xc0
+
+#define	TIMER_STS_OUT		0x80
+#define	TIMER_STS_NULLCNT	0x40
+
+#define	TIMER_RB_LCTR		0x20
+#define	TIMER_RB_LSTATUS	0x10
+#define	TIMER_RB_CTR_2		0x08
+#define	TIMER_RB_CTR_1		0x04
+#define	TIMER_RB_CTR_0		0x02
+
+#define	TMR2_OUT_STS		0x20
+
+#define	PIT_8254_FREQ		1193182
+#define	TIMER_DIV(freq, hz)	(((freq) + (hz) / 2) / (hz))
+
+struct vatpit_callout_arg {
+	struct vatpit	*vatpit;
+	int		channel_num;
+};
+
+
+struct channel {
+	int		mode;
+	uint16_t	initial;	/* initial counter value */
+	sbintime_t	now_sbt;	/* uptime when counter was loaded */
+	uint8_t		cr[2];
+	uint8_t		ol[2];
+	bool		slatched;	/* status latched */
+	uint8_t		status;
+	int		crbyte;
+	int		olbyte;
+	int		frbyte;
+	struct callout	callout;
+	sbintime_t	callout_sbt;	/* target time */
+	struct vatpit_callout_arg callout_arg;
+};
+
+struct vatpit {
+	struct vm	*vm;
+	struct mtx	mtx;
+
+	sbintime_t	freq_sbt;
+
+	struct channel	channel[3];
+};
+
+static void pit_timer_start_cntr0(struct vatpit *vatpit);
+
+static int
+vatpit_get_out(struct vatpit *vatpit, int channel)
+{
+	struct channel *c;
+	sbintime_t delta_ticks;
+	int out;
+
+	c = &vatpit->channel[channel];
+
+	switch (c->mode) {
+	case TIMER_INTTC:
+		delta_ticks = (sbinuptime() - c->now_sbt) / vatpit->freq_sbt;
+		out = ((c->initial - delta_ticks) <= 0);
+		break;
+	default:
+		out = 0;
+		break;
+	}
+
+	return (out);
+}
+
+static void
+vatpit_callout_handler(void *a)
+{
+	struct vatpit_callout_arg *arg = a;
+	struct vatpit *vatpit;
+	struct callout *callout;
+	struct channel *c;
+
+	vatpit = arg->vatpit;
+	c = &vatpit->channel[arg->channel_num];
+	callout = &c->callout;
+
+	VM_CTR1(vatpit->vm, "atpit t%d fired", arg->channel_num);
+
+	VATPIT_LOCK(vatpit);
+
+	if (callout_pending(callout))		/* callout was reset */
+		goto done;
+
+	if (!callout_active(callout))		/* callout was stopped */
+		goto done;
+
+	callout_deactivate(callout);
+
+	if (c->mode == TIMER_RATEGEN) {
+		pit_timer_start_cntr0(vatpit);
+	}
+
+	vatpic_pulse_irq(vatpit->vm, 0);
+	vioapic_pulse_irq(vatpit->vm, 2);
+
+done:
+	VATPIT_UNLOCK(vatpit);
+	return;
+}
+
+static void
+pit_timer_start_cntr0(struct vatpit *vatpit)
+{
+	struct channel *c;
+	sbintime_t now, delta, precision;
+
+	c = &vatpit->channel[0];
+	if (c->initial != 0) {
+		delta = c->initial * vatpit->freq_sbt;
+		precision = delta >> tc_precexp;
+		c->callout_sbt = c->callout_sbt + delta;
+
+		/*
+		 * Reset 'callout_sbt' if the time that the callout
+		 * was supposed to fire is more than 'c->initial'
+		 * ticks in the past.
+		 */
+		now = sbinuptime();
+		if (c->callout_sbt < now)
+			c->callout_sbt = now + delta;
+
+		callout_reset_sbt(&c->callout, c->callout_sbt,
+		    precision, vatpit_callout_handler, &c->callout_arg,
+		    C_ABSOLUTE);
+	}
+}
+
+static uint16_t
+pit_update_counter(struct vatpit *vatpit, struct channel *c, bool latch)
+{
+	uint16_t lval;
+	sbintime_t delta_ticks;
+
+	/* cannot latch a new value until the old one has been consumed */
+	if (latch && c->olbyte != 0)
+		return (0);
+
+	if (c->initial == 0) {
+		/*
+		 * This is possibly an o/s bug - reading the value of
+		 * the timer without having set up the initial value.
+		 *
+		 * The original user-space version of this code set
+		 * the timer to 100hz in this condition; do the same
+		 * here.
+		 */
+		c->initial = TIMER_DIV(PIT_8254_FREQ, 100);
+		c->now_sbt = sbinuptime();
+		c->status &= ~TIMER_STS_NULLCNT;
+	}
+
+	delta_ticks = (sbinuptime() - c->now_sbt) / vatpit->freq_sbt;
+
+	lval = c->initial - delta_ticks % c->initial;
+
+	if (latch) {
+		c->olbyte = 2;
+		c->ol[1] = lval;		/* LSB */
+		c->ol[0] = lval >> 8;		/* MSB */
+	}
+
+	return (lval);
+}
+
+static int
+pit_readback1(struct vatpit *vatpit, int channel, uint8_t cmd)
+{
+	struct channel *c;
+
+	c = &vatpit->channel[channel];
+
+	/*
+	 * Latch the count/status of the timer if not already latched.
+	 * N.B. that the count/status latch-select bits are active-low.
+	 */
+	if (!(cmd & TIMER_RB_LCTR) && !c->olbyte) {
+		(void) pit_update_counter(vatpit, c, true);
+	}
+
+	if (!(cmd & TIMER_RB_LSTATUS) && !c->slatched) {
+		c->slatched = true;
+		/*
+		 * For mode 0, see if the elapsed time is greater
+		 * than the initial value - this results in the
+		 * output pin being set to 1 in the status byte.
+		 */
+		if (c->mode == TIMER_INTTC && vatpit_get_out(vatpit, channel))
+			c->status |= TIMER_STS_OUT;
+		else
+			c->status &= ~TIMER_STS_OUT;
+	}
+
+	return (0);
+}
+
+static int
+pit_readback(struct vatpit *vatpit, uint8_t cmd)
+{
+	int error;
+
+	/*
+	 * The readback command can apply to all timers.
+	 */
+	error = 0;
+	if (cmd & TIMER_RB_CTR_0)
+		error = pit_readback1(vatpit, 0, cmd);
+	if (!error && cmd & TIMER_RB_CTR_1)
+		error = pit_readback1(vatpit, 1, cmd);
+	if (!error && cmd & TIMER_RB_CTR_2)
+		error = pit_readback1(vatpit, 2, cmd);
+
+	return (error);
+}
+
+
+static int
+vatpit_update_mode(struct vatpit *vatpit, uint8_t val)
+{
+	struct channel *c;
+	int sel, rw, mode;
+
+	sel = val & TIMER_SEL_MASK;
+	rw = val & TIMER_RW_MASK;
+	mode = val & TIMER_MODE_MASK;
+
+	if (sel == TIMER_SEL_READBACK)
+		return (pit_readback(vatpit, val));
+
+	if (rw != TIMER_LATCH && rw != TIMER_16BIT)
+		return (-1);
+
+	if (rw != TIMER_LATCH) {
+		/*
+		 * Counter mode is not affected when issuing a
+		 * latch command.
+		 */
+		if (mode != TIMER_INTTC &&
+		    mode != TIMER_RATEGEN &&
+		    mode != TIMER_SQWAVE &&
+		    mode != TIMER_SWSTROBE)
+			return (-1);
+	}
+
+	c = &vatpit->channel[sel >> 6];
+	if (rw == TIMER_LATCH)
+		pit_update_counter(vatpit, c, true);
+	else {
+		c->mode = mode;
+		c->olbyte = 0;	/* reset latch after reprogramming */
+		c->status |= TIMER_STS_NULLCNT;
+	}
+
+	return (0);
+}
+
+int
+vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *eax)
+{
+	struct vatpit *vatpit;
+	struct channel *c;
+	uint8_t val;
+	int error;
+
+	vatpit = vm_atpit(vm);
+
+	if (bytes != 1)
+		return (-1);
+
+	val = *eax;
+
+	if (port == TIMER_MODE) {
+		if (in) {
+			VM_CTR0(vatpit->vm, "vatpit attempt to read mode");
+			return (-1);
+		}
+
+		VATPIT_LOCK(vatpit);
+		error = vatpit_update_mode(vatpit, val);
+		VATPIT_UNLOCK(vatpit);
+
+		return (error);
+	}
+
+	/* counter ports */
+	KASSERT(port >= TIMER_CNTR0 && port <= TIMER_CNTR2,
+	    ("invalid port 0x%x", port));
+	c = &vatpit->channel[port - TIMER_CNTR0];
+
+	VATPIT_LOCK(vatpit);
+	if (in && c->slatched) {
+		/*
+		 * Return the status byte if latched
+		 */
+		*eax = c->status;
+		c->slatched = false;
+		c->status = 0;
+	} else if (in) {
+		/*
+		 * The spec says that once the output latch is completely
+		 * read it should revert to "following" the counter. Use
+		 * the free running counter for this case (i.e. Linux
+		 * TSC calibration). Assuming the access mode is 16-bit,
+		 * toggle the MSB/LSB bit on each read.
+		 */
+		if (c->olbyte == 0) {
+			uint16_t tmp;
+
+			tmp = pit_update_counter(vatpit, c, false);
+			if (c->frbyte)
+				tmp >>= 8;
+			tmp &= 0xff;
+			*eax = tmp;
+			c->frbyte ^= 1;
+		}  else
+			*eax = c->ol[--c->olbyte];
+	} else {
+		c->cr[c->crbyte++] = *eax;
+		if (c->crbyte == 2) {
+			c->status &= ~TIMER_STS_NULLCNT;
+			c->frbyte = 0;
+			c->crbyte = 0;
+			c->initial = c->cr[0] | (uint16_t)c->cr[1] << 8;
+			c->now_sbt = sbinuptime();
+			/* Start an interval timer for channel 0 */
+			if (port == TIMER_CNTR0) {
+				c->callout_sbt = c->now_sbt;
+				pit_timer_start_cntr0(vatpit);
+			}
+			if (c->initial == 0)
+				c->initial = 0xffff;
+		}
+	}
+	VATPIT_UNLOCK(vatpit);
+
+	return (0);
+}
+
+int
+vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *eax)
+{
+	struct vatpit *vatpit;
+
+	vatpit = vm_atpit(vm);
+
+	if (in) {
+			VATPIT_LOCK(vatpit);
+			if (vatpit_get_out(vatpit, 2))
+				*eax = TMR2_OUT_STS;
+			else
+				*eax = 0;
+
+			VATPIT_UNLOCK(vatpit);
+	}
+
+	return (0);
+}
+
+struct vatpit *
+vatpit_init(struct vm *vm)
+{
+	struct vatpit *vatpit;
+	struct bintime bt;
+	struct vatpit_callout_arg *arg;
+	int i;
+
+	vatpit = malloc(sizeof(struct vatpit), M_VATPIT, M_WAITOK | M_ZERO);
+	vatpit->vm = vm;
+
+	mtx_init(&vatpit->mtx, "vatpit lock", NULL, MTX_SPIN);
+
+	FREQ2BT(PIT_8254_FREQ, &bt);
+	vatpit->freq_sbt = bttosbt(bt);
+
+	for (i = 0; i < 3; i++) {
+		callout_init(&vatpit->channel[i].callout, true);
+		arg = &vatpit->channel[i].callout_arg;
+		arg->vatpit = vatpit;
+		arg->channel_num = i;
+	}
+
+	return (vatpit);
+}
+
+void
+vatpit_cleanup(struct vatpit *vatpit)
+{
+	int i;
+
+	for (i = 0; i < 3; i++)
+		callout_drain(&vatpit->channel[i].callout);
+
+	free(vatpit, M_VATPIT);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.h b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
new file mode 100644
index 0000000000..f20ad73e47
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vatpit.h 273706 2014-10-26 19:03:06Z neel $
+ */
+
+#ifndef _VATPIT_H_
+#define	_VATPIT_H_
+
+#include <machine/timerreg.h>
+
+#define	NMISC_PORT	0x61
+
+struct vatpit *vatpit_init(struct vm *vm);
+void vatpit_cleanup(struct vatpit *vatpit);
+
+int vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *eax);
+int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port,
+    int bytes, uint32_t *eax);
+
+#endif	/* _VATPIT_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/io/vdev.c b/usr/src/uts/i86pc/io/vmm/io/vdev.c
new file mode 100644
index 0000000000..0f835625f3
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vdev.c
@@ -0,0 +1,282 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vdev.c 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/vdev.c 245678 2013-01-20 03:42:49Z neel $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include "vdev.h"
+
+struct vdev {
+	SLIST_ENTRY(vdev) 	 entry;
+	struct vdev_ops 	*ops;
+	void			*dev;
+};
+static SLIST_HEAD(, vdev)	vdev_head;
+static int 		  	vdev_count;
+
+struct vdev_region {
+	SLIST_ENTRY(vdev_region) 	 entry;
+	struct vdev_ops 		*ops;
+	void				*dev;
+	struct io_region		*io;
+};
+static SLIST_HEAD(, vdev_region)	 region_head;
+static int 		  		 region_count;
+
+static MALLOC_DEFINE(M_VDEV, "vdev", "vdev");
+
+#define VDEV_INIT 	(0)
+#define VDEV_RESET	(1)
+#define VDEV_HALT	(2)
+
+// static const char* vdev_event_str[] = {"VDEV_INIT", "VDEV_RESET", "VDEV_HALT"};
+
+static int
+vdev_system_event(int event)
+{
+	struct vdev 	*vd;
+	int		 rc;
+
+	// TODO: locking
+	SLIST_FOREACH(vd, &vdev_head, entry) {
+		// printf("%s : %s Device %s\n", __func__, vdev_event_str[event], vd->ops->name);
+		switch (event) {
+			case VDEV_INIT:
+				rc = vd->ops->init(vd->dev);
+				break;
+			case VDEV_RESET:
+				rc = vd->ops->reset(vd->dev);
+				break;
+			case VDEV_HALT:
+				rc = vd->ops->halt(vd->dev);
+				break;
+			default:
+				break;
+		}
+		if (rc) {
+			printf("vdev %s init failed rc=%d\n",
+			    vd->ops->name, rc);
+			return rc;
+		}
+	}
+	return 0;
+}
+
+int
+vdev_init(void)
+{
+	return vdev_system_event(VDEV_INIT);
+}
+
+int
+vdev_reset(void)
+{
+	return vdev_system_event(VDEV_RESET);
+}
+
+int
+vdev_halt(void)
+{
+	return vdev_system_event(VDEV_HALT);
+}
+
+void
+vdev_vm_init(void)
+{
+	SLIST_INIT(&vdev_head);
+	vdev_count = 0;
+
+	SLIST_INIT(&region_head);
+	region_count = 0;
+}
+void
+vdev_vm_cleanup(void)
+{
+	struct vdev *vd;
+     
+	// TODO: locking
+	while (!SLIST_EMPTY(&vdev_head)) {
+		vd = SLIST_FIRST(&vdev_head);
+		SLIST_REMOVE_HEAD(&vdev_head, entry);
+		free(vd, M_VDEV);
+		vdev_count--;
+	}
+}
+
+int
+vdev_register(struct vdev_ops *ops, void *dev)
+{
+	struct vdev *vd;
+	vd = malloc(sizeof(*vd), M_VDEV, M_WAITOK | M_ZERO); 
+	vd->ops = ops;
+	vd->dev = dev;
+	
+	// TODO: locking
+	SLIST_INSERT_HEAD(&vdev_head, vd, entry); 
+	vdev_count++;
+	return 0;
+}
+
+void
+vdev_unregister(void *dev)
+{
+	struct vdev 	*vd, *found;
+
+	found = NULL;
+	// TODO: locking
+	SLIST_FOREACH(vd, &vdev_head, entry) {
+		if (vd->dev == dev) {
+			found = vd;
+		}
+	}
+
+	if (found) {
+		SLIST_REMOVE(&vdev_head, found, vdev, entry);
+		free(found, M_VDEV);
+	}
+}
+
+#define IN_RANGE(val, start, end)	\
+    (((val) >= (start)) && ((val) < (end)))
+
+static struct vdev_region*
+vdev_find_region(struct io_region *io, void *dev) 
+{
+	struct 		vdev_region *region, *found;
+	uint64_t	region_base;
+	uint64_t	region_end;
+
+	found = NULL;
+
+	// TODO: locking
+	// FIXME: we should verify we are in the context the current
+	// 	  vcpu here as well.
+	SLIST_FOREACH(region, &region_head, entry) {
+		region_base = region->io->base;
+		region_end = region_base + region->io->len;
+		if (IN_RANGE(io->base, region_base, region_end) &&
+		    IN_RANGE(io->base+io->len, region_base, region_end+1) &&
+		    (dev && dev == region->dev)) {
+			found = region;
+			break;
+		}
+	}
+	return found;
+}
+
+int
+vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io)
+{
+	struct vdev_region *region;
+
+	region = vdev_find_region(io, dev);
+	if (region) {
+		return -EEXIST;
+	}
+
+	region = malloc(sizeof(*region), M_VDEV, M_WAITOK | M_ZERO);
+	region->io = io;
+	region->ops = ops;
+	region->dev = dev;
+
+	// TODO: locking
+	SLIST_INSERT_HEAD(&region_head, region, entry); 
+	region_count++;
+
+	return 0;
+}
+
+void
+vdev_unregister_region(void *dev, struct io_region *io)
+{
+	struct vdev_region *region;
+
+	region = vdev_find_region(io, dev);
+	
+	if (region) {
+		SLIST_REMOVE(&region_head, region, vdev_region, entry);
+		free(region, M_VDEV);
+		region_count--;
+	}
+}
+
+static int
+vdev_memrw(uint64_t gpa, opsize_t size, uint64_t *data, int read)
+{
+	struct vdev_region 	*region;
+	struct io_region	 io;
+	region_attr_t		 attr;
+	int			 rc;
+
+	io.base = gpa;
+	io.len = size;
+
+	region = vdev_find_region(&io, NULL);
+	if (!region)
+		return -EINVAL;
+	
+	attr = (read) ? MMIO_READ : MMIO_WRITE;
+	if (!(region->io->attr & attr))
+		return -EPERM;
+
+	if (read)
+		rc = region->ops->memread(region->dev, gpa, size, data);
+	else 
+		rc = region->ops->memwrite(region->dev, gpa, size, *data);
+
+	return rc;
+}
+
+int
+vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data)
+{
+	return vdev_memrw(gpa, size, data, 1);
+}
+
+int
+vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data)
+{
+	return vdev_memrw(gpa, size, &data, 0);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vdev.h b/usr/src/uts/i86pc/io/vmm/io/vdev.h
new file mode 100644
index 0000000000..dd2df75ad8
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vdev.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vdev.h 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef _VDEV_H_
+#define	_VDEV_H_
+
+typedef enum {
+	BYTE	= 1,
+	WORD	= 2,
+	DWORD	= 4,
+	QWORD	= 8,
+} opsize_t;
+
+typedef enum {
+	MMIO_READ = 1,
+	MMIO_WRITE = 2,
+} region_attr_t;
+
+struct io_region {
+	uint64_t	base;
+	uint64_t	len;
+	region_attr_t	attr;
+	int		vcpu;
+};
+
+typedef int (*vdev_init_t)(void* dev);
+typedef int (*vdev_reset_t)(void* dev);
+typedef int (*vdev_halt_t)(void* dev);
+typedef int (*vdev_memread_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t *data);
+typedef int (*vdev_memwrite_t)(void* dev, uint64_t gpa, opsize_t size, uint64_t data);
+
+
+struct vdev_ops {
+	const char	*name;
+	vdev_init_t	init;
+	vdev_reset_t	reset;
+	vdev_halt_t	halt;
+	vdev_memread_t	memread;
+	vdev_memwrite_t	memwrite;
+};
+
+
+void vdev_vm_init(void);
+void vdev_vm_cleanup(void);
+
+int  vdev_register(struct vdev_ops *ops, void *dev);
+void vdev_unregister(void *dev);
+
+int  vdev_register_region(struct vdev_ops *ops, void *dev, struct io_region *io);
+void vdev_unregister_region(void *dev, struct io_region *io);
+
+int vdev_init(void);
+int vdev_reset(void);
+int vdev_halt(void);
+int vdev_memread(uint64_t gpa, opsize_t size, uint64_t *data);
+int vdev_memwrite(uint64_t gpa, opsize_t size, uint64_t data);
+
+#endif	/* _VDEV_H_ */
+
diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.c b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
new file mode 100644
index 0000000000..25f6013da0
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
@@ -0,0 +1,821 @@
+/*-
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vhpet.c 263035 2014-03-11 16:56:00Z tychon $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/vhpet.c 263035 2014-03-11 16:56:00Z tychon $");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/cpuset.h>
+
+#include <dev/acpica/acpi_hpet.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include "vmm_lapic.h"
+#include "vatpic.h"
+#include "vioapic.h"
+#include "vhpet.h"
+
+#include "vmm_ktr.h"
+
+static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
+
+#define	HPET_FREQ	10000000		/* 10.0 Mhz */
+#define	FS_PER_S	1000000000000000ul
+
+/* Timer N Configuration and Capabilities Register */
+#define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE 	|		\
+				 HPET_TCAP_FSB_INT_DEL	|		\
+				 HPET_TCAP_SIZE		|		\
+				 HPET_TCAP_PER_INT)
+/*
+ * HPET requires at least 3 timers and up to 32 timers per block.
+ */
+#define	VHPET_NUM_TIMERS	8
+CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
+
+struct vhpet_callout_arg {
+	struct vhpet *vhpet;
+	int timer_num;
+};
+
+struct vhpet {
+	struct vm	*vm;
+	struct mtx	mtx;
+	sbintime_t	freq_sbt;
+
+	uint64_t	config;		/* Configuration */
+	uint64_t	isr;		/* Interrupt Status */
+	uint32_t	countbase;	/* HPET counter base value */
+	sbintime_t	countbase_sbt;	/* uptime corresponding to base value */
+
+	struct {
+		uint64_t	cap_config;	/* Configuration */
+		uint64_t	msireg;		/* FSB interrupt routing */
+		uint32_t	compval;	/* Comparator */
+		uint32_t	comprate;
+		struct callout	callout;
+		sbintime_t	callout_sbt;	/* time when counter==compval */
+		struct vhpet_callout_arg arg;
+	} timer[VHPET_NUM_TIMERS];
+};
+
+#define	VHPET_LOCK(vhp)		mtx_lock(&((vhp)->mtx))
+#define	VHPET_UNLOCK(vhp)	mtx_unlock(&((vhp)->mtx))
+
+static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
+    sbintime_t now);
+
+static uint64_t
+vhpet_capabilities(void)
+{
+	uint64_t cap = 0;
+
+	cap |= 0x8086 << 16;			/* vendor id */
+	cap |= HPET_CAP_LEG_RT;			/* legacy routing capable */
+	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
+	cap |= 1;				/* revision */
+	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
+
+	cap &= 0xffffffff;
+	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
+
+	return (cap);
+}
+
+static __inline bool
+vhpet_counter_enabled(struct vhpet *vhpet)
+{
+
+	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
+}
+
+static __inline bool
+vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
+{
+	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
+
+	/*
+	 * LegacyReplacement Route configuration takes precedence over MSI
+	 * for timers 0 and 1.
+	 */
+	if (n == 0 || n == 1) {
+		if (vhpet->config & HPET_CNF_LEG_RT)
+			return (false);
+	}
+
+	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
+		return (true);
+	else
+		return (false);
+}
+
+static __inline int
+vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
+{
+	/*
+	 * If the timer is configured to use MSI then treat it as if the
+	 * timer is not connected to the ioapic.
+	 */
+	if (vhpet_timer_msi_enabled(vhpet, n))
+		return (0);
+
+	if (vhpet->config & HPET_CNF_LEG_RT) {
+		/*
+		 * In "legacy routing" timers 0 and 1 are connected to
+		 * ioapic pins 2 and 8 respectively.
+		 */
+		switch (n) {
+		case 0:
+			return (2);
+		case 1:
+			return (8);
+		}
+	}
+
+	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
+}
+
+static __inline int
+vhpet_timer_atpic_pin(struct vhpet *vhpet, int n)
+{
+	if (vhpet->config & HPET_CNF_LEG_RT) {
+		/*
+		 * In "legacy routing" timers 0 and 1 are connected to
+		 * 8259 master pin 0 and slave pin 0 respectively.
+		 */
+		switch (n) {
+		case 0:
+			return (0);
+		case 1:
+			return (8);
+		}
+	}
+
+	return (-1);
+}
+
+static uint32_t
+vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
+{
+	uint32_t val;
+	sbintime_t now, delta;
+
+	val = vhpet->countbase;
+	if (vhpet_counter_enabled(vhpet)) {
+		now = sbinuptime();
+		delta = now - vhpet->countbase_sbt;
+#ifdef	__FreeBSD__
+		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
+		    "%#lx to %#lx", vhpet->countbase_sbt, now));
+#else
+		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
+		    "%lx to %lx", vhpet->countbase_sbt, now));
+#endif
+		val += delta / vhpet->freq_sbt;
+		if (nowptr != NULL)
+			*nowptr = now;
+	} else {
+		/*
+		 * The sbinuptime corresponding to the 'countbase' is
+		 * meaningless when the counter is disabled. Make sure
+		 * that the the caller doesn't want to use it.
+		 */
+		KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
+	}
+	return (val);
+}
+
+static void
+vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
+{
+	int pin, legacy_pin;
+
+	if (vhpet->isr & (1 << n)) {
+		pin = vhpet_timer_ioapic_pin(vhpet, n);
+		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
+		vioapic_deassert_irq(vhpet->vm, pin);
+
+		legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
+		if (legacy_pin != -1)
+			vatpic_deassert_irq(vhpet->vm, legacy_pin);
+
+		vhpet->isr &= ~(1 << n);
+	}
+}
+
+static __inline bool
+vhpet_periodic_timer(struct vhpet *vhpet, int n)
+{
+
+	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
+}
+
+static __inline bool
+vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
+{
+
+	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
+}
+
+static __inline bool
+vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
+{
+
+	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
+	    "timer %d is using MSI", n));
+
+	/* The legacy replacement interrupts are always edge triggered */
+	if (vhpet->config & HPET_CNF_LEG_RT) {
+		if (n == 0 || n == 1)
+			return (true);
+	}
+
+	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
+		return (true);
+	else
+		return (false);
+}
+
+static void
+vhpet_timer_interrupt(struct vhpet *vhpet, int n)
+{
+	int pin, legacy_pin;
+
+	/* If interrupts are not enabled for this timer then just return. */
+	if (!vhpet_timer_interrupt_enabled(vhpet, n))
+		return;
+
+	/*
+	 * If a level triggered interrupt is already asserted then just return.
+	 */
+	if ((vhpet->isr & (1 << n)) != 0) {
+		VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
+		return;
+	}
+
+	if (vhpet_timer_msi_enabled(vhpet, n)) {
+		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
+		    vhpet->timer[n].msireg & 0xffffffff);
+		return;
+	}	
+
+	pin = vhpet_timer_ioapic_pin(vhpet, n);
+	if (pin == 0) {
+		VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
+		return;
+	}
+
+	legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
+
+	if (vhpet_timer_edge_trig(vhpet, n)) {
+		vioapic_pulse_irq(vhpet->vm, pin);
+		if (legacy_pin != -1)
+			vatpic_pulse_irq(vhpet->vm, legacy_pin);
+	} else {
+		vhpet->isr |= 1 << n;
+		vioapic_assert_irq(vhpet->vm, pin);
+		if (legacy_pin != -1)
+			vatpic_assert_irq(vhpet->vm, legacy_pin);
+	}
+}
+
+static void
+vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
+{
+	uint32_t compval, comprate, compnext;
+
+	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
+
+	compval = vhpet->timer[n].compval;
+	comprate = vhpet->timer[n].comprate;
+
+	/*
+	 * Calculate the comparator value to be used for the next periodic
+	 * interrupt.
+	 *
+	 * This function is commonly called from the callout handler.
+	 * In this scenario the 'counter' is ahead of 'compval'. To find
+	 * the next value to program into the accumulator we divide the
+	 * number space between 'compval' and 'counter' into 'comprate'
+	 * sized units. The 'compval' is rounded up such that is "ahead"
+	 * of 'counter'.
+	 */
+	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
+
+	vhpet->timer[n].compval = compnext;
+}
+
+static void
+vhpet_handler(void *a)
+{
+	int n;
+	uint32_t counter;
+	sbintime_t now;
+	struct vhpet *vhpet;
+	struct callout *callout;
+	struct vhpet_callout_arg *arg;
+
+	arg = a;
+	vhpet = arg->vhpet;
+	n = arg->timer_num;
+	callout = &vhpet->timer[n].callout;
+
+	VM_CTR1(vhpet->vm, "hpet t%d fired", n);
+
+	VHPET_LOCK(vhpet);
+
+	if (callout_pending(callout))		/* callout was reset */
+		goto done;
+
+	if (!callout_active(callout))		/* callout was stopped */
+		goto done;
+
+	callout_deactivate(callout);
+
+	if (!vhpet_counter_enabled(vhpet))
+		panic("vhpet(%p) callout with counter disabled", vhpet);
+
+	counter = vhpet_counter(vhpet, &now);
+	vhpet_start_timer(vhpet, n, counter, now);
+	vhpet_timer_interrupt(vhpet, n);
+done:
+	VHPET_UNLOCK(vhpet);
+	return;
+}
+
+static void
+vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
+{
+
+	VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
+	callout_stop(&vhpet->timer[n].callout);
+
+	/*
+	 * If the callout was scheduled to expire in the past but hasn't
+	 * had a chance to execute yet then trigger the timer interrupt
+	 * here. Failing to do so will result in a missed timer interrupt
+	 * in the guest. This is especially bad in one-shot mode because
+	 * the next interrupt has to wait for the counter to wrap around.
+	 */
+	if (vhpet->timer[n].callout_sbt < now) {
+		VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
+		    "stopping timer", n);
+		vhpet_timer_interrupt(vhpet, n);
+	}
+}
+
+static void
+vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
+{
+	sbintime_t delta, precision;
+
+	/* If interrupts are not enabled for this timer then just return. */
+	if (!vhpet_timer_interrupt_enabled(vhpet, n))
+		return;
+
+	if (vhpet->timer[n].comprate != 0)
+		vhpet_adjust_compval(vhpet, n, counter);
+	else {
+		/*
+		 * In one-shot mode it is the guest's responsibility to make
+		 * sure that the comparator value is not in the "past". The
+		 * hardware doesn't have any belt-and-suspenders to deal with
+		 * this so we don't either.
+		 */
+	}
+
+	delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
+	precision = delta >> tc_precexp;
+	vhpet->timer[n].callout_sbt = now + delta;
+	callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
+	    precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
+}
+
+static void
+vhpet_start_counting(struct vhpet *vhpet)
+{
+	int i;
+
+	vhpet->countbase_sbt = sbinuptime();
+	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
+		/*
+		 * Restart the timers based on the value of the main counter
+		 * when it stopped counting.
+		 */
+		vhpet_start_timer(vhpet, i, vhpet->countbase,
+		    vhpet->countbase_sbt);
+	}
+}
+
+static void
+vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
+{
+	int i;
+
+	vhpet->countbase = counter;
+	for (i = 0; i < VHPET_NUM_TIMERS; i++)
+		vhpet_stop_timer(vhpet, i, now);
+}
+
+static __inline void
+update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
+{
+
+	*regptr &= ~mask;
+	*regptr |= (data & mask);
+}
+
+static void
+vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
+    uint64_t mask)
+{
+	bool clear_isr;
+	int old_pin, new_pin;
+	uint32_t allowed_irqs;
+	uint64_t oldval, newval;
+
+	if (vhpet_timer_msi_enabled(vhpet, n) ||
+	    vhpet_timer_edge_trig(vhpet, n)) {
+		if (vhpet->isr & (1 << n))
+			panic("vhpet timer %d isr should not be asserted", n);
+	}
+	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
+	oldval = vhpet->timer[n].cap_config;
+
+	newval = oldval;
+	update_register(&newval, data, mask);
+	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
+	newval |= oldval & HPET_TCAP_RO_MASK;
+
+	if (newval == oldval)
+		return;
+
+	vhpet->timer[n].cap_config = newval;
+	VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
+
+	/*
+	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
+	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
+	 * it to the default value of 0.
+	 */
+	allowed_irqs = vhpet->timer[n].cap_config >> 32;
+	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
+	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
+		VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
+		    "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
+		new_pin = 0;
+		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
+	}
+
+	if (!vhpet_periodic_timer(vhpet, n))
+		vhpet->timer[n].comprate = 0;
+
+	/*
+	 * If the timer's ISR bit is set then clear it in the following cases:
+	 * - interrupt is disabled
+	 * - interrupt type is changed from level to edge or fsb.
+	 * - interrupt routing is changed
+	 *
+	 * This is to ensure that this timer's level triggered interrupt does
+	 * not remain asserted forever.
+	 */
+	if (vhpet->isr & (1 << n)) {
+		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
+		    n, old_pin));
+		if (!vhpet_timer_interrupt_enabled(vhpet, n))
+			clear_isr = true;
+		else if (vhpet_timer_msi_enabled(vhpet, n))
+			clear_isr = true;
+		else if (vhpet_timer_edge_trig(vhpet, n))
+			clear_isr = true;
+		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
+			clear_isr = true;
+		else
+			clear_isr = false;
+
+		if (clear_isr) {
+			VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
+			    "configuration change", n);
+			vioapic_deassert_irq(vhpet->vm, old_pin);
+			vhpet->isr &= ~(1 << n);
+		}
+	}
+}
+
+int
+vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
+    void *arg)
+{
+	struct vhpet *vhpet;
+	uint64_t data, mask, oldval, val64;
+	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
+	sbintime_t now, *nowptr;
+	int i, offset;
+
+	vhpet = vm_hpet(vm);
+	offset = gpa - VHPET_BASE;
+
+	VHPET_LOCK(vhpet);
+
+	/* Accesses to the HPET should be 4 or 8 bytes wide */
+	switch (size) {
+	case 8:
+		mask = 0xffffffffffffffff;
+		data = val;
+		break;
+	case 4:
+		mask = 0xffffffff;
+		data = val;
+		if ((offset & 0x4) != 0) {
+			mask <<= 32;
+			data <<= 32;
+		} 
+		break;
+	default:
+		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
+		    "offset 0x%08x, size %d", offset, size);
+		goto done;
+	}
+
+	/* Access to the HPET should be naturally aligned to its width */
+	if (offset & (size - 1)) {
+		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
+		    "offset 0x%08x, size %d", offset, size);
+		goto done;
+	}
+
+	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
+		/*
+		 * Get the most recent value of the counter before updating
+		 * the 'config' register. If the HPET is going to be disabled
+		 * then we need to update 'countbase' with the value right
+		 * before it is disabled.
+		 */
+		nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
+		counter = vhpet_counter(vhpet, nowptr);
+		oldval = vhpet->config;
+		update_register(&vhpet->config, data, mask);
+		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
+			if (vhpet_counter_enabled(vhpet)) {
+				vhpet_start_counting(vhpet);
+				VM_CTR0(vhpet->vm, "hpet enabled");
+			} else {
+				vhpet_stop_counting(vhpet, counter, now);
+				VM_CTR0(vhpet->vm, "hpet disabled");
+			}
+		}
+		goto done;
+	}
+
+	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
+		isr_clear_mask = vhpet->isr & data;
+		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
+			if ((isr_clear_mask & (1 << i)) != 0) {
+				VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
+				vhpet_timer_clear_isr(vhpet, i);
+			}
+		}
+		goto done;
+	}
+
+	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
+		/* Zero-extend the counter to 64-bits before updating it */
+		val64 = vhpet_counter(vhpet, NULL);
+		update_register(&val64, data, mask);
+		vhpet->countbase = val64;
+		if (vhpet_counter_enabled(vhpet))
+			vhpet_start_counting(vhpet);
+		goto done;
+	}
+
+	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
+		if (offset == HPET_TIMER_CAP_CNF(i) ||
+		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
+			vhpet_timer_update_config(vhpet, i, data, mask);
+			break;
+		}
+
+		if (offset == HPET_TIMER_COMPARATOR(i) ||
+		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
+			old_compval = vhpet->timer[i].compval;
+			old_comprate = vhpet->timer[i].comprate;
+			if (vhpet_periodic_timer(vhpet, i)) {
+				/*
+				 * In periodic mode writes to the comparator
+				 * change the 'compval' register only if the
+				 * HPET_TCNF_VAL_SET bit is set in the config
+				 * register.
+				 */
+				val64 = vhpet->timer[i].comprate;
+				update_register(&val64, data, mask);
+				vhpet->timer[i].comprate = val64;
+				if ((vhpet->timer[i].cap_config &
+				    HPET_TCNF_VAL_SET) != 0) {
+					vhpet->timer[i].compval = val64;
+				}
+			} else {
+				KASSERT(vhpet->timer[i].comprate == 0,
+				    ("vhpet one-shot timer %d has invalid "
+				    "rate %u", i, vhpet->timer[i].comprate));
+				val64 = vhpet->timer[i].compval;
+				update_register(&val64, data, mask);
+				vhpet->timer[i].compval = val64;
+			}
+			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
+
+			if (vhpet->timer[i].compval != old_compval ||
+			    vhpet->timer[i].comprate != old_comprate) {
+				if (vhpet_counter_enabled(vhpet)) {
+					counter = vhpet_counter(vhpet, &now);
+					vhpet_start_timer(vhpet, i, counter,
+					    now);
+				}
+			}
+			break;
+		}
+
+		if (offset == HPET_TIMER_FSB_VAL(i) ||
+		    offset == HPET_TIMER_FSB_ADDR(i)) {
+			update_register(&vhpet->timer[i].msireg, data, mask);
+			break;
+		}
+	}
+done:
+	VHPET_UNLOCK(vhpet);
+	return (0);
+}
+
+int
+vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size,
+    void *arg)
+{
+	int i, offset;
+	struct vhpet *vhpet;
+	uint64_t data;
+
+	vhpet = vm_hpet(vm);
+	offset = gpa - VHPET_BASE;
+
+	VHPET_LOCK(vhpet);
+
+	/* Accesses to the HPET should be 4 or 8 bytes wide */
+	if (size != 4 && size != 8) {
+		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
+		    "offset 0x%08x, size %d", offset, size);
+		data = 0;
+		goto done;
+	}
+
+	/* Access to the HPET should be naturally aligned to its width */
+	if (offset & (size - 1)) {
+		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
+		    "offset 0x%08x, size %d", offset, size);
+		data = 0;
+		goto done;
+	}
+
+	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
+		data = vhpet_capabilities();
+		goto done;	
+	}
+
+	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
+		data = vhpet->config;
+		goto done;
+	}
+
+	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
+		data = vhpet->isr;
+		goto done;
+	}
+
+	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
+		data = vhpet_counter(vhpet, NULL);
+		goto done;
+	}
+
+	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
+		if (offset == HPET_TIMER_CAP_CNF(i) ||
+		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
+			data = vhpet->timer[i].cap_config;
+			break;
+		}
+
+		if (offset == HPET_TIMER_COMPARATOR(i) ||
+		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
+			data = vhpet->timer[i].compval;
+			break;
+		}
+
+		if (offset == HPET_TIMER_FSB_VAL(i) ||
+		    offset == HPET_TIMER_FSB_ADDR(i)) {
+			data = vhpet->timer[i].msireg;
+			break;
+		}
+	}
+
+	if (i >= VHPET_NUM_TIMERS)
+		data = 0;
+done:
+	VHPET_UNLOCK(vhpet);
+
+	if (size == 4) {
+		if (offset & 0x4)
+			data >>= 32;
+	}
+	*rval = data;
+	return (0);
+}
+
+struct vhpet *
+vhpet_init(struct vm *vm)
+{
+	int i, pincount;
+	struct vhpet *vhpet;
+	uint64_t allowed_irqs;
+	struct vhpet_callout_arg *arg;
+	struct bintime bt;
+
+	vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
+        vhpet->vm = vm;
+	mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
+
+	FREQ2BT(HPET_FREQ, &bt);
+	vhpet->freq_sbt = bttosbt(bt);
+
+	pincount = vioapic_pincount(vm);
+	if (pincount >= 24)
+		allowed_irqs = 0x00f00000;	/* irqs 20, 21, 22 and 23 */
+	else
+		allowed_irqs = 0;
+
+	/*
+	 * Initialize HPET timer hardware state.
+	 */
+	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
+		vhpet->timer[i].cap_config = allowed_irqs << 32;
+		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
+		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
+
+		vhpet->timer[i].compval = 0xffffffff;
+		callout_init(&vhpet->timer[i].callout, 1);
+
+		arg = &vhpet->timer[i].arg;
+		arg->vhpet = vhpet;
+		arg->timer_num = i;
+	}
+
+	return (vhpet);
+}
+
+void
+vhpet_cleanup(struct vhpet *vhpet)
+{
+	int i;
+
+	for (i = 0; i < VHPET_NUM_TIMERS; i++)
+		callout_drain(&vhpet->timer[i].callout);
+
+	free(vhpet, M_VHPET);
+}
+
+int
+vhpet_getcap(struct vm_hpet_cap *cap)
+{
+
+	cap->capabilities = vhpet_capabilities();
+	return (0);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.h b/usr/src/uts/i86pc/io/vmm/io/vhpet.h
new file mode 100644
index 0000000000..868809d166
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.h
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vhpet.h 258579 2013-11-25 19:04:51Z neel $
+ */
+
+#ifndef _VHPET_H_
+#define	_VHPET_H_
+
+#define	VHPET_BASE	0xfed00000
+#define	VHPET_SIZE	1024
+
+struct vhpet *vhpet_init(struct vm *vm);
+void 	vhpet_cleanup(struct vhpet *vhpet);
+int	vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val,
+	    int size, void *arg);
+int	vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val,
+	    int size, void *arg);
+int	vhpet_getcap(struct vm_hpet_cap *cap);
+
+#endif	/* _VHPET_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/io/vioapic.c b/usr/src/uts/i86pc/io/vmm/io/vioapic.c
new file mode 100644
index 0000000000..5adf5de16d
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vioapic.c
@@ -0,0 +1,514 @@
+/*-
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vioapic.c 262139 2014-02-17 22:57:51Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/vioapic.c 262139 2014-02-17 22:57:51Z neel $");
+
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <x86/apicreg.h>
+#include <machine/vmm.h>
+
+#include "vmm_ktr.h"
+#include "vmm_lapic.h"
+#include "vlapic.h"
+#include "vioapic.h"
+
+#define	IOREGSEL	0x00
+#define	IOWIN		0x10
+
+#define	REDIR_ENTRIES	24
+#define	RTBL_RO_BITS	((uint64_t)(IOART_REM_IRR | IOART_DELIVS))
+
+struct vioapic {
+	struct vm	*vm;
+	struct mtx	mtx;
+	uint32_t	id;
+	uint32_t	ioregsel;
+	struct {
+		uint64_t reg;
+		int	 acnt;	/* sum of pin asserts (+1) and deasserts (-1) */
+	} rtbl[REDIR_ENTRIES];
+};
+
+#define	VIOAPIC_LOCK(vioapic)		mtx_lock_spin(&((vioapic)->mtx))
+#define	VIOAPIC_UNLOCK(vioapic)		mtx_unlock_spin(&((vioapic)->mtx))
+#define	VIOAPIC_LOCKED(vioapic)		mtx_owned(&((vioapic)->mtx))
+
+static MALLOC_DEFINE(M_VIOAPIC, "vioapic", "bhyve virtual ioapic");
+
+#define	VIOAPIC_CTR1(vioapic, fmt, a1)					\
+	VM_CTR1((vioapic)->vm, fmt, a1)
+
+#define	VIOAPIC_CTR2(vioapic, fmt, a1, a2)				\
+	VM_CTR2((vioapic)->vm, fmt, a1, a2)
+
+#define	VIOAPIC_CTR3(vioapic, fmt, a1, a2, a3)				\
+	VM_CTR3((vioapic)->vm, fmt, a1, a2, a3)
+
+#define	VIOAPIC_CTR4(vioapic, fmt, a1, a2, a3, a4)			\
+	VM_CTR4((vioapic)->vm, fmt, a1, a2, a3, a4)
+
+#ifdef KTR
+static const char *
+pinstate_str(bool asserted)
+{
+
+	if (asserted)
+		return ("asserted");
+	else
+		return ("deasserted");
+}
+#endif
+
+static void
+vioapic_send_intr(struct vioapic *vioapic, int pin)
+{
+	int vector, delmode;
+	uint32_t low, high, dest;
+	bool level, phys;
+
+	KASSERT(pin >= 0 && pin < REDIR_ENTRIES,
+	    ("vioapic_set_pinstate: invalid pin number %d", pin));
+
+	KASSERT(VIOAPIC_LOCKED(vioapic),
+	    ("vioapic_set_pinstate: vioapic is not locked"));
+
+	low = vioapic->rtbl[pin].reg;
+	high = vioapic->rtbl[pin].reg >> 32;
+
+	if ((low & IOART_INTMASK) == IOART_INTMSET) {
+		VIOAPIC_CTR1(vioapic, "ioapic pin%d: masked", pin);
+		return;
+	}
+
+	phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
+	delmode = low & IOART_DELMOD;
+	level = low & IOART_TRGRLVL ? true : false;
+	if (level)
+		vioapic->rtbl[pin].reg |= IOART_REM_IRR;
+
+	vector = low & IOART_INTVEC;
+	dest = high >> APIC_ID_SHIFT;
+	vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
+}
+
+static void
+vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate)
+{
+	int oldcnt, newcnt;
+	bool needintr;
+
+	KASSERT(pin >= 0 && pin < REDIR_ENTRIES,
+	    ("vioapic_set_pinstate: invalid pin number %d", pin));
+
+	KASSERT(VIOAPIC_LOCKED(vioapic),
+	    ("vioapic_set_pinstate: vioapic is not locked"));
+
+	oldcnt = vioapic->rtbl[pin].acnt;
+	if (newstate)
+		vioapic->rtbl[pin].acnt++;
+	else
+		vioapic->rtbl[pin].acnt--;
+	newcnt = vioapic->rtbl[pin].acnt;
+
+	if (newcnt < 0) {
+		VIOAPIC_CTR2(vioapic, "ioapic pin%d: bad acnt %d",
+		    pin, newcnt);
+	}
+
+	needintr = false;
+	if (oldcnt == 0 && newcnt == 1) {
+		needintr = true;
+		VIOAPIC_CTR1(vioapic, "ioapic pin%d: asserted", pin);
+	} else if (oldcnt == 1 && newcnt == 0) {
+		VIOAPIC_CTR1(vioapic, "ioapic pin%d: deasserted", pin);
+	} else {
+		VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s, ignored, acnt %d",
+		    pin, pinstate_str(newstate), newcnt);
+	}
+
+	if (needintr)
+		vioapic_send_intr(vioapic, pin);
+}
+
+enum irqstate {
+	IRQSTATE_ASSERT,
+	IRQSTATE_DEASSERT,
+	IRQSTATE_PULSE
+};
+
+static int
+vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
+{
+	struct vioapic *vioapic;
+
+	if (irq < 0 || irq >= REDIR_ENTRIES)
+		return (EINVAL);
+
+	vioapic = vm_ioapic(vm);
+
+	VIOAPIC_LOCK(vioapic);
+	switch (irqstate) {
+	case IRQSTATE_ASSERT:
+		vioapic_set_pinstate(vioapic, irq, true);
+		break;
+	case IRQSTATE_DEASSERT:
+		vioapic_set_pinstate(vioapic, irq, false);
+		break;
+	case IRQSTATE_PULSE:
+		vioapic_set_pinstate(vioapic, irq, true);
+		vioapic_set_pinstate(vioapic, irq, false);
+		break;
+	default:
+		panic("vioapic_set_irqstate: invalid irqstate %d", irqstate);
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return (0);
+}
+
+int
+vioapic_assert_irq(struct vm *vm, int irq)
+{
+
+	return (vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT));
+}
+
+int
+vioapic_deassert_irq(struct vm *vm, int irq)
+{
+
+	return (vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT));
+}
+
+int
+vioapic_pulse_irq(struct vm *vm, int irq)
+{
+
+	return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
+}
+
+/*
+ * Reset the vlapic's trigger-mode register to reflect the ioapic pin
+ * configuration.
+ */
+static void
+vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg)
+{
+	struct vioapic *vioapic;
+	struct vlapic *vlapic;
+	uint32_t low, high, dest;
+	int delmode, pin, vector;
+	bool level, phys;
+
+	vlapic = vm_lapic(vm, vcpuid);
+	vioapic = vm_ioapic(vm);
+
+	VIOAPIC_LOCK(vioapic);
+	/*
+	 * Reset all vectors to be edge-triggered.
+	 */
+	vlapic_reset_tmr(vlapic);
+	for (pin = 0; pin < REDIR_ENTRIES; pin++) {
+		low = vioapic->rtbl[pin].reg;
+		high = vioapic->rtbl[pin].reg >> 32;
+
+		level = low & IOART_TRGRLVL ? true : false;
+		if (!level)
+			continue;
+
+		/*
+		 * For a level-triggered 'pin' let the vlapic figure out if
+		 * an assertion on this 'pin' would result in an interrupt
+		 * being delivered to it. If yes, then it will modify the
+		 * TMR bit associated with this vector to level-triggered.
+		 */
+		phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
+		delmode = low & IOART_DELMOD;
+		vector = low & IOART_INTVEC;
+		dest = high >> APIC_ID_SHIFT;
+		vlapic_set_tmr_level(vlapic, dest, phys, delmode, vector);
+	}
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+static uint32_t
+vioapic_read(struct vioapic *vioapic, int vcpuid, uint32_t addr)
+{
+	int regnum, pin, rshift;
+
+	regnum = addr & 0xff;
+	switch (regnum) {
+	case IOAPIC_ID:
+		return (vioapic->id);
+		break;
+	case IOAPIC_VER:
+		return (((REDIR_ENTRIES - 1) << MAXREDIRSHIFT) | 0x11);
+		break;
+	case IOAPIC_ARB:
+		return (vioapic->id);
+		break;
+	default:
+		break;
+	}
+
+	/* redirection table entries */
+	if (regnum >= IOAPIC_REDTBL &&
+	    regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) {
+		pin = (regnum - IOAPIC_REDTBL) / 2;
+		if ((regnum - IOAPIC_REDTBL) % 2)
+			rshift = 32;
+		else
+			rshift = 0;
+
+		return (vioapic->rtbl[pin].reg >> rshift);
+	}
+
+	return (0);
+}
+
+static void
+vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
+{
+	uint64_t data64, mask64;
+	uint64_t last, changed;
+	int regnum, pin, lshift;
+	cpuset_t allvcpus;
+
+	regnum = addr & 0xff;
+	switch (regnum) {
+	case IOAPIC_ID:
+		vioapic->id = data & APIC_ID_MASK;
+		break;
+	case IOAPIC_VER:
+	case IOAPIC_ARB:
+		/* readonly */
+		break;
+	default:
+		break;
+	}
+
+	/* redirection table entries */
+	if (regnum >= IOAPIC_REDTBL &&
+	    regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) {
+		pin = (regnum - IOAPIC_REDTBL) / 2;
+		if ((regnum - IOAPIC_REDTBL) % 2)
+			lshift = 32;
+		else
+			lshift = 0;
+
+		last = vioapic->rtbl[pin].reg;
+
+		data64 = (uint64_t)data << lshift;
+		mask64 = (uint64_t)0xffffffff << lshift;
+		vioapic->rtbl[pin].reg &= ~mask64 | RTBL_RO_BITS;
+		vioapic->rtbl[pin].reg |= data64 & ~RTBL_RO_BITS;
+
+		VIOAPIC_CTR2(vioapic, "ioapic pin%d: redir table entry %#lx",
+		    pin, vioapic->rtbl[pin].reg);
+
+		/*
+		 * If any fields in the redirection table entry (except mask
+		 * or polarity) have changed then rendezvous all the vcpus
+		 * to update their vlapic trigger-mode registers.
+		 */
+		changed = last ^ vioapic->rtbl[pin].reg;
+		if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
+			VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
+			    "vlapic trigger-mode register", pin);
+			VIOAPIC_UNLOCK(vioapic);
+#if 0	/* XXX */
+			allvcpus = vm_active_cpus(vioapic->vm);
+			vm_smp_rendezvous(vioapic->vm, vcpuid, allvcpus,
+			    vioapic_update_tmr, NULL);
+#endif
+			VIOAPIC_LOCK(vioapic);
+		}
+
+		/*
+		 * Generate an interrupt if the following conditions are met:
+		 * - pin is not masked
+		 * - previous interrupt has been EOIed
+		 * - pin level is asserted
+		 */
+		if ((vioapic->rtbl[pin].reg & IOART_INTMASK) == IOART_INTMCLR &&
+		    (vioapic->rtbl[pin].reg & IOART_REM_IRR) == 0 &&
+		    (vioapic->rtbl[pin].acnt > 0)) {
+			VIOAPIC_CTR2(vioapic, "ioapic pin%d: asserted at rtbl "
+			    "write, acnt %d", pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+	}
+}
+
+static int
+vioapic_mmio_rw(struct vioapic *vioapic, int vcpuid, uint64_t gpa,
+    uint64_t *data, int size, bool doread)
+{
+	uint64_t offset;
+
+	offset = gpa - VIOAPIC_BASE;
+
+	/*
+	 * The IOAPIC specification allows 32-bit wide accesses to the
+	 * IOREGSEL (offset 0) and IOWIN (offset 16) registers.
+	 */
+	if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
+		if (doread)
+			*data = 0;
+		return (0);
+	}
+
+	VIOAPIC_LOCK(vioapic);
+	if (offset == IOREGSEL) {
+		if (doread)
+			*data = vioapic->ioregsel;
+		else
+			vioapic->ioregsel = *data;
+	} else {
+		if (doread) {
+			*data = vioapic_read(vioapic, vcpuid,
+			    vioapic->ioregsel);
+		} else {
+			vioapic_write(vioapic, vcpuid, vioapic->ioregsel,
+			    *data);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+
+	return (0);
+}
+
+int
+vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval,
+    int size, void *arg)
+{
+	int error;
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	error = vioapic_mmio_rw(vioapic, vcpuid, gpa, rval, size, true);
+	return (error);
+}
+
+int
+vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t wval,
+    int size, void *arg)
+{
+	int error;
+	struct vioapic *vioapic;
+
+	vioapic = vm_ioapic(vm);
+	error = vioapic_mmio_rw(vioapic, vcpuid, gpa, &wval, size, false);
+	return (error);
+}
+
+void
+vioapic_process_eoi(struct vm *vm, int vcpuid, int vector)
+{
+	struct vioapic *vioapic;
+	int pin;
+
+	KASSERT(vector >= 0 && vector < 256,
+	    ("vioapic_process_eoi: invalid vector %d", vector));
+
+	vioapic = vm_ioapic(vm);
+	VIOAPIC_CTR1(vioapic, "ioapic processing eoi for vector %d", vector);
+
+	/*
+	 * XXX keep track of the pins associated with this vector instead
+	 * of iterating on every single pin each time.
+	 */
+	VIOAPIC_LOCK(vioapic);
+	for (pin = 0; pin < REDIR_ENTRIES; pin++) {
+		if ((vioapic->rtbl[pin].reg & IOART_REM_IRR) == 0)
+			continue;
+		if ((vioapic->rtbl[pin].reg & IOART_INTVEC) != vector)
+			continue;
+		vioapic->rtbl[pin].reg &= ~IOART_REM_IRR;
+		if (vioapic->rtbl[pin].acnt > 0) {
+			VIOAPIC_CTR2(vioapic, "ioapic pin%d: asserted at eoi, "
+			    "acnt %d", pin, vioapic->rtbl[pin].acnt);
+			vioapic_send_intr(vioapic, pin);
+		}
+	}
+	VIOAPIC_UNLOCK(vioapic);
+}
+
+struct vioapic *
+vioapic_init(struct vm *vm)
+{
+	int i;
+	struct vioapic *vioapic;
+
+	vioapic = malloc(sizeof(struct vioapic), M_VIOAPIC, M_WAITOK | M_ZERO);
+
+	vioapic->vm = vm;
+	mtx_init(&vioapic->mtx, "vioapic lock", NULL, MTX_SPIN);
+
+	/* Initialize all redirection entries to mask all interrupts */
+	for (i = 0; i < REDIR_ENTRIES; i++)
+		vioapic->rtbl[i].reg = 0x0001000000010000UL;
+
+	return (vioapic);
+}
+
+void
+vioapic_cleanup(struct vioapic *vioapic)
+{
+
+	free(vioapic, M_VIOAPIC);
+}
+
+int
+vioapic_pincount(struct vm *vm)
+{
+
+	return (REDIR_ENTRIES);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vioapic.h b/usr/src/uts/i86pc/io/vmm/io/vioapic.h
new file mode 100644
index 0000000000..9479ebb10e
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vioapic.h
@@ -0,0 +1,66 @@
+/*-
+ * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vioapic.h 258699 2013-11-27 22:18:08Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _VIOAPIC_H_
+#define	_VIOAPIC_H_
+
+#define	VIOAPIC_BASE	0xFEC00000
+#define	VIOAPIC_SIZE	4096
+
+#include "vdev.h"
+
+struct vm;
+
+struct vioapic *vioapic_init(struct vm *vm);
+void	vioapic_cleanup(struct vioapic *vioapic);
+
+int	vioapic_assert_irq(struct vm *vm, int irq);
+int	vioapic_deassert_irq(struct vm *vm, int irq);
+int	vioapic_pulse_irq(struct vm *vm, int irq);
+
+int	vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa,
+	    uint64_t wval, int size, void *arg);
+int	vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa,
+	    uint64_t *rval, int size, void *arg);
+
+int	vioapic_pincount(struct vm *vm);
+void	vioapic_process_eoi(struct vm *vm, int vcpuid, int vector);
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
new file mode 100644
index 0000000000..9a0a3058ea
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -0,0 +1,1687 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vlapic.c 273375 2014-10-21 07:10:43Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/vlapic.c 273375 2014-10-21 07:10:43Z neel $");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+
+#include <x86/specialreg.h>
+#include <x86/apicreg.h>
+
+#include <machine/clock.h>
+#include <machine/smp.h>
+
+#include <machine/vmm.h>
+
+#include "vmm_ipi.h"
+#include "vmm_lapic.h"
+#include "vmm_ktr.h"
+#include "vmm_stat.h"
+
+#include "vlapic.h"
+#include "vlapic_priv.h"
+#include "vioapic.h"
+
+#define	PRIO(x)			((x) >> 4)
+
+#define VLAPIC_VERSION		(16)
+
+#define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
+
+/*
+ * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
+ * vlapic_callout_handler() and vcpu accesses to:
+ * - timer_freq_bt, timer_period_bt, timer_fire_bt
+ * - timer LVT register
+ */
+#define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
+#define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
+#define	VLAPIC_TIMER_LOCKED(vlapic)	mtx_owned(&((vlapic)->timer_mtx))
+
+#define VLAPIC_BUS_FREQ	tsc_freq
+
+static __inline uint32_t
+vlapic_get_id(struct vlapic *vlapic)
+{
+
+	if (x2apic(vlapic))
+		return (vlapic->vcpuid);
+	else
+		return (vlapic->vcpuid << 24);
+}
+
+static uint32_t
+x2apic_ldr(struct vlapic *vlapic)
+{
+	int apicid;
+	uint32_t ldr;
+
+	apicid = vlapic_get_id(vlapic);
+	ldr = 1 << (apicid & 0xf);
+	ldr |= (apicid & 0xffff0) << 12;
+	return (ldr);
+}
+
+void
+vlapic_dfr_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+
+	lapic = vlapic->apic_page;
+	if (x2apic(vlapic)) {
+		VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x",
+		    lapic->dfr);
+		lapic->dfr = 0;
+		return;
+	}
+
+	lapic->dfr &= APIC_DFR_MODEL_MASK;
+	lapic->dfr |= APIC_DFR_RESERVED;
+
+	if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
+	else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
+	else
+		VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
+}
+
+void
+vlapic_ldr_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+
+	lapic = vlapic->apic_page;
+
+	/* LDR is read-only in x2apic mode */
+	if (x2apic(vlapic)) {
+		VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x",
+		    lapic->ldr);
+		lapic->ldr = x2apic_ldr(vlapic);
+	} else {
+		lapic->ldr &= ~APIC_LDR_RESERVED;
+		VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
+	}
+}
+
+void
+vlapic_id_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	
+	/*
+	 * We don't allow the ID register to be modified so reset it back to
+	 * its default value.
+	 */
+	lapic = vlapic->apic_page;
+	lapic->id = vlapic_get_id(vlapic);
+}
+
+static int
+vlapic_timer_divisor(uint32_t dcr)
+{
+	switch (dcr & 0xB) {
+	case APIC_TDCR_1:
+		return (1);
+	case APIC_TDCR_2:
+		return (2);
+	case APIC_TDCR_4:
+		return (4);
+	case APIC_TDCR_8:
+		return (8);
+	case APIC_TDCR_16:
+		return (16);
+	case APIC_TDCR_32:
+		return (32);
+	case APIC_TDCR_64:
+		return (64);
+	case APIC_TDCR_128:
+		return (128);
+	default:
+		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
+	}
+}
+
+#if 0
+static inline void
+vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
+{
+	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
+	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
+	    *lvt & APIC_LVTT_M);
+}
+#endif
+
+static uint32_t
+vlapic_get_ccr(struct vlapic *vlapic)
+{
+	struct bintime bt_now, bt_rem;
+	struct LAPIC *lapic;
+	uint32_t ccr;
+	
+	ccr = 0;
+	lapic = vlapic->apic_page;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+	if (callout_active(&vlapic->callout)) {
+		/*
+		 * If the timer is scheduled to expire in the future then
+		 * compute the value of 'ccr' based on the remaining time.
+		 */
+		binuptime(&bt_now);
+		if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
+			bt_rem = vlapic->timer_fire_bt;
+			bintime_sub(&bt_rem, &bt_now);
+			ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
+			ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
+		}
+	}
+#ifdef	__FreeBSD__
+	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
+	    "icr_timer is %#x", ccr, lapic->icr_timer));
+#else
+	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, "
+	    "icr_timer is %x", ccr, lapic->icr_timer));
+#endif
+	VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
+	    ccr, lapic->icr_timer);
+	VLAPIC_TIMER_UNLOCK(vlapic);
+	return (ccr);
+}
+
+void
+vlapic_dcr_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	int divisor;
+	
+	lapic = vlapic->apic_page;
+	VLAPIC_TIMER_LOCK(vlapic);
+
+	divisor = vlapic_timer_divisor(lapic->dcr_timer);
+	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d",
+	    lapic->dcr_timer, divisor);
+
+	/*
+	 * Update the timer frequency and the timer period.
+	 *
+	 * XXX changes to the frequency divider will not take effect until
+	 * the timer is reloaded.
+	 */
+	FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
+	vlapic->timer_period_bt = vlapic->timer_freq_bt;
+	bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+
+	VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+
+void
+vlapic_esr_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	
+	lapic = vlapic->apic_page;
+	lapic->esr = vlapic->esr_pending;
+	vlapic->esr_pending = 0;
+}
+
+int
+vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
+{
+	struct LAPIC *lapic;
+	uint32_t *irrptr, *tmrptr, mask;
+	int idx;
+
+	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
+
+	lapic = vlapic->apic_page;
+	if (!(lapic->svr & APIC_SVR_ENABLE)) {
+		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
+		    "interrupt %d", vector);
+		return (0);
+	}
+
+	if (vector < 16) {
+		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
+		VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
+		    vector);
+		return (1);
+	}
+
+	if (vlapic->ops.set_intr_ready)
+		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
+
+	idx = (vector / 32) * 4;
+	mask = 1 << (vector % 32);
+
+	irrptr = &lapic->irr0;
+	atomic_set_int(&irrptr[idx], mask);
+
+	/*
+	 * Verify that the trigger-mode of the interrupt matches with
+	 * the vlapic TMR registers.
+	 */
+	tmrptr = &lapic->tmr0;
+	if ((tmrptr[idx] & mask) != (level ? mask : 0)) {
+		VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but "
+		    "interrupt is %s-triggered", idx / 4, tmrptr[idx],
+		    level ? "level" : "edge");
+	}
+
+	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
+	return (1);
+}
+
+static __inline uint32_t *
+vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
+{
+	struct LAPIC	*lapic = vlapic->apic_page;
+	int 		 i;
+
+	switch (offset) {
+	case APIC_OFFSET_CMCI_LVT:
+		return (&lapic->lvt_cmci);
+	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
+		return ((&lapic->lvt_timer) + i);;
+	default:
+		panic("vlapic_get_lvt: invalid LVT\n");
+	}
+}
+
+static __inline int
+lvt_off_to_idx(uint32_t offset)
+{
+	int index;
+
+	switch (offset) {
+	case APIC_OFFSET_CMCI_LVT:
+		index = APIC_LVT_CMCI;
+		break;
+	case APIC_OFFSET_TIMER_LVT:
+		index = APIC_LVT_TIMER;
+		break;
+	case APIC_OFFSET_THERM_LVT:
+		index = APIC_LVT_THERMAL;
+		break;
+	case APIC_OFFSET_PERF_LVT:
+		index = APIC_LVT_PMC;
+		break;
+	case APIC_OFFSET_LINT0_LVT:
+		index = APIC_LVT_LINT0;
+		break;
+	case APIC_OFFSET_LINT1_LVT:
+		index = APIC_LVT_LINT1;
+		break;
+	case APIC_OFFSET_ERROR_LVT:
+		index = APIC_LVT_ERROR;
+		break;
+	default:
+		index = -1;
+		break;
+	}
+#ifdef	__FreeBSD__
+	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
+	    "invalid lvt index %d for offset %#x", index, offset));
+#else
+	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
+	    "invalid lvt index %d for offset %x", index, offset));
+#endif
+
+	return (index);
+}
+
+static __inline uint32_t
+vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
+{
+	int idx;
+	uint32_t val;
+
+	idx = lvt_off_to_idx(offset);
+	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
+	return (val);
+}
+
+void
+vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
+{
+	uint32_t *lvtptr, mask, val;
+	struct LAPIC *lapic;
+	int idx;
+	
+	lapic = vlapic->apic_page;
+	lvtptr = vlapic_get_lvtptr(vlapic, offset);	
+	val = *lvtptr;
+	idx = lvt_off_to_idx(offset);
+
+	if (!(lapic->svr & APIC_SVR_ENABLE))
+		val |= APIC_LVT_M;
+	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
+	switch (offset) {
+	case APIC_OFFSET_TIMER_LVT:
+		mask |= APIC_LVTT_TM;
+		break;
+	case APIC_OFFSET_ERROR_LVT:
+		break;
+	case APIC_OFFSET_LINT0_LVT:
+	case APIC_OFFSET_LINT1_LVT:
+		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
+		/* FALLTHROUGH */
+	default:
+		mask |= APIC_LVT_DM;
+		break;
+	}
+	val &= mask;
+	*lvtptr = val;
+	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
+}
+
+static void
+vlapic_mask_lvts(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic = vlapic->apic_page;
+
+	lapic->lvt_cmci |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
+
+	lapic->lvt_timer |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
+
+	lapic->lvt_thermal |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
+
+	lapic->lvt_pcint |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
+
+	lapic->lvt_lint0 |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
+
+	lapic->lvt_lint1 |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
+
+	lapic->lvt_error |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
+}
+
+static int
+vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
+{
+	uint32_t vec, mode;
+
+	if (lvt & APIC_LVT_M)
+		return (0);
+
+	vec = lvt & APIC_LVT_VECTOR;
+	mode = lvt & APIC_LVT_DM;
+
+	switch (mode) {
+	case APIC_LVT_DM_FIXED:
+		if (vec < 16) {
+			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
+			return (0);
+		}
+		if (vlapic_set_intr_ready(vlapic, vec, false))
+			vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
+		break;
+	case APIC_LVT_DM_NMI:
+		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
+		break;
+	case APIC_LVT_DM_EXTINT:
+		vm_inject_extint(vlapic->vm, vlapic->vcpuid);
+		break;
+	default:
+		// Other modes ignored
+		return (0);
+	}
+	return (1);
+}
+
+#if 1
+static void
+dump_isrvec_stk(struct vlapic *vlapic)
+{
+	int i;
+	uint32_t *isrptr;
+
+	isrptr = &vlapic->apic_page->isr0;
+	for (i = 0; i < 8; i++)
+		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
+
+	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
+		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
+}
+#endif
+
+/*
+ * Algorithm adopted from section "Interrupt, Task and Processor Priority"
+ * in Intel Architecture Manual Vol 3a.
+ */
+static void
+vlapic_update_ppr(struct vlapic *vlapic)
+{
+	int isrvec, tpr, ppr;
+
+	/*
+	 * Note that the value on the stack at index 0 is always 0.
+	 *
+	 * This is a placeholder for the value of ISRV when none of the
+	 * bits is set in the ISRx registers.
+	 */
+	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
+	tpr = vlapic->apic_page->tpr;
+
+#if 1
+	{
+		int i, lastprio, curprio, vector, idx;
+		uint32_t *isrptr;
+
+		if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
+			panic("isrvec_stk is corrupted: %d", isrvec);
+
+		/*
+		 * Make sure that the priority of the nested interrupts is
+		 * always increasing.
+		 */
+		lastprio = -1;
+		for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
+			curprio = PRIO(vlapic->isrvec_stk[i]);
+			if (curprio <= lastprio) {
+				dump_isrvec_stk(vlapic);
+				panic("isrvec_stk does not satisfy invariant");
+			}
+			lastprio = curprio;
+		}
+
+		/*
+		 * Make sure that each bit set in the ISRx registers has a
+		 * corresponding entry on the isrvec stack.
+		 */
+		i = 1;
+		isrptr = &vlapic->apic_page->isr0;
+		for (vector = 0; vector < 256; vector++) {
+			idx = (vector / 32) * 4;
+			if (isrptr[idx] & (1 << (vector % 32))) {
+				if (i > vlapic->isrvec_stk_top ||
+				    vlapic->isrvec_stk[i] != vector) {
+					dump_isrvec_stk(vlapic);
+					panic("ISR and isrvec_stk out of sync");
+				}
+				i++;
+			}
+		}
+	}
+#endif
+
+	if (PRIO(tpr) >= PRIO(isrvec))
+		ppr = tpr;
+	else
+		ppr = isrvec & 0xf0;
+
+	vlapic->apic_page->ppr = ppr;
+	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
+}
+
+static void
+vlapic_process_eoi(struct vlapic *vlapic)
+{
+	struct LAPIC	*lapic = vlapic->apic_page;
+	uint32_t	*isrptr, *tmrptr;
+	int		i, idx, bitpos, vector;
+
+	isrptr = &lapic->isr0;
+	tmrptr = &lapic->tmr0;
+
+	/*
+	 * The x86 architecture reserves the the first 32 vectors for use
+	 * by the processor.
+	 */
+	for (i = 7; i > 0; i--) {
+		idx = i * 4;
+		bitpos = fls(isrptr[idx]);
+		if (bitpos-- != 0) {
+			if (vlapic->isrvec_stk_top <= 0) {
+				panic("invalid vlapic isrvec_stk_top %d",
+				      vlapic->isrvec_stk_top);
+			}
+			isrptr[idx] &= ~(1 << bitpos);
+			VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
+			vlapic->isrvec_stk_top--;
+			vlapic_update_ppr(vlapic);
+			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
+				vector = i * 32 + bitpos;
+				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
+				    vector);
+			}
+			return;
+		}
+	}
+}
+
+static __inline int
+vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
+{
+
+	return (lvt & mask);
+}
+
+static __inline int
+vlapic_periodic_timer(struct vlapic *vlapic)
+{
+	uint32_t lvt;
+	
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
+
+	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
+}
+
+static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
+
+void
+vlapic_set_error(struct vlapic *vlapic, uint32_t mask)
+{
+	uint32_t lvt;
+
+	vlapic->esr_pending |= mask;
+	if (vlapic->esr_firing)
+		return;
+	vlapic->esr_firing = 1;
+
+	// The error LVT always uses the fixed delivery mode.
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
+	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
+	}
+	vlapic->esr_firing = 0;
+}
+
+static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
+
+static void
+vlapic_fire_timer(struct vlapic *vlapic)
+{
+	uint32_t lvt;
+
+	KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
+	
+	// The timer LVT always uses the fixed delivery mode.
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
+	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
+		VLAPIC_CTR0(vlapic, "vlapic timer fired");
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
+	}
+}
+
+static VMM_STAT(VLAPIC_INTR_CMC,
+    "corrected machine check interrupts generated by vlapic");
+
+void
+vlapic_fire_cmci(struct vlapic *vlapic)
+{
+	uint32_t lvt;
+
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
+	if (vlapic_fire_lvt(vlapic, lvt)) {
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
+	}
+}
+
+static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
+    "lvts triggered");
+
+int
+vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
+{
+	uint32_t lvt;
+
+	if (vlapic_enabled(vlapic) == false) {
+		/*
+		 * When the local APIC is global/hardware disabled,
+		 * LINT[1:0] pins are configured as INTR and NMI pins,
+		 * respectively.
+		*/
+		switch (vector) {
+			case APIC_LVT_LINT0:
+				vm_inject_extint(vlapic->vm, vlapic->vcpuid);
+				break;
+			case APIC_LVT_LINT1:
+				vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
+				break;
+			default:
+				break;
+		}
+		return (0);
+	}
+
+	switch (vector) {
+	case APIC_LVT_LINT0:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
+		break;
+	case APIC_LVT_LINT1:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
+		break;
+	case APIC_LVT_TIMER:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
+		lvt |= APIC_LVT_DM_FIXED;
+		break;
+	case APIC_LVT_ERROR:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
+		lvt |= APIC_LVT_DM_FIXED;
+		break;
+	case APIC_LVT_PMC:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
+		break;
+	case APIC_LVT_THERMAL:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
+		break;
+	case APIC_LVT_CMCI:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
+		break;
+	default:
+		return (EINVAL);
+	}
+	if (vlapic_fire_lvt(vlapic, lvt)) {
+		vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
+		    LVTS_TRIGGERRED, vector, 1);
+	}
+	return (0);
+}
+
+static void
+vlapic_callout_handler(void *arg)
+{
+	struct vlapic *vlapic;
+	struct bintime bt, btnow;
+	sbintime_t rem_sbt;
+
+	vlapic = arg;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+	if (callout_pending(&vlapic->callout))	/* callout was reset */
+		goto done;
+
+	if (!callout_active(&vlapic->callout))	/* callout was stopped */
+		goto done;
+
+	callout_deactivate(&vlapic->callout);
+
+	vlapic_fire_timer(vlapic);
+
+	if (vlapic_periodic_timer(vlapic)) {
+		binuptime(&btnow);
+#ifdef	__FreeBSD__
+		KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
+		    ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
+		    btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
+		    vlapic->timer_fire_bt.frac));
+#else
+		KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
+		    ("vlapic callout at %lx.%lx, expected at %lx.%lx",
+		    btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
+		    vlapic->timer_fire_bt.frac));
+#endif
+
+		/*
+		 * Compute the delta between when the timer was supposed to
+		 * fire and the present time.
+		 */
+		bt = btnow;
+		bintime_sub(&bt, &vlapic->timer_fire_bt);
+
+		rem_sbt = bttosbt(vlapic->timer_period_bt);
+		if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
+			/*
+			 * Adjust the time until the next countdown downward
+			 * to account for the lost time.
+			 */
+			rem_sbt -= bttosbt(bt);
+		} else {
+			/*
+			 * If the delta is greater than the timer period then
+			 * just reset our time base instead of trying to catch
+			 * up.
+			 */
+			vlapic->timer_fire_bt = btnow;
+			VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
+			    "usecs, period is %lu usecs - resetting time base",
+			    bttosbt(bt) / SBT_1US,
+			    bttosbt(vlapic->timer_period_bt) / SBT_1US);
+		}
+
+		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
+		callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
+		    vlapic_callout_handler, vlapic, 0);
+	}
+done:
+	VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+void
+vlapic_icrtmr_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	sbintime_t sbt;
+	uint32_t icr_timer;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+
+	lapic = vlapic->apic_page;
+	icr_timer = lapic->icr_timer;
+
+	vlapic->timer_period_bt = vlapic->timer_freq_bt;
+	bintime_mul(&vlapic->timer_period_bt, icr_timer);
+
+	if (icr_timer != 0) {
+		binuptime(&vlapic->timer_fire_bt);
+		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
+
+		sbt = bttosbt(vlapic->timer_period_bt);
+		callout_reset_sbt(&vlapic->callout, sbt, 0,
+		    vlapic_callout_handler, vlapic, 0);
+	} else
+		callout_stop(&vlapic->callout);
+
+	VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+/*
+ * This function populates 'dmask' with the set of vcpus that match the
+ * addressing specified by the (dest, phys, lowprio) tuple.
+ * 
+ * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
+ * or xAPIC (8-bit) destination field.
+ */
+static void
+vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
+    bool lowprio, bool x2apic_dest)
+{
+	struct vlapic *vlapic;
+	uint32_t dfr, ldr, ldest, cluster;
+	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
+	cpuset_t amask;
+	int vcpuid;
+
+	if ((x2apic_dest && dest == 0xffffffff) ||
+	    (!x2apic_dest && dest == 0xff)) {
+		/*
+		 * Broadcast in both logical and physical modes.
+		 */
+		*dmask = vm_active_cpus(vm);
+		return;
+	}
+
+	if (phys) {
+		/*
+		 * Physical mode: destination is APIC ID.
+		 */
+		CPU_ZERO(dmask);
+		vcpuid = vm_apicid2vcpuid(vm, dest);
+		if (vcpuid < VM_MAXCPU)
+			CPU_SET(vcpuid, dmask);
+	} else {
+		/*
+		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
+		 * bitmask. This model is only avilable in the xAPIC mode.
+		 */
+		mda_flat_ldest = dest & 0xff;
+
+		/*
+		 * In the "Cluster Model" the MDA is used to identify a
+		 * specific cluster and a set of APICs in that cluster.
+		 */
+		if (x2apic_dest) {
+			mda_cluster_id = dest >> 16;
+			mda_cluster_ldest = dest & 0xffff;
+		} else {
+			mda_cluster_id = (dest >> 4) & 0xf;
+			mda_cluster_ldest = dest & 0xf;
+		}
+
+		/*
+		 * Logical mode: match each APIC that has a bit set
+		 * in it's LDR that matches a bit in the ldest.
+		 */
+		CPU_ZERO(dmask);
+		amask = vm_active_cpus(vm);
+		while ((vcpuid = CPU_FFS(&amask)) != 0) {
+			vcpuid--;
+			CPU_CLR(vcpuid, &amask);
+
+			vlapic = vm_lapic(vm, vcpuid);
+			dfr = vlapic->apic_page->dfr;
+			ldr = vlapic->apic_page->ldr;
+
+			if ((dfr & APIC_DFR_MODEL_MASK) ==
+			    APIC_DFR_MODEL_FLAT) {
+				ldest = ldr >> 24;
+				mda_ldest = mda_flat_ldest;
+			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
+			    APIC_DFR_MODEL_CLUSTER) {
+				if (x2apic(vlapic)) {
+					cluster = ldr >> 16;
+					ldest = ldr & 0xffff;
+				} else {
+					cluster = ldr >> 28;
+					ldest = (ldr >> 24) & 0xf;
+				}
+				if (cluster != mda_cluster_id)
+					continue;
+				mda_ldest = mda_cluster_ldest;
+			} else {
+				/*
+				 * Guest has configured a bad logical
+				 * model for this vcpu - skip it.
+				 */
+				VLAPIC_CTR1(vlapic, "vlapic has bad logical "
+				    "model %x - cannot deliver interrupt", dfr);
+				continue;
+			}
+
+			if ((mda_ldest & ldest) != 0) {
+				CPU_SET(vcpuid, dmask);
+				if (lowprio)
+					break;
+			}
+		}
+	}
+}
+
+static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
+
+static void
+vlapic_set_tpr(struct vlapic *vlapic, uint8_t val)
+{
+	struct LAPIC *lapic = vlapic->apic_page;
+
+	if (lapic->tpr != val) {
+		VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vlapic TPR changed "
+		    "from %#x to %#x", lapic->tpr, val);
+		lapic->tpr = val;
+		vlapic_update_ppr(vlapic);
+	}
+}
+
+static uint8_t
+vlapic_get_tpr(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic = vlapic->apic_page;
+
+	return (lapic->tpr);
+}
+
+void
+vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
+{
+	uint8_t tpr;
+
+	if (val & ~0xf) {
+		vm_inject_gp(vlapic->vm, vlapic->vcpuid);
+		return;
+	}
+
+	tpr = val << 4;
+	vlapic_set_tpr(vlapic, tpr);
+}
+
+uint64_t
+vlapic_get_cr8(struct vlapic *vlapic)
+{
+	uint8_t tpr;
+
+	tpr = vlapic_get_tpr(vlapic);
+	return (tpr >> 4);
+}
+
+int
+vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
+{
+	int i;
+	bool phys;
+	cpuset_t dmask;
+	uint64_t icrval;
+	uint32_t dest, vec, mode;
+	struct vlapic *vlapic2;
+	struct vm_exit *vmexit;
+	struct LAPIC *lapic;
+
+	lapic = vlapic->apic_page;
+	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
+	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
+
+	if (x2apic(vlapic))
+		dest = icrval >> 32;
+	else
+		dest = icrval >> (32 + 24);
+	vec = icrval & APIC_VECTOR_MASK;
+	mode = icrval & APIC_DELMODE_MASK;
+
+	if (mode == APIC_DELMODE_FIXED && vec < 16) {
+		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
+		VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
+		return (0);
+	}
+
+	VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
+
+	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
+		switch (icrval & APIC_DEST_MASK) {
+		case APIC_DEST_DESTFLD:
+			phys = ((icrval & APIC_DESTMODE_LOG) == 0);
+			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
+			    x2apic(vlapic));
+			break;
+		case APIC_DEST_SELF:
+			CPU_SETOF(vlapic->vcpuid, &dmask);
+			break;
+		case APIC_DEST_ALLISELF:
+			dmask = vm_active_cpus(vlapic->vm);
+			break;
+		case APIC_DEST_ALLESELF:
+			dmask = vm_active_cpus(vlapic->vm);
+			CPU_CLR(vlapic->vcpuid, &dmask);
+			break;
+		default:
+			CPU_ZERO(&dmask);	/* satisfy gcc */
+			break;
+		}
+
+		while ((i = CPU_FFS(&dmask)) != 0) {
+			i--;
+			CPU_CLR(i, &dmask);
+			if (mode == APIC_DELMODE_FIXED) {
+				lapic_intr_edge(vlapic->vm, i, vec);
+				vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
+						    IPIS_SENT, i, 1);
+				VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
+				    "to vcpuid %d", vec, i);
+			} else {
+				vm_inject_nmi(vlapic->vm, i);
+				VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
+				    "to vcpuid %d", i);
+			}
+		}
+
+		return (0);	/* handled completely in the kernel */
+	}
+
+	if (mode == APIC_DELMODE_INIT) {
+		if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
+			return (0);
+
+		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
+			vlapic2 = vm_lapic(vlapic->vm, dest);
+
+			/* move from INIT to waiting-for-SIPI state */
+			if (vlapic2->boot_state == BS_INIT) {
+				vlapic2->boot_state = BS_SIPI;
+			}
+
+			return (0);
+		}
+	}
+
+	if (mode == APIC_DELMODE_STARTUP) {
+		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
+			vlapic2 = vm_lapic(vlapic->vm, dest);
+
+			/*
+			 * Ignore SIPIs in any state other than wait-for-SIPI
+			 */
+			if (vlapic2->boot_state != BS_SIPI)
+				return (0);
+
+			vlapic2->boot_state = BS_RUNNING;
+
+			*retu = true;
+			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
+			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
+			vmexit->u.spinup_ap.vcpu = dest;
+			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
+
+			return (0);
+		}
+	}
+
+	/*
+	 * This will cause a return to userland.
+	 */
+	return (1);
+}
+
+void
+vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val)
+{
+	int vec;
+
+	KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode"));
+
+	vec = val & 0xff;
+	lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
+	vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT,
+	    vlapic->vcpuid, 1);
+	VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec);
+}
+
+int
+vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
+{
+	struct LAPIC	*lapic = vlapic->apic_page;
+	int	  	 idx, i, bitpos, vector;
+	uint32_t	*irrptr, val;
+
+	if (vlapic->ops.pending_intr)
+		return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
+
+	irrptr = &lapic->irr0;
+
+	/*
+	 * The x86 architecture reserves the the first 32 vectors for use
+	 * by the processor.
+	 */
+	for (i = 7; i > 0; i--) {
+		idx = i * 4;
+		val = atomic_load_acq_int(&irrptr[idx]);
+		bitpos = fls(val);
+		if (bitpos != 0) {
+			vector = i * 32 + (bitpos - 1);
+			if (PRIO(vector) > PRIO(lapic->ppr)) {
+				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
+				if (vecptr != NULL)
+					*vecptr = vector;
+				return (1);
+			} else 
+				break;
+		}
+	}
+	return (0);
+}
+
+void
+vlapic_intr_accepted(struct vlapic *vlapic, int vector)
+{
+	struct LAPIC	*lapic = vlapic->apic_page;
+	uint32_t	*irrptr, *isrptr;
+	int		idx, stk_top;
+
+	if (vlapic->ops.intr_accepted)
+		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
+
+	/*
+	 * clear the ready bit for vector being accepted in irr 
+	 * and set the vector as in service in isr.
+	 */
+	idx = (vector / 32) * 4;
+
+	irrptr = &lapic->irr0;
+	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
+	VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
+
+	isrptr = &lapic->isr0;
+	isrptr[idx] |= 1 << (vector % 32);
+	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
+
+	/*
+	 * Update the PPR
+	 */
+	vlapic->isrvec_stk_top++;
+
+	stk_top = vlapic->isrvec_stk_top;
+	if (stk_top >= ISRVEC_STK_SIZE)
+		panic("isrvec_stk_top overflow %d", stk_top);
+
+	vlapic->isrvec_stk[stk_top] = vector;
+	vlapic_update_ppr(vlapic);
+}
+
+void
+vlapic_svr_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	uint32_t old, new, changed;
+
+	lapic = vlapic->apic_page;
+
+	new = lapic->svr;
+	old = vlapic->svr_last;
+	vlapic->svr_last = new;
+
+	changed = old ^ new;
+	if ((changed & APIC_SVR_ENABLE) != 0) {
+		if ((new & APIC_SVR_ENABLE) == 0) {
+			/*
+			 * The apic is now disabled so stop the apic timer
+			 * and mask all the LVT entries.
+			 */
+			VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
+			VLAPIC_TIMER_LOCK(vlapic);
+			callout_stop(&vlapic->callout);
+			VLAPIC_TIMER_UNLOCK(vlapic);
+			vlapic_mask_lvts(vlapic);
+		} else {
+			/*
+			 * The apic is now enabled so restart the apic timer
+			 * if it is configured in periodic mode.
+			 */
+			VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
+			if (vlapic_periodic_timer(vlapic))
+				vlapic_icrtmr_write_handler(vlapic);
+		}
+	}
+}
+
+int
+vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t *data, bool *retu)
+{
+	struct LAPIC	*lapic = vlapic->apic_page;
+	uint32_t	*reg;
+	int		 i;
+
+	/* Ignore MMIO accesses in x2APIC mode */
+	if (x2apic(vlapic) && mmio_access) {
+		VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode",
+		    offset);
+		*data = 0;
+		goto done;
+	}
+
+	if (!x2apic(vlapic) && !mmio_access) {
+		/*
+		 * XXX Generate GP fault for MSR accesses in xAPIC mode
+		 */
+		VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in "
+		    "xAPIC mode", offset);
+		*data = 0;
+		goto done;
+	}
+
+	if (offset > sizeof(*lapic)) {
+		*data = 0;
+		goto done;
+	}
+	
+	offset &= ~3;
+	switch(offset)
+	{
+		case APIC_OFFSET_ID:
+			*data = lapic->id;
+			break;
+		case APIC_OFFSET_VER:
+			*data = lapic->version;
+			break;
+		case APIC_OFFSET_TPR:
+			*data = vlapic_get_tpr(vlapic);
+			break;
+		case APIC_OFFSET_APR:
+			*data = lapic->apr;
+			break;
+		case APIC_OFFSET_PPR:
+			*data = lapic->ppr;
+			break;
+		case APIC_OFFSET_EOI:
+			*data = lapic->eoi;
+			break;
+		case APIC_OFFSET_LDR:
+			*data = lapic->ldr;
+			break;
+		case APIC_OFFSET_DFR:
+			*data = lapic->dfr;
+			break;
+		case APIC_OFFSET_SVR:
+			*data = lapic->svr;
+			break;
+		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
+			i = (offset - APIC_OFFSET_ISR0) >> 2;
+			reg = &lapic->isr0;
+			*data = *(reg + i);
+			break;
+		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
+			i = (offset - APIC_OFFSET_TMR0) >> 2;
+			reg = &lapic->tmr0;
+			*data = *(reg + i);
+			break;
+		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
+			i = (offset - APIC_OFFSET_IRR0) >> 2;
+			reg = &lapic->irr0;
+			*data = atomic_load_acq_int(reg + i);
+			break;
+		case APIC_OFFSET_ESR:
+			*data = lapic->esr;
+			break;
+		case APIC_OFFSET_ICR_LOW: 
+			*data = lapic->icr_lo;
+			if (x2apic(vlapic))
+				*data |= (uint64_t)lapic->icr_hi << 32;
+			break;
+		case APIC_OFFSET_ICR_HI: 
+			*data = lapic->icr_hi;
+			break;
+		case APIC_OFFSET_CMCI_LVT:
+		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+			*data = vlapic_get_lvt(vlapic, offset);	
+#ifdef INVARIANTS
+			reg = vlapic_get_lvtptr(vlapic, offset);
+			KASSERT(*data == *reg, ("inconsistent lvt value at "
+			    "offset %#lx: %#lx/%#x", offset, *data, *reg));
+#endif
+			break;
+		case APIC_OFFSET_TIMER_ICR:
+			*data = lapic->icr_timer;
+			break;
+		case APIC_OFFSET_TIMER_CCR:
+			*data = vlapic_get_ccr(vlapic);
+			break;
+		case APIC_OFFSET_TIMER_DCR:
+			*data = lapic->dcr_timer;
+			break;
+		case APIC_OFFSET_SELF_IPI:
+			/*
+			 * XXX generate a GP fault if vlapic is in x2apic mode
+			 */
+			*data = 0;
+			break;
+		case APIC_OFFSET_RRR:
+		default:
+			*data = 0;
+			break;
+	}
+done:
+	VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
+	return 0;
+}
+
+int
+vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t data, bool *retu)
+{
+	struct LAPIC	*lapic = vlapic->apic_page;
+	uint32_t	*regptr;
+	int		retval;
+
+#ifdef	__FreeBSD__
+	KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
+	    ("vlapic_write: invalid offset %#lx", offset));
+#else
+	KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
+	    ("vlapic_write: invalid offset %lx", offset));
+#endif
+
+	VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx",
+	    offset, data);
+
+	if (offset > sizeof(*lapic))
+		return (0);
+
+	/* Ignore MMIO accesses in x2APIC mode */
+	if (x2apic(vlapic) && mmio_access) {
+		VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx "
+		    "in x2APIC mode", data, offset);
+		return (0);
+	}
+
+	/*
+	 * XXX Generate GP fault for MSR accesses in xAPIC mode
+	 */
+	if (!x2apic(vlapic) && !mmio_access) {
+		VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx "
+		    "in xAPIC mode", data, offset);
+		return (0);
+	}
+
+	retval = 0;
+	switch(offset)
+	{
+		case APIC_OFFSET_ID:
+			lapic->id = data;
+			vlapic_id_write_handler(vlapic);
+			break;
+		case APIC_OFFSET_TPR:
+			vlapic_set_tpr(vlapic, data & 0xff);
+			break;
+		case APIC_OFFSET_EOI:
+			vlapic_process_eoi(vlapic);
+			break;
+		case APIC_OFFSET_LDR:
+			lapic->ldr = data;
+			vlapic_ldr_write_handler(vlapic);
+			break;
+		case APIC_OFFSET_DFR:
+			lapic->dfr = data;
+			vlapic_dfr_write_handler(vlapic);
+			break;
+		case APIC_OFFSET_SVR:
+			lapic->svr = data;
+			vlapic_svr_write_handler(vlapic);
+			break;
+		case APIC_OFFSET_ICR_LOW: 
+			lapic->icr_lo = data;
+			if (x2apic(vlapic))
+				lapic->icr_hi = data >> 32;
+			retval = vlapic_icrlo_write_handler(vlapic, retu);
+			break;
+		case APIC_OFFSET_ICR_HI:
+			lapic->icr_hi = data;
+			break;
+		case APIC_OFFSET_CMCI_LVT:
+		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+			regptr = vlapic_get_lvtptr(vlapic, offset);
+			*regptr = data;
+			vlapic_lvt_write_handler(vlapic, offset);
+			break;
+		case APIC_OFFSET_TIMER_ICR:
+			lapic->icr_timer = data;
+			vlapic_icrtmr_write_handler(vlapic);
+			break;
+
+		case APIC_OFFSET_TIMER_DCR:
+			lapic->dcr_timer = data;
+			vlapic_dcr_write_handler(vlapic);
+			break;
+
+		case APIC_OFFSET_ESR:
+			vlapic_esr_write_handler(vlapic);
+			break;
+
+		case APIC_OFFSET_SELF_IPI:
+			if (x2apic(vlapic))
+				vlapic_self_ipi_handler(vlapic, data);
+			break;
+
+		case APIC_OFFSET_VER:
+		case APIC_OFFSET_APR:
+		case APIC_OFFSET_PPR:
+		case APIC_OFFSET_RRR:
+		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
+		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
+		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
+		case APIC_OFFSET_TIMER_CCR:
+		default:
+			// Read only.
+			break;
+	}
+
+	return (retval);
+}
+
+static void
+vlapic_reset(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	
+	lapic = vlapic->apic_page;
+	bzero(lapic, sizeof(struct LAPIC));
+
+	lapic->id = vlapic_get_id(vlapic);
+	lapic->version = VLAPIC_VERSION;
+	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
+	lapic->dfr = 0xffffffff;
+	lapic->svr = APIC_SVR_VECTOR;
+	vlapic_mask_lvts(vlapic);
+	vlapic_reset_tmr(vlapic);
+
+	lapic->dcr_timer = 0;
+	vlapic_dcr_write_handler(vlapic);
+
+	if (vlapic->vcpuid == 0)
+		vlapic->boot_state = BS_RUNNING;	/* BSP */
+	else
+		vlapic->boot_state = BS_INIT;		/* AP */
+
+	vlapic->svr_last = lapic->svr;
+}
+
+void
+vlapic_init(struct vlapic *vlapic)
+{
+	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
+	KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
+	    ("vlapic_init: vcpuid is not initialized"));
+	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
+	    "initialized"));
+
+	/*
+	 * If the vlapic is configured in x2apic mode then it will be
+	 * accessed in the critical section via the MSR emulation code.
+	 *
+	 * Therefore the timer mutex must be a spinlock because blockable
+	 * mutexes cannot be acquired in a critical section.
+	 */
+	mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
+	callout_init(&vlapic->callout, 1);
+
+	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
+
+	if (vlapic->vcpuid == 0)
+		vlapic->msr_apicbase |= APICBASE_BSP;
+
+	vlapic_reset(vlapic);
+}
+
+void
+vlapic_cleanup(struct vlapic *vlapic)
+{
+
+	callout_drain(&vlapic->callout);
+}
+
+uint64_t
+vlapic_get_apicbase(struct vlapic *vlapic)
+{
+
+	return (vlapic->msr_apicbase);
+}
+
+int
+vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new)
+{
+
+	if (vlapic->msr_apicbase != new) {
+		VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx "
+		    "not supported", vlapic->msr_apicbase, new);
+		return (-1);
+	}
+
+	return (0);
+}
+
+void
+vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
+{
+	struct vlapic *vlapic;
+	struct LAPIC *lapic;
+
+	vlapic = vm_lapic(vm, vcpuid);
+
+	if (state == X2APIC_DISABLED)
+		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
+	else
+		vlapic->msr_apicbase |= APICBASE_X2APIC;
+
+	/*
+	 * Reset the local APIC registers whose values are mode-dependent.
+	 *
+	 * XXX this works because the APIC mode can be changed only at vcpu
+	 * initialization time.
+	 */
+	lapic = vlapic->apic_page;
+	lapic->id = vlapic_get_id(vlapic);
+	if (x2apic(vlapic)) {
+		lapic->ldr = x2apic_ldr(vlapic);
+		lapic->dfr = 0;
+	} else {
+		lapic->ldr = 0;
+		lapic->dfr = 0xffffffff;
+	}
+
+	if (state == X2APIC_ENABLED) {
+		if (vlapic->ops.enable_x2apic_mode)
+			(*vlapic->ops.enable_x2apic_mode)(vlapic);
+	}
+}
+
+void
+vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
+    int delmode, int vec)
+{
+	bool lowprio;
+	int vcpuid;
+	cpuset_t dmask;
+
+	if (delmode != IOART_DELFIXED &&
+	    delmode != IOART_DELLOPRI &&
+	    delmode != IOART_DELEXINT) {
+		VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
+		return;
+	}
+	lowprio = (delmode == IOART_DELLOPRI);
+
+	/*
+	 * We don't provide any virtual interrupt redirection hardware so
+	 * all interrupts originating from the ioapic or MSI specify the
+	 * 'dest' in the legacy xAPIC format.
+	 */
+	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
+
+	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
+		vcpuid--;
+		CPU_CLR(vcpuid, &dmask);
+		if (delmode == IOART_DELEXINT) {
+			vm_inject_extint(vm, vcpuid);
+		} else {
+			lapic_set_intr(vm, vcpuid, vec, level);
+		}
+	}
+}
+
+void
+vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
+{
+	/*
+	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
+	 *
+	 * This is done by leveraging features like Posted Interrupts (Intel)
+	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
+	 *
+	 * If neither of these features are available then fallback to
+	 * sending an IPI to 'hostcpu'.
+	 */
+	if (vlapic->ops.post_intr)
+		(*vlapic->ops.post_intr)(vlapic, hostcpu);
+	else
+		ipi_cpu(hostcpu, ipinum);
+}
+
+bool
+vlapic_enabled(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic = vlapic->apic_page;
+
+	if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
+	    (lapic->svr & APIC_SVR_ENABLE) != 0)
+		return (true);
+	else
+		return (false);
+}
+
+static void
+vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
+{
+	struct LAPIC *lapic;
+	uint32_t *tmrptr, mask;
+	int idx;
+
+	lapic = vlapic->apic_page;
+	tmrptr = &lapic->tmr0;
+	idx = (vector / 32) * 4;
+	mask = 1 << (vector % 32);
+	if (level)
+		tmrptr[idx] |= mask;
+	else
+		tmrptr[idx] &= ~mask;
+
+	if (vlapic->ops.set_tmr != NULL)
+		(*vlapic->ops.set_tmr)(vlapic, vector, level);
+}
+
+void
+vlapic_reset_tmr(struct vlapic *vlapic)
+{
+	int vector;
+
+	VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
+
+	for (vector = 0; vector <= 255; vector++)
+		vlapic_set_tmr(vlapic, vector, false);
+}
+
+void
+vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
+    int delmode, int vector)
+{
+	cpuset_t dmask;
+	bool lowprio;
+
+	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
+
+	/*
+	 * A level trigger is valid only for fixed and lowprio delivery modes.
+	 */
+	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
+		VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
+		    "delivery-mode %d", delmode);
+		return;
+	}
+
+	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
+	vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
+
+	if (!CPU_ISSET(vlapic->vcpuid, &dmask))
+		return;
+
+	VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
+	vlapic_set_tmr(vlapic, vector, true);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.h b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
new file mode 100644
index 0000000000..3fa705d818
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.h
@@ -0,0 +1,109 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vlapic.h 262281 2014-02-21 06:03:54Z neel $
+ */
+
+#ifndef _VLAPIC_H_
+#define	_VLAPIC_H_
+
+struct vm;
+enum x2apic_state;
+
+int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t data, bool *retu);
+int vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t *data, bool *retu);
+
+/*
+ * Returns 0 if there is no eligible vector that can be delivered to the
+ * guest at this time and non-zero otherwise.
+ *
+ * If an eligible vector number is found and 'vecptr' is not NULL then it will
+ * be stored in the location pointed to by 'vecptr'.
+ *
+ * Note that the vector does not automatically transition to the ISR as a
+ * result of calling this function.
+ */
+int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr);
+
+/*
+ * Transition 'vector' from IRR to ISR. This function is called with the
+ * vector returned by 'vlapic_pending_intr()' when the guest is able to
+ * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
+ * block interrupt delivery).
+ */
+void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
+
+/*
+ * Returns 1 if the vcpu needs to be notified of the interrupt and 0 otherwise.
+ */
+int vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
+
+/*
+ * Post an interrupt to the vcpu running on 'hostcpu'. This will use a
+ * hardware assist if available (e.g. Posted Interrupt) or fall back to
+ * sending an 'ipinum' to interrupt the 'hostcpu'.
+ */
+void vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum);
+
+void vlapic_set_error(struct vlapic *vlapic, uint32_t mask);
+void vlapic_fire_cmci(struct vlapic *vlapic);
+int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
+
+uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
+int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
+void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s);
+bool vlapic_enabled(struct vlapic *vlapic);
+
+void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
+    int delmode, int vec);
+
+/* Reset the trigger-mode bits for all vectors to be edge-triggered */
+void vlapic_reset_tmr(struct vlapic *vlapic);
+
+/*
+ * Set the trigger-mode bit associated with 'vector' to level-triggered if
+ * the (dest,phys,delmode) tuple resolves to an interrupt being delivered to
+ * this 'vlapic'.
+ */
+void vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
+    int delmode, int vector);
+
+void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
+uint64_t vlapic_get_cr8(struct vlapic *vlapic);
+
+/* APIC write handlers */
+void vlapic_id_write_handler(struct vlapic *vlapic);
+void vlapic_ldr_write_handler(struct vlapic *vlapic);
+void vlapic_dfr_write_handler(struct vlapic *vlapic);
+void vlapic_svr_write_handler(struct vlapic *vlapic);
+void vlapic_esr_write_handler(struct vlapic *vlapic);
+int vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu);
+void vlapic_icrtmr_write_handler(struct vlapic *vlapic);
+void vlapic_dcr_write_handler(struct vlapic *vlapic);
+void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset);
+void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val);
+#endif	/* _VLAPIC_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
new file mode 100644
index 0000000000..f9bd2e0e8b
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
@@ -0,0 +1,190 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/io/vlapic_priv.h 263211 2014-03-15 23:09:34Z tychon $
+ */
+
+#ifndef _VLAPIC_PRIV_H_
+#define	_VLAPIC_PRIV_H_
+
+#include <x86/apicreg.h>
+
+/*
+ * APIC Register:		Offset	   Description
+ */
+#define APIC_OFFSET_ID		0x20	/* Local APIC ID		*/
+#define APIC_OFFSET_VER		0x30	/* Local APIC Version		*/
+#define APIC_OFFSET_TPR		0x80	/* Task Priority Register	*/
+#define APIC_OFFSET_APR		0x90	/* Arbitration Priority		*/
+#define APIC_OFFSET_PPR		0xA0	/* Processor Priority Register	*/
+#define APIC_OFFSET_EOI		0xB0	/* EOI Register			*/
+#define APIC_OFFSET_RRR		0xC0	/* Remote read			*/
+#define APIC_OFFSET_LDR		0xD0	/* Logical Destination		*/
+#define APIC_OFFSET_DFR		0xE0	/* Destination Format Register	*/
+#define APIC_OFFSET_SVR		0xF0	/* Spurious Vector Register	*/
+#define APIC_OFFSET_ISR0	0x100	/* In Service Register		*/
+#define APIC_OFFSET_ISR1	0x110
+#define APIC_OFFSET_ISR2	0x120
+#define APIC_OFFSET_ISR3	0x130
+#define APIC_OFFSET_ISR4	0x140
+#define APIC_OFFSET_ISR5	0x150
+#define APIC_OFFSET_ISR6	0x160
+#define APIC_OFFSET_ISR7	0x170
+#define APIC_OFFSET_TMR0	0x180	/* Trigger Mode Register	*/
+#define APIC_OFFSET_TMR1	0x190
+#define APIC_OFFSET_TMR2	0x1A0
+#define APIC_OFFSET_TMR3	0x1B0
+#define APIC_OFFSET_TMR4	0x1C0
+#define APIC_OFFSET_TMR5	0x1D0
+#define APIC_OFFSET_TMR6	0x1E0
+#define APIC_OFFSET_TMR7	0x1F0
+#define APIC_OFFSET_IRR0	0x200	/* Interrupt Request Register	*/
+#define APIC_OFFSET_IRR1	0x210
+#define APIC_OFFSET_IRR2	0x220
+#define APIC_OFFSET_IRR3	0x230
+#define APIC_OFFSET_IRR4	0x240
+#define APIC_OFFSET_IRR5	0x250
+#define APIC_OFFSET_IRR6	0x260
+#define APIC_OFFSET_IRR7	0x270
+#define APIC_OFFSET_ESR		0x280	/* Error Status Register	*/
+#define APIC_OFFSET_CMCI_LVT	0x2F0	/* Local Vector Table (CMCI)	*/
+#define APIC_OFFSET_ICR_LOW	0x300	/* Interrupt Command Register	*/
+#define APIC_OFFSET_ICR_HI	0x310
+#define APIC_OFFSET_TIMER_LVT	0x320	/* Local Vector Table (Timer)	*/
+#define APIC_OFFSET_THERM_LVT	0x330	/* Local Vector Table (Thermal)	*/
+#define APIC_OFFSET_PERF_LVT	0x340	/* Local Vector Table (PMC)	*/
+#define APIC_OFFSET_LINT0_LVT	0x350	/* Local Vector Table (LINT0)	*/
+#define APIC_OFFSET_LINT1_LVT	0x360	/* Local Vector Table (LINT1)	*/
+#define APIC_OFFSET_ERROR_LVT	0x370	/* Local Vector Table (ERROR)	*/
+#define APIC_OFFSET_TIMER_ICR	0x380	/* Timer's Initial Count	*/
+#define APIC_OFFSET_TIMER_CCR	0x390	/* Timer's Current Count	*/
+#define APIC_OFFSET_TIMER_DCR	0x3E0	/* Timer's Divide Configuration	*/
+#define	APIC_OFFSET_SELF_IPI	0x3F0	/* Self IPI register */
+
+#define	VLAPIC_CTR0(vlapic, format)					\
+	VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
+
+#define	VLAPIC_CTR1(vlapic, format, p1)					\
+	VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
+
+#define	VLAPIC_CTR2(vlapic, format, p1, p2)				\
+	VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
+
+#define	VLAPIC_CTR3(vlapic, format, p1, p2, p3)				\
+	VCPU_CTR3((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2, p3)
+
+#define	VLAPIC_CTR_IRR(vlapic, msg)					\
+do {									\
+	uint32_t *irrptr = &(vlapic)->apic_page->irr0;			\
+	irrptr[0] = irrptr[0];	/* silence compiler */			\
+	VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);	\
+} while (0)
+
+#define	VLAPIC_CTR_ISR(vlapic, msg)					\
+do {									\
+	uint32_t *isrptr = &(vlapic)->apic_page->isr0;			\
+	isrptr[0] = isrptr[0];	/* silence compiler */			\
+	VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);	\
+} while (0)
+
+enum boot_state {
+	BS_INIT,
+	BS_SIPI,
+	BS_RUNNING
+};
+
+/*
+ * 16 priority levels with at most one vector injected per level.
+ */
+#define	ISRVEC_STK_SIZE		(16 + 1)
+
+#define VLAPIC_MAXLVT_INDEX	APIC_LVT_CMCI
+
+struct vlapic;
+
+struct vlapic_ops {
+	int (*set_intr_ready)(struct vlapic *vlapic, int vector, bool level);
+	int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
+	void (*intr_accepted)(struct vlapic *vlapic, int vector);
+	void (*post_intr)(struct vlapic *vlapic, int hostcpu);
+	void (*set_tmr)(struct vlapic *vlapic, int vector, bool level);
+	void (*enable_x2apic_mode)(struct vlapic *vlapic);
+};
+
+struct vlapic {
+	struct vm		*vm;
+	int			vcpuid;
+	struct LAPIC		*apic_page;
+	struct vlapic_ops	ops;
+
+	uint32_t		esr_pending;
+	int			esr_firing;
+
+	struct callout	callout;	/* vlapic timer */
+	struct bintime	timer_fire_bt;	/* callout expiry time */
+	struct bintime	timer_freq_bt;	/* timer frequency */
+	struct bintime	timer_period_bt; /* timer period */
+	struct mtx	timer_mtx;
+
+	/*
+	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
+	 * A vector is popped from the stack when the processor does an EOI.
+	 * The vector on the top of the stack is used to compute the
+	 * Processor Priority in conjunction with the TPR.
+	 */
+	uint8_t		isrvec_stk[ISRVEC_STK_SIZE];
+	int		isrvec_stk_top;
+
+	uint64_t	msr_apicbase;
+	enum boot_state	boot_state;
+
+	/*
+	 * Copies of some registers in the virtual APIC page. We do this for
+	 * a couple of different reasons:
+	 * - to be able to detect what changed (e.g. svr_last)
+	 * - to maintain a coherent snapshot of the register (e.g. lvt_last)
+	 */
+	uint32_t	svr_last;
+	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+};
+
+void vlapic_init(struct vlapic *vlapic);
+void vlapic_cleanup(struct vlapic *vlapic);
+
+#endif	/* _VLAPIC_PRIV_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/offsets.in b/usr/src/uts/i86pc/io/vmm/offsets.in
new file mode 100644
index 0000000000..4b1fe1d6b6
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/offsets.in
@@ -0,0 +1,72 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/cpuvar.h>
+
+#include <machine/pmap.h>
+
+#include <machine/vmm.h>
+#include "intel/vmx_cpufunc.h"
+#include "intel/vmx.h"
+
+vmxctx
+	tmpstktop	VMXCTX_TMPSTKTOP
+	guest_rdi	VMXCTX_GUEST_RDI
+	guest_rsi	VMXCTX_GUEST_RSI
+	guest_rdx	VMXCTX_GUEST_RDX
+	guest_rcx	VMXCTX_GUEST_RCX
+	guest_r8	VMXCTX_GUEST_R8
+	guest_r9	VMXCTX_GUEST_R9
+	guest_rax	VMXCTX_GUEST_RAX
+	guest_rbx	VMXCTX_GUEST_RBX
+	guest_rbp	VMXCTX_GUEST_RBP
+	guest_r10	VMXCTX_GUEST_R10
+	guest_r11	VMXCTX_GUEST_R11
+	guest_r12	VMXCTX_GUEST_R12
+	guest_r13	VMXCTX_GUEST_R13
+	guest_r14	VMXCTX_GUEST_R14
+	guest_r15	VMXCTX_GUEST_R15
+	guest_cr2	VMXCTX_GUEST_CR2
+	host_r15	VMXCTX_HOST_R15
+	host_r14	VMXCTX_HOST_R14
+	host_r13	VMXCTX_HOST_R13
+	host_r12	VMXCTX_HOST_R12
+	host_rbp	VMXCTX_HOST_RBP
+	host_rsp	VMXCTX_HOST_RSP
+	host_rbx	VMXCTX_HOST_RBX
+	host_rip	VMXCTX_HOST_RIP
+	launch_error	VMXCTX_LAUNCH_ERROR
+
+vmx			VMX_SIZE
+
+\#define	VM_SUCCESS		0
+\#define	VM_FAIL_INVALID		1
+\#define	VM_FAIL_VALID		2
+
+\#define	VMX_RETURN_DIRECT	0
+\#define	VMX_RETURN_LONGJMP	1
+\#define	VMX_RETURN_VMRESUME	2
+\#define	VMX_RETURN_VMLAUNCH	3
+\#define	VMX_RETURN_AST		4
+
+cpu
+	cpu_thread
+
+_kthread
+	t_lwp
+	_tu._ts._t_astflag	T_ASTFLAG
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
new file mode 100644
index 0000000000..7081368f4a
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -0,0 +1,1894 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm.c 280929 2015-04-01 00:15:31Z tychon $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm.c 280929 2015-04-01 00:15:31Z tychon $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <x86/psl.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+
+#include <machine/vm.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <x86/apicreg.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include "vmm_ioport.h"
+#include "vmm_ktr.h"
+#include "vmm_host.h"
+#include "vmm_mem.h"
+#include "vmm_util.h"
+#include "vatpic.h"
+#include "vatpit.h"
+#include "vhpet.h"
+#include "vioapic.h"
+#include "vlapic.h"
+#include "vmm_ipi.h"
+#include "vmm_stat.h"
+#include "vmm_lapic.h"
+
+#ifdef	__FreeBSD__
+#include "io/ppt.h"
+#include "io/iommu.h"
+#endif
+
+struct vhpet;
+struct vioapic;
+struct vlapic;
+
+struct vcpu {
+	int		flags;
+	enum vcpu_state	state;
+	struct mtx	mtx;
+	int		hostcpu;	/* host cpuid this vcpu last ran on */
+	struct vlapic	*vlapic;
+	int		 vcpuid;
+	struct savefpu	*guestfpu;	/* guest fpu state */
+	void		*stats;
+	struct vm_exit	exitinfo;
+	uint64_t	nextrip;	/* (x) next instruction to execute */
+	enum x2apic_state x2apic_state;
+	uint64_t	exitintinfo;
+	int		nmi_pending;
+	int		extint_pending;
+	struct vm_exception exception;
+	int		exception_pending;
+};
+
+#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
+#define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
+#define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
+
+#define	VM_MAX_MEMORY_SEGMENTS	8
+
+struct vm {
+	void		*cookie;	/* processor-specific data */
+	void		*iommu;		/* iommu-specific data */
+	struct vcpu	vcpu[VM_MAXCPU];
+	struct vhpet	*vhpet;
+	struct vioapic	*vioapic;	/* virtual ioapic */
+	struct vatpic	*vatpic;	/* virtual atpic */
+	struct vatpit	*vatpit;	/* virtual atpit */
+	int		num_mem_segs;
+	struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
+	char		name[VM_MAX_NAMELEN];
+
+	/*
+	 * Set of active vcpus.
+	 * An active vcpu is one that has been started implicitly (BSP) or
+	 * explicitly (AP) by sending it a startup ipi.
+	 */
+	cpuset_t	active_cpus;
+
+	vm_rendezvous_func_t rendezvous_func;
+};
+
+static int vmm_initialized;
+
+static struct vmm_ops *ops;
+#define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
+#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
+
+#define	VMINIT(vm)	(ops != NULL ? (*ops->vminit)(vm): NULL)
+#define	VMRUN(vmi, vcpu, rip) \
+	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO)
+#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
+#define	VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm)			\
+    	(ops != NULL ? 							\
+    	(*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) :	\
+	ENXIO)
+#define	VMMMAP_GET(vmi, gpa) \
+	(ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO)
+#define	VMGETREG(vmi, vcpu, num, retval)		\
+	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
+#define	VMSETREG(vmi, vcpu, num, val)		\
+	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
+#define	VMGETDESC(vmi, vcpu, num, desc)		\
+	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
+#define	VMSETDESC(vmi, vcpu, num, desc)		\
+	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
+#define	VMGETCAP(vmi, vcpu, num, retval)	\
+	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
+#define	VMSETCAP(vmi, vcpu, num, val)		\
+	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+#define	VLAPIC_INIT(vmi, vcpu)			\
+	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
+#define	VLAPIC_CLEANUP(vmi, vlapic)		\
+	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
+
+#define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
+#define	fpu_stop_emulating()	clts()
+
+static MALLOC_DEFINE(M_VM, "vm", "vm");
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+    "IPI vector used for vcpu notifications");
+
+static void
+vcpu_cleanup(struct vm *vm, int i)
+{
+	struct vcpu *vcpu = &vm->vcpu[i];
+
+	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
+#ifdef	__FreeBSD__
+	vmm_stat_free(vcpu->stats);	
+#endif
+	fpu_save_area_free(vcpu->guestfpu);
+}
+
+static void
+vcpu_init(struct vm *vm, uint32_t vcpu_id)
+{
+	struct vcpu *vcpu;
+	
+	vcpu = &vm->vcpu[vcpu_id];
+
+	vcpu_lock_init(vcpu);
+	vcpu->hostcpu = NOCPU;
+	vcpu->vcpuid = vcpu_id;
+	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
+	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+	vcpu->exitintinfo = 0;
+	vcpu->guestfpu = fpu_save_area_alloc();
+	fpu_save_area_reset(vcpu->guestfpu);
+#ifdef	__FreeBSD__
+	vcpu->stats = vmm_stat_alloc();
+#endif
+}
+
+struct vm_exit *
+vm_exitinfo(struct vm *vm, int cpuid)
+{
+	struct vcpu *vcpu;
+
+	if (cpuid < 0 || cpuid >= VM_MAXCPU)
+		panic("vm_exitinfo: invalid cpuid %d", cpuid);
+
+	vcpu = &vm->vcpu[cpuid];
+
+	return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+	int error;
+
+#ifndef	__FreeBSD__
+	vmm_sol_glue_init();
+#endif
+
+	vmm_host_state_init();
+#ifdef	__FreeBSD__
+	vmm_ipi_init();
+#endif
+
+	error = vmm_mem_init();
+	if (error)
+		return (error);
+	
+	if (vmm_is_intel())
+		ops = &vmm_ops_intel;
+	else if (vmm_is_amd())
+		ops = &vmm_ops_amd;
+	else
+		return (ENXIO);
+
+	return (VMM_INIT());
+}
+
+#ifdef	__FreeBSD__
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+	int error;
+
+	switch (what) {
+	case MOD_LOAD:
+		vmmdev_init();
+		if (ppt_num_devices() > 0)
+			iommu_init();
+		error = vmm_init();
+		if (error == 0)
+			vmm_initialized = 1;
+		break;
+	case MOD_UNLOAD:
+		error = vmmdev_cleanup();
+		if (error == 0) {
+#ifndef	__FreeBSD__
+			vmm_sol_glue_cleanup();
+#endif
+			iommu_cleanup();
+			vmm_ipi_cleanup();
+			error = VMM_CLEANUP();
+			/*
+			 * Something bad happened - prevent new
+			 * VMs from being created
+			 */
+			if (error)
+				vmm_initialized = 0;
+		}
+		break;
+	default:
+		error = 0;
+		break;
+	}
+	return (error);
+}
+
+static moduledata_t vmm_kmod = {
+	"vmm",
+	vmm_handler,
+	NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - iommu initialization must happen after the pci passthru driver has had
+ *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
+ *
+ * - VT-x initialization requires smp_rendezvous() and therefore must happen
+ *   after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+#else
+int
+vmm_mod_load()
+{
+	int	error;
+
+	vmmdev_init();
+	error = vmm_init();
+	if (error == 0)
+		vmm_initialized = 1;
+
+	return (error);
+}
+
+int
+vmm_mod_unload()
+{
+	int	error;
+
+	error = vmmdev_cleanup();
+	if (error)
+		return (error);
+	error = VMM_CLEANUP();
+	if (error)
+		return (error);
+	vmm_initialized = 0;
+
+	return (0);
+}
+#endif
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+	int i;
+	struct vm *vm;
+	vm_paddr_t maxaddr;
+
+	const int BSP = 0;
+
+	/*
+	 * If vmm.ko could not be successfully initialized then don't attempt
+	 * to create the virtual machine.
+	 */
+	if (!vmm_initialized)
+		return (ENXIO);
+
+	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+		return (EINVAL);
+
+	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
+	strcpy(vm->name, name);
+	vm->cookie = VMINIT(vm);
+
+	vm->vioapic = vioapic_init(vm);
+	vm->vhpet = vhpet_init(vm);
+	vm->vatpic = vatpic_init(vm);
+	vm->vatpit = vatpit_init(vm);
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		vcpu_init(vm, i);
+	}
+
+#ifdef	__FreeBSD__
+	maxaddr = vmm_mem_maxaddr();
+	vm->iommu = iommu_create_domain(maxaddr);
+#endif
+
+	*retvm = vm;
+	return (0);
+}
+
+static void
+vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
+{
+	size_t len;
+	vm_paddr_t hpa;
+	void *host_domain;
+
+#ifdef	__FreeBSD__
+	host_domain = iommu_host_domain();
+#endif
+
+	len = 0;
+	while (len < seg->len) {
+		hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
+		if (hpa == (vm_paddr_t)-1) {
+			panic("vm_free_mem_segs: cannot free hpa "
+			      "associated with gpa 0x%016lx", seg->gpa + len);
+		}
+
+#ifdef	__FreeBSD__
+		/*
+		 * Remove the 'gpa' to 'hpa' mapping in VMs domain.
+		 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'.
+		 */
+		iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE);
+		iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE);
+#endif
+
+		vmm_mem_free(hpa, PAGE_SIZE);
+
+		len += PAGE_SIZE;
+	}
+
+#ifdef	__FreeBSD__
+	/*
+	 * Invalidate cached translations associated with 'vm->iommu' since
+	 * we have now moved some pages from it.
+	 */
+	iommu_invalidate_tlb(vm->iommu);
+#endif
+
+	bzero(seg, sizeof(struct vm_memory_segment));
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+	int i;
+
+#ifdef	__FreeBSD__
+	ppt_unassign_all(vm);
+#endif
+
+	for (i = 0; i < vm->num_mem_segs; i++)
+		vm_free_mem_seg(vm, &vm->mem_segs[i]);
+
+	vm->num_mem_segs = 0;
+
+	for (i = 0; i < VM_MAXCPU; i++)
+		vcpu_cleanup(vm, i);
+
+	vatpit_cleanup(vm->vatpit);
+	vhpet_cleanup(vm->vhpet);
+	vatpic_cleanup(vm->vatpic);
+	vioapic_cleanup(vm->vioapic);
+
+#ifdef	__FreeBSD__
+	iommu_destroy_domain(vm->iommu);
+#endif
+
+	VMCLEANUP(vm->cookie);
+
+	free(vm, M_VM);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+	return (vm->name);
+}
+
+#ifdef	__FreeBSD__
+int
+vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
+{
+	const boolean_t spok = TRUE;	/* superpage mappings are ok */
+
+	return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
+			   VM_PROT_RW, spok));
+}
+
+int
+vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+	const boolean_t spok = TRUE;	/* superpage mappings are ok */
+
+	return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0,
+			   VM_PROT_NONE, spok));
+}
+#endif
+
+/*
+ * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise
+ */
+static boolean_t
+vm_gpa_available(struct vm *vm, vm_paddr_t gpa)
+{
+	int i;
+	vm_paddr_t gpabase, gpalimit;
+
+	if (gpa & PAGE_MASK)
+		panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa);
+
+	for (i = 0; i < vm->num_mem_segs; i++) {
+		gpabase = vm->mem_segs[i].gpa;
+		gpalimit = gpabase + vm->mem_segs[i].len;
+		if (gpa >= gpabase && gpa < gpalimit)
+			return (FALSE);
+	}
+
+	return (TRUE);
+}
+
+int
+vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+	int error, available, allocated;
+	struct vm_memory_segment *seg;
+	vm_paddr_t g, hpa;
+	void *host_domain;
+
+	const boolean_t spok = TRUE;	/* superpage mappings are ok */
+
+	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
+		return (EINVAL);
+	
+	available = allocated = 0;
+	g = gpa;
+	while (g < gpa + len) {
+		if (vm_gpa_available(vm, g))
+			available++;
+		else
+			allocated++;
+
+		g += PAGE_SIZE;
+	}
+
+	/*
+	 * If there are some allocated and some available pages in the address
+	 * range then it is an error.
+	 */
+	if (allocated && available)
+		return (EINVAL);
+
+	/*
+	 * If the entire address range being requested has already been
+	 * allocated then there isn't anything more to do.
+	 */
+	if (allocated && available == 0)
+		return (0);
+
+	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
+		return (E2BIG);
+
+#ifdef	__FreeBSD__
+	host_domain = iommu_host_domain();
+#endif
+
+	seg = &vm->mem_segs[vm->num_mem_segs];
+
+	error = 0;
+	seg->gpa = gpa;
+	seg->len = 0;
+	while (seg->len < len) {
+		hpa = vmm_mem_alloc(PAGE_SIZE);
+		if (hpa == 0) {
+			error = ENOMEM;
+			break;
+		}
+
+		error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE,
+				   VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok);
+		if (error)
+			break;
+
+#ifdef	__FreeBSD__
+		/*
+		 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'.
+		 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain.
+		 */
+		iommu_remove_mapping(host_domain, hpa, PAGE_SIZE);
+		iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE);
+#endif
+
+		seg->len += PAGE_SIZE;
+	}
+
+	if (error) {
+		vm_free_mem_seg(vm, seg);
+		return (error);
+	}
+
+#ifdef	__FreeBSD__
+	/*
+	 * Invalidate cached translations associated with 'host_domain' since
+	 * we have now moved some pages from it.
+	 */
+	iommu_invalidate_tlb(host_domain);
+#endif
+
+	vm->num_mem_segs++;
+
+	return (0);
+}
+
+vm_paddr_t
+vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+	vm_paddr_t nextpage;
+
+	nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
+	if (len > nextpage - gpa)
+		panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+	return (VMMMAP_GET(vm->cookie, gpa));
+}
+
+void *
+vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+	    void **cookie)
+{
+#ifdef	__FreeBSD__
+	int count, pageoff;
+	vm_page_t m;
+
+	pageoff = gpa & PAGE_MASK;
+	if (len > PAGE_SIZE - pageoff)
+		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
+	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+
+	if (count == 1) {
+		*cookie = m;
+		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
+	} else {
+		*cookie = NULL;
+		return (NULL);
+	}
+#else
+	int pageoff;
+	vm_paddr_t hpa;
+
+	pageoff = gpa & PAGE_MASK;
+	if (len > PAGE_SIZE - pageoff)
+		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+	hpa = vm_gpa2hpa(vm, gpa, len);
+	if (hpa == (vm_paddr_t)-1)
+		return (NULL);
+
+	return (hat_kpm_pfn2va(btop(hpa)) + pageoff);
+#endif
+}
+
+void
+vm_gpa_release(void *cookie)
+{
+#ifdef	__FreeBSD__
+	vm_page_t m = cookie;
+
+	vm_page_lock(m);
+	vm_page_unhold(m);
+	vm_page_unlock(m);
+#endif
+}
+
+int
+vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
+		  struct vm_memory_segment *seg)
+{
+	int i;
+
+	for (i = 0; i < vm->num_mem_segs; i++) {
+		if (gpabase == vm->mem_segs[i].gpa) {
+			*seg = vm->mem_segs[i];
+			return (0);
+		}
+	}
+	return (-1);
+}
+
+int
+vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
+{
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (reg >= VM_REG_LAST)
+		return (EINVAL);
+
+	return (VMGETREG(vm->cookie, vcpu, reg, retval));
+}
+
+int
+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
+{
+	struct vcpu *vcpu;
+	int error;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (reg >= VM_REG_LAST)
+		return (EINVAL);
+
+	error = VMSETREG(vm->cookie, vcpuid, reg, val);
+	if (error || reg != VM_REG_GUEST_RIP)
+		return (error);
+
+	/* Set 'nextrip' to match the value of %rip */
+	VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val);
+	vcpu = &vm->vcpu[vcpuid];
+	vcpu->nextrip = val;
+	return (0);
+}
+
+static boolean_t
+is_descriptor_table(int reg)
+{
+
+	switch (reg) {
+	case VM_REG_GUEST_IDTR:
+	case VM_REG_GUEST_GDTR:
+		return (TRUE);
+	default:
+		return (FALSE);
+	}
+}
+
+static boolean_t
+is_segment_register(int reg)
+{
+	
+	switch (reg) {
+	case VM_REG_GUEST_ES:
+	case VM_REG_GUEST_CS:
+	case VM_REG_GUEST_SS:
+	case VM_REG_GUEST_DS:
+	case VM_REG_GUEST_FS:
+	case VM_REG_GUEST_GS:
+	case VM_REG_GUEST_TR:
+	case VM_REG_GUEST_LDTR:
+		return (TRUE);
+	default:
+		return (FALSE);
+	}
+}
+
+int
+vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
+		struct seg_desc *desc)
+{
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (!is_segment_register(reg) && !is_descriptor_table(reg))
+		return (EINVAL);
+
+	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
+}
+
+int
+vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
+		struct seg_desc *desc)
+{
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (!is_segment_register(reg) && !is_descriptor_table(reg))
+		return (EINVAL);
+
+	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
+}
+
+static void
+restore_guest_fpustate(struct vcpu *vcpu)
+{
+
+	/* flush host state to the pcb */
+	fpuexit(curthread);
+
+	/* restore guest FPU state */
+	fpu_stop_emulating();
+	fpurestore(vcpu->guestfpu);
+
+	/*
+	 * The FPU is now "dirty" with the guest's state so turn on emulation
+	 * to trap any access to the FPU by the host.
+	 */
+	fpu_start_emulating();
+}
+
+static void
+save_guest_fpustate(struct vcpu *vcpu)
+{
+
+	if ((rcr0() & CR0_TS) == 0)
+		panic("fpu emulation not enabled in host!");
+
+	/* save guest FPU state */
+	fpu_stop_emulating();
+	fpusave(vcpu->guestfpu);
+	fpu_start_emulating();
+}
+
+static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
+
+static int
+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+    bool from_idle)
+{
+	int error;
+
+	vcpu_assert_locked(vcpu);
+
+	/*
+	 * State transitions from the vmmdev_ioctl() must always begin from
+	 * the VCPU_IDLE state. This guarantees that there is only a single
+	 * ioctl() operating on a vcpu at any point.
+	 */
+	if (from_idle) {
+		while (vcpu->state != VCPU_IDLE)
+			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+	} else {
+		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+		    "vcpu idle state"));
+	}
+
+	if (vcpu->state == VCPU_RUNNING) {
+		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+	} else {
+		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+		    "vcpu that is not running", vcpu->hostcpu));
+	}
+
+	/*
+	 * The following state transitions are allowed:
+	 * IDLE -> FROZEN -> IDLE
+	 * FROZEN -> RUNNING -> FROZEN
+	 * FROZEN -> SLEEPING -> FROZEN
+	 */
+	switch (vcpu->state) {
+	case VCPU_IDLE:
+	case VCPU_RUNNING:
+	case VCPU_SLEEPING:
+		error = (newstate != VCPU_FROZEN);
+		break;
+	case VCPU_FROZEN:
+		error = (newstate == VCPU_FROZEN);
+		break;
+	default:
+		error = 1;
+		break;
+	}
+
+	if (error)
+		return (EBUSY);
+
+	vcpu->state = newstate;
+	if (newstate == VCPU_RUNNING)
+		vcpu->hostcpu = curcpu;
+	else
+		vcpu->hostcpu = NOCPU;
+
+	if (newstate == VCPU_IDLE)
+		wakeup(&vcpu->state);
+
+	return (0);
+}
+
+static void
+vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
+{
+	int error;
+
+	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
+		panic("Error %d setting state to %d\n", error, newstate);
+}
+
+static void
+vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
+{
+	int error;
+
+	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
+		panic("Error %d setting state to %d", error, newstate);
+}
+
+/*
+ * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
+ */
+static int
+vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
+{
+	struct vm_exit *vmexit;
+	struct vcpu *vcpu;
+	int t, timo, spindown;
+
+	vcpu = &vm->vcpu[vcpuid];
+	spindown = 0;
+
+	vcpu_lock(vcpu);
+
+	/*
+	 * Do a final check for pending NMI or interrupts before
+	 * really putting this thread to sleep.
+	 *
+	 * These interrupts could have happened any time after we
+	 * returned from VMRUN() and before we grabbed the vcpu lock.
+	 */
+	if (vm->rendezvous_func == NULL &&
+	    !vm_nmi_pending(vm, vcpuid) &&
+	    (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) {
+		t = ticks;
+		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+		if (vlapic_enabled(vcpu->vlapic)) {
+			/*
+			 * XXX msleep_spin() is not interruptible so use the
+			 * 'timo' to put an upper bound on the sleep time.
+			 */
+			timo = hz;
+			msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
+		} else {
+			/*
+			 * Spindown the vcpu if the apic is disabled and it
+			 * had entered the halted state.
+			 */
+			spindown = 1;
+		}
+		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
+	}
+	vcpu_unlock(vcpu);
+
+#ifdef	__FreeBSD__
+	/*
+	 * Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it
+	 * outside the confines of the vcpu spinlock.
+	 */
+	if (spindown) {
+		*retu = true;
+		vmexit = vm_exitinfo(vm, vcpuid);
+		vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
+		vm_deactivate_cpu(vm, vcpuid);
+		VCPU_CTR0(vm, vcpuid, "spinning down cpu");
+	}
+#endif
+
+	return (0);
+}
+
+static int
+vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
+{
+	struct vie *vie;
+	struct vcpu *vcpu;
+	struct vm_exit *vme;
+	uint64_t gla, gpa, cs_base;
+	struct vm_guest_paging *paging;
+	mem_region_read_t mread;
+	mem_region_write_t mwrite;
+	enum vm_cpu_mode cpu_mode;
+	int cs_d, error, length;
+
+	vcpu = &vm->vcpu[vcpuid];
+	vme = &vcpu->exitinfo;
+
+	gla = vme->u.inst_emul.gla;
+	gpa = vme->u.inst_emul.gpa;
+	cs_base = vme->u.inst_emul.cs_base;
+	cs_d = vme->u.inst_emul.cs_d;
+	vie = &vme->u.inst_emul.vie;
+	paging = &vme->u.inst_emul.paging;
+	cpu_mode = paging->cpu_mode;
+
+	VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
+
+	/* Fetch, decode and emulate the faulting instruction */
+	if (vie->num_valid == 0) {
+		/*
+		 * If the instruction length is not known then assume a
+		 * maximum size instruction.
+		 */
+		length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE;
+		error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip +
+		    cs_base, length, vie);
+	} else {
+		/*
+		 * The instruction bytes have already been copied into 'vie'
+		 */
+		error = 0;
+	}
+	if (error == 1)
+		return (0);		/* Resume guest to handle page fault */
+	else if (error == -1)
+		return (EFAULT);
+	else if (error != 0)
+		panic("%s: vmm_fetch_instruction error %d", __func__, error);
+
+	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0)
+		return (EFAULT);
+
+	/*
+	 * If the instruction length was not specified then update it now
+	 * along with 'nextrip'.
+	 */
+	if (vme->inst_length == 0) {
+		vme->inst_length = vie->num_processed;
+		vcpu->nextrip += vie->num_processed;
+	}
+ 
+	/* return to userland unless this is an in-kernel emulated device */
+	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
+		mread = lapic_mmio_read;
+		mwrite = lapic_mmio_write;
+	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
+		mread = vioapic_mmio_read;
+		mwrite = vioapic_mmio_write;
+	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
+		mread = vhpet_mmio_read;
+		mwrite = vhpet_mmio_write;
+	} else {
+		*retu = true;
+		return (0);
+	}
+
+	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
+	    mread, mwrite, retu);
+
+	return (error);
+}
+
+int
+vm_run(struct vm *vm, struct vm_run *vmrun)
+{
+	int error, vcpuid;
+	struct vcpu *vcpu;
+	struct pcb *pcb;
+	uint64_t tscval;
+	struct vm_exit *vme;
+	bool retu, intr_disabled;
+
+	vcpuid = vmrun->cpuid;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+	vme = &vcpu->exitinfo;
+restart:
+	critical_enter();
+
+	tscval = rdtsc();
+
+#ifdef	__FreeBSD__
+	pcb = PCPU_GET(curpcb);
+	set_pcb_flags(pcb, PCB_FULL_IRET);
+#endif
+
+#ifndef	__FreeBSD__
+	installctx(curthread, vcpu, save_guest_fpustate,
+	    restore_guest_fpustate, NULL, NULL, NULL, NULL);
+#endif
+	restore_guest_fpustate(vcpu);
+
+	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
+	error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip);
+	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
+
+	save_guest_fpustate(vcpu);
+#ifndef	__FreeBSD__
+	removectx(curthread, vcpu, save_guest_fpustate,
+	    restore_guest_fpustate, NULL, NULL, NULL, NULL);
+#endif
+
+	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
+
+	critical_exit();
+
+	if (error == 0) {
+		retu = false;
+		vcpu->nextrip = vme->rip + vme->inst_length;
+		switch (vme->exitcode) {
+		case VM_EXITCODE_HLT:
+			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
+			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
+			break;
+		case VM_EXITCODE_INST_EMUL:
+			error = vm_handle_inst_emul(vm, vcpuid, &retu);
+			break;
+		case VM_EXITCODE_INOUT:
+		case VM_EXITCODE_INOUT_STR:
+			error = vm_handle_inout(vm, vcpuid, vme, &retu);
+			break;
+		default:
+			retu = true;	/* handled in userland */
+			break;
+		}
+	}
+
+	if (error == 0 && retu == false) {
+		goto restart;
+	}
+
+	/* copy the exit information */
+	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
+	return (error);
+}
+
+int
+vm_restart_instruction(void *arg, int vcpuid)
+{
+	struct vm *vm;
+	struct vcpu *vcpu;
+	enum vcpu_state state;
+	uint64_t rip;
+	int error;
+
+	vm = arg;
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+	state = vcpu_get_state(vm, vcpuid, NULL);
+	if (state == VCPU_RUNNING) {
+		/*
+		 * When a vcpu is "running" the next instruction is determined
+		 * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'.
+		 * Thus setting 'inst_length' to zero will cause the current
+		 * instruction to be restarted.
+		 */
+		vcpu->exitinfo.inst_length = 0;
+		VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by "
+		    "setting inst_length to zero", vcpu->exitinfo.rip);
+	} else if (state == VCPU_FROZEN) {
+		/*
+		 * When a vcpu is "frozen" it is outside the critical section
+		 * around VMRUN() and 'nextrip' points to the next instruction.
+		 * Thus instruction restart is achieved by setting 'nextrip'
+		 * to the vcpu's %rip.
+		 */
+		error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
+		KASSERT(!error, ("%s: error %d getting rip", __func__, error));
+		VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
+		    "nextrip from %#lx to %#lx", vcpu->nextrip, rip);
+		vcpu->nextrip = rip;
+	} else {
+		panic("%s: invalid state %d", __func__, state);
+	}
+	return (0);
+}
+
+int
+vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
+{
+	struct vcpu *vcpu;
+	int type, vector;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	if (info & VM_INTINFO_VALID) {
+		type = info & VM_INTINFO_TYPE;
+		vector = info & 0xff;
+		if (type == VM_INTINFO_NMI && vector != IDT_NMI)
+			return (EINVAL);
+		if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
+			return (EINVAL);
+		if (info & VM_INTINFO_RSVD)
+			return (EINVAL);
+	} else {
+		info = 0;
+	}
+	VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
+	vcpu->exitintinfo = info;
+	return (0);
+}
+
+enum exc_class {
+	EXC_BENIGN,
+	EXC_CONTRIBUTORY,
+	EXC_PAGEFAULT
+};
+
+#define	IDT_VE	20	/* Virtualization Exception (Intel specific) */
+
+static enum exc_class
+exception_class(uint64_t info)
+{
+	int type, vector;
+
+#ifdef	__FreeBSD__
+	KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
+#else
+	KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %lx", info));
+#endif
+	type = info & VM_INTINFO_TYPE;
+	vector = info & 0xff;
+
+	/* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
+	switch (type) {
+	case VM_INTINFO_HWINTR:
+	case VM_INTINFO_SWINTR:
+	case VM_INTINFO_NMI:
+		return (EXC_BENIGN);
+	default:
+		/*
+		 * Hardware exception.
+		 *
+		 * SVM and VT-x use identical type values to represent NMI,
+		 * hardware interrupt and software interrupt.
+		 *
+		 * SVM uses type '3' for all exceptions. VT-x uses type '3'
+		 * for exceptions except #BP and #OF. #BP and #OF use a type
+		 * value of '5' or '6'. Therefore we don't check for explicit
+		 * values of 'type' to classify 'intinfo' into a hardware
+		 * exception.
+		 */
+		break;
+	}
+
+	switch (vector) {
+	case IDT_PF:
+	case IDT_VE:
+		return (EXC_PAGEFAULT);
+	case IDT_DE:
+	case IDT_TS:
+	case IDT_NP:
+	case IDT_SS:
+	case IDT_GP:
+		return (EXC_CONTRIBUTORY);
+	default:
+		return (EXC_BENIGN);
+	}
+}
+
+static int
+nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
+    uint64_t *retinfo)
+{
+	enum exc_class exc1, exc2;
+	int type1, vector1;
+
+#ifdef	__FreeBSD__
+	KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
+	KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
+#else
+	KASSERT(info1 & VM_INTINFO_VALID, ("info1 %lx is not valid", info1));
+	KASSERT(info2 & VM_INTINFO_VALID, ("info2 %lx is not valid", info2));
+#endif
+
+	/*
+	 * If an exception occurs while attempting to call the double-fault
+	 * handler the processor enters shutdown mode (aka triple fault).
+	 */
+	type1 = info1 & VM_INTINFO_TYPE;
+	vector1 = info1 & 0xff;
+	if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
+		VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
+		    info1, info2);
+#ifdef	__FreeBSD__
+		vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
+#endif
+		*retinfo = 0;
+		return (0);
+	}
+
+	/*
+	 * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
+	 */
+	exc1 = exception_class(info1);
+	exc2 = exception_class(info2);
+	if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
+	    (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
+		/* Convert nested fault into a double fault. */
+		*retinfo = IDT_DF;
+		*retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
+		*retinfo |= VM_INTINFO_DEL_ERRCODE;
+	} else {
+		/* Handle exceptions serially */
+		*retinfo = info2;
+	}
+	return (1);
+}
+
+static uint64_t
+vcpu_exception_intinfo(struct vcpu *vcpu)
+{
+	uint64_t info = 0;
+
+	if (vcpu->exception_pending) {
+		info = vcpu->exception.vector & 0xff;
+		info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
+		if (vcpu->exception.error_code_valid) {
+			info |= VM_INTINFO_DEL_ERRCODE;
+			info |= (uint64_t)vcpu->exception.error_code << 32;
+		}
+	}
+	return (info);
+}
+
+int
+vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
+{
+	struct vcpu *vcpu;
+	uint64_t info1, info2;
+	int valid;
+
+	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	info1 = vcpu->exitintinfo;
+	vcpu->exitintinfo = 0;
+
+	info2 = 0;
+	if (vcpu->exception_pending) {
+		info2 = vcpu_exception_intinfo(vcpu);
+		vcpu->exception_pending = 0;
+		VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
+		    vcpu->exception.vector, info2);
+	}
+
+	if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
+		valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
+	} else if (info1 & VM_INTINFO_VALID) {
+		*retinfo = info1;
+		valid = 1;
+	} else if (info2 & VM_INTINFO_VALID) {
+		*retinfo = info2;
+		valid = 1;
+	} else {
+		valid = 0;
+	}
+
+	if (valid) {
+		VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
+		    "retinfo(%#lx)", __func__, info1, info2, *retinfo);
+	}
+
+	return (valid);
+}
+
+int
+vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (exception->vector < 0 || exception->vector >= 32)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	if (vcpu->exception_pending) {
+		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
+		    "pending exception %d", exception->vector,
+		    vcpu->exception.vector);
+		return (EBUSY);
+	}
+
+	vcpu->exception_pending = 1;
+	vcpu->exception = *exception;
+	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
+	return (0);
+}
+
+int
+vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
+{
+	struct vcpu *vcpu;
+	int pending;
+
+	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
+
+	vcpu = &vm->vcpu[vcpuid];
+	pending = vcpu->exception_pending;
+	if (pending) {
+		vcpu->exception_pending = 0;
+		*exception = vcpu->exception;
+		VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
+		    exception->vector);
+	}
+	return (pending);
+}
+
+void
+vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
+    int errcode)
+{
+	struct vm_exception exception;
+	struct vm_exit *vmexit;
+	struct vm *vm;
+	int error;
+
+	vm = vmarg;
+
+	exception.vector = vector;
+	exception.error_code = errcode;
+	exception.error_code_valid = errcode_valid;
+	error = vm_inject_exception(vm, vcpuid, &exception);
+	KASSERT(error == 0, ("vm_inject_exception error %d", error));
+
+	/*
+	 * A fault-like exception allows the instruction to be restarted
+	 * after the exception handler returns.
+	 *
+	 * By setting the inst_length to 0 we ensure that the instruction
+	 * pointer remains at the faulting instruction.
+	 */
+	vmexit = vm_exitinfo(vm, vcpuid);
+	vmexit->inst_length = 0;
+}
+
+void
+vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2)
+{
+	struct vm *vm;
+	int error;
+
+	vm = vmarg;
+	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
+	    error_code, cr2);
+
+	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
+	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
+
+	vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code);
+}
+
+static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
+
+int
+vm_inject_nmi(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu->nmi_pending = 1;
+	vcpu_notify_event(vm, vcpuid, false);
+
+	return (0);
+}
+
+int
+vm_nmi_pending(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	return (vcpu->nmi_pending);
+}
+
+void
+vm_nmi_clear(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	if (vcpu->nmi_pending == 0)
+		panic("vm_nmi_clear: inconsistent nmi_pending state");
+
+	vcpu->nmi_pending = 0;
+	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
+}
+
+static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
+
+int
+vm_inject_extint(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu->extint_pending = 1;
+	vcpu_notify_event(vm, vcpuid, false);
+
+	return (0);
+}
+
+int
+vm_extint_pending(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	return (vcpu->extint_pending);
+}
+
+void
+vm_extint_clear(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	if (vcpu->extint_pending == 0)
+		panic("vm_extint_clear: inconsistent extint_pending state");
+
+	vcpu->extint_pending = 0;
+	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
+}
+
+int
+vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
+{
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (type < 0 || type >= VM_CAP_MAX)
+		return (EINVAL);
+
+	return (VMGETCAP(vm->cookie, vcpu, type, retval));
+}
+
+int
+vm_set_capability(struct vm *vm, int vcpu, int type, int val)
+{
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (type < 0 || type >= VM_CAP_MAX)
+		return (EINVAL);
+
+	return (VMSETCAP(vm->cookie, vcpu, type, val));
+}
+
+struct vhpet *
+vm_hpet(struct vm *vm)
+{
+	return (vm->vhpet);
+}
+
+struct vioapic *
+vm_ioapic(struct vm *vm)
+{
+	return (vm->vioapic);
+}
+
+struct vlapic *
+vm_lapic(struct vm *vm, int cpu)
+{
+	return (vm->vcpu[cpu].vlapic);
+}
+
+#ifdef	__FreeBSD__
+boolean_t
+vmm_is_pptdev(int bus, int slot, int func)
+{
+	int found, i, n;
+	int b, s, f;
+	char *val, *cp, *cp2;
+
+	/*
+	 * XXX
+	 * The length of an environment variable is limited to 128 bytes which
+	 * puts an upper limit on the number of passthru devices that may be
+	 * specified using a single environment variable.
+	 *
+	 * Work around this by scanning multiple environment variable
+	 * names instead of a single one - yuck!
+	 */
+	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
+
+	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
+	found = 0;
+	for (i = 0; names[i] != NULL && !found; i++) {
+		cp = val = getenv(names[i]);
+		while (cp != NULL && *cp != '\0') {
+			if ((cp2 = strchr(cp, ' ')) != NULL)
+				*cp2 = '\0';
+
+			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
+			if (n == 3 && bus == b && slot == s && func == f) {
+				found = 1;
+				break;
+			}
+		
+			if (cp2 != NULL)
+				*cp2++ = ' ';
+
+			cp = cp2;
+		}
+		freeenv(val);
+	}
+	return (found);
+}
+#endif
+
+void *
+vm_iommu_domain(struct vm *vm)
+{
+
+	return (vm->iommu);
+}
+
+int
+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
+    bool from_idle)
+{
+	int error;
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+	vcpu_unlock(vcpu);
+
+	return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
+{
+	struct vcpu *vcpu;
+	enum vcpu_state state;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	state = vcpu->state;
+	if (hostcpu != NULL)
+		*hostcpu = vcpu->hostcpu;
+	vcpu_unlock(vcpu);
+
+	return (state);
+}
+
+int
+vm_activate_cpu(struct vm *vm, int vcpuid)
+{
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (CPU_ISSET(vcpuid, &vm->active_cpus))
+		return (EBUSY);
+
+	VCPU_CTR0(vm, vcpuid, "activated");
+	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
+	return (0);
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+	return (vm->active_cpus);
+}
+
+void *
+vcpu_stats(struct vm *vm, int vcpuid)
+{
+
+	return (vm->vcpu[vcpuid].stats);
+}
+
+int
+vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
+{
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	*state = vm->vcpu[vcpuid].x2apic_state;
+
+	return (0);
+}
+
+int
+vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
+{
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (state >= X2APIC_STATE_LAST)
+		return (EINVAL);
+
+	vm->vcpu[vcpuid].x2apic_state = state;
+
+	vlapic_set_x2apic_state(vm, vcpuid, state);
+
+	return (0);
+}
+
+/*
+ * This function is called to ensure that a vcpu "sees" a pending event
+ * as soon as possible:
+ * - If the vcpu thread is sleeping then it is woken up.
+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
+ *   to the host_cpu to cause the vcpu to trap into the hypervisor.
+ */
+void
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+{
+	int hostcpu;
+	struct vcpu *vcpu;
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	hostcpu = vcpu->hostcpu;
+	if (vcpu->state == VCPU_RUNNING) {
+		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
+		if (hostcpu != curcpu) {
+			if (lapic_intr) {
+				vlapic_post_intr(vcpu->vlapic, hostcpu,
+				    vmm_ipinum);
+			} else {
+				ipi_cpu(hostcpu, vmm_ipinum);
+			}
+		} else {
+			/*
+			 * If the 'vcpu' is running on 'curcpu' then it must
+			 * be sending a notification to itself (e.g. SELF_IPI).
+			 * The pending event will be picked up when the vcpu
+			 * transitions back to guest context.
+			 */
+		}
+	} else {
+		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
+		    "with hostcpu %d", vcpu->state, hostcpu));
+		if (vcpu->state == VCPU_SLEEPING)
+			wakeup_one(vcpu);
+	}
+	vcpu_unlock(vcpu);
+}
+
+int
+vm_apicid2vcpuid(struct vm *vm, int apicid)
+{
+	/*
+	 * XXX apic id is assumed to be numerically identical to vcpu id
+	 */
+	return (apicid);
+}
+
+struct vatpic *
+vm_atpic(struct vm *vm)
+{
+	return (vm->vatpic);
+}
+
+struct vatpit *
+vm_atpit(struct vm *vm)
+{
+	return (vm->vatpit);
+}
+
+enum vm_reg_name
+vm_segment_name(int seg)
+{
+	static enum vm_reg_name seg_names[] = {
+		VM_REG_GUEST_ES,
+		VM_REG_GUEST_CS,
+		VM_REG_GUEST_SS,
+		VM_REG_GUEST_DS,
+		VM_REG_GUEST_FS,
+		VM_REG_GUEST_GS
+	};
+
+	KASSERT(seg >= 0 && seg < nitems(seg_names),
+	    ("%s: invalid segment encoding %d", __func__, seg));
+	return (seg_names[seg]);
+}
+
+void
+vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    int num_copyinfo)
+{
+	int idx;
+
+#ifdef	__FreeBSD__
+	for (idx = 0; idx < num_copyinfo; idx++) {
+		if (copyinfo[idx].cookie != NULL)
+			vm_gpa_release(copyinfo[idx].cookie);
+	}
+#endif
+	bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo));
+}
+
+int
+vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
+    int num_copyinfo)
+{
+	int error, idx, nused;
+	size_t n, off, remaining;
+	void *hva, *cookie;
+	uint64_t gpa;
+
+	bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo);
+
+	nused = 0;
+	remaining = len;
+	while (remaining > 0) {
+		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
+		error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
+		if (error)
+			return (error);
+		off = gpa & PAGE_MASK;
+		n = min(remaining, PAGE_SIZE - off);
+		copyinfo[nused].gpa = gpa;
+		copyinfo[nused].len = n;
+		remaining -= n;
+		gla += n;
+		nused++;
+	}
+
+	for (idx = 0; idx < nused; idx++) {
+		hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
+		    prot, &cookie);
+		if (hva == NULL)
+			break;
+		copyinfo[idx].hva = hva;
+		copyinfo[idx].cookie = cookie;
+	}
+
+	if (idx != nused) {
+		vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
+		return (-1);
+	} else {
+		return (0);
+	}
+}
+
+void
+vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
+    size_t len)
+{
+	char *dst;
+	int idx;
+	
+	dst = kaddr;
+	idx = 0;
+	while (len > 0) {
+		bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
+		len -= copyinfo[idx].len;
+		dst += copyinfo[idx].len;
+		idx++;
+	}
+}
+
+void
+vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
+    struct vm_copyinfo *copyinfo, size_t len)
+{
+	const char *src;
+	int idx;
+
+	src = kaddr;
+	idx = 0;
+	while (len > 0) {
+		bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
+		len -= copyinfo[idx].len;
+		src += copyinfo[idx].len;
+		idx++;
+	}
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.conf b/usr/src/uts/i86pc/io/vmm/vmm.conf
new file mode 100644
index 0000000000..8833076014
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm.conf
@@ -0,0 +1 @@
+name="vmm" parent="pseudo";
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.c b/usr/src/uts/i86pc/io/vmm/vmm_host.c
new file mode 100644
index 0000000000..b94caf4009
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_host.c
@@ -0,0 +1,160 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_host.c 242275 2012-10-29 01:51:24Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm_host.c 242275 2012-10-29 01:51:24Z neel $");
+
+#include <sys/param.h>
+#include <sys/pcpu.h>
+
+#include <machine/cpufunc.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+
+#include "vmm_host.h"
+
+static uint64_t vmm_host_efer, vmm_host_pat, vmm_host_cr0, vmm_host_cr4;
+
+void
+vmm_host_state_init(void)
+{
+
+	vmm_host_efer = rdmsr(MSR_EFER);
+	vmm_host_pat = rdmsr(MSR_PAT);
+
+	/*
+	 * We always want CR0.TS to be set when the processor does a VM exit.
+	 *
+	 * With emulation turned on unconditionally after a VM exit, we are
+	 * able to trap inadvertent use of the FPU until the guest FPU state
+	 * has been safely squirreled away.
+	 */
+	vmm_host_cr0 = rcr0() | CR0_TS;
+
+	vmm_host_cr4 = rcr4();
+}
+
+uint64_t
+vmm_get_host_pat(void)
+{
+
+	return (vmm_host_pat);
+}
+
+uint64_t
+vmm_get_host_efer(void)
+{
+
+	return (vmm_host_efer);
+}
+
+uint64_t
+vmm_get_host_cr0(void)
+{
+
+	return (vmm_host_cr0);
+}
+
+uint64_t
+vmm_get_host_cr4(void)
+{
+
+	return (vmm_host_cr4);
+}
+
+uint64_t
+vmm_get_host_datasel(void)
+{
+
+#ifdef	__FreeBSD__
+	return (GSEL(GDATA_SEL, SEL_KPL));
+#else
+	return (SEL_GDT(GDT_KDATA, SEL_KPL));
+#endif
+
+}
+
+uint64_t
+vmm_get_host_codesel(void)
+{
+
+#ifdef	__FreeBSD__
+	return (GSEL(GCODE_SEL, SEL_KPL));
+#else
+	return (SEL_GDT(GDT_KCODE, SEL_KPL));
+#endif
+}
+
+
+uint64_t
+vmm_get_host_tsssel(void)
+{
+
+#ifdef	__FreeBSD__
+	return (GSEL(GPROC0_SEL, SEL_KPL));
+#else
+	return (SEL_GDT(GDT_KTSS, SEL_KPL));
+#endif
+}
+
+uint64_t
+vmm_get_host_fsbase(void)
+{
+
+#ifdef	__FreeBSD__
+	return (0);
+#else
+	return (rdmsr(MSR_FSBASE));
+#endif
+}
+
+uint64_t
+vmm_get_host_idtrbase(void)
+{
+
+#ifdef	__FreeBSD__
+	return (r_idt.rd_base);
+#else
+	desctbr_t idtr;
+
+	rd_idtr(&idtr);
+	return (idtr.dtr_base);
+#endif
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.h b/usr/src/uts/i86pc/io/vmm/vmm_host.h
new file mode 100644
index 0000000000..5de015a228
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_host.h
@@ -0,0 +1,119 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_host.h 242275 2012-10-29 01:51:24Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef	_VMM_HOST_H_
+#define	_VMM_HOST_H_
+
+#ifndef	__FreeBSD__
+#include <sys/cpuvar.h>
+#endif
+
+#ifndef	_KERNEL
+#error "no user-servicable parts inside"
+#endif
+
+void vmm_host_state_init(void);
+
+uint64_t vmm_get_host_pat(void);
+uint64_t vmm_get_host_efer(void);
+uint64_t vmm_get_host_cr0(void);
+uint64_t vmm_get_host_cr4(void);
+uint64_t vmm_get_host_datasel(void);
+uint64_t vmm_get_host_codesel(void);
+uint64_t vmm_get_host_tsssel(void);
+uint64_t vmm_get_host_fsbase(void);
+uint64_t vmm_get_host_idtrbase(void);
+
+/*
+ * Inline access to host state that is used on every VM entry
+ */
+static __inline uint64_t
+vmm_get_host_trbase(void)
+{
+
+#ifdef	__FreeBSD__
+	return ((uint64_t)PCPU_GET(tssp));
+#else
+	return ((u_long)CPU->cpu_tss);
+#endif
+}
+
+static __inline uint64_t
+vmm_get_host_gdtrbase(void)
+{
+
+#ifdef	__FreeBSD__
+	return ((uint64_t)&gdt[NGDT * curcpu]);
+#else
+	desctbr_t gdtr;
+
+	rd_gdtr(&gdtr);
+	return (gdtr.dtr_base);
+#endif
+}
+
+struct pcpu;
+extern struct pcpu __pcpu[];
+
+static __inline uint64_t
+vmm_get_host_gsbase(void)
+{
+
+#ifdef	__FreeBSD__
+	return ((uint64_t)&__pcpu[curcpu]);
+#else
+	return (rdmsr(MSR_GSBASE));
+#endif
+}
+
+#ifndef	__FreeBSD__
+static __inline uint64_t
+vmm_get_host_fssel(void)
+{
+	return (KFS_SEL);
+}
+
+static __inline uint64_t
+vmm_get_host_gssel(void)
+{
+	return (KGS_SEL);
+}
+#endif
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
new file mode 100644
index 0000000000..72c7056e26
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -0,0 +1,2370 @@
+/*-
+ * Copyright (c) 2012 Sandvine, Inc.
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_instruction_emul.c 281987 2015-04-25 19:02:06Z tychon $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm_instruction_emul.c 281987 2015-04-25 19:02:06Z tychon $");
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#else	/* !_KERNEL */
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/_iovec.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <vmmapi.h>
+#define	KASSERT(exp,msg)	assert((exp))
+#endif	/* _KERNEL */
+
+#include <machine/vmm_instruction_emul.h>
+#include <x86/psl.h>
+#include <x86/specialreg.h>
+
+/* struct vie_op.op_type */
+enum {
+	VIE_OP_TYPE_NONE = 0,
+	VIE_OP_TYPE_MOV,
+	VIE_OP_TYPE_MOVSX,
+	VIE_OP_TYPE_MOVZX,
+	VIE_OP_TYPE_AND,
+	VIE_OP_TYPE_OR,
+	VIE_OP_TYPE_SUB,
+	VIE_OP_TYPE_TWO_BYTE,
+	VIE_OP_TYPE_PUSH,
+	VIE_OP_TYPE_CMP,
+	VIE_OP_TYPE_POP,
+	VIE_OP_TYPE_MOVS,
+	VIE_OP_TYPE_GROUP1,
+	VIE_OP_TYPE_STOS,
+	VIE_OP_TYPE_LAST
+};
+
+/* struct vie_op.op_flags */
+#define	VIE_OP_F_IMM		(1 << 0)  /* 16/32-bit immediate operand */
+#define	VIE_OP_F_IMM8		(1 << 1)  /* 8-bit immediate operand */
+#define	VIE_OP_F_MOFFSET	(1 << 2)  /* 16/32/64-bit immediate moffset */
+#define	VIE_OP_F_NO_MODRM	(1 << 3)
+#define	VIE_OP_F_NO_GLA_VERIFICATION (1 << 4)
+
+static const struct vie_op two_byte_opcodes[256] = {
+	[0xB6] = {
+		.op_byte = 0xB6,
+		.op_type = VIE_OP_TYPE_MOVZX,
+	},
+	[0xB7] = {
+		.op_byte = 0xB7,
+		.op_type = VIE_OP_TYPE_MOVZX,
+	},
+	[0xBE] = {
+		.op_byte = 0xBE,
+		.op_type = VIE_OP_TYPE_MOVSX,
+	},
+};
+
+static const struct vie_op one_byte_opcodes[256] = {
+	[0x0F] = {
+		.op_byte = 0x0F,
+		.op_type = VIE_OP_TYPE_TWO_BYTE
+	},
+	[0x2B] = {
+		.op_byte = 0x2B,
+		.op_type = VIE_OP_TYPE_SUB,
+	},
+	[0x3B] = {
+		.op_byte = 0x3B,
+		.op_type = VIE_OP_TYPE_CMP,
+	},
+	[0x88] = {
+		.op_byte = 0x88,
+		.op_type = VIE_OP_TYPE_MOV,
+	},
+	[0x89] = {
+		.op_byte = 0x89,
+		.op_type = VIE_OP_TYPE_MOV,
+	},
+	[0x8A] = {
+		.op_byte = 0x8A,
+		.op_type = VIE_OP_TYPE_MOV,
+	},
+	[0x8B] = {
+		.op_byte = 0x8B,
+		.op_type = VIE_OP_TYPE_MOV,
+	},
+	[0xA1] = {
+		.op_byte = 0xA1,
+		.op_type = VIE_OP_TYPE_MOV,
+		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
+	},
+	[0xA3] = {
+		.op_byte = 0xA3,
+		.op_type = VIE_OP_TYPE_MOV,
+		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
+	},
+	[0xA4] = {
+		.op_byte = 0xA4,
+		.op_type = VIE_OP_TYPE_MOVS,
+		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+	},
+	[0xA5] = {
+		.op_byte = 0xA5,
+		.op_type = VIE_OP_TYPE_MOVS,
+		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+	},
+	[0xAA] = {
+		.op_byte = 0xAA,
+		.op_type = VIE_OP_TYPE_STOS,
+		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+	},
+	[0xAB] = {
+		.op_byte = 0xAB,
+		.op_type = VIE_OP_TYPE_STOS,
+		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+	},
+	[0xC6] = {
+		/* XXX Group 11 extended opcode - not just MOV */
+		.op_byte = 0xC6,
+		.op_type = VIE_OP_TYPE_MOV,
+		.op_flags = VIE_OP_F_IMM8,
+	},
+	[0xC7] = {
+		.op_byte = 0xC7,
+		.op_type = VIE_OP_TYPE_MOV,
+		.op_flags = VIE_OP_F_IMM,
+	},
+	[0x23] = {
+		.op_byte = 0x23,
+		.op_type = VIE_OP_TYPE_AND,
+	},
+	[0x81] = {
+		/* XXX Group 1 extended opcode */
+		.op_byte = 0x81,
+		.op_type = VIE_OP_TYPE_GROUP1,
+		.op_flags = VIE_OP_F_IMM,
+	},
+	[0x83] = {
+		/* XXX Group 1 extended opcode */
+		.op_byte = 0x83,
+		.op_type = VIE_OP_TYPE_GROUP1,
+		.op_flags = VIE_OP_F_IMM8,
+	},
+	[0x8F] = {
+		/* XXX Group 1A extended opcode - not just POP */
+		.op_byte = 0x8F,
+		.op_type = VIE_OP_TYPE_POP,
+	},
+	[0xFF] = {
+		/* XXX Group 5 extended opcode - not just PUSH */
+		.op_byte = 0xFF,
+		.op_type = VIE_OP_TYPE_PUSH,
+	}
+};
+
+/* struct vie.mod */
+#define	VIE_MOD_INDIRECT		0
+#define	VIE_MOD_INDIRECT_DISP8		1
+#define	VIE_MOD_INDIRECT_DISP32		2
+#define	VIE_MOD_DIRECT			3
+
+/* struct vie.rm */
+#define	VIE_RM_SIB			4
+#define	VIE_RM_DISP32			5
+
+#define	GB				(1024 * 1024 * 1024)
+
+static enum vm_reg_name gpr_map[16] = {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15
+};
+
+static uint64_t size2mask[] = {
+	[1] = 0xff,
+	[2] = 0xffff,
+	[4] = 0xffffffff,
+	[8] = 0xffffffffffffffff,
+};
+
+static int
+vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
+{
+	int error;
+
+	error = vm_get_register(vm, vcpuid, reg, rval);
+
+	return (error);
+}
+
+static void
+vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
+{
+	*lhbr = 0;
+	*reg = gpr_map[vie->reg];
+
+	/*
+	 * 64-bit mode imposes limitations on accessing legacy high byte
+	 * registers (lhbr).
+	 *
+	 * The legacy high-byte registers cannot be addressed if the REX
+	 * prefix is present. In this case the values 4, 5, 6 and 7 of the
+	 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively.
+	 *
+	 * If the REX prefix is not present then the values 4, 5, 6 and 7
+	 * of the 'ModRM:reg' field address the legacy high-byte registers,
+	 * %ah, %ch, %dh and %bh respectively.
+	 */
+	if (!vie->rex_present) {
+		if (vie->reg & 0x4) {
+			*lhbr = 1;
+			*reg = gpr_map[vie->reg & 0x3];
+		}
+	}
+}
+
+static int
+vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
+{
+	uint64_t val;
+	int error, lhbr;
+	enum vm_reg_name reg;
+
+	vie_calc_bytereg(vie, &reg, &lhbr);
+	error = vm_get_register(vm, vcpuid, reg, &val);
+
+	/*
+	 * To obtain the value of a legacy high byte register shift the
+	 * base register right by 8 bits (%ah = %rax >> 8).
+	 */
+	if (lhbr)
+		*rval = val >> 8;
+	else
+		*rval = val;
+	return (error);
+}
+
+static int
+vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte)
+{
+	uint64_t origval, val, mask;
+	int error, lhbr;
+	enum vm_reg_name reg;
+
+	vie_calc_bytereg(vie, &reg, &lhbr);
+	error = vm_get_register(vm, vcpuid, reg, &origval);
+	if (error == 0) {
+		val = byte;
+		mask = 0xff;
+		if (lhbr) {
+			/*
+			 * Shift left by 8 to store 'byte' in a legacy high
+			 * byte register.
+			 */
+			val <<= 8;
+			mask <<= 8;
+		}
+		val |= origval & ~mask;
+		error = vm_set_register(vm, vcpuid, reg, val);
+	}
+	return (error);
+}
+
+int
+vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
+		    uint64_t val, int size)
+{
+	int error;
+	uint64_t origval;
+
+	switch (size) {
+	case 1:
+	case 2:
+		error = vie_read_register(vm, vcpuid, reg, &origval);
+		if (error)
+			return (error);
+		val &= size2mask[size];
+		val |= origval & ~size2mask[size];
+		break;
+	case 4:
+		val &= 0xffffffffUL;
+		break;
+	case 8:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	error = vm_set_register(vm, vcpuid, reg, val);
+	return (error);
+}
+
+#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
+
+/*
+ * Return the status flags that would result from doing (x - y).
+ */
+#define	GETCC(sz)							\
+static u_long								\
+getcc##sz(uint##sz##_t x, uint##sz##_t y)				\
+{									\
+	u_long rflags;							\
+									\
+	__asm __volatile("sub %2,%1; pushfq; popq %0" :			\
+	    "=r" (rflags), "+r" (x) : "m" (y));				\
+	return (rflags);						\
+} struct __hack
+
+GETCC(8);
+GETCC(16);
+GETCC(32);
+GETCC(64);
+
+static u_long
+getcc(int opsize, uint64_t x, uint64_t y)
+{
+	KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
+	    ("getcc: invalid operand size %d", opsize));
+
+	if (opsize == 1)
+		return (getcc8(x, y));
+	else if (opsize == 2)
+		return (getcc16(x, y));
+	else if (opsize == 4)
+		return (getcc32(x, y));
+	else
+		return (getcc64(x, y));
+}
+
+static int
+emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	enum vm_reg_name reg;
+	uint8_t byte;
+	uint64_t val;
+
+	size = vie->opsize;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0x88:
+		/*
+		 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m)
+		 * 88/r:	mov r/m8, r8
+		 * REX + 88/r:	mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
+		 */
+		size = 1;	/* override for byte operation */
+		error = vie_read_bytereg(vm, vcpuid, vie, &byte);
+		if (error == 0)
+			error = memwrite(vm, vcpuid, gpa, byte, size, arg);
+		break;
+	case 0x89:
+		/*
+		 * MOV from reg (ModRM:reg) to mem (ModRM:r/m)
+		 * 89/r:	mov r/m16, r16
+		 * 89/r:	mov r/m32, r32
+		 * REX.W + 89/r	mov r/m64, r64
+		 */
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &val);
+		if (error == 0) {
+			val &= size2mask[size];
+			error = memwrite(vm, vcpuid, gpa, val, size, arg);
+		}
+		break;
+	case 0x8A:
+		/*
+		 * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg)
+		 * 8A/r:	mov r8, r/m8
+		 * REX + 8A/r:	mov r8, r/m8
+		 */
+		size = 1;	/* override for byte operation */
+		error = memread(vm, vcpuid, gpa, &val, size, arg);
+		if (error == 0)
+			error = vie_write_bytereg(vm, vcpuid, vie, val);
+		break;
+	case 0x8B:
+		/*
+		 * MOV from mem (ModRM:r/m) to reg (ModRM:reg)
+		 * 8B/r:	mov r16, r/m16
+		 * 8B/r:	mov r32, r/m32
+		 * REX.W 8B/r:	mov r64, r/m64
+		 */
+		error = memread(vm, vcpuid, gpa, &val, size, arg);
+		if (error == 0) {
+			reg = gpr_map[vie->reg];
+			error = vie_update_register(vm, vcpuid, reg, val, size);
+		}
+		break;
+	case 0xA1:
+		/*
+		 * MOV from seg:moffset to AX/EAX/RAX
+		 * A1:		mov AX, moffs16
+		 * A1:		mov EAX, moffs32
+		 * REX.W + A1:	mov RAX, moffs64
+		 */
+		error = memread(vm, vcpuid, gpa, &val, size, arg);
+		if (error == 0) {
+			reg = VM_REG_GUEST_RAX;
+			error = vie_update_register(vm, vcpuid, reg, val, size);
+		}
+		break;
+	case 0xA3:
+		/*
+		 * MOV from AX/EAX/RAX to seg:moffset
+		 * A3:		mov moffs16, AX
+		 * A3:		mov moffs32, EAX 
+		 * REX.W + A3:	mov moffs64, RAX
+		 */
+		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+		if (error == 0) {
+			val &= size2mask[size];
+			error = memwrite(vm, vcpuid, gpa, val, size, arg);
+		}
+		break;
+	case 0xC6:
+		/*
+		 * MOV from imm8 to mem (ModRM:r/m)
+		 * C6/0		mov r/m8, imm8
+		 * REX + C6/0	mov r/m8, imm8
+		 */
+		size = 1;	/* override for byte operation */
+		error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg);
+		break;
+	case 0xC7:
+		/*
+		 * MOV from imm16/imm32 to mem (ModRM:r/m)
+		 * C7/0		mov r/m16, imm16
+		 * C7/0		mov r/m32, imm32
+		 * REX.W + C7/0	mov r/m64, imm32 (sign-extended to 64-bits)
+		 */
+		val = vie->immediate & size2mask[size];
+		error = memwrite(vm, vcpuid, gpa, val, size, arg);
+		break;
+	default:
+		break;
+	}
+
+	return (error);
+}
+
+static int
+emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	     mem_region_read_t memread, mem_region_write_t memwrite,
+	     void *arg)
+{
+	int error, size;
+	enum vm_reg_name reg;
+	uint64_t val;
+
+	size = vie->opsize;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0xB6:
+		/*
+		 * MOV and zero extend byte from mem (ModRM:r/m) to
+		 * reg (ModRM:reg).
+		 *
+		 * 0F B6/r		movzx r16, r/m8
+		 * 0F B6/r		movzx r32, r/m8
+		 * REX.W + 0F B6/r	movzx r64, r/m8
+		 */
+
+		/* get the first operand */
+		error = memread(vm, vcpuid, gpa, &val, 1, arg);
+		if (error)
+			break;
+
+		/* get the second operand */
+		reg = gpr_map[vie->reg];
+
+		/* zero-extend byte */
+		val = (uint8_t)val;
+
+		/* write the result */
+		error = vie_update_register(vm, vcpuid, reg, val, size);
+		break;
+	case 0xB7:
+		/*
+		 * MOV and zero extend word from mem (ModRM:r/m) to
+		 * reg (ModRM:reg).
+		 *
+		 * 0F B7/r		movzx r32, r/m16
+		 * REX.W + 0F B7/r	movzx r64, r/m16
+		 */
+		error = memread(vm, vcpuid, gpa, &val, 2, arg);
+		if (error)
+			return (error);
+
+		reg = gpr_map[vie->reg];
+
+		/* zero-extend word */
+		val = (uint16_t)val;
+
+		error = vie_update_register(vm, vcpuid, reg, val, size);
+		break;
+	case 0xBE:
+		/*
+		 * MOV and sign extend byte from mem (ModRM:r/m) to
+		 * reg (ModRM:reg).
+		 *
+		 * 0F BE/r		movsx r16, r/m8
+		 * 0F BE/r		movsx r32, r/m8
+		 * REX.W + 0F BE/r	movsx r64, r/m8
+		 */
+
+		/* get the first operand */
+		error = memread(vm, vcpuid, gpa, &val, 1, arg);
+		if (error)
+			break;
+
+		/* get the second operand */
+		reg = gpr_map[vie->reg];
+
+		/* sign extend byte */
+		val = (int8_t)val;
+
+		/* write the result */
+		error = vie_update_register(vm, vcpuid, reg, val, size);
+		break;
+	default:
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Helper function to calculate and validate a linear address.
+ *
+ * Returns 0 on success and 1 if an exception was injected into the guest.
+ */
+static int
+get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
+    int opsize, int addrsize, int prot, enum vm_reg_name seg,
+    enum vm_reg_name gpr, uint64_t *gla)
+{
+	struct seg_desc desc;
+	uint64_t cr0, val, rflags;
+	int error;
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+	KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+	error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
+	KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
+	    __func__, error, seg));
+
+	error = vie_read_register(vm, vcpuid, gpr, &val);
+	KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
+	    error, gpr));
+
+	if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize,
+	    addrsize, prot, gla)) {
+		if (seg == VM_REG_GUEST_SS)
+			vm_inject_ss(vm, vcpuid, 0);
+		else
+			vm_inject_gp(vm, vcpuid);
+		return (1);
+	}
+
+	if (vie_canonical_check(paging->cpu_mode, *gla)) {
+		if (seg == VM_REG_GUEST_SS)
+			vm_inject_ss(vm, vcpuid, 0);
+		else
+			vm_inject_gp(vm, vcpuid);
+		return (1);
+	}
+
+	if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
+		vm_inject_ac(vm, vcpuid, 0);
+		return (1);
+	}
+
+	return (0);
+}
+
+static int
+emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+#ifdef _KERNEL
+	struct vm_copyinfo copyinfo[2];
+#else
+	struct iovec copyinfo[2];
+#endif
+	uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
+	uint64_t rcx, rdi, rsi, rflags;
+	int error, opsize, seg, repeat;
+
+	opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
+	val = 0;
+	error = 0;
+
+	/*
+	 * XXX although the MOVS instruction is only supposed to be used with
+	 * the "rep" prefix some guests like FreeBSD will use "repnz" instead.
+	 *
+	 * Empirically the "repnz" prefix has identical behavior to "rep"
+	 * and the zero flag does not make a difference.
+	 */
+	repeat = vie->repz_present | vie->repnz_present;
+
+	if (repeat) {
+		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+		KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
+
+		/*
+		 * The count register is %rcx, %ecx or %cx depending on the
+		 * address size of the instruction.
+		 */
+		if ((rcx & vie_size2mask(vie->addrsize)) == 0)
+			return (0);
+	}
+
+	/*
+	 *	Source		Destination	Comments
+	 *	--------------------------------------------
+	 * (1)  memory		memory		n/a
+	 * (2)  memory		mmio		emulated
+	 * (3)  mmio		memory		emulated
+	 * (4)  mmio		mmio		emulated
+	 *
+	 * At this point we don't have sufficient information to distinguish
+	 * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
+	 * out because it will succeed only when operating on regular memory.
+	 *
+	 * XXX the emulation doesn't properly handle the case where 'gpa'
+	 * is straddling the boundary between the normal memory and MMIO.
+	 */
+
+	seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
+	error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
+	    PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr);
+	if (error)
+		goto done;
+
+	error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
+	    copyinfo, nitems(copyinfo));
+	if (error == 0) {
+		/*
+		 * case (2): read from system memory and write to mmio.
+		 */
+		vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
+		vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+		error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+		if (error)
+			goto done;
+	} else if (error > 0) {
+		/*
+		 * Resume guest execution to handle fault.
+		 */
+		goto done;
+	} else {
+		/*
+		 * 'vm_copy_setup()' is expected to fail for cases (3) and (4)
+		 * if 'srcaddr' is in the mmio space.
+		 */
+
+		error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
+		    PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr);
+		if (error)
+			goto done;
+
+		error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
+		    PROT_WRITE, copyinfo, nitems(copyinfo));
+		if (error == 0) {
+			/*
+			 * case (3): read from MMIO and write to system memory.
+			 *
+			 * A MMIO read can have side-effects so we
+			 * commit to it only after vm_copy_setup() is
+			 * successful. If a page-fault needs to be
+			 * injected into the guest then it will happen
+			 * before the MMIO read is attempted.
+			 */
+			error = memread(vm, vcpuid, gpa, &val, opsize, arg);
+			if (error)
+				goto done;
+
+			vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
+			vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+		} else if (error > 0) {
+			/*
+			 * Resume guest execution to handle fault.
+			 */
+			goto done;
+		} else {
+			/*
+			 * Case (4): read from and write to mmio.
+			 */
+			error = vm_gla2gpa(vm, vcpuid, paging, srcaddr,
+			    PROT_READ, &srcgpa);
+			if (error)
+				goto done;
+			error = memread(vm, vcpuid, srcgpa, &val, opsize, arg);
+			if (error)
+				goto done;
+
+			error = vm_gla2gpa(vm, vcpuid, paging, dstaddr,
+			   PROT_WRITE, &dstgpa);
+			if (error)
+				goto done;
+			error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg);
+			if (error)
+				goto done;
+		}
+	}
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
+	KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+	KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+	if (rflags & PSL_D) {
+		rsi -= opsize;
+		rdi -= opsize;
+	} else {
+		rsi += opsize;
+		rdi += opsize;
+	}
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi,
+	    vie->addrsize);
+	KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error));
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
+	    vie->addrsize);
+	KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
+
+	if (repeat) {
+		rcx = rcx - 1;
+		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+		    rcx, vie->addrsize);
+		KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
+
+		/*
+		 * Repeat the instruction if the count register is not zero.
+		 */
+		if ((rcx & vie_size2mask(vie->addrsize)) != 0)
+			vm_restart_instruction(vm, vcpuid);
+	}
+done:
+	if (error < 0)
+		return (EFAULT);
+	else
+		return (0);
+}
+
+static int
+emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+	int error, opsize, repeat;
+	uint64_t val;
+	uint64_t rcx, rdi, rflags;
+
+	opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize;
+	repeat = vie->repz_present | vie->repnz_present;
+
+	if (repeat) {
+		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+		KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
+
+		/*
+		 * The count register is %rcx, %ecx or %cx depending on the
+		 * address size of the instruction.
+		 */
+		if ((rcx & vie_size2mask(vie->addrsize)) == 0)
+			return (0);
+	}
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+	KASSERT(!error, ("%s: error %d getting rax", __func__, error));
+
+	error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+	if (error)
+		return (error);
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+	KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+	if (rflags & PSL_D)
+		rdi -= opsize;
+	else
+		rdi += opsize;
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
+	    vie->addrsize);
+	KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
+
+	if (repeat) {
+		rcx = rcx - 1;
+		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+		    rcx, vie->addrsize);
+		KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
+
+		/*
+		 * Repeat the instruction if the count register is not zero.
+		 */
+		if ((rcx & vie_size2mask(vie->addrsize)) != 0)
+			vm_restart_instruction(vm, vcpuid);
+	}
+
+	return (0);
+}
+
+static int
+emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	enum vm_reg_name reg;
+	uint64_t result, rflags, rflags2, val1, val2;
+
+	size = vie->opsize;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0x23:
+		/*
+		 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
+		 * result in reg.
+		 *
+		 * 23/r		and r16, r/m16
+		 * 23/r		and r32, r/m32
+		 * REX.W + 23/r	and r64, r/m64
+		 */
+
+		/* get the first operand */
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &val1);
+		if (error)
+			break;
+
+		/* get the second operand */
+		error = memread(vm, vcpuid, gpa, &val2, size, arg);
+		if (error)
+			break;
+
+		/* perform the operation and write the result */
+		result = val1 & val2;
+		error = vie_update_register(vm, vcpuid, reg, result, size);
+		break;
+	case 0x81:
+	case 0x83:
+		/*
+		 * AND mem (ModRM:r/m) with immediate and store the
+		 * result in mem.
+		 *
+		 * 81 /4		and r/m16, imm16
+		 * 81 /4		and r/m32, imm32
+		 * REX.W + 81 /4	and r/m64, imm32 sign-extended to 64
+		 *
+		 * 83 /4		and r/m16, imm8 sign-extended to 16
+		 * 83 /4		and r/m32, imm8 sign-extended to 32
+		 * REX.W + 83/4		and r/m64, imm8 sign-extended to 64
+		 */
+
+		/* get the first operand */
+                error = memread(vm, vcpuid, gpa, &val1, size, arg);
+                if (error)
+			break;
+
+                /*
+		 * perform the operation with the pre-fetched immediate
+		 * operand and write the result
+		 */
+                result = val1 & vie->immediate;
+                error = memwrite(vm, vcpuid, gpa, result, size, arg);
+		break;
+	default:
+		break;
+	}
+	if (error)
+		return (error);
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+
+	/*
+	 * OF and CF are cleared; the SF, ZF and PF flags are set according
+	 * to the result; AF is undefined.
+	 *
+	 * The updated status flags are obtained by subtracting 0 from 'result'.
+	 */
+	rflags2 = getcc(size, result, 0);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
+	return (error);
+}
+
+static int
+emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	uint64_t val1, result, rflags, rflags2;
+
+	size = vie->opsize;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0x81:
+	case 0x83:
+		/*
+		 * OR mem (ModRM:r/m) with immediate and store the
+		 * result in mem.
+		 *
+		 * 81 /1		or r/m16, imm16
+		 * 81 /1		or r/m32, imm32
+		 * REX.W + 81 /1	or r/m64, imm32 sign-extended to 64
+		 *
+		 * 83 /1		or r/m16, imm8 sign-extended to 16
+		 * 83 /1		or r/m32, imm8 sign-extended to 32
+		 * REX.W + 83/1		or r/m64, imm8 sign-extended to 64
+		 */
+
+		/* get the first operand */
+                error = memread(vm, vcpuid, gpa, &val1, size, arg);
+                if (error)
+			break;
+
+                /*
+		 * perform the operation with the pre-fetched immediate
+		 * operand and write the result
+		 */
+                result = val1 | vie->immediate;
+                error = memwrite(vm, vcpuid, gpa, result, size, arg);
+		break;
+	default:
+		break;
+	}
+	if (error)
+		return (error);
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+
+	/*
+	 * OF and CF are cleared; the SF, ZF and PF flags are set according
+	 * to the result; AF is undefined.
+	 *
+	 * The updated status flags are obtained by subtracting 0 from 'result'.
+	 */
+	rflags2 = getcc(size, result, 0);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
+	return (error);
+}
+
+static int
+emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	uint64_t op1, op2, rflags, rflags2;
+	enum vm_reg_name reg;
+
+	size = vie->opsize;
+	switch (vie->op.op_byte) {
+	case 0x3B:
+		/*
+		 * 3B/r		CMP r16, r/m16
+		 * 3B/r		CMP r32, r/m32
+		 * REX.W + 3B/r	CMP r64, r/m64
+		 *
+		 * Compare first operand (reg) with second operand (r/m) and
+		 * set status flags in EFLAGS register. The comparison is
+		 * performed by subtracting the second operand from the first
+		 * operand and then setting the status flags.
+		 */
+
+		/* Get the first operand */
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &op1);
+		if (error)
+			return (error);
+
+		/* Get the second operand */
+		error = memread(vm, vcpuid, gpa, &op2, size, arg);
+		if (error)
+			return (error);
+
+		rflags2 = getcc(size, op1, op2);
+		break;
+	case 0x81:
+	case 0x83:
+		/*
+		 * 81 /7		cmp r/m16, imm16
+		 * 81 /7		cmp r/m32, imm32
+		 * REX.W + 81 /7	cmp r/m64, imm32 sign-extended to 64
+		 *
+		 * 83 /7		cmp r/m16, imm8 sign-extended to 16
+		 * 83 /7		cmp r/m32, imm8 sign-extended to 32
+		 * REX.W + 83 /7	cmp r/m64, imm8 sign-extended to 64
+		 *
+		 * Compare mem (ModRM:r/m) with immediate and set
+		 * status flags according to the results.  The
+		 * comparison is performed by subtracting the
+		 * immediate from the first operand and then setting
+		 * the status flags.
+		 *
+		 */
+
+		/* get the first operand */
+                error = memread(vm, vcpuid, gpa, &op1, size, arg);
+		if (error)
+			return (error);
+
+		rflags2 = getcc(size, op1, vie->immediate);
+		break;
+	default:
+		return (EINVAL);
+	}
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & RFLAGS_STATUS_BITS;
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
+	return (error);
+}
+
+static int
+emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	uint64_t nval, rflags, rflags2, val1, val2;
+	enum vm_reg_name reg;
+
+	size = vie->opsize;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0x2B:
+		/*
+		 * SUB r/m from r and store the result in r
+		 * 
+		 * 2B/r            SUB r16, r/m16
+		 * 2B/r            SUB r32, r/m32
+		 * REX.W + 2B/r    SUB r64, r/m64
+		 */
+
+		/* get the first operand */
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &val1);
+		if (error)
+			break;
+
+		/* get the second operand */
+		error = memread(vm, vcpuid, gpa, &val2, size, arg);
+		if (error)
+			break;
+
+		/* perform the operation and write the result */
+		nval = val1 - val2;
+		error = vie_update_register(vm, vcpuid, reg, nval, size);
+		break;
+	default:
+		break;
+	}
+
+	if (!error) {
+		rflags2 = getcc(size, val1, val2);
+		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+		    &rflags);
+		if (error)
+			return (error);
+
+		rflags &= ~RFLAGS_STATUS_BITS;
+		rflags |= rflags2 & RFLAGS_STATUS_BITS;
+		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+		    rflags, 8);
+	}
+
+	return (error);
+}
+
+static int
+emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+#ifdef _KERNEL
+	struct vm_copyinfo copyinfo[2];
+#else
+	struct iovec copyinfo[2];
+#endif
+	struct seg_desc ss_desc;
+	uint64_t cr0, rflags, rsp, stack_gla, val;
+	int error, size, stackaddrsize, pushop;
+
+	val = 0;
+	size = vie->opsize;
+	pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
+
+	/*
+	 * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
+	 */
+	if (paging->cpu_mode == CPU_MODE_REAL) {
+		stackaddrsize = 2;
+	} else if (paging->cpu_mode == CPU_MODE_64BIT) {
+		/*
+		 * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3
+		 * - Stack pointer size is always 64-bits.
+		 * - PUSH/POP of 32-bit values is not possible in 64-bit mode.
+		 * - 16-bit PUSH/POP is supported by using the operand size
+		 *   override prefix (66H).
+		 */
+		stackaddrsize = 8;
+		size = vie->opsize_override ? 2 : 8;
+	} else {
+		/*
+		 * In protected or compability mode the 'B' flag in the
+		 * stack-segment descriptor determines the size of the
+		 * stack pointer.
+		 */
+		error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc);
+		KASSERT(error == 0, ("%s: error %d getting SS descriptor",
+		    __func__, error));
+		if (SEG_DESC_DEF32(ss_desc.access))
+			stackaddrsize = 4;
+		else
+			stackaddrsize = 2;
+	}
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+	KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
+	KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
+	if (pushop) {
+		rsp -= size;
+	}
+
+	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
+	    rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ,
+	    &stack_gla)) {
+		vm_inject_ss(vm, vcpuid, 0);
+		return (0);
+	}
+
+	if (vie_canonical_check(paging->cpu_mode, stack_gla)) {
+		vm_inject_ss(vm, vcpuid, 0);
+		return (0);
+	}
+
+	if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) {
+		vm_inject_ac(vm, vcpuid, 0);
+		return (0);
+	}
+
+	error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size,
+	    pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo));
+	if (error == -1) {
+		/*
+		 * XXX cannot return a negative error value here because it
+		 * ends up being the return value of the VM_RUN() ioctl and
+		 * is interpreted as a pseudo-error (for e.g. ERESTART).
+		 */
+		return (EFAULT);
+	} else if (error == 1) {
+		/* Resume guest execution to handle page fault */
+		return (0);
+	}
+
+	if (pushop) {
+		error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+		if (error == 0)
+			vm_copyout(vm, vcpuid, &val, copyinfo, size);
+	} else {
+		vm_copyin(vm, vcpuid, copyinfo, &val, size);
+		error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
+		rsp += size;
+	}
+	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+
+	if (error == 0) {
+		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
+		    stackaddrsize);
+		KASSERT(error == 0, ("error %d updating rsp", error));
+	}
+	return (error);
+}
+
+static int
+emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+	int error;
+
+	/*
+	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
+	 *
+	 * PUSH is part of the group 5 extended opcodes and is identified
+	 * by ModRM:reg = b110.
+	 */
+	if ((vie->reg & 7) != 6)
+		return (EINVAL);
+
+	error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
+	    memwrite, arg);
+	return (error);
+}
+
+static int
+emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+	int error;
+
+	/*
+	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
+	 *
+	 * POP is part of the group 1A extended opcodes and is identified
+	 * by ModRM:reg = b000.
+	 */
+	if ((vie->reg & 7) != 0)
+		return (EINVAL);
+
+	error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
+	    memwrite, arg);
+	return (error);
+}
+
+static int
+emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *memarg)
+{
+	int error;
+
+	switch (vie->reg & 7) {
+	case 0x1:	/* OR */
+		error = emulate_or(vm, vcpuid, gpa, vie,
+		    memread, memwrite, memarg);
+		break;
+	case 0x4:	/* AND */
+		error = emulate_and(vm, vcpuid, gpa, vie,
+		    memread, memwrite, memarg);
+		break;
+	case 0x7:	/* CMP */
+		error = emulate_cmp(vm, vcpuid, gpa, vie,
+		    memread, memwrite, memarg);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+int
+vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *memarg)
+{
+	int error;
+
+	if (!vie->decoded)
+		return (EINVAL);
+
+	switch (vie->op.op_type) {
+	case VIE_OP_TYPE_GROUP1:
+		error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread,
+		    memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_POP:
+		error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread,
+		    memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_PUSH:
+		error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
+		    memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_CMP:
+		error = emulate_cmp(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_MOV:
+		error = emulate_mov(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_MOVSX:
+	case VIE_OP_TYPE_MOVZX:
+		error = emulate_movx(vm, vcpuid, gpa, vie,
+				     memread, memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_MOVS:
+		error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
+		    memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_STOS:
+		error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread,
+		    memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_AND:
+		error = emulate_and(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_OR:
+		error = emulate_or(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
+	case VIE_OP_TYPE_SUB:
+		error = emulate_sub(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+int
+vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
+{
+	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
+	    ("%s: invalid size %d", __func__, size));
+	KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl));
+
+	if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
+		return (0);
+
+	return ((gla & (size - 1)) ? 1 : 0);
+}
+
+int
+vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
+{
+	uint64_t mask;
+
+	if (cpu_mode != CPU_MODE_64BIT)
+		return (0);
+
+	/*
+	 * The value of the bit 47 in the 'gla' should be replicated in the
+	 * most significant 16 bits.
+	 */
+	mask = ~((1UL << 48) - 1);
+	if (gla & (1UL << 47))
+		return ((gla & mask) != mask);
+	else
+		return ((gla & mask) != 0);
+}
+
+uint64_t
+vie_size2mask(int size)
+{
+	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
+	    ("vie_size2mask: invalid size %d", size));
+	return (size2mask[size]);
+}
+
+int
+vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+    struct seg_desc *desc, uint64_t offset, int length, int addrsize,
+    int prot, uint64_t *gla)
+{
+	uint64_t firstoff, low_limit, high_limit, segbase;
+	int glasize, type;
+
+	KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS,
+	    ("%s: invalid segment %d", __func__, seg));
+	KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
+	    ("%s: invalid operand size %d", __func__, length));
+#ifdef	__FreeBSD__
+	KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
+	    ("%s: invalid prot %#x", __func__, prot));
+#else
+	KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
+	    ("%s: invalid prot %x", __func__, prot));
+#endif
+
+	firstoff = offset;
+	if (cpu_mode == CPU_MODE_64BIT) {
+		KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address "
+		    "size %d for cpu_mode %d", __func__, addrsize, cpu_mode));
+		glasize = 8;
+	} else {
+		KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address "
+		    "size %d for cpu mode %d", __func__, addrsize, cpu_mode));
+		glasize = 4;
+		/*
+		 * If the segment selector is loaded with a NULL selector
+		 * then the descriptor is unusable and attempting to use
+		 * it results in a #GP(0).
+		 */
+		if (SEG_DESC_UNUSABLE(desc->access))
+			return (-1);
+
+		/* 
+		 * The processor generates a #NP exception when a segment
+		 * register is loaded with a selector that points to a
+		 * descriptor that is not present. If this was the case then
+		 * it would have been checked before the VM-exit.
+		 */
+#ifdef	__FreeBSD__
+		KASSERT(SEG_DESC_PRESENT(desc->access),
+		    ("segment %d not present: %#x", seg, desc->access));
+#else
+		KASSERT(SEG_DESC_PRESENT(desc->access),
+		    ("segment %d not present: %x", seg, desc->access));
+#endif
+
+		/*
+		 * The descriptor type must indicate a code/data segment.
+		 */
+		type = SEG_DESC_TYPE(desc->access);
+#ifdef	__FreeBSD__
+		KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
+		    "descriptor type %#x", seg, type));
+#else
+		KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
+		    "descriptor type %x", seg, type));
+#endif
+
+		if (prot & PROT_READ) {
+			/* #GP on a read access to a exec-only code segment */
+			if ((type & 0xA) == 0x8)
+				return (-1);
+		}
+
+		if (prot & PROT_WRITE) {
+			/*
+			 * #GP on a write access to a code segment or a
+			 * read-only data segment.
+			 */
+			if (type & 0x8)			/* code segment */
+				return (-1);
+
+			if ((type & 0xA) == 0)		/* read-only data seg */
+				return (-1);
+		}
+
+		/*
+		 * 'desc->limit' is fully expanded taking granularity into
+		 * account.
+		 */
+		if ((type & 0xC) == 0x4) {
+			/* expand-down data segment */
+			low_limit = desc->limit + 1;
+			high_limit = SEG_DESC_DEF32(desc->access) ?
+			    0xffffffff : 0xffff;
+		} else {
+			/* code segment or expand-up data segment */
+			low_limit = 0;
+			high_limit = desc->limit;
+		}
+
+		while (length > 0) {
+			offset &= vie_size2mask(addrsize);
+			if (offset < low_limit || offset > high_limit)
+				return (-1);
+			offset++;
+			length--;
+		}
+	}
+
+	/*
+	 * In 64-bit mode all segments except %fs and %gs have a segment
+	 * base address of 0.
+	 */
+	if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
+	    seg != VM_REG_GUEST_GS) {
+		segbase = 0;
+	} else {
+		segbase = desc->base;
+	}
+
+	/*
+	 * Truncate 'firstoff' to the effective address size before adding
+	 * it to the segment base.
+	 */
+	firstoff &= vie_size2mask(addrsize);
+	*gla = (segbase + firstoff) & vie_size2mask(glasize);
+	return (0);
+}
+
+#ifdef _KERNEL
+void
+vie_init(struct vie *vie, const char *inst_bytes, int inst_length)
+{
+	KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE,
+	    ("%s: invalid instruction length (%d)", __func__, inst_length));
+
+	bzero(vie, sizeof(struct vie));
+
+	vie->base_register = VM_REG_LAST;
+	vie->index_register = VM_REG_LAST;
+	vie->segment_register = VM_REG_LAST;
+
+	if (inst_length) {
+		bcopy(inst_bytes, vie->inst, inst_length);
+		vie->num_valid = inst_length;
+	}
+}
+
+static int
+pf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
+{
+	int error_code = 0;
+
+	if (pte & PG_V)
+		error_code |= PGEX_P;
+	if (prot & VM_PROT_WRITE)
+		error_code |= PGEX_W;
+	if (usermode)
+		error_code |= PGEX_U;
+	if (rsvd)
+		error_code |= PGEX_RSV;
+	if (prot & VM_PROT_EXECUTE)
+		error_code |= PGEX_I;
+
+	return (error_code);
+}
+
+static void
+ptp_release(void **cookie)
+{
+	if (*cookie != NULL) {
+		vm_gpa_release(*cookie);
+		*cookie = NULL;
+	}
+}
+
+static void *
+ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
+{
+	void *ptr;
+
+	ptp_release(cookie);
+	ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie);
+	return (ptr);
+}
+
+int
+vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa)
+{
+	int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
+#ifdef	__FreeBSD__
+#endif
+	u_int retries;
+	uint64_t *ptpbase, ptpphys, pte, pgsize;
+	uint32_t *ptpbase32, pte32;
+	void *cookie;
+
+	usermode = (paging->cpl == 3 ? 1 : 0);
+	writable = prot & VM_PROT_WRITE;
+	cookie = NULL;
+	retval = 0;
+#ifdef	__FreeBSD__
+	retries = 0;
+#endif
+restart:
+	ptpphys = paging->cr3;		/* root of the page tables */
+	ptp_release(&cookie);
+#ifdef	__FreeBSD__
+	if (retries++ > 0)
+		maybe_yield();
+#endif
+
+	if (vie_canonical_check(paging->cpu_mode, gla)) {
+		/*
+		 * XXX assuming a non-stack reference otherwise a stack fault
+		 * should be generated.
+		 */
+		vm_inject_gp(vm, vcpuid);
+		goto fault;
+	}
+
+	if (paging->paging_mode == PAGING_MODE_FLAT) {
+		*gpa = gla;
+		goto done;
+	}
+
+	if (paging->paging_mode == PAGING_MODE_32) {
+		nlevels = 2;
+		while (--nlevels >= 0) {
+			/* Zero out the lower 12 bits. */
+			ptpphys &= ~0xfff;
+
+			ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
+
+			if (ptpbase32 == NULL)
+				goto error;
+
+			ptpshift = PAGE_SHIFT + nlevels * 10;
+			ptpindex = (gla >> ptpshift) & 0x3FF;
+			pgsize = 1UL << ptpshift;
+
+			pte32 = ptpbase32[ptpindex];
+
+			if ((pte32 & PG_V) == 0 ||
+			    (usermode && (pte32 & PG_U) == 0) ||
+			    (writable && (pte32 & PG_RW) == 0)) {
+				pfcode = pf_error_code(usermode, prot, 0,
+				    pte32);
+				vm_inject_pf(vm, vcpuid, pfcode, gla);
+				goto fault;
+			}
+
+			/*
+			 * Emulate the x86 MMU's management of the accessed
+			 * and dirty flags. While the accessed flag is set
+			 * at every level of the page table, the dirty flag
+			 * is only set at the last level providing the guest
+			 * physical address.
+			 */
+			if ((pte32 & PG_A) == 0) {
+				if (atomic_cmpset_32(&ptpbase32[ptpindex],
+				    pte32, pte32 | PG_A) == 0) {
+					goto restart;
+				}
+			}
+
+			/* XXX must be ignored if CR4.PSE=0 */
+			if (nlevels > 0 && (pte32 & PG_PS) != 0)
+				break;
+
+			ptpphys = pte32;
+		}
+
+		/* Set the dirty bit in the page table entry if necessary */
+		if (writable && (pte32 & PG_M) == 0) {
+			if (atomic_cmpset_32(&ptpbase32[ptpindex],
+			    pte32, pte32 | PG_M) == 0) {
+				goto restart;
+			}
+		}
+
+		/* Zero out the lower 'ptpshift' bits */
+		pte32 >>= ptpshift; pte32 <<= ptpshift;
+		*gpa = pte32 | (gla & (pgsize - 1));
+		goto done;
+	}
+
+	if (paging->paging_mode == PAGING_MODE_PAE) {
+		/* Zero out the lower 5 bits and the upper 32 bits */
+		ptpphys &= 0xffffffe0UL;
+
+		ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie);
+		if (ptpbase == NULL)
+			goto error;
+
+		ptpindex = (gla >> 30) & 0x3;
+
+		pte = ptpbase[ptpindex];
+
+		if ((pte & PG_V) == 0) {
+			pfcode = pf_error_code(usermode, prot, 0, pte);
+			vm_inject_pf(vm, vcpuid, pfcode, gla);
+			goto fault;
+		}
+
+		ptpphys = pte;
+
+		nlevels = 2;
+	} else
+		nlevels = 4;
+	while (--nlevels >= 0) {
+		/* Zero out the lower 12 bits and the upper 12 bits */
+		ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
+
+		ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
+		if (ptpbase == NULL)
+			goto error;
+
+		ptpshift = PAGE_SHIFT + nlevels * 9;
+		ptpindex = (gla >> ptpshift) & 0x1FF;
+		pgsize = 1UL << ptpshift;
+
+		pte = ptpbase[ptpindex];
+
+		if ((pte & PG_V) == 0 ||
+		    (usermode && (pte & PG_U) == 0) ||
+		    (writable && (pte & PG_RW) == 0)) {
+			pfcode = pf_error_code(usermode, prot, 0, pte);
+			vm_inject_pf(vm, vcpuid, pfcode, gla);
+			goto fault;
+		}
+
+		/* Set the accessed bit in the page table entry */
+		if ((pte & PG_A) == 0) {
+			if (atomic_cmpset_64(&ptpbase[ptpindex],
+			    pte, pte | PG_A) == 0) {
+				goto restart;
+			}
+		}
+
+		if (nlevels > 0 && (pte & PG_PS) != 0) {
+			if (pgsize > 1 * GB) {
+				pfcode = pf_error_code(usermode, prot, 1, pte);
+				vm_inject_pf(vm, vcpuid, pfcode, gla);
+				goto fault;
+			}
+			break;
+		}
+
+		ptpphys = pte;
+	}
+
+	/* Set the dirty bit in the page table entry if necessary */
+	if (writable && (pte & PG_M) == 0) {
+		if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0)
+			goto restart;
+	}
+
+	/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
+	pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
+	*gpa = pte | (gla & (pgsize - 1));
+done:
+	ptp_release(&cookie);
+	return (retval);
+error:
+	retval = -1;
+	goto done;
+fault:
+	retval = 1;
+	goto done;
+}
+
+int
+vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t rip, int inst_length, struct vie *vie)
+{
+	struct vm_copyinfo copyinfo[2];
+	int error, prot;
+
+	if (inst_length > VIE_INST_SIZE)
+		panic("vmm_fetch_instruction: invalid length %d", inst_length);
+
+	prot = PROT_READ | PROT_EXEC;
+	error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot,
+	    copyinfo, nitems(copyinfo));
+	if (error == 0) {
+		vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
+		vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+		vie->num_valid = inst_length;
+	}
+	return (error);
+}
+
+static int
+vie_peek(struct vie *vie, uint8_t *x)
+{
+
+	if (vie->num_processed < vie->num_valid) {
+		*x = vie->inst[vie->num_processed];
+		return (0);
+	} else
+		return (-1);
+}
+
+static void
+vie_advance(struct vie *vie)
+{
+
+	vie->num_processed++;
+}
+
+static bool
+segment_override(uint8_t x, int *seg)
+{
+
+	switch (x) {
+	case 0x2E:
+		*seg = VM_REG_GUEST_CS;
+		break;
+	case 0x36:
+		*seg = VM_REG_GUEST_SS;
+		break;
+	case 0x3E:
+		*seg = VM_REG_GUEST_DS;
+		break;
+	case 0x26:
+		*seg = VM_REG_GUEST_ES;
+		break;
+	case 0x64:
+		*seg = VM_REG_GUEST_FS;
+		break;
+	case 0x65:
+		*seg = VM_REG_GUEST_GS;
+		break;
+	default:
+		return (false);
+	}
+	return (true);
+}
+
+static int
+decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
+{
+	uint8_t x;
+
+	while (1) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		if (x == 0x66)
+			vie->opsize_override = 1;
+		else if (x == 0x67)
+			vie->addrsize_override = 1;
+		else if (x == 0xF3)
+			vie->repz_present = 1;
+		else if (x == 0xF2)
+			vie->repnz_present = 1;
+		else if (segment_override(x, &vie->segment_register))
+			vie->segment_override = 1;
+		else
+			break;
+
+		vie_advance(vie);
+	}
+
+	/*
+	 * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2:
+	 * - Only one REX prefix is allowed per instruction.
+	 * - The REX prefix must immediately precede the opcode byte or the
+	 *   escape opcode byte.
+	 * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3)
+	 *   the mandatory prefix must come before the REX prefix.
+	 */
+	if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) {
+		vie->rex_present = 1;
+		vie->rex_w = x & 0x8 ? 1 : 0;
+		vie->rex_r = x & 0x4 ? 1 : 0;
+		vie->rex_x = x & 0x2 ? 1 : 0;
+		vie->rex_b = x & 0x1 ? 1 : 0;
+		vie_advance(vie);
+	}
+
+	/*
+	 * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1
+	 */
+	if (cpu_mode == CPU_MODE_64BIT) {
+		/*
+		 * Default address size is 64-bits and default operand size
+		 * is 32-bits.
+		 */
+		vie->addrsize = vie->addrsize_override ? 4 : 8;
+		if (vie->rex_w)
+			vie->opsize = 8;
+		else if (vie->opsize_override)
+			vie->opsize = 2;
+		else
+			vie->opsize = 4;
+	} else if (cs_d) {
+		/* Default address and operand sizes are 32-bits */
+		vie->addrsize = vie->addrsize_override ? 2 : 4;
+		vie->opsize = vie->opsize_override ? 2 : 4;
+	} else {
+		/* Default address and operand sizes are 16-bits */
+		vie->addrsize = vie->addrsize_override ? 4 : 2;
+		vie->opsize = vie->opsize_override ? 4 : 2;
+	}
+	return (0);
+}
+
+static int
+decode_two_byte_opcode(struct vie *vie)
+{
+	uint8_t x;
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	vie->op = two_byte_opcodes[x];
+
+	if (vie->op.op_type == VIE_OP_TYPE_NONE)
+		return (-1);
+
+	vie_advance(vie);
+	return (0);
+}
+
+static int
+decode_opcode(struct vie *vie)
+{
+	uint8_t x;
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	vie->op = one_byte_opcodes[x];
+
+	if (vie->op.op_type == VIE_OP_TYPE_NONE)
+		return (-1);
+
+	vie_advance(vie);
+
+	if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE)
+		return (decode_two_byte_opcode(vie));
+
+	return (0);
+}
+
+static int
+decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
+{
+	uint8_t x;
+
+	if (vie->op.op_flags & VIE_OP_F_NO_MODRM)
+		return (0);
+
+	if (cpu_mode == CPU_MODE_REAL)
+		return (-1);
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	vie->mod = (x >> 6) & 0x3;
+	vie->rm =  (x >> 0) & 0x7;
+	vie->reg = (x >> 3) & 0x7;
+
+	/*
+	 * A direct addressing mode makes no sense in the context of an EPT
+	 * fault. There has to be a memory access involved to cause the
+	 * EPT fault.
+	 */
+	if (vie->mod == VIE_MOD_DIRECT)
+		return (-1);
+
+	if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
+	    (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
+		/*
+		 * Table 2-5: Special Cases of REX Encodings
+		 *
+		 * mod=0, r/m=5 is used in the compatibility mode to
+		 * indicate a disp32 without a base register.
+		 *
+		 * mod!=3, r/m=4 is used in the compatibility mode to
+		 * indicate that the SIB byte is present.
+		 *
+		 * The 'b' bit in the REX prefix is don't care in
+		 * this case.
+		 */
+	} else {
+		vie->rm |= (vie->rex_b << 3);
+	}
+
+	vie->reg |= (vie->rex_r << 3);
+
+	/* SIB */
+	if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
+		goto done;
+
+	vie->base_register = gpr_map[vie->rm];
+
+	switch (vie->mod) {
+	case VIE_MOD_INDIRECT_DISP8:
+		vie->disp_bytes = 1;
+		break;
+	case VIE_MOD_INDIRECT_DISP32:
+		vie->disp_bytes = 4;
+		break;
+	case VIE_MOD_INDIRECT:
+		if (vie->rm == VIE_RM_DISP32) {
+			vie->disp_bytes = 4;
+			/*
+			 * Table 2-7. RIP-Relative Addressing
+			 *
+			 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32
+			 * whereas in compatibility mode it just implies disp32.
+			 */
+
+			if (cpu_mode == CPU_MODE_64BIT)
+				vie->base_register = VM_REG_GUEST_RIP;
+			else
+				vie->base_register = VM_REG_LAST;
+		}
+		break;
+	}
+
+done:
+	vie_advance(vie);
+
+	return (0);
+}
+
+static int
+decode_sib(struct vie *vie)
+{
+	uint8_t x;
+
+	/* Proceed only if SIB byte is present */
+	if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
+		return (0);
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	/* De-construct the SIB byte */
+	vie->ss = (x >> 6) & 0x3;
+	vie->index = (x >> 3) & 0x7;
+	vie->base = (x >> 0) & 0x7;
+
+	/* Apply the REX prefix modifiers */
+	vie->index |= vie->rex_x << 3;
+	vie->base |= vie->rex_b << 3;
+
+	switch (vie->mod) {
+	case VIE_MOD_INDIRECT_DISP8:
+		vie->disp_bytes = 1;
+		break;
+	case VIE_MOD_INDIRECT_DISP32:
+		vie->disp_bytes = 4;
+		break;
+	}
+
+	if (vie->mod == VIE_MOD_INDIRECT &&
+	    (vie->base == 5 || vie->base == 13)) {
+		/*
+		 * Special case when base register is unused if mod = 0
+		 * and base = %rbp or %r13.
+		 *
+		 * Documented in:
+		 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+		 * Table 2-5: Special Cases of REX Encodings
+		 */
+		vie->disp_bytes = 4;
+	} else {
+		vie->base_register = gpr_map[vie->base];
+	}
+
+	/*
+	 * All encodings of 'index' are valid except for %rsp (4).
+	 *
+	 * Documented in:
+	 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+	 * Table 2-5: Special Cases of REX Encodings
+	 */
+	if (vie->index != 4)
+		vie->index_register = gpr_map[vie->index];
+
+	/* 'scale' makes sense only in the context of an index register */
+	if (vie->index_register < VM_REG_LAST)
+		vie->scale = 1 << vie->ss;
+
+	vie_advance(vie);
+
+	return (0);
+}
+
+static int
+decode_displacement(struct vie *vie)
+{
+	int n, i;
+	uint8_t x;
+
+	union {
+		char	buf[4];
+		int8_t	signed8;
+		int32_t	signed32;
+	} u;
+
+	if ((n = vie->disp_bytes) == 0)
+		return (0);
+
+	if (n != 1 && n != 4)
+		panic("decode_displacement: invalid disp_bytes %d", n);
+
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+
+	if (n == 1)
+		vie->displacement = u.signed8;		/* sign-extended */
+	else
+		vie->displacement = u.signed32;		/* sign-extended */
+
+	return (0);
+}
+
+static int
+decode_immediate(struct vie *vie)
+{
+	int i, n;
+	uint8_t x;
+	union {
+		char	buf[4];
+		int8_t	signed8;
+		int16_t	signed16;
+		int32_t	signed32;
+	} u;
+
+	/* Figure out immediate operand size (if any) */
+	if (vie->op.op_flags & VIE_OP_F_IMM) {
+		/*
+		 * Section 2.2.1.5 "Immediates", Intel SDM:
+		 * In 64-bit mode the typical size of immediate operands
+		 * remains 32-bits. When the operand size if 64-bits, the
+		 * processor sign-extends all immediates to 64-bits prior
+		 * to their use.
+		 */
+		if (vie->opsize == 4 || vie->opsize == 8)
+			vie->imm_bytes = 4;
+		else
+			vie->imm_bytes = 2;
+	} else if (vie->op.op_flags & VIE_OP_F_IMM8) {
+		vie->imm_bytes = 1;
+	}
+
+	if ((n = vie->imm_bytes) == 0)
+		return (0);
+
+	KASSERT(n == 1 || n == 2 || n == 4,
+	    ("%s: invalid number of immediate bytes: %d", __func__, n));
+
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+
+	/* sign-extend the immediate value before use */
+	if (n == 1)
+		vie->immediate = u.signed8;
+	else if (n == 2)
+		vie->immediate = u.signed16;
+	else
+		vie->immediate = u.signed32;
+
+	return (0);
+}
+
+static int
+decode_moffset(struct vie *vie)
+{
+	int i, n;
+	uint8_t x;
+	union {
+		char	buf[8];
+		uint64_t u64;
+	} u;
+
+	if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0)
+		return (0);
+
+	/*
+	 * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM:
+	 * The memory offset size follows the address-size of the instruction.
+	 */
+	n = vie->addrsize;
+	KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n));
+
+	u.u64 = 0;
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+	vie->displacement = u.u64;
+	return (0);
+}
+
+/*
+ * Verify that all the bytes in the instruction buffer were consumed.
+ */
+static int
+verify_inst_length(struct vie *vie)
+{
+
+	if (vie->num_processed)
+		return (0);
+	else
+		return (-1);
+}
+
+/*
+ * Verify that the 'guest linear address' provided as collateral of the nested
+ * page table fault matches with our instruction decoding.
+ */
+static int
+verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+{
+	int error;
+	uint64_t base, idx, gla2;
+
+	/* Skip 'gla' verification */
+	if (gla == VIE_INVALID_GLA)
+		return (0);
+
+	base = 0;
+	if (vie->base_register != VM_REG_LAST) {
+		error = vm_get_register(vm, cpuid, vie->base_register, &base);
+		if (error) {
+			printf("verify_gla: error %d getting base reg %d\n",
+				error, vie->base_register);
+			return (-1);
+		}
+
+		/*
+		 * RIP-relative addressing starts from the following
+		 * instruction
+		 */
+		if (vie->base_register == VM_REG_GUEST_RIP)
+			base += vie->num_valid;
+	}
+
+	idx = 0;
+	if (vie->index_register != VM_REG_LAST) {
+		error = vm_get_register(vm, cpuid, vie->index_register, &idx);
+		if (error) {
+			printf("verify_gla: error %d getting index reg %d\n",
+				error, vie->index_register);
+			return (-1);
+		}
+	}
+
+	/* XXX assuming that the base address of the segment is 0 */
+	gla2 = base + vie->scale * idx + vie->displacement;
+	gla2 &= size2mask[vie->addrsize];
+	if (gla != gla2) {
+		printf("verify_gla mismatch: "
+		       "base(0x%0lx), scale(%d), index(0x%0lx), "
+		       "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n",
+		       base, vie->scale, idx, vie->displacement, gla, gla2);
+		return (-1);
+	}
+
+	return (0);
+}
+
+int
+vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
+		       enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
+{
+
+	if (decode_prefixes(vie, cpu_mode, cs_d))
+		return (-1);
+
+	if (decode_opcode(vie))
+		return (-1);
+
+	if (decode_modrm(vie, cpu_mode))
+		return (-1);
+
+	if (decode_sib(vie))
+		return (-1);
+
+	if (decode_displacement(vie))
+		return (-1);
+
+	if (decode_immediate(vie))
+		return (-1);
+
+	if (decode_moffset(vie))
+		return (-1);
+
+	if (verify_inst_length(vie))
+		return (-1);
+
+	if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
+		if (verify_gla(vm, cpuid, gla, vie))
+			return (-1);
+	}
+
+	vie->decoded = 1;	/* success */
+
+	return (0);
+}
+#endif	/* _KERNEL */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
new file mode 100644
index 0000000000..bea750f162
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
@@ -0,0 +1,174 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm_ioport.c 277168 2015-01-14 07:18:51Z neel $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
+#include <sys/systm.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include "vatpic.h"
+#include "vatpit.h"
+#include "vmm_ioport.h"
+#include "vmm_ktr.h"
+
+#define	MAX_IOPORTS		1280
+
+ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
+	[TIMER_MODE] = vatpit_handler,
+	[TIMER_CNTR0] = vatpit_handler,
+	[TIMER_CNTR1] = vatpit_handler,
+	[TIMER_CNTR2] = vatpit_handler,
+	[NMISC_PORT] = vatpit_nmisc_handler,
+	[IO_ICU1] = vatpic_master_handler,
+	[IO_ICU1 + ICU_IMR_OFFSET] = vatpic_master_handler,
+	[IO_ICU2] = vatpic_slave_handler,
+	[IO_ICU2 + ICU_IMR_OFFSET] = vatpic_slave_handler,
+	[IO_ELCR1] = vatpic_elc_handler,
+	[IO_ELCR2] = vatpic_elc_handler,
+};
+
+#ifdef KTR
+static const char *
+inout_instruction(struct vm_exit *vmexit)
+{
+	int index;
+
+	static const char *iodesc[] = {
+		"outb", "outw", "outl",
+		"inb", "inw", "inl",
+		"outsb", "outsw", "outsd",
+		"insb", "insw", "insd",
+	};
+
+	switch (vmexit->u.inout.bytes) {
+	case 1:
+		index = 0;
+		break;
+	case 2:
+		index = 1;
+		break;
+	default:
+		index = 2;
+		break;
+	}
+
+	if (vmexit->u.inout.in)
+		index += 3;
+
+	if (vmexit->u.inout.string)
+		index += 6;
+
+	KASSERT(index < nitems(iodesc), ("%s: invalid index %d",
+	    __func__, index));
+
+	return (iodesc[index]);
+}
+#endif	/* KTR */
+
+static int
+emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
+    bool *retu)
+{
+	ioport_handler_func_t handler;
+	uint32_t mask, val;
+	int error;
+
+	/*
+	 * If there is no handler for the I/O port then punt to userspace.
+	 */
+	if (vmexit->u.inout.port >= MAX_IOPORTS ||
+	    (handler = ioport_handler[vmexit->u.inout.port]) == NULL) {
+		*retu = true;
+		return (0);
+	}
+
+	mask = vie_size2mask(vmexit->u.inout.bytes);
+
+	if (!vmexit->u.inout.in) {
+		val = vmexit->u.inout.eax & mask;
+	}
+
+	error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
+	    vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
+	if (error) {
+		/*
+		 * The value returned by this function is also the return value
+		 * of vm_run(). This needs to be a positive number otherwise it
+		 * can be interpreted as a "pseudo-error" like ERESTART.
+		 *
+		 * Enforce this by mapping all errors to EIO.
+		 */
+		return (EIO);
+	}
+
+	if (vmexit->u.inout.in) {
+		vmexit->u.inout.eax &= ~mask;
+		vmexit->u.inout.eax |= val & mask;
+		error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX,
+		    vmexit->u.inout.eax);
+		KASSERT(error == 0, ("emulate_ioport: error %d setting guest "
+		    "rax register", error));
+	}
+	*retu = false;
+	return (0);
+}
+
+static int
+emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
+{
+	*retu = true;
+	return (0);	/* Return to userspace to finish emulation */
+}
+
+int
+vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
+{
+	int bytes, error;
+
+	bytes = vmexit->u.inout.bytes;
+	KASSERT(bytes == 1 || bytes == 2 || bytes == 4,
+	    ("vm_handle_inout: invalid operand size %d", bytes));
+
+	if (vmexit->u.inout.string)
+		error = emulate_inout_str(vm, vcpuid, vmexit, retu);
+	else
+		error = emulate_inout_port(vm, vcpuid, vmexit, retu);
+
+	VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s",
+	    vmexit->u.inout.rep ? "rep " : "",
+	    inout_instruction(vmexit),
+	    vmexit->u.inout.port,
+	    error ? "error" : (*retu ? "userspace" : "handled"));
+
+	return (error);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
new file mode 100644
index 0000000000..624dd8f1d8
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_ioport.h 273706 2014-10-26 19:03:06Z neel $
+ */
+
+#ifndef	_VMM_IOPORT_H_
+#define	_VMM_IOPORT_H_
+
+typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid,
+    bool in, int port, int bytes, uint32_t *val);
+
+int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu);
+
+#endif	/* _VMM_IOPORT_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ipi.h b/usr/src/uts/i86pc/io/vmm/vmm_ipi.h
new file mode 100644
index 0000000000..4dff03ba1f
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ipi.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_ipi.h 260466 2014-01-09 03:25:54Z neel $
+ */
+
+#ifndef _VMM_IPI_H_
+#define _VMM_IPI_H_
+
+#ifdef	__FreeBSD__
+int	vmm_ipi_alloc(void);
+void	vmm_ipi_free(int num);
+#endif
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ktr.h b/usr/src/uts/i86pc/io/vmm/vmm_ktr.h
new file mode 100644
index 0000000000..917c7f83a4
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ktr.h
@@ -0,0 +1,69 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_ktr.h 258699 2013-11-27 22:18:08Z neel $
+ */
+
+#ifndef _VMM_KTR_H_
+#define	_VMM_KTR_H_
+
+#include <sys/ktr.h>
+#include <sys/pcpu.h>
+
+#ifndef KTR_VMM
+#define	KTR_VMM	KTR_GEN
+#endif
+
+#define	VCPU_CTR0(vm, vcpuid, format)					\
+CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid))
+
+#define	VCPU_CTR1(vm, vcpuid, format, p1)				\
+CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1))
+
+#define	VCPU_CTR2(vm, vcpuid, format, p1, p2)				\
+CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2))
+
+#define	VCPU_CTR3(vm, vcpuid, format, p1, p2, p3)			\
+CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3))
+
+#define	VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4)			\
+CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid),		\
+    (p1), (p2), (p3), (p4))
+
+#define	VM_CTR0(vm, format)						\
+CTR1(KTR_VMM, "vm %s: " format, vm_name((vm)))
+
+#define	VM_CTR1(vm, format, p1)						\
+CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1))
+
+#define	VM_CTR2(vm, format, p1, p2)					\
+CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2))
+
+#define	VM_CTR3(vm, format, p1, p2, p3)					\
+CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3))
+
+#define	VM_CTR4(vm, format, p1, p2, p3, p4)				\
+CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4))
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
new file mode 100644
index 0000000000..3215c74a44
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_lapic.c 264509 2014-04-15 17:06:26Z tychon $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm_lapic.c 264509 2014-04-15 17:06:26Z tychon $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+
+#include <x86/specialreg.h>
+#include <x86/apicreg.h>
+
+#include <machine/vmm.h>
+#include "vmm_ipi.h"
+#include "vmm_ktr.h"
+#include "vmm_lapic.h"
+#include "vlapic.h"
+
+/*
+ * Some MSI message definitions
+ */
+#define	MSI_X86_ADDR_MASK	0xfff00000
+#define	MSI_X86_ADDR_BASE	0xfee00000
+#define	MSI_X86_ADDR_RH		0x00000008	/* Redirection Hint */
+#define	MSI_X86_ADDR_LOG	0x00000004	/* Destination Mode */
+
+int
+lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
+{
+	struct vlapic *vlapic;
+
+	if (cpu < 0 || cpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (vector < 32 || vector > 255)
+		return (EINVAL);
+
+	vlapic = vm_lapic(vm, cpu);
+	if (vlapic_set_intr_ready(vlapic, vector, level))
+		vcpu_notify_event(vm, cpu, true);
+	return (0);
+}
+
+int
+lapic_set_local_intr(struct vm *vm, int cpu, int vector)
+{
+	struct vlapic *vlapic;
+	cpuset_t dmask;
+	int error;
+
+	if (cpu < -1 || cpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (cpu == -1)
+		dmask = vm_active_cpus(vm);
+	else
+		CPU_SETOF(cpu, &dmask);
+	error = 0;
+	while ((cpu = CPU_FFS(&dmask)) != 0) {
+		cpu--;
+		CPU_CLR(cpu, &dmask);
+		vlapic = vm_lapic(vm, cpu);
+		error = vlapic_trigger_lvt(vlapic, vector);
+		if (error)
+			break;
+	}
+
+	return (error);
+}
+
+int
+lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg)
+{
+	int delmode, vec;
+	uint32_t dest;
+	bool phys;
+
+	VM_CTR2(vm, "lapic MSI addr: %#lx msg: %#lx", addr, msg);
+
+	if ((addr & MSI_X86_ADDR_MASK) != MSI_X86_ADDR_BASE) {
+		VM_CTR1(vm, "lapic MSI invalid addr %#lx", addr);
+		return (-1);
+	}
+
+	/*
+	 * Extract the x86-specific fields from the MSI addr/msg
+	 * params according to the Intel Arch spec, Vol3 Ch 10.
+	 *
+	 * The PCI specification does not support level triggered
+	 * MSI/MSI-X so ignore trigger level in 'msg'.
+	 *
+	 * The 'dest' is interpreted as a logical APIC ID if both
+	 * the Redirection Hint and Destination Mode are '1' and
+	 * physical otherwise.
+	 */
+	dest = (addr >> 12) & 0xff;
+	phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) !=
+	    (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG));
+	delmode = msg & APIC_DELMODE_MASK;
+	vec = msg & 0xff;
+
+	VM_CTR3(vm, "lapic MSI %s dest %#x, vec %d",
+	    phys ? "physical" : "logical", dest, vec);
+
+	vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec);
+	return (0);
+}
+
+static boolean_t
+x2apic_msr(u_int msr)
+{
+	if (msr >= 0x800 && msr <= 0xBFF)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+static u_int
+x2apic_msr_to_regoff(u_int msr)
+{
+
+	return ((msr - 0x800) << 4);
+}
+
+boolean_t
+lapic_msr(u_int msr)
+{
+
+	if (x2apic_msr(msr) || (msr == MSR_APICBASE))
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+int
+lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu)
+{
+	int error;
+	u_int offset;
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	if (msr == MSR_APICBASE) {
+		*rval = vlapic_get_apicbase(vlapic);
+		error = 0;
+	} else {
+		offset = x2apic_msr_to_regoff(msr);
+		error = vlapic_read(vlapic, 0, offset, rval, retu);
+	}
+
+	return (error);
+}
+
+int
+lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu)
+{
+	int error;
+	u_int offset;
+	struct vlapic *vlapic;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	if (msr == MSR_APICBASE) {
+		error = vlapic_set_apicbase(vlapic, val);
+	} else {
+		offset = x2apic_msr_to_regoff(msr);
+		error = vlapic_write(vlapic, 0, offset, val, retu);
+	}
+
+	return (error);
+}
+
+int
+lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
+		 void *arg)
+{
+	int error;
+	uint64_t off;
+	struct vlapic *vlapic;
+
+	off = gpa - DEFAULT_APIC_BASE;
+
+	/*
+	 * Memory mapped local apic accesses must be 4 bytes wide and
+	 * aligned on a 16-byte boundary.
+	 */
+	if (size != 4 || off & 0xf)
+		return (EINVAL);
+
+	vlapic = vm_lapic(vm, cpu);
+	error = vlapic_write(vlapic, 1, off, wval, arg);
+	return (error);
+}
+
+int
+lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
+		void *arg)
+{
+	int error;
+	uint64_t off;
+	struct vlapic *vlapic;
+
+	off = gpa - DEFAULT_APIC_BASE;
+
+	/*
+	 * Memory mapped local apic accesses should be aligned on a
+	 * 16-byte boundary.  They are also suggested to be 4 bytes
+	 * wide, alas not all OSes follow suggestions.
+	 */
+	off &= ~3;
+	if (off & 0xf)
+		return (EINVAL);
+
+	vlapic = vm_lapic(vm, cpu);
+	error = vlapic_read(vlapic, 1, off, rval, arg);
+	return (error);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.h b/usr/src/uts/i86pc/io/vmm/vmm_lapic.h
new file mode 100644
index 0000000000..ee47ee7783
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.h
@@ -0,0 +1,87 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_lapic.h 259863 2013-12-25 06:46:31Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _VMM_LAPIC_H_
+#define	_VMM_LAPIC_H_
+
+struct vm;
+
+boolean_t lapic_msr(u_int num);
+int	lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval,
+	    bool *retu);
+int	lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval,
+	    bool *retu);
+
+int	lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
+			uint64_t *rval, int size, void *arg);
+int	lapic_mmio_write(void *vm, int cpu, uint64_t gpa,
+			 uint64_t wval, int size, void *arg);
+
+/*
+ * Signals to the LAPIC that an interrupt at 'vector' needs to be generated
+ * to the 'cpu', the state is recorded in IRR.
+ */
+int	lapic_set_intr(struct vm *vm, int cpu, int vector, bool trig);
+
+#define	LAPIC_TRIG_LEVEL	true
+#define	LAPIC_TRIG_EDGE		false
+static __inline int
+lapic_intr_level(struct vm *vm, int cpu, int vector)
+{
+
+	return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_LEVEL));
+}
+
+static __inline int
+lapic_intr_edge(struct vm *vm, int cpu, int vector)
+{
+
+	return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_EDGE));
+}
+
+/*
+ * Triggers the LAPIC local interrupt (LVT) 'vector' on 'cpu'.  'cpu' can
+ * be set to -1 to trigger the interrupt on all CPUs.
+ */
+int	lapic_set_local_intr(struct vm *vm, int cpu, int vector);
+
+int	lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg);
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_mem.h b/usr/src/uts/i86pc/io/vmm/vmm_mem.h
new file mode 100644
index 0000000000..05dc37fb9a
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_mem.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_mem.h 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef	_VMM_MEM_H_
+#define	_VMM_MEM_H_
+
+int		vmm_mem_init(void);
+vm_paddr_t	vmm_mem_alloc(size_t size);
+void		vmm_mem_free(vm_paddr_t start, size_t size);
+vm_paddr_t	vmm_mem_maxaddr(void);
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
new file mode 100644
index 0000000000..79e1cb1a44
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -0,0 +1,1040 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/cpuvar.h>
+#include <sys/ioccom.h>
+#include <sys/stat.h>
+#include <sys/vmsystm.h>
+#include <sys/ddi.h>
+/*
+ * struct modctl in <sys/modctl.h> contains "void *__unused".  
+ * Do this ugly workaround to avoid it.
+ */
+#undef	__unused
+#include <sys/sunddi.h>
+#include <sys/fs/dv_node.h>
+
+#include <sys/vmm.h>
+#include <sys/vmm_instruction_emul.h>
+#include <sys/vmm_dev.h>
+#include <sys/vmm_impl.h>
+
+#include <vm/vm.h>
+#include <vm/seg_dev.h>
+
+#include "io/vatpic.h"
+#include "io/vioapic.h"
+#include "vmm_lapic.h"
+
+static dev_info_t *vmm_dip;
+static void *vmm_statep;
+
+static SLIST_HEAD(, vmm_softc) head;
+
+static kmutex_t vmmdev_mtx;
+
+/*
+ * vmm trace ring
+ */
+int	vmm_dmsg_ring_size = VMM_DMSG_RING_SIZE;
+static	vmm_trace_rbuf_t *vmm_debug_rbuf;
+static	vmm_trace_dmsg_t *vmm_trace_dmsg_alloc(void);
+static	void vmm_trace_dmsg_free(void);
+static	void vmm_trace_rbuf_alloc(void);
+static	void vmm_trace_rbuf_free(void);
+
+/*
+ * This routine is used to manage debug messages
+ * on ring buffer.
+ */
+static vmm_trace_dmsg_t *
+vmm_trace_dmsg_alloc(void)
+{
+	vmm_trace_dmsg_t *dmsg_alloc, *dmsg = vmm_debug_rbuf->dmsgp;
+
+	if (vmm_debug_rbuf->looped == TRUE) {
+		vmm_debug_rbuf->dmsgp = dmsg->next;
+		return (vmm_debug_rbuf->dmsgp);
+	}
+
+	/*
+	 * If we're looping for the first time,
+	 * connect the ring.
+	 */
+	if (((vmm_debug_rbuf->size + (sizeof (vmm_trace_dmsg_t))) >
+	    vmm_debug_rbuf->maxsize) && (vmm_debug_rbuf->dmsgh != NULL)) {
+		dmsg->next = vmm_debug_rbuf->dmsgh;
+		vmm_debug_rbuf->dmsgp = vmm_debug_rbuf->dmsgh;
+		vmm_debug_rbuf->looped = TRUE;
+		return (vmm_debug_rbuf->dmsgp);
+	}
+
+	/* If we've gotten this far then memory allocation is needed */
+	dmsg_alloc = kmem_zalloc(sizeof (vmm_trace_dmsg_t), KM_NOSLEEP);
+	if (dmsg_alloc == NULL) {
+		vmm_debug_rbuf->allocfailed++;
+		return (dmsg_alloc);
+	} else {
+		vmm_debug_rbuf->size += sizeof (vmm_trace_dmsg_t);
+	}
+
+	if (vmm_debug_rbuf->dmsgp != NULL) {
+		dmsg->next = dmsg_alloc;
+		vmm_debug_rbuf->dmsgp = dmsg->next;
+		return (vmm_debug_rbuf->dmsgp);
+	} else {
+		/*
+		 * We should only be here if we're initializing
+		 * the ring buffer.
+		 */
+		if (vmm_debug_rbuf->dmsgh == NULL) {
+			vmm_debug_rbuf->dmsgh = dmsg_alloc;
+		} else {
+			/* Something is wrong */
+			kmem_free(dmsg_alloc, sizeof (vmm_trace_dmsg_t));
+			return (NULL);
+		}
+
+		vmm_debug_rbuf->dmsgp = dmsg_alloc;
+		return (vmm_debug_rbuf->dmsgp);
+	}
+}
+
+/*
+ * Free all messages on debug ring buffer.
+ */
+static void
+vmm_trace_dmsg_free(void)
+{
+	vmm_trace_dmsg_t *dmsg_next, *dmsg = vmm_debug_rbuf->dmsgh;
+
+	while (dmsg != NULL) {
+		dmsg_next = dmsg->next;
+		kmem_free(dmsg, sizeof (vmm_trace_dmsg_t));
+
+		/*
+		 * If we've looped around the ring than we're done.
+		 */
+		if (dmsg_next == vmm_debug_rbuf->dmsgh) {
+			break;
+		} else {
+			dmsg = dmsg_next;
+		}
+	}
+}
+
+static void
+vmm_trace_rbuf_alloc(void)
+{
+	vmm_debug_rbuf = kmem_zalloc(sizeof (vmm_trace_rbuf_t), KM_SLEEP);
+
+	mutex_init(&vmm_debug_rbuf->lock, NULL, MUTEX_DRIVER, NULL);
+
+	if (vmm_dmsg_ring_size > 0) {
+		vmm_debug_rbuf->maxsize = vmm_dmsg_ring_size;
+	}
+}
+
+
+static void
+vmm_trace_rbuf_free(void)
+{
+	vmm_trace_dmsg_free();
+	mutex_destroy(&vmm_debug_rbuf->lock);
+	kmem_free(vmm_debug_rbuf, sizeof (vmm_trace_rbuf_t));
+}
+
+static void
+vmm_vtrace_log(const char *fmt, va_list ap)
+{
+	vmm_trace_dmsg_t *dmsg;
+
+	if (vmm_debug_rbuf == NULL) {
+		return;
+	}
+
+	/*
+	 * If max size of ring buffer is smaller than size
+	 * required for one debug message then just return
+	 * since we have no room for the debug message.
+	 */
+	if (vmm_debug_rbuf->maxsize < (sizeof (vmm_trace_dmsg_t))) {
+		return;
+	}
+
+	mutex_enter(&vmm_debug_rbuf->lock);
+
+	/* alloc or reuse on ring buffer */
+	dmsg = vmm_trace_dmsg_alloc();
+
+	if (dmsg == NULL) {
+		/* resource allocation failed */
+		mutex_exit(&vmm_debug_rbuf->lock);
+		return;
+	}
+
+	gethrestime(&dmsg->timestamp);
+
+	(void) vsnprintf(dmsg->buf, sizeof (dmsg->buf), fmt, ap);
+
+	mutex_exit(&vmm_debug_rbuf->lock);
+}
+
+void
+vmm_trace_log(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmm_vtrace_log(fmt, ap);
+	va_end(ap);
+}
+
+void
+vmmdev_init(void)
+{
+	vmm_trace_rbuf_alloc();
+}
+
+int
+vmmdev_cleanup(void)
+{
+	int	error;
+
+	if (SLIST_EMPTY(&head))
+		error = 0;
+	else
+		error = EBUSY;
+
+	if (error == 0)
+		vmm_trace_dmsg_free();
+
+	return (error);
+}
+
+int
+vmmdev_do_ioctl(struct vmm_softc *sc, int cmd, intptr_t arg, int mode,
+    cred_t *credp, int *rvalp)
+{
+	int error, vcpu, state_changed;
+	struct vm_memory_segment seg;
+	struct vm_register vmreg;
+	struct vm_seg_desc vmsegdesc;
+	struct vm_run vmrun;
+	struct vm_exception vmexc;
+	struct vm_lapic_irq vmirq;
+	struct vm_lapic_msi vmmsi;
+	struct vm_ioapic_irq ioapic_irq;
+	struct vm_isa_irq isa_irq;
+	struct vm_capability vmcap;
+	struct vm_nmi vmnmi;
+	struct vm_x2apic x2apic;
+	struct vm_gla2gpa gg;
+	struct vm_activate_cpu vac;
+	int pincount;
+	int i;
+
+	vcpu = -1;
+	state_changed = 0;
+
+	/*
+	 * Some VMM ioctls can operate only on vcpus that are not running.
+	 */
+	switch (cmd) {
+	case VM_RUN:
+	case VM_GET_REGISTER:
+	case VM_SET_REGISTER:
+	case VM_GET_SEGMENT_DESCRIPTOR:
+	case VM_SET_SEGMENT_DESCRIPTOR:
+	case VM_INJECT_EXCEPTION:
+	case VM_GET_CAPABILITY:
+	case VM_SET_CAPABILITY:
+	case VM_PPTDEV_MSI:
+	case VM_PPTDEV_MSIX:
+	case VM_SET_X2APIC_STATE:
+	case VM_GLA2GPA:
+	case VM_ACTIVATE_CPU:
+	case VM_RESTART_INSTRUCTION:
+		/*
+		 * XXX fragile, handle with care
+		 * Assumes that the first field of the ioctl data is the vcpu.
+		 */
+		if (ddi_copyin((void *)arg, &vcpu, sizeof (vcpu), mode)) {
+			return (EFAULT);
+		}
+		if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+			error = EINVAL;
+			goto done;
+		}
+
+		error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+		if (error)
+			goto done;
+
+		state_changed = 1;
+		break;
+	case VM_MAP_MEMORY:
+		/*
+		 * ioctls that operate on the entire virtual machine must
+		 * prevent all vcpus from running.
+		 */
+		error = 0;
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+			error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+			if (error)
+				break;
+		}
+
+		if (error) {
+			while (--vcpu >= 0)
+				vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+			goto done;
+		}
+
+		state_changed = 2;
+		break;
+
+	default:
+		break;
+	}
+
+	switch(cmd) {
+	case VM_RUN:
+		if (ddi_copyin((void *)arg, &vmrun,
+		    sizeof (struct vm_run), mode)) {
+			return (EFAULT);
+		}
+		error = vm_run(sc->vm, &vmrun);
+		if (ddi_copyout(&vmrun, (void *)arg,
+		    sizeof (struct vm_run), mode)) {
+			return (EFAULT);
+		}
+		break;
+	case VM_LAPIC_IRQ:
+		if (ddi_copyin((void *)arg, &vmirq,
+		    sizeof (struct vm_lapic_irq), mode)) {
+			return (EFAULT);
+		}
+		error = lapic_intr_edge(sc->vm, vmirq.cpuid, vmirq.vector);
+		if (ddi_copyout(&vmirq, (void *)arg,
+		    sizeof (struct vm_lapic_irq), mode)) {
+			return (EFAULT);
+		}
+		break;
+	case VM_LAPIC_LOCAL_IRQ:
+		if (ddi_copyin((void *)arg, &vmirq,
+		    sizeof (struct vm_lapic_irq), mode)) {
+			return (EFAULT);
+		}
+		error = lapic_set_local_intr(sc->vm, vmirq.cpuid,
+		    vmirq.vector);
+		if (ddi_copyout(&vmirq, (void *)arg,
+		    sizeof (struct vm_lapic_irq), mode)) {
+			return (EFAULT);
+		}
+		break;
+	case VM_LAPIC_MSI:
+		if (ddi_copyin((void *)arg, &vmmsi,
+		    sizeof (struct vm_lapic_msi), mode)) {
+			return (EFAULT);
+		}
+		error = lapic_intr_msi(sc->vm, vmmsi.addr, vmmsi.msg);
+		if (ddi_copyout(&vmmsi, (void *)arg,
+		    sizeof (struct vm_lapic_msi), mode)) {
+			return (EFAULT);
+		}
+	case VM_IOAPIC_ASSERT_IRQ:
+		if (ddi_copyin((void *)arg, &ioapic_irq,
+		    sizeof (struct vm_ioapic_irq), mode)) {
+			return (EFAULT);
+		}
+		error = vioapic_assert_irq(sc->vm, ioapic_irq.irq);;
+		if (ddi_copyout(&ioapic_irq, (void *)arg,
+		    sizeof (struct vm_ioapic_irq), mode)) {
+			return (EFAULT);
+		}
+		break;
+	case VM_IOAPIC_DEASSERT_IRQ:
+		if (ddi_copyin((void *)arg, &ioapic_irq,
+		    sizeof (struct vm_ioapic_irq), mode)) {
+			return (EFAULT);
+		}
+		error = vioapic_deassert_irq(sc->vm, ioapic_irq.irq);
+		if (ddi_copyout(&ioapic_irq, (void *)arg,
+		    sizeof (struct vm_ioapic_irq), mode)) {
+			return (EFAULT);
+		}
+		break;
+	case VM_IOAPIC_PULSE_IRQ:
+		if (ddi_copyin((void *)arg, &ioapic_irq,
+		    sizeof (struct vm_ioapic_irq), mode)) {
+			return (EFAULT);
+		}
+		error = vioapic_pulse_irq(sc->vm, ioapic_irq.irq);
+		if (ddi_copyout(&ioapic_irq, (void *)arg,
+		    sizeof (struct vm_ioapic_irq), mode)) {
+			return (EFAULT);
+		}
+		break;
+	case VM_IOAPIC_PINCOUNT:
+		error = 0;
+		pincount = vioapic_pincount(sc->vm);
+		if (ddi_copyout(&pincount, (void *)arg, sizeof (int), mode)) {
+			return (EFAULT);
+		}
+		break;
+	case VM_ISA_ASSERT_IRQ:
+		if (ddi_copyin((void *)arg, &isa_irq,
+		    sizeof (struct vm_isa_irq), mode)) {
+			return (EFAULT);
+		}
+		error = vatpic_assert_irq(sc->vm, isa_irq.atpic_irq);
+		if (error == 0 && isa_irq.ioapic_irq != -1)
+			error = vioapic_assert_irq(sc->vm,
+			    isa_irq.ioapic_irq);
+		if (ddi_copyout(&isa_irq, (void *)arg,
+		    sizeof (struct vm_isa_irq), mode)) {
+			return (EFAULT);
+		
+		}
+		break;
+	case VM_ISA_DEASSERT_IRQ:
+		if (ddi_copyin((void *)arg, &isa_irq,
+		    sizeof (struct vm_isa_irq), mode)) {
+			return (EFAULT);
+		}
+		error = vatpic_deassert_irq(sc->vm, isa_irq.atpic_irq);
+		if (error == 0 && isa_irq.ioapic_irq != -1)
+			error = vioapic_deassert_irq(sc->vm,
+			    isa_irq.ioapic_irq);
+		if (ddi_copyout(&isa_irq, (void *)arg,
+		    sizeof (struct vm_isa_irq), mode)) {
+			return (EFAULT);
+		
+		}
+		break;
+	case VM_ISA_PULSE_IRQ:
+		if (ddi_copyin((void *)arg, &isa_irq,
+		    sizeof (struct vm_isa_irq), mode)) {
+			return (EFAULT);
+		}
+		error = vatpic_pulse_irq(sc->vm, isa_irq.atpic_irq);
+		if (error == 0 && isa_irq.ioapic_irq != -1)
+			error = vioapic_pulse_irq(sc->vm, isa_irq.ioapic_irq);
+		if (ddi_copyout(&isa_irq, (void *)arg,
+		    sizeof (struct vm_isa_irq), mode)) {
+			return (EFAULT);
+		
+		}
+		break;
+	case VM_MAP_MEMORY:
+		if (ddi_copyin((void *)arg, &seg,
+		    sizeof (struct vm_memory_segment), mode)) {
+			return (EFAULT);
+		}
+		error = vm_malloc(sc->vm, seg.gpa, seg.len);
+		break;
+	case VM_GET_MEMORY_SEG:
+		if (ddi_copyin((void *)arg, &seg,
+		    sizeof (struct vm_memory_segment), mode)) {
+			return (EFAULT);
+		}
+		seg.len = 0;
+		(void)vm_gpabase2memseg(sc->vm, seg.gpa, &seg);
+		if (ddi_copyout(&seg, (void *)arg,
+		    sizeof (struct vm_memory_segment), mode)) {
+			return (EFAULT);
+		}
+		error = 0;
+		break;
+	case VM_GET_REGISTER:
+		if (ddi_copyin((void *)arg, &vmreg,
+		    sizeof (struct vm_register), mode)) {
+			return (EFAULT);
+		}
+		error = vm_get_register(sc->vm, vmreg.cpuid, vmreg.regnum,
+					&vmreg.regval);
+		if (!error) {
+			if (ddi_copyout(&vmreg, (void *)arg,
+				 sizeof (struct vm_register), mode)) {
+				return (EFAULT);
+			}
+		}
+		break;
+	case VM_SET_REGISTER:
+		if (ddi_copyin((void *)arg, &vmreg,
+		    sizeof (struct vm_register), mode)) {
+			return (EFAULT);
+		}
+		error = vm_set_register(sc->vm, vmreg.cpuid, vmreg.regnum,
+					vmreg.regval);
+		break;
+	case VM_SET_SEGMENT_DESCRIPTOR:
+		if (ddi_copyin((void *)arg, &vmsegdesc,
+		    sizeof (struct vm_seg_desc), mode)) {
+			return (EFAULT);
+		}
+		error = vm_set_seg_desc(sc->vm, vmsegdesc.cpuid,
+					vmsegdesc.regnum,
+					&vmsegdesc.desc);
+		break;
+	case VM_GET_SEGMENT_DESCRIPTOR:
+		if (ddi_copyin((void *)arg, &vmsegdesc,
+		    sizeof (struct vm_seg_desc), mode)) {
+			return (EFAULT);
+		}
+		error = vm_get_seg_desc(sc->vm, vmsegdesc.cpuid,
+					vmsegdesc.regnum,
+					&vmsegdesc.desc);
+		if (!error) {
+			if (ddi_copyout(&vmsegdesc, (void *)arg,
+			    sizeof (struct vm_seg_desc), mode)) {
+				return (EFAULT);
+			}
+		}
+		break;
+	case VM_GET_CAPABILITY:
+		if (ddi_copyin((void *)arg, &vmcap,
+		    sizeof (struct vm_capability), mode)) {
+			return (EFAULT);
+		}
+		error = vm_get_capability(sc->vm, vmcap.cpuid,
+					  vmcap.captype,
+					  &vmcap.capval);
+		if (!error) {
+			if (ddi_copyout(&vmcap, (void *)arg,
+			    sizeof (struct vm_capability), mode)) {
+				return (EFAULT);
+			}
+		}
+		break;
+	case VM_SET_CAPABILITY:
+		if (ddi_copyin((void *)arg, &vmcap,
+		    sizeof (struct vm_capability), mode)) {
+			return (EFAULT);
+		}
+		error = vm_set_capability(sc->vm, vmcap.cpuid,
+					  vmcap.captype,
+					  vmcap.capval);
+		break;
+	case VM_SET_X2APIC_STATE:
+		if (ddi_copyin((void *)arg, &x2apic,
+		    sizeof (struct vm_x2apic), mode)) {
+			return (EFAULT);
+		}
+		error = vm_set_x2apic_state(sc->vm,
+					    x2apic.cpuid, x2apic.state);
+		break;
+	case VM_GET_X2APIC_STATE:
+		if (ddi_copyin((void *)arg, &x2apic,
+		    sizeof (struct vm_x2apic), mode)) {
+			return (EFAULT);
+		}
+		error = vm_get_x2apic_state(sc->vm,
+					    x2apic.cpuid, &x2apic.state);
+		if (!error) {
+			if (ddi_copyout(&x2apic, (void *)arg,
+			    sizeof (struct vm_x2apic), mode)) {
+				return (EFAULT);
+			}
+		}
+		break;
+	case VM_GLA2GPA: {
+		CTASSERT(PROT_READ == VM_PROT_READ);
+		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
+		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
+		if (ddi_copyin((void *)arg, &gg,
+		    sizeof (struct vm_gla2gpa), mode)) {
+			return (EFAULT);
+		}
+		error = vm_gla2gpa(sc->vm, gg.vcpuid, &gg.paging, gg.gla,
+		    gg.prot, &gg.gpa);
+		KASSERT(error == 0 || error == 1 || error == -1,
+		    ("%s: vm_gla2gpa unknown error %d", __func__, error));
+		if (error >= 0) {
+			/*
+			 * error = 0: the translation was successful
+			 * error = 1: a fault was injected into the guest
+			 */
+			gg.fault = error;
+			error = 0;
+			if (ddi_copyout(&gg, (void *)arg,
+			    sizeof (struct vm_gla2gpa), mode)) {
+				return (EFAULT);
+			}
+		} else {
+			error = EFAULT;
+		}
+		break;
+	}
+	case VM_ACTIVATE_CPU:
+		if (ddi_copyin((void *)arg, &vac,
+		    sizeof (struct vm_activate_cpu), mode)) {
+			return (EFAULT);
+		}
+		error = vm_activate_cpu(sc->vm, vac.vcpuid);
+		break;
+	case VM_RESTART_INSTRUCTION:
+		error = vm_restart_instruction(sc->vm, vcpu);
+		break;
+	default:
+		error = ENOTTY;
+		break;
+	}
+
+	if (state_changed == 1) {
+		vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+	} else if (state_changed == 2) {
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+			vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+	}
+
+done:
+	/* Make sure that no handler returns a bogus value like ERESTART */
+	KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
+	return (error);
+}
+
+static
+minor_t vmm_find_free_minor(void)
+{
+	minor_t		minor;
+
+	for (minor = 1; ; minor++) {
+		if (ddi_get_soft_state(vmm_statep, minor) == NULL)
+			break;
+	}
+
+	return (minor);
+}
+
+int
+vmmdev_do_vm_create(dev_info_t *dip, char *name)
+{
+	struct vmm_softc	*sc;
+	minor_t			minor;
+	int			error;
+
+	mutex_enter(&vmmdev_mtx);
+
+	if (strlen(name) >= VM_MAX_NAMELEN) {
+		mutex_exit(&vmmdev_mtx);
+		return (EINVAL);
+	}
+
+	minor = vmm_find_free_minor();
+	if (ddi_soft_state_zalloc(vmm_statep, minor) == DDI_FAILURE) {
+		mutex_exit(&vmmdev_mtx);
+		return (DDI_FAILURE);
+	}
+
+	if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) {
+		ddi_soft_state_free(vmm_statep, minor);
+		mutex_exit(&vmmdev_mtx);
+		return (DDI_FAILURE);
+	}
+	strcpy(sc->name, name);
+	sc->minor = minor;
+
+	if (ddi_create_minor_node(dip, name, S_IFCHR, minor,
+	    DDI_PSEUDO, 0) == DDI_FAILURE) {
+		ddi_soft_state_free(vmm_statep, minor);
+		mutex_exit(&vmmdev_mtx);
+		return (DDI_FAILURE);
+	}
+
+	error = vm_create(name, &sc->vm);
+	if (error != 0) {
+		ddi_soft_state_free(vmm_statep, minor);
+		ddi_remove_minor_node(dip, name);
+		mutex_exit(&vmmdev_mtx);
+		return (error);
+	}
+	SLIST_INSERT_HEAD(&head, sc, link);
+
+	mutex_exit(&vmmdev_mtx);
+
+	return (0);
+}
+
+static struct vmm_softc *
+vmm_lookup(char *name)
+{
+	struct vmm_softc	*sc;
+
+	SLIST_FOREACH(sc, &head, link) {
+		if (strcmp(sc->name, name) == 0) {
+			break;
+		}
+	}
+
+	return (sc);
+
+}
+
+struct vm *
+vm_lookup_by_name(char *name)
+{
+	struct vmm_softc	*sc;
+
+	mutex_enter(&vmmdev_mtx);
+
+	if ((sc = vmm_lookup(name)) == NULL) {
+		mutex_exit(&vmmdev_mtx);
+		return (NULL);
+	}
+
+	mutex_exit(&vmmdev_mtx);
+
+	return (sc->vm);
+}
+
+int
+vmmdev_do_vm_destroy(dev_info_t *dip, char *name)
+{
+	struct vmm_softc	*sc;
+	dev_info_t      *pdip = ddi_get_parent(dip);
+
+	mutex_enter(&vmmdev_mtx);
+
+	if ((sc = vmm_lookup(name)) == NULL) {
+		mutex_exit(&vmmdev_mtx);
+		return (ENOENT);
+	}
+
+	if (sc->open) {
+		mutex_exit(&vmmdev_mtx);
+		return (EBUSY);
+	}
+
+	vm_destroy(sc->vm);
+	SLIST_REMOVE(&head, sc, vmm_softc, link);
+	ddi_remove_minor_node(dip, name);
+	ddi_soft_state_free(vmm_statep, sc->minor);
+	(void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE);
+
+	mutex_exit(&vmmdev_mtx);
+
+	return (0);
+}
+
+int
+vmmdev_do_vm_mmap(struct vmm_softc *vmm_sc, off_t off, int nprot)
+{
+	vm_paddr_t	paddr;
+
+	mutex_enter(&vmmdev_mtx);
+
+	paddr = vm_gpa2hpa(vmm_sc->vm, (vm_paddr_t)off, PAGE_SIZE);
+	if (paddr == -1) {
+		return (-1);
+	}
+
+	mutex_exit(&vmmdev_mtx);
+
+	return (btop(paddr));
+}
+
+
+static int
+vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+	minor_t			minor;
+	struct vmm_softc	*sc;
+
+	minor = getminor(*devp);
+	if (minor == VMM_CTL_MINOR) {
+		/*
+		 * Master control device must be opened exclusively.
+		 */
+		if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) {
+			return (EINVAL);
+		}
+
+		return (0);
+	}
+
+	mutex_enter(&vmmdev_mtx);
+	sc = ddi_get_soft_state(vmm_statep, minor);
+	if (sc == NULL) {
+		mutex_exit(&vmmdev_mtx);
+		return (ENXIO);
+	}
+
+	if (sc->open) {
+		mutex_exit(&vmmdev_mtx);
+		return (EBUSY);
+	}
+	sc->open = B_TRUE;
+	mutex_exit(&vmmdev_mtx);
+
+	return (0);
+}
+
+static int
+vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+	minor_t			minor;
+	struct vmm_softc	*sc;
+
+	minor = getminor(dev);
+	if (minor == VMM_CTL_MINOR)
+		return (0);
+
+	mutex_enter(&vmmdev_mtx);
+	sc = ddi_get_soft_state(vmm_statep, minor);
+	if (sc == NULL) {
+		mutex_exit(&vmmdev_mtx);
+		return (ENXIO);
+	}
+
+	sc->open = B_FALSE;
+	mutex_exit(&vmmdev_mtx);
+
+	return (0);
+}
+
+static int
+vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+    int *rvalp)
+{
+	struct vmm_softc	*sc;
+	struct vmm_ioctl	kvi;
+	minor_t			minor;
+
+	minor = getminor(dev);
+
+	if (minor == VMM_CTL_MINOR) {
+		if (ddi_copyin((void *)arg, &kvi, sizeof (struct vmm_ioctl),
+		    mode)) {
+			return (EFAULT);
+		}
+		switch (cmd) {
+		case VMM_CREATE_VM:
+			if ((mode & FWRITE) == 0)
+				return (EPERM);
+			return (vmmdev_do_vm_create(vmm_dip, kvi.vmm_name));
+		case VMM_DESTROY_VM:
+			if ((mode & FWRITE) == 0)
+				return (EPERM);
+			return (vmmdev_do_vm_destroy(vmm_dip, kvi.vmm_name));
+		default:
+			break;
+		}
+	}
+
+	sc = ddi_get_soft_state(vmm_statep, minor);
+	ASSERT(sc);
+
+	return (vmmdev_do_ioctl(sc, cmd, arg, mode, credp, rvalp));
+}
+
+static int
+vmm_mmap(dev_t dev, off_t off, int prot)
+{
+	struct vmm_softc	*sc;
+
+	sc = ddi_get_soft_state(vmm_statep, getminor(dev));
+	ASSERT(sc);
+
+	return (vmmdev_do_vm_mmap(sc, off, prot));
+}
+
+static int
+vmm_segmap(dev_t dev, off_t off, struct as *as,
+		  caddr_t *addrp, off_t len, unsigned int prot,
+		  unsigned int maxprot, unsigned int flags, cred_t *credp)
+{
+	struct segdev_crargs	dev_a;
+	int			error;
+
+	as_rangelock(as);
+
+	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
+	if (error != 0) {
+		as_rangeunlock(as);
+		return (error);
+	}
+
+	dev_a.mapfunc = vmm_mmap;
+	dev_a.dev = dev;
+	dev_a.offset = off;
+	dev_a.type = (flags & MAP_TYPE);
+	dev_a.prot = (uchar_t)prot;
+	dev_a.maxprot = (uchar_t)maxprot;
+	dev_a.hat_attr = 0;
+	dev_a.hat_flags = HAT_LOAD_NOCONSIST;
+	dev_a.devmap_data = NULL;
+
+	error = as_map(as, *addrp, len, segdev_create, &dev_a);
+
+	as_rangeunlock(as);
+
+	return (error);
+}
+
+static int
+vmm_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+	return (0);
+}
+
+static int
+vmm_probe(dev_info_t *dip)
+{
+	if (driver_installed(ddi_name_to_major("kvm"))) {
+		cmn_err(CE_WARN, "kvm is installed\n");
+		return (DDI_PROBE_FAILURE);
+	}
+
+	return (DDI_PROBE_SUCCESS);
+}
+
+static int
+vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	switch (cmd) {
+	case DDI_ATTACH:
+		break;
+	default:
+		return (DDI_FAILURE);
+	}
+
+	if (vmm_mod_load()) {
+		return (DDI_FAILURE);
+	}
+
+	vmm_dip = dip;
+
+	/*
+	 * Create control node.  Other nodes will be created on demand.
+	 */
+	if (ddi_create_minor_node(dip, VMM_CTL_MINOR_NODE, S_IFCHR,
+	    VMM_CTL_MINOR, DDI_PSEUDO, 0) != 0) {
+		return (DDI_FAILURE);
+	}
+
+	ddi_report_dev(dip);
+
+	return (DDI_SUCCESS);
+}
+
+static int
+vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	switch (cmd) {
+	case DDI_DETACH:
+		break;
+	default:
+		return (DDI_FAILURE);
+	}
+
+	if (vmm_mod_unload()) {;
+		return (DDI_FAILURE);
+	}
+
+	/*
+	 * Remove the control node.
+	 */
+	ddi_remove_minor_node(dip, VMM_CTL_MINOR_NODE);
+	vmm_dip = NULL;
+
+	return (DDI_SUCCESS);
+}
+
+static struct cb_ops vmm_cb_ops = {
+	vmm_open,
+	vmm_close,
+	nodev,		/* strategy */
+	nodev,		/* print */
+	nodev,		/* dump */
+	nodev,		/* read */
+	nodev,		/* write */
+	vmm_ioctl,
+	nodev,		/* devmap */
+	vmm_mmap,
+	vmm_segmap,
+	nochpoll,	/* poll */
+	ddi_prop_op,
+	NULL,
+	D_NEW | D_MP | D_DEVMAP
+};
+
+static struct dev_ops vmm_ops = {
+	DEVO_REV,
+	0,
+	ddi_no_info,
+	nulldev,	/* identify */
+	vmm_probe,
+	vmm_attach,
+	vmm_detach,
+	nodev,		/* reset */
+	&vmm_cb_ops,
+	(struct bus_ops *)NULL
+};
+
+static struct modldrv modldrv = {
+	&mod_driverops,
+	"vmm",
+	&vmm_ops
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	&modldrv,
+	NULL
+};
+
+int
+_init(void)
+{
+	int	error;
+
+	mutex_init(&vmmdev_mtx, NULL, MUTEX_DRIVER, NULL);
+
+	error = ddi_soft_state_init(&vmm_statep, sizeof (struct vmm_softc), 0);
+	if (error) {
+		return (error);
+	}
+
+	error = mod_install(&modlinkage);
+	if (error) {
+		ddi_soft_state_fini(&vmm_statep);
+	}
+
+	return (error);
+}
+
+int
+_fini(void)
+{
+	int	error;
+
+	error = mod_remove(&modlinkage);
+	if (error) {
+		return (error);
+	}
+	ddi_soft_state_fini(&vmm_statep);
+
+	return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
new file mode 100644
index 0000000000..6588f5a46d
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -0,0 +1,779 @@
+/*
+ * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/kern/subr_sleepqueue.c 261520 2014-02-05 18:13:27Z jhb $
+ */
+/*-
+ * Copyright (c) 2004 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/kern/subr_unit.c 255057 2013-08-30 07:37:45Z kib $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/archsystm.h>
+#include <sys/cpuset.h>
+#include <sys/fp.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+#include <sys/spl.h>
+#include <sys/systm.h>
+
+#include <machine/cpufunc.h>
+#include <machine/fpu.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <machine/vmm.h>
+#include <sys/vmm_impl.h>
+
+#include <vm/as.h>
+#include <vm/seg_kmem.h>
+
+vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+	pfn_t	pfn;
+
+	pfn = hat_getpfnum(kas.a_hat, (caddr_t)va);
+	ASSERT(pfn != PFN_INVALID);
+	return (pfn << PAGE_SHIFT) | ((uintptr_t)va & PAGE_MASK);
+}
+
+int
+cpusetobj_ffs(const cpuset_t *set)
+{
+#if	CPUSET_WORDS > 1
+	int	i, cbit;
+
+	cbit = 0;
+	for (i = 0; i < CPUSET_WORDS; i++) {
+		if (set->cpub[i] != 0) {
+			cbit = ffsl(set->cpub[i]);
+			cbit += i * sizeof (set->cpub[0]);
+			break;
+		}
+	}
+	return (cbit);
+#else
+	return(ffsl(*set));
+#endif
+}
+
+void
+smp_rendezvous(void (* setup_func)(void *),
+	       void (* action_func)(void *),
+	       void (* teardown_func)(void *),
+	       void *arg)
+{
+	cpuset_t cpuset;
+
+	ASSERT(setup_func == NULL);
+	ASSERT(teardown_func == NULL);
+
+	CPUSET_ALL(cpuset);
+	xc_sync((xc_arg_t)arg, 0, 0, CPUSET2BV(cpuset), (xc_func_t)action_func);
+}
+
+struct kmem_item {
+	void			*addr;
+	size_t			size;
+	LIST_ENTRY(kmem_item)	next;
+};
+static kmutex_t kmem_items_lock;
+static LIST_HEAD(, kmem_item) kmem_items;
+
+void *
+malloc(unsigned long size, struct malloc_type *mtp, int flags)
+{
+	void			*p;
+	struct kmem_item	*i;
+	int			kmem_flag = KM_SLEEP;
+
+	if (flags & M_NOWAIT)
+		kmem_flag = KM_NOSLEEP;
+
+	if (flags & M_ZERO) {
+		p = kmem_zalloc(size + sizeof(struct kmem_item), kmem_flag);
+	} else {
+		p = kmem_alloc(size + sizeof(struct kmem_item), kmem_flag);
+	}
+
+	mutex_enter(&kmem_items_lock);
+	i = p + size;
+	i->addr = p;
+	i->size = size;
+
+	LIST_INSERT_HEAD(&kmem_items, i, next);
+	mutex_exit(&kmem_items_lock);
+
+	return (p);
+}
+
+void
+free(void *addr, struct malloc_type *mtp)
+{
+	struct kmem_item	*i;
+
+	mutex_enter(&kmem_items_lock);
+	LIST_FOREACH(i, &kmem_items, next) {
+		if (i->addr == addr)
+			break;
+	}
+	ASSERT(i != NULL);
+	LIST_REMOVE(i, next);
+	mutex_exit(&kmem_items_lock);
+
+	kmem_free(addr, i->size + sizeof(struct kmem_item));
+}
+
+void
+mtx_init(struct mtx *mtx, char *name, const char *type_name, int opts)
+{
+	if (opts & MTX_SPIN) {
+		mutex_init(&mtx->m, name, MUTEX_SPIN,
+		    (ddi_iblock_cookie_t)ipltospl(DISP_LEVEL));
+	} else {
+		mutex_init(&mtx->m, name, MUTEX_DRIVER, NULL);
+	}
+}
+
+void
+mtx_destroy(struct mtx *mtx)
+{
+	mutex_destroy(&mtx->m);
+}
+
+void
+critical_enter(void)
+{
+	kpreempt_disable();
+	thread_affinity_set(curthread, CPU_CURRENT);
+}
+
+void
+critical_exit(void)
+{
+	thread_affinity_clear(curthread);
+	kpreempt_enable();
+}
+
+struct unr {
+	u_int		item;
+	struct unr	*link;
+};
+
+#define	UNR_HASHSIZE	8
+
+struct unrhdr {
+	struct mtx	*mtx;
+	struct unr	*hash[UNR_HASHSIZE];
+	u_int		min;
+	u_int		max;
+	u_int		next;
+};
+
+#define	HASH_UNR(uh, i)	((uh)->hash[(i) & ((UNR_HASHSIZE) - 1)])
+
+static struct mtx unr_mtx;
+
+/*
+ * Allocate a new unrheader set.
+ *
+ * Highest and lowest valid values given as parameters.
+ */
+struct unrhdr *
+new_unrhdr(int low, int high, struct mtx *mtx)
+{
+	struct unrhdr	*uh;
+
+	uh = kmem_zalloc(sizeof (struct unrhdr), KM_SLEEP);
+	if (mtx) {
+		uh->mtx = mtx;
+	} else {
+		uh->mtx = &unr_mtx;
+	}
+	uh->min = low;
+	uh->max = high;
+	uh->next = uh->min;
+
+	return (uh);
+}
+
+void
+delete_unrhdr(struct unrhdr *uh)
+{
+	kmem_free(uh, sizeof (struct unrhdr));
+}
+
+static struct unr *
+unr_lookup(struct unrhdr *uh, int item)
+{
+	struct unr	*unr;
+
+	ASSERT(MUTEX_HELD(&uh->mtx->m));
+
+	for (unr = HASH_UNR(uh, item); unr != NULL; unr = unr->link) {
+		if (unr->item == item)
+			break;
+	}
+
+	return (unr);
+}
+
+int
+alloc_unr(struct unrhdr *uh)
+{
+	struct unr	*unr;
+	int		item, start;
+
+	mutex_enter(&uh->mtx->m);
+	start = uh->next;
+	for (;;) {
+		item = uh->next;
+		if (++uh->next == uh->max) {
+			uh->next = uh->min;
+		}
+
+		if (unr_lookup(uh, item) == NULL) {
+			unr = kmem_zalloc(sizeof (struct unr), KM_SLEEP);
+			unr->item = item;
+			unr->link = HASH_UNR(uh, item);
+			HASH_UNR(uh, item) = unr;
+			break;
+		}
+
+		if (item == start) {
+			item = -1;
+			break;
+		}
+	}
+	mutex_exit(&uh->mtx->m);
+
+	return (item);
+}
+
+void
+free_unr(struct unrhdr *uh, u_int item)
+{
+	struct unr	*unr, **unrp;
+
+	mutex_enter(&uh->mtx->m);
+	unrp = &HASH_UNR(uh, item);
+	for (;;) {
+		ASSERT(*unrp != NULL);
+		if ((*unrp)->item == item)
+			break;
+		unrp = &(*unrp)->link;
+	}
+	unr = *unrp;
+	*unrp = unr->link;
+	mutex_exit(&uh->mtx->m);
+	kmem_free(unr, sizeof(struct unr));
+}
+
+
+static void
+vmm_glue_callout_handler(void *arg)
+{
+	struct callout *c = arg;
+
+	c->c_flags &= ~CALLOUT_PENDING;
+	if (c->c_flags & CALLOUT_ACTIVE) {
+		(c->c_func)(c->c_arg);
+	}
+}
+
+void
+vmm_glue_callout_init(struct callout *c, int mpsafe)
+{
+	cyc_handler_t	hdlr;
+	cyc_time_t	when;
+
+	hdlr.cyh_level = CY_LOW_LEVEL;
+	hdlr.cyh_func = vmm_glue_callout_handler;
+	hdlr.cyh_arg = c;
+	when.cyt_when = CY_INFINITY;
+	when.cyt_interval = CY_INFINITY;
+
+	mutex_enter(&cpu_lock);
+	c->c_cyc_id = cyclic_add(&hdlr, &when);
+	c->c_flags |= CALLOUT_ACTIVE;
+	mutex_exit(&cpu_lock);
+}
+
+int
+vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
+    void (*func)(void *), void *arg, int flags)
+{
+	ASSERT(c->c_cyc_id != CYCLIC_NONE);
+
+	c->c_func = func;
+	c->c_arg = arg;
+	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+
+	if (flags & C_ABSOLUTE)
+		cyclic_reprogram(c->c_cyc_id, sbt);
+	else
+		cyclic_reprogram(c->c_cyc_id, sbt + gethrtime());
+
+	return (0);
+}
+
+int
+vmm_glue_callout_stop(struct callout *c)
+{
+	ASSERT(c->c_cyc_id != CYCLIC_NONE);
+	cyclic_reprogram(c->c_cyc_id, CY_INFINITY);
+	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+
+	return (0);
+}
+
+int
+vmm_glue_callout_drain(struct callout *c)
+{
+	ASSERT(c->c_cyc_id != CYCLIC_NONE);
+	mutex_enter(&cpu_lock);
+	cyclic_remove(c->c_cyc_id);
+	c->c_cyc_id = CYCLIC_NONE;
+	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+	mutex_exit(&cpu_lock);
+
+	return (0);
+}
+
+static int
+ipi_cpu_justreturn(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
+{
+	return (0);
+}
+
+void
+ipi_cpu(int cpu, u_int ipi)
+{
+	cpuset_t	set;
+
+	CPUSET_ONLY(set, cpu);
+	xc_call_nowait(NULL, NULL, NULL, CPUSET2BV(set),
+		       ipi_cpu_justreturn);
+}
+
+#define	SC_TABLESIZE	256			/* Must be power of 2. */
+#define	SC_MASK		(SC_TABLESIZE - 1)
+#define	SC_SHIFT	8
+#define	SC_HASH(wc)	((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
+			    SC_MASK)
+#define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
+
+struct sleepqueue {
+	u_int sq_blockedcnt;			/* Num. of blocked threads. */
+	LIST_ENTRY(sleepqueue) sq_hash;		/* Chain. */
+	void		*sq_wchan;		/* Wait channel. */
+	kcondvar_t	sq_cv;
+};
+
+struct sleepqueue_chain {
+	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
+	struct mtx	sc_lock;		/* Spin lock for this chain. */
+};
+
+static struct sleepqueue_chain	sleepq_chains[SC_TABLESIZE];
+
+#define	SLEEPQ_CACHE_SZ		(64)
+static kmem_cache_t		*vmm_sleepq_cache;
+
+static int
+vmm_sleepq_cache_init(void *buf, void *user_arg, int kmflags)
+{
+	struct sleepqueue *sq = (struct sleepqueue *)buf;
+
+	bzero(sq, sizeof (struct sleepqueue));
+	cv_init(&sq->sq_cv, NULL, CV_DRIVER, NULL);
+
+	return (0);
+}
+
+static void
+vmm_sleepq_cache_fini(void *buf, void *user_arg)
+{
+	struct sleepqueue *sq = (struct sleepqueue *)buf;
+	cv_destroy(&sq->sq_cv);
+}
+
+static void
+init_sleepqueues(void)
+{
+	int	i;
+
+        for (i = 0; i < SC_TABLESIZE; i++) {
+		LIST_INIT(&sleepq_chains[i].sc_queues);
+		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
+			 MTX_SPIN);
+	}
+
+	vmm_sleepq_cache = kmem_cache_create("vmm_sleepq_cache",
+	    sizeof (struct sleepqueue), SLEEPQ_CACHE_SZ, vmm_sleepq_cache_init,
+	    vmm_sleepq_cache_fini, NULL, NULL, NULL, 0);
+
+}
+
+/*
+ * Lock the sleep queue chain associated with the specified wait channel.
+ */
+static void
+sleepq_lock(void *wchan)
+{
+	struct sleepqueue_chain *sc;
+
+	sc = SC_LOOKUP(wchan);
+	mtx_lock_spin(&sc->sc_lock);
+}
+
+/*
+ * Look up the sleep queue associated with a given wait channel in the hash
+ * table locking the associated sleep queue chain.  If no queue is found in
+ * the table, NULL is returned.
+ */
+static struct sleepqueue *
+sleepq_lookup(void *wchan)
+{
+	struct sleepqueue_chain	*sc;
+	struct sleepqueue	*sq;
+
+	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+	sc = SC_LOOKUP(wchan);
+	mtx_assert(&sc->sc_lock, MA_OWNED);
+	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
+		if (sq->sq_wchan == wchan)
+			return (sq);
+	return (NULL);
+}
+
+/*
+ * Unlock the sleep queue chain associated with a given wait channel.
+ */
+static void
+sleepq_release(void *wchan)
+{
+	struct sleepqueue_chain *sc;
+
+	sc = SC_LOOKUP(wchan);
+	mtx_unlock_spin(&sc->sc_lock);
+}
+
+struct sleepqueue *
+sleepq_add(void *wchan)
+{
+	struct sleepqueue_chain	*sc;
+	struct sleepqueue	*sq;
+
+	sc = SC_LOOKUP(wchan);
+
+	/* Look up the sleep queue associated with the wait channel 'wchan'. */
+	sq = sleepq_lookup(wchan);
+
+	if (sq == NULL) {
+		sq = kmem_cache_alloc(vmm_sleepq_cache, KM_SLEEP);
+		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
+		sq->sq_wchan = wchan;
+	}
+
+        sq->sq_blockedcnt++;
+
+	return (sq);
+}
+
+void
+sleepq_remove(struct sleepqueue *sq)
+{
+	sq->sq_blockedcnt--;
+
+	if (sq->sq_blockedcnt == 0) {
+		LIST_REMOVE(sq, sq_hash);
+		kmem_cache_free(vmm_sleepq_cache, sq);
+	}
+}
+
+int
+msleep_spin(void *chan, struct mtx *mtx, const char *wmesg, int ticks)
+{
+	struct sleepqueue	*sq;
+	int			error;
+
+	sleepq_lock(chan);
+	sq = sleepq_add(chan);
+	sleepq_release(chan);
+
+	cv_reltimedwait(&sq->sq_cv, &mtx->m, ticks, TR_CLOCK_TICK);
+
+	sleepq_lock(chan);
+	sleepq_remove(sq);
+	sleepq_release(chan);
+
+	return (error);
+}
+
+void
+wakeup(void *chan)
+{
+	struct sleepqueue	*sq;
+
+	sleepq_lock(chan);
+        sq = sleepq_lookup(chan);
+	if (sq != NULL) {
+		cv_broadcast(&sq->sq_cv);
+	}
+	sleepq_release(chan);
+}
+
+void
+wakeup_one(void *chan)
+{
+	struct sleepqueue	*sq;
+
+	sleepq_lock(chan);
+        sq = sleepq_lookup(chan);
+	if (sq != NULL) {
+		cv_signal(&sq->sq_cv);
+	}
+	sleepq_release(chan);
+}
+
+u_int	cpu_high;		/* Highest arg to CPUID */
+u_int	cpu_exthigh;		/* Highest arg to extended CPUID */
+u_int	cpu_id;			/* Stepping ID */
+char	cpu_vendor[20];		/* CPU Origin code */
+
+static void
+vmm_cpuid_init(void)
+{
+	u_int regs[4];
+
+	do_cpuid(0, regs);
+	cpu_high = regs[0];
+	((u_int *)&cpu_vendor)[0] = regs[1];
+	((u_int *)&cpu_vendor)[1] = regs[3];
+	((u_int *)&cpu_vendor)[2] = regs[2];
+	cpu_vendor[12] = '\0';
+
+	do_cpuid(1, regs);
+	cpu_id = regs[0];
+
+	do_cpuid(0x80000000, regs);
+	cpu_exthigh = regs[0];
+}
+
+struct savefpu {
+	fpu_ctx_t	fsa_fp_ctx;
+};
+
+static vmem_t *fpu_save_area_arena;
+
+static void
+fpu_save_area_init(void)
+{
+	fpu_save_area_arena = vmem_create("fpu_save_area",
+	    NULL, 0, XSAVE_AREA_ALIGN,
+	    segkmem_alloc, segkmem_free, heap_arena, 0, VM_BESTFIT | VM_SLEEP);
+}
+
+static void
+fpu_save_area_cleanup(void)
+{
+	vmem_destroy(fpu_save_area_arena);
+}
+
+struct savefpu *
+fpu_save_area_alloc(void)
+{
+	return (vmem_alloc(fpu_save_area_arena, sizeof (struct savefpu),
+			   VM_SLEEP));
+}
+
+void
+fpu_save_area_free(struct savefpu *fsa)
+{
+	vmem_free(fpu_save_area_arena, fsa, sizeof (struct savefpu));
+}
+
+void
+fpu_save_area_reset(struct savefpu *fsa)
+{
+	extern const struct fxsave_state sse_initial;
+	extern const struct xsave_state avx_initial;
+	struct fpu_ctx *fp;
+	struct fxsave_state *fx;
+	struct xsave_state *xs;
+
+	fp = &fsa->fsa_fp_ctx;
+
+	fp->fpu_regs.kfpu_status = 0;
+	fp->fpu_regs.kfpu_xstatus = 0;
+
+	switch (fp_save_mech) {
+	case FP_FXSAVE:
+		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
+		bcopy(&sse_initial, fx, sizeof (*fx));
+		break;
+	case FP_XSAVE:
+		fp->fpu_xsave_mask = (XFEATURE_ENABLED_X87 |
+		    XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX);
+		xs = &fp->fpu_regs.kfpu_u.kfpu_xs;
+		bcopy(&avx_initial, xs, sizeof (*xs));
+		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
+	}
+}
+
+void
+fpuexit(kthread_t *td)
+{
+	fp_save(&curthread->t_lwp->lwp_pcb.pcb_fpu);
+}
+
+static __inline void
+vmm_fxrstor(struct fxsave_state *addr)
+{
+	__asm __volatile("fxrstor %0" : : "m" (*(addr)));
+}
+
+static __inline void
+vmm_fxsave(struct fxsave_state *addr)
+{
+	__asm __volatile("fxsave %0" : "=m" (*(addr)));
+}
+
+static __inline void
+vmm_xrstor(struct xsave_state *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	__asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
+}
+
+static __inline void
+vmm_xsave(struct xsave_state *addr, uint64_t mask)
+{
+	uint32_t low, hi;
+
+	low = mask;
+	hi = mask >> 32;
+	__asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
+	    "memory");
+}
+
+void
+fpurestore(void *arg)
+{
+	struct savefpu *fsa = (struct savefpu *)arg;
+	struct fpu_ctx *fp;
+
+	fp = &fsa->fsa_fp_ctx;
+
+	switch (fp_save_mech) {
+	case FP_FXSAVE:
+		vmm_fxrstor(&fp->fpu_regs.kfpu_u.kfpu_fx);
+		break;
+	case FP_XSAVE:
+		vmm_xrstor(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
+		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
+	}
+}
+
+void
+fpusave(void *arg)
+{
+	struct savefpu *fsa = (struct savefpu *)arg;
+	struct fpu_ctx *fp;
+
+	fp = &fsa->fsa_fp_ctx;
+
+	switch (fp_save_mech) {
+	case FP_FXSAVE:
+		vmm_fxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
+		break;
+	case FP_XSAVE:
+		vmm_xsave(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
+		break;
+	default:
+		panic("Invalid fp_save_mech");
+		/*NOTREACHED*/
+	}
+}
+
+void
+vmm_sol_glue_init(void)
+{
+	vmm_cpuid_init();
+	fpu_save_area_init();
+	init_sleepqueues();
+}
+
+void
+vmm_sol_glue_cleanup(void)
+{
+	fpu_save_area_cleanup();
+	kmem_cache_destroy(vmm_sleepq_cache);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_mem.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_mem.c
new file mode 100644
index 0000000000..3bb5412d16
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_mem.c
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_mem.c 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm_mem.c 245678 2013-01-20 03:42:49Z neel $");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+#include <machine/pmap.h>
+
+#include <sys/ddi.h>
+
+#include "vmm_util.h"
+#include "vmm_mem.h"
+
+int
+vmm_mem_init(void)
+{
+	return (0);
+}
+
+vm_paddr_t
+vmm_mem_alloc(size_t size)
+{
+	clock_t usec = 2 * 1000000;
+	vm_paddr_t pa;
+	caddr_t addr;
+
+	if (size != PAGE_SIZE)
+		panic("vmm_mem_alloc: invalid allocation size %lu", size);
+
+	while (usec > 0) {
+		if ((addr = kmem_zalloc(PAGE_SIZE, KM_NOSLEEP)) != NULL) {
+			ASSERT(((uintptr_t)addr & PAGE_MASK) == 0);
+			pa = vtophys((vm_offset_t)addr);
+			return (pa);
+		}
+		delay(drv_usectohz((clock_t)500000));
+		usec -= 500000;
+	}
+
+	return (NULL);
+}
+
+void
+vmm_mem_free(vm_paddr_t base, size_t length)
+{
+	page_t	*pp;
+
+	if (base & PAGE_MASK) {
+		panic("vmm_mem_free: base 0x%0lx must be aligned on a "
+		      "0x%0x boundary\n", base, PAGE_SIZE);
+	}
+
+	if (length != PAGE_SIZE) {
+		panic("vmm_mem_free: invalid length %lu", length);
+	}
+
+	pp = page_numtopp_nolock(btop(base));
+	kmem_free((void *)pp->p_offset, PAGE_SIZE);
+}
+
+vm_paddr_t
+vmm_mem_maxaddr(void)
+{
+
+	return (ptob(physmax + 1));
+}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.h b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
new file mode 100644
index 0000000000..9bf7a60e0b
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_stat.h 250427 2013-05-10 02:59:49Z neel $
+ */
+
+#ifndef _VMM_STAT_H_
+#define	_VMM_STAT_H_
+
+struct vm;
+
+#define	MAX_VMM_STAT_ELEMS	64		/* arbitrary */
+
+enum vmm_stat_scope {
+	VMM_STAT_SCOPE_ANY,
+	VMM_STAT_SCOPE_INTEL,		/* Intel VMX specific statistic */
+	VMM_STAT_SCOPE_AMD,		/* AMD SVM specific statistic */
+};
+
+struct vmm_stat_type {
+	int	index;			/* position in the stats buffer */
+	int	nelems;			/* standalone or array */
+	const char *desc;		/* description of statistic */
+	enum vmm_stat_scope scope;
+};
+
+void	vmm_stat_init(void *arg);
+
+#define	VMM_STAT_DEFINE(type, nelems, desc, scope)			\
+	struct vmm_stat_type type[1] = {				\
+		{ -1, nelems, desc, scope }				\
+	};								\
+	SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_init, type)
+
+#define	VMM_STAT_DECLARE(type)						\
+	extern struct vmm_stat_type type[1]
+
+#define	VMM_STAT(type, desc)		\
+	VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
+#define	VMM_STAT_INTEL(type, desc)	\
+	VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL)
+#define	VMM_STAT_AMD(type, desc)	\
+	VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD)
+
+#define	VMM_STAT_ARRAY(type, nelems, desc)	\
+	VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
+
+void	*vmm_stat_alloc(void);
+void 	vmm_stat_free(void *vp);
+
+/*
+ * 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
+ */
+int	vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf);
+int	vmm_stat_desc_copy(int index, char *buf, int buflen);
+
+static void __inline
+vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
+		    int statidx, uint64_t x)
+{
+#ifdef VMM_KEEP_STATS
+	uint64_t *stats;
+	
+	stats = vcpu_stats(vm, vcpu);
+
+	if (vst->index >= 0 && statidx < vst->nelems)
+		stats[vst->index + statidx] += x;
+#endif
+}
+		   
+
+static void __inline
+vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
+{
+
+#ifdef VMM_KEEP_STATS
+	vmm_stat_array_incr(vm, vcpu, vst, 0, x);
+#endif
+}
+
+VMM_STAT_DECLARE(VCPU_MIGRATIONS);
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_EXTINT);
+VMM_STAT_DECLARE(VMEXIT_HLT);
+VMM_STAT_DECLARE(VMEXIT_CR_ACCESS);
+VMM_STAT_DECLARE(VMEXIT_RDMSR);
+VMM_STAT_DECLARE(VMEXIT_WRMSR);
+VMM_STAT_DECLARE(VMEXIT_MTRAP);
+VMM_STAT_DECLARE(VMEXIT_PAUSE);
+VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_INOUT);
+VMM_STAT_DECLARE(VMEXIT_CPUID);
+VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
+VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
+VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
+VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.c b/usr/src/uts/i86pc/io/vmm/vmm_util.c
new file mode 100644
index 0000000000..fabd42e13c
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_util.c
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_util.c 245678 2013-01-20 03:42:49Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm_util.c 245678 2013-01-20 03:42:49Z neel $");
+
+#include <sys/param.h>
+#include <sys/libkern.h>
+
+#include <machine/md_var.h>
+
+#include "vmm_util.h"
+
+boolean_t
+vmm_is_intel(void)
+{
+
+	if (strcmp(cpu_vendor, "GenuineIntel") == 0)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+boolean_t
+vmm_is_amd(void)
+{
+	if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
+boolean_t
+vmm_supports_1G_pages(void)
+{
+	unsigned int regs[4];
+
+	/*
+	 * CPUID.80000001:EDX[bit 26] = 1 indicates support for 1GB pages
+	 *
+	 * Both Intel and AMD support this bit.
+	 */
+	if (cpu_exthigh >= 0x80000001) {
+		do_cpuid(0x80000001, regs);
+		if (regs[3] & (1 << 26))
+			return (TRUE);
+	}
+	return (FALSE);
+}
+
+#ifdef	__FreeBSD__
+#include <sys/proc.h>
+#include <machine/frame.h>
+#define	DUMP_REG(x)	printf(#x "\t\t0x%016lx\n", (long)(tf->tf_ ## x))
+#define	DUMP_SEG(x)	printf(#x "\t\t0x%04x\n", (unsigned)(tf->tf_ ## x))
+void
+dump_trapframe(struct trapframe *tf)
+{
+	DUMP_REG(rdi);
+	DUMP_REG(rsi);
+	DUMP_REG(rdx);
+	DUMP_REG(rcx);
+	DUMP_REG(r8);
+	DUMP_REG(r9);
+	DUMP_REG(rax);
+	DUMP_REG(rbx);
+	DUMP_REG(rbp);
+	DUMP_REG(r10);
+	DUMP_REG(r11);
+	DUMP_REG(r12);
+	DUMP_REG(r13);
+	DUMP_REG(r14);
+	DUMP_REG(r15);
+	DUMP_REG(trapno);
+	DUMP_REG(addr);
+	DUMP_REG(flags);
+	DUMP_REG(err);
+	DUMP_REG(rip);
+	DUMP_REG(rflags);
+	DUMP_REG(rsp);
+	DUMP_SEG(cs);
+	DUMP_SEG(ss);
+	DUMP_SEG(fs);
+	DUMP_SEG(gs);
+	DUMP_SEG(es);
+	DUMP_SEG(ds);
+}
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.h b/usr/src/uts/i86pc/io/vmm/vmm_util.h
new file mode 100644
index 0000000000..fe1c1c9449
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmm_util.h
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/vmm_util.h 245678 2013-01-20 03:42:49Z neel $
+ */
+
+#ifndef _VMM_UTIL_H_
+#define	_VMM_UTIL_H_
+
+struct trapframe;
+
+boolean_t	vmm_is_intel(void);
+boolean_t	vmm_is_amd(void);
+boolean_t	vmm_supports_1G_pages(void);
+
+void		dump_trapframe(struct trapframe *tf);
+
+#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmx_assym.s b/usr/src/uts/i86pc/io/vmm/vmx_assym.s
new file mode 100644
index 0000000000..d84ca30275
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/vmx_assym.s
@@ -0,0 +1 @@
+#include "vmx_assym.h"
diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c
new file mode 100644
index 0000000000..02222ef5e7
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/x86.c
@@ -0,0 +1,276 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/x86.c 255645 2013-09-17 17:56:53Z grehan $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/amd64/vmm/x86.c 255645 2013-09-17 17:56:53Z grehan $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/cpuset.h>
+
+#include <machine/clock.h>
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+
+#include <machine/vmm.h>
+
+#include "x86.h"
+
+#define	CPUID_VM_HIGH		0x40000000
+
+static const char bhyve_id[12] = "bhyve bhyve ";
+
+static uint64_t bhyve_xcpuids;
+
+int
+x86_emulate_cpuid(struct vm *vm, int vcpu_id,
+		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+	int error;
+	unsigned int 	func, regs[4];
+	enum x2apic_state x2apic_state;
+
+	/*
+	 * Requests for invalid CPUID levels should map to the highest
+	 * available level instead.
+	 */
+	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
+		if (*eax > cpu_exthigh)
+			*eax = cpu_exthigh;
+	} else if (*eax >= 0x40000000) {
+		if (*eax > CPUID_VM_HIGH)
+			*eax = CPUID_VM_HIGH;
+	} else if (*eax > cpu_high) {
+		*eax = cpu_high;
+	}
+
+	func = *eax;
+
+	/*
+	 * In general the approach used for CPU topology is to
+	 * advertise a flat topology where all CPUs are packages with
+	 * no multi-core or SMT.
+	 */
+	switch (func) {
+		/*
+		 * Pass these through to the guest
+		 */
+		case CPUID_0000_0000:
+		case CPUID_0000_0002:
+		case CPUID_0000_0003:
+		case CPUID_8000_0000:
+		case CPUID_8000_0002:
+		case CPUID_8000_0003:
+		case CPUID_8000_0004:
+		case CPUID_8000_0006:
+		case CPUID_8000_0008:
+			cpuid_count(*eax, *ecx, regs);
+			break;
+
+		case CPUID_8000_0001:
+			/*
+			 * Hide rdtscp/ia32_tsc_aux until we know how
+			 * to deal with them.
+			 */
+			cpuid_count(*eax, *ecx, regs);
+			regs[3] &= ~AMDID_RDTSCP;
+			break;
+
+		case CPUID_8000_0007:
+			cpuid_count(*eax, *ecx, regs);
+#ifdef	__FreeBSD__
+			/*
+			 * If the host TSCs are not synchronized across
+			 * physical cpus then we cannot advertise an
+			 * invariant tsc to a vcpu.
+			 *
+			 * XXX This still falls short because the vcpu
+			 * can observe the TSC moving backwards as it
+			 * migrates across physical cpus. But at least
+			 * it should discourage the guest from using the
+			 * TSC to keep track of time.
+			 */
+			if (!smp_tsc)
+				regs[3] &= ~AMDPM_TSC_INVARIANT;
+#endif
+			break;
+
+		case CPUID_0000_0001:
+			do_cpuid(1, regs);
+
+			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
+			if (error) {
+				panic("x86_emulate_cpuid: error %d "
+				      "fetching x2apic state", error);
+			}
+
+			/*
+			 * Override the APIC ID only in ebx
+			 */
+			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
+			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
+
+			/*
+			 * Don't expose VMX, SpeedStep or TME capability.
+			 * Advertise x2APIC capability and Hypervisor guest.
+			 */
+			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
+
+			regs[2] |= CPUID2_HV;
+
+			if (x2apic_state != X2APIC_DISABLED)
+				regs[2] |= CPUID2_X2APIC;
+
+			/*
+			 * Hide xsave/osxsave/avx until the FPU save/restore
+			 * issues are resolved
+			 */
+			regs[2] &= ~(CPUID2_XSAVE | CPUID2_OSXSAVE |
+				     CPUID2_AVX);
+
+			/*
+			 * Hide monitor/mwait until we know how to deal with
+			 * these instructions.
+			 */
+			regs[2] &= ~CPUID2_MON;
+
+                        /*
+			 * Hide the performance and debug features.
+			 */
+			regs[2] &= ~CPUID2_PDCM;
+			
+			/*
+			 * No TSC deadline support in the APIC yet
+			 */
+			regs[2] &= ~CPUID2_TSCDLT;
+
+			/*
+			 * Hide thermal monitoring
+			 */
+			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
+			
+			/*
+			 * Machine check handling is done in the host.
+			 */
+			regs[3] &= ~(CPUID_MCA | CPUID_MCE);
+
+                        /*
+                        * Hide the debug store capability.
+                        */
+			regs[3] &= ~CPUID_DS;
+
+			/*
+			 * Disable multi-core.
+			 */
+			regs[1] &= ~CPUID_HTT_CORES;
+			regs[3] &= ~CPUID_HTT;
+			break;
+
+		case CPUID_0000_0004:
+			do_cpuid(4, regs);
+
+			/*
+			 * Do not expose topology.
+			 */
+			regs[0] &= 0xffff8000;
+			/*
+			 * The maximum number of processor cores in
+			 * this physical processor package and the
+			 * maximum number of threads sharing this
+			 * cache are encoded with "plus 1" encoding.
+			 * Adding one to the value in this register
+			 * field to obtains the actual value.
+			 *
+			 * Therefore 0 for both indicates 1 core
+			 * per package and no cache sharing.
+			 */
+			break;
+
+		case CPUID_0000_0006:
+		case CPUID_0000_0007:
+		case CPUID_0000_000A:
+		case CPUID_0000_000D:
+			/*
+			 * Handle the access, but report 0 for
+			 * all options
+			 */
+			regs[0] = 0;
+			regs[1] = 0;
+			regs[2] = 0;
+			regs[3] = 0;
+			break;
+
+		case CPUID_0000_000B:
+			/*
+			 * Processor topology enumeration
+			 */
+			regs[0] = 0;
+			regs[1] = 0;
+			regs[2] = *ecx & 0xff;
+			regs[3] = vcpu_id;
+			break;
+
+		case 0x40000000:
+			regs[0] = CPUID_VM_HIGH;
+			bcopy(bhyve_id, &regs[1], 4);
+			bcopy(bhyve_id + 4, &regs[2], 4);
+			bcopy(bhyve_id + 8, &regs[3], 4);
+			break;
+
+		default:
+			/*
+			 * The leaf value has already been clamped so
+			 * simply pass this through, keeping count of
+			 * how many unhandled leaf values have been seen.
+			 */
+			atomic_add_long(&bhyve_xcpuids, 1);
+			cpuid_count(*eax, *ecx, regs);
+			break;
+	}
+
+	*eax = regs[0];
+	*ebx = regs[1];
+	*ecx = regs[2];
+	*edx = regs[3];
+
+	return (1);
+}
diff --git a/usr/src/uts/i86pc/io/vmm/x86.h b/usr/src/uts/i86pc/io/vmm/x86.h
new file mode 100644
index 0000000000..db2340b37b
--- /dev/null
+++ b/usr/src/uts/i86pc/io/vmm/x86.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/vmm/x86.h 255287 2013-09-06 05:16:10Z grehan $
+ */
+
+#ifndef _X86_H_
+#define	_X86_H_
+
+#define CPUID_0000_0000 (0x0)
+#define CPUID_0000_0001	(0x1)
+#define CPUID_0000_0002 (0x2)
+#define CPUID_0000_0003 (0x3)
+#define CPUID_0000_0004 (0x4)
+#define CPUID_0000_0006 (0x6)
+#define CPUID_0000_0007 (0x7)
+#define	CPUID_0000_000A	(0xA)
+#define	CPUID_0000_000B	(0xB)
+#define	CPUID_0000_000D	(0xD)
+#define CPUID_8000_0000	(0x80000000)
+#define CPUID_8000_0001	(0x80000001)
+#define CPUID_8000_0002	(0x80000002)
+#define CPUID_8000_0003	(0x80000003)
+#define CPUID_8000_0004	(0x80000004)
+#define CPUID_8000_0006	(0x80000006)
+#define CPUID_8000_0007	(0x80000007)
+#define CPUID_8000_0008	(0x80000008)
+
+/*
+ * CPUID instruction Fn0000_0001:
+ */
+#define CPUID_0000_0001_APICID_MASK			(0xff<<24)
+#define CPUID_0000_0001_APICID_SHIFT			24
+
+/*
+ * CPUID instruction Fn0000_0001 ECX
+ */
+#define CPUID_0000_0001_FEAT0_VMX	(1<<5)
+
+int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx,
+		      uint32_t *ecx, uint32_t *edx);
+
+#endif
diff --git a/usr/src/uts/i86pc/sys/Makefile b/usr/src/uts/i86pc/sys/Makefile
index 7fa9f8f6ef..80461a55c1 100644
--- a/usr/src/uts/i86pc/sys/Makefile
+++ b/usr/src/uts/i86pc/sys/Makefile
@@ -21,6 +21,7 @@
 #
 # Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
+# Copyright 2017 Joyent, Inc.
 #
 # uts/i86pc/sys/Makefile
 #
@@ -36,7 +37,7 @@ include ../Makefile.i86pc
 #
 FILEMODE = 644
 
-HDRS=  \
+CHKHDRS=  \
 	acpidev.h	\
 	amd_iommu.h	\
 	asm_misc.h	\
@@ -66,6 +67,15 @@ HDRS=  \
 	xc_levels.h	\
 	xsvc.h
 
+NOCHKHDRS= \
+	vmm.h		\
+	vmm_impl.h	\
+	vmm_instruction_emul.h
+
+HDRS= \
+	$(CHKHDRS)	\
+	$(NOCHKHDRS)
+
 ROOTHDRS=	$(HDRS:%=$(USR_PSM_ISYS_DIR)/%)
 
 ROOTDIR=	$(ROOT)/usr/share/src
@@ -74,7 +84,7 @@ ROOTDIRS=	$(ROOTDIR)/uts $(ROOTDIR)/uts/$(PLATFORM)
 ROOTLINK=	$(ROOTDIR)/uts/$(PLATFORM)/sys
 LINKDEST=	../../../../platform/$(PLATFORM)/include/sys
 
-CHECKHDRS=	$(HDRS:%.h=%.check)
+CHECKHDRS=	$(CHKHDRS:%.h=%.check)
 
 .KEEP_STATE:
 
diff --git a/usr/src/uts/i86pc/sys/viona_io.h b/usr/src/uts/i86pc/sys/viona_io.h
new file mode 100644
index 0000000000..a4fb0f2527
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/viona_io.h
@@ -0,0 +1,45 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2013 Pluribus Networks Inc.
+ */
+
+#ifndef	_VIONA_IO_H_
+#define	_VIONA_IO_H_
+
+#define	VNA_IOC			(('V' << 16)|('C' << 8))
+#define	VNA_IOC_CREATE		(VNA_IOC | 1)
+#define	VNA_IOC_DELETE		(VNA_IOC | 2)
+#define	VNA_IOC_RX_RING_INIT	(VNA_IOC | 3)
+#define	VNA_IOC_TX_RING_INIT	(VNA_IOC | 4)
+#define	VNA_IOC_RX_RING_RESET	(VNA_IOC | 5)
+#define	VNA_IOC_TX_RING_RESET	(VNA_IOC | 6)
+#define	VNA_IOC_RX_RING_KICK	(VNA_IOC | 7)
+#define	VNA_IOC_TX_RING_KICK	(VNA_IOC | 8)
+#define	VNA_IOC_RX_INTR_CLR	(VNA_IOC | 9)
+#define	VNA_IOC_TX_INTR_CLR	(VNA_IOC | 10)
+#define VNA_IOC_SET_FEATURES	(VNA_IOC | 11)
+#define VNA_IOC_GET_FEATURES	(VNA_IOC | 12)
+
+typedef struct vioc_create {
+	datalink_id_t	c_linkid;
+	char		c_vmname[64];
+	size_t		c_lomem_size;
+	size_t		c_himem_size;
+} vioc_create_t;
+
+typedef struct vioc_ring_init {
+	uint16_t	ri_qsize;
+	uint64_t	ri_qaddr;
+} vioc_ring_init_t;
+
+#endif	/* _VIONA_IO_H_ */
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
new file mode 100644
index 0000000000..e876ce748f
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -0,0 +1,565 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/include/vmm.h 273375 2014-10-21 07:10:43Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef _VMM_H_
+#define	_VMM_H_
+
+#include <x86/segments.h>
+
+enum vm_suspend_how {
+	VM_SUSPEND_NONE,
+	VM_SUSPEND_RESET,
+	VM_SUSPEND_POWEROFF,
+	VM_SUSPEND_HALT,
+	VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15,
+	VM_REG_GUEST_CR0,
+	VM_REG_GUEST_CR3,
+	VM_REG_GUEST_CR4,
+	VM_REG_GUEST_DR7,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RIP,
+	VM_REG_GUEST_RFLAGS,
+	VM_REG_GUEST_ES,
+	VM_REG_GUEST_CS,
+	VM_REG_GUEST_SS,
+	VM_REG_GUEST_DS,
+	VM_REG_GUEST_FS,
+	VM_REG_GUEST_GS,
+	VM_REG_GUEST_LDTR,
+	VM_REG_GUEST_TR,
+	VM_REG_GUEST_IDTR,
+	VM_REG_GUEST_GDTR,
+	VM_REG_GUEST_EFER,
+	VM_REG_GUEST_CR2,
+	VM_REG_LAST
+};
+
+enum x2apic_state {
+	X2APIC_DISABLED,
+	X2APIC_ENABLED,
+	X2APIC_STATE_LAST
+};
+
+#define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
+#define	VM_INTINFO_DEL_ERRCODE	0x800
+#define	VM_INTINFO_RSVD		0x7ffff000
+#define	VM_INTINFO_VALID	0x80000000
+#define	VM_INTINFO_TYPE		0x700
+#define	VM_INTINFO_HWINTR	(0 << 8)
+#define	VM_INTINFO_NMI		(2 << 8)
+#define	VM_INTINFO_HWEXCEPTION	(3 << 8)
+#define	VM_INTINFO_SWINTR	(4 << 8)
+
+#define	VM_MAX_NAMELEN	32
+
+#ifdef _KERNEL
+
+struct vm;
+struct vm_exception;
+struct vm_memory_segment;
+struct seg_desc;
+struct vm_exit;
+struct vm_run;
+struct vhpet;
+struct vioapic;
+struct vlapic;
+struct vm_guest_paging;
+
+typedef int	(*vmm_init_func_t)(void);
+typedef int	(*vmm_cleanup_func_t)(void);
+typedef void *	(*vmi_init_func_t)(struct vm *vm); /* instance specific apis */
+typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip);
+typedef void	(*vmi_cleanup_func_t)(void *vmi);
+typedef int	(*vmi_mmap_set_func_t)(void *vmi, vm_paddr_t gpa,
+				       vm_paddr_t hpa, size_t length,
+				       vm_memattr_t attr, int prot,
+				       boolean_t superpages_ok);
+typedef vm_paddr_t (*vmi_mmap_get_func_t)(void *vmi, vm_paddr_t gpa);
+typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t *retval);
+typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t val);
+typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
+				  struct seg_desc *desc);
+typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
+				  struct seg_desc *desc);
+typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
+typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+
+struct vmm_ops {
+	vmm_init_func_t		init;		/* module wide initialization */
+	vmm_cleanup_func_t	cleanup;
+
+	vmi_init_func_t		vminit;		/* vm-specific initialization */
+	vmi_run_func_t		vmrun;
+	vmi_cleanup_func_t	vmcleanup;
+	vmi_mmap_set_func_t	vmmmap_set;
+	vmi_mmap_get_func_t	vmmmap_get;
+	vmi_get_register_t	vmgetreg;
+	vmi_set_register_t	vmsetreg;
+	vmi_get_desc_t		vmgetdesc;
+	vmi_set_desc_t		vmsetdesc;
+	vmi_get_cap_t		vmgetcap;
+	vmi_set_cap_t		vmsetcap;
+	vmi_vlapic_init		vlapic_init;
+	vmi_vlapic_cleanup	vlapic_cleanup;
+};
+
+extern struct vmm_ops vmm_ops_intel;
+extern struct vmm_ops vmm_ops_amd;
+
+int vm_create(const char *name, struct vm **retvm);
+void vm_destroy(struct vm *vm);
+const char *vm_name(struct vm *vm);
+int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
+#ifdef	__FreeBSD__
+int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
+#endif
+int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
+#ifndef	__FreeBSD__
+vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t size);
+#endif
+void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
+		  void **cookie);
+void vm_gpa_release(void *cookie);
+int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
+	      struct vm_memory_segment *seg);
+int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
+int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
+		    struct seg_desc *ret_desc);
+int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
+		    struct seg_desc *desc);
+int vm_run(struct vm *vm, struct vm_run *vmrun);
+int vm_inject_nmi(struct vm *vm, int vcpu);
+int vm_nmi_pending(struct vm *vm, int vcpuid);
+void vm_nmi_clear(struct vm *vm, int vcpuid);
+int vm_inject_extint(struct vm *vm, int vcpu);
+int vm_extint_pending(struct vm *vm, int vcpuid);
+void vm_extint_clear(struct vm *vm, int vcpuid);
+struct vlapic *vm_lapic(struct vm *vm, int cpu);
+struct vioapic *vm_ioapic(struct vm *vm);
+struct vhpet *vm_hpet(struct vm *vm);
+int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
+int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
+int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
+int vm_apicid2vcpuid(struct vm *vm, int apicid);
+int vm_activate_cpu(struct vm *vm, int vcpu);
+cpuset_t vm_active_cpus(struct vm *vm);
+struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+
+typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
+
+/*
+ * Return 1 if device indicated by bus/slot/func is supposed to be a
+ * pci passthrough device.
+ *
+ * Return 0 otherwise.
+ */
+int vmm_is_pptdev(int bus, int slot, int func);
+
+void *vm_iommu_domain(struct vm *vm);
+
+enum vcpu_state {
+	VCPU_IDLE,
+	VCPU_FROZEN,
+	VCPU_RUNNING,
+	VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
+    bool from_idle);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+
+static int __inline
+vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
+{
+	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
+struct vatpic *vm_atpic(struct vm *vm);
+struct vatpit *vm_atpit(struct vm *vm);
+
+/*
+ * Inject exception 'vme' into the guest vcpu. This function returns 0 on
+ * success and non-zero on failure.
+ *
+ * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
+ * this function directly because they enforce the trap-like or fault-like
+ * behavior of an exception.
+ *
+ * This function should only be called in the context of the thread that is
+ * executing this vcpu.
+ */
+int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
+
+/*
+ * This function is called after a VM-exit that occurred during exception or
+ * interrupt delivery through the IDT. The format of 'intinfo' is described
+ * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
+ *
+ * If a VM-exit handler completes the event delivery successfully then it
+ * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
+ * if the task switch emulation is triggered via a task gate then it should
+ * call this function with 'intinfo=0' to indicate that the external event
+ * is not pending anymore.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
+
+/*
+ * This function is called before every VM-entry to retrieve a pending
+ * event that should be injected into the guest. This function combines
+ * nested events into a double or triple fault.
+ *
+ * Returns 0 if there are no events that need to be injected into the guest
+ * and non-zero otherwise.
+ */
+int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
+
+int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+struct vm_copyinfo {
+	uint64_t	gpa;
+	size_t		len;
+	void		*hva;
+	void		*cookie;
+};
+
+/*
+ * Set up 'copyinfo[]' to copy to/from guest linear address space starting
+ * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
+ * a copyin or PROT_WRITE for a copyout. 
+ *
+ * Returns 0 on success.
+ * Returns 1 if an exception was injected into the guest.
+ * Returns -1 otherwise.
+ *
+ * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
+ * the return value is 0. The 'copyinfo[]' resources should be freed by calling
+ * 'vm_copy_teardown()' after the copy is done.
+ */
+int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
+    int num_copyinfo);
+void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    int num_copyinfo);
+void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+    void *kaddr, size_t len);
+void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
+    struct vm_copyinfo *copyinfo, size_t len);
+#endif	/* KERNEL */
+
+#define	VM_MAXCPU	16			/* maximum virtual cpus */
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+	VM_CAP_HALT_EXIT,
+	VM_CAP_MTRAP_EXIT,
+	VM_CAP_PAUSE_EXIT,
+	VM_CAP_UNRESTRICTED_GUEST,
+	VM_CAP_ENABLE_INVPCID,
+	VM_CAP_MAX
+};
+
+enum vm_intr_trigger {
+	EDGE_TRIGGER,
+	LEVEL_TRIGGER
+};
+	
+/*
+ * The 'access' field has the format specified in Table 21-2 of the Intel
+ * Architecture Manual vol 3b.
+ *
+ * XXX The contents of the 'access' field are architecturally defined except
+ * bit 16 - Segment Unusable.
+ */
+struct seg_desc {
+	uint64_t	base;
+	uint32_t	limit;
+	uint32_t	access;
+};
+
+#define	SEG_DESC_TYPE(access)		((access) & 0x001f)
+#define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
+#define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
+#define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
+#define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
+#define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
+
+enum vm_cpu_mode {
+	CPU_MODE_REAL,
+	CPU_MODE_PROTECTED,
+	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
+	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+	PAGING_MODE_FLAT,
+	PAGING_MODE_32,
+	PAGING_MODE_PAE,
+	PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+	uint64_t	cr3;
+	int		cpl;
+	enum vm_cpu_mode cpu_mode;
+	enum vm_paging_mode paging_mode;
+};
+
+/*
+ * The data structures 'vie' and 'vie_op' are meant to be opaque to the
+ * consumers of instruction decoding. The only reason why their contents
+ * need to be exposed is because they are part of the 'vm_exit' structure.
+ */
+struct vie_op {
+	uint8_t		op_byte;	/* actual opcode byte */
+	uint8_t		op_type;	/* type of operation (e.g. MOV) */
+	uint16_t	op_flags;
+};
+
+#define	VIE_INST_SIZE	15
+struct vie {
+	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
+	uint8_t		num_valid;		/* size of the instruction */
+	uint8_t		num_processed;
+
+	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
+	uint8_t		rex_w:1,		/* REX prefix */
+			rex_r:1,
+			rex_x:1,
+			rex_b:1,
+			rex_present:1,
+			repz_present:1,		/* REP/REPE/REPZ prefix */
+			repnz_present:1,	/* REPNE/REPNZ prefix */
+			opsize_override:1,	/* Operand size override */
+			addrsize_override:1,	/* Address size override */
+			segment_override:1;	/* Segment override */
+
+	uint8_t		mod:2,			/* ModRM byte */
+			reg:4,
+			rm:4;
+
+	uint8_t		ss:2,			/* SIB byte */
+			index:4,
+			base:4;
+
+	uint8_t		disp_bytes;
+	uint8_t		imm_bytes;
+
+	uint8_t		scale;
+	int		base_register;		/* VM_REG_GUEST_xyz */
+	int		index_register;		/* VM_REG_GUEST_xyz */
+	int		segment_register;	/* VM_REG_GUEST_xyz */
+
+	int64_t		displacement;		/* optional addr displacement */
+	int64_t		immediate;		/* optional immediate operand */
+
+	uint8_t		decoded;	/* set to 1 if successfully decoded */
+
+	struct vie_op	op;			/* opcode description */
+};
+
+enum vm_exitcode {
+	VM_EXITCODE_INOUT,
+	VM_EXITCODE_VMX,
+	VM_EXITCODE_BOGUS,
+	VM_EXITCODE_RDMSR,
+	VM_EXITCODE_WRMSR,
+	VM_EXITCODE_HLT,
+	VM_EXITCODE_MTRAP,
+	VM_EXITCODE_PAUSE,
+	VM_EXITCODE_PAGING,
+	VM_EXITCODE_INST_EMUL,
+	VM_EXITCODE_SPINUP_AP,
+	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
+	VM_EXITCODE_INOUT_STR,
+	VM_EXITCODE_MAX
+};
+
+struct vm_inout {
+	uint16_t	bytes:3;	/* 1 or 2 or 4 */
+	uint16_t	in:1;
+	uint16_t	string:1;
+	uint16_t	rep:1;
+	uint16_t	port;
+	uint32_t	eax;		/* valid for out */
+};
+
+struct vm_inout_str {
+	struct vm_inout	inout;		/* must be the first element */
+	struct vm_guest_paging paging;
+	uint64_t	rflags;
+	uint64_t	cr0;
+	uint64_t	index;
+	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
+	int		addrsize;
+	enum vm_reg_name seg_name;
+	struct seg_desc seg_desc;
+};
+
+struct vm_exit {
+	enum vm_exitcode	exitcode;
+	int			inst_length;	/* 0 means unknown */
+	uint64_t		rip;
+	union {
+		struct vm_inout	inout;
+		struct vm_inout_str inout_str;
+		struct {
+			uint64_t	gpa;
+			int		fault_type;
+		} paging;
+		struct {
+			uint64_t	gpa;
+			uint64_t	gla;
+			uint64_t	cs_base;
+			int		cs_d;		/* CS.D */
+			struct vm_guest_paging paging;
+			struct vie	vie;
+		} inst_emul;
+		/*
+		 * VMX specific payload. Used when there is no "better"
+		 * exitcode to represent the VM-exit.
+		 */
+		struct {
+			int		status;		/* vmx inst status */
+			/*
+			 * 'exit_reason' and 'exit_qualification' are valid
+			 * only if 'status' is zero.
+			 */
+			uint32_t	exit_reason;
+			uint64_t	exit_qualification;
+			/*
+			 * 'inst_error' and 'inst_type' are valid
+			 * only if 'status' is non-zero.
+			 */
+			int		inst_type;
+			int		inst_error;
+		} vmx;
+		struct {
+			uint32_t	code;		/* ecx value */
+			uint64_t	wval;
+		} msr;
+		struct {
+			int		vcpu;
+			uint64_t	rip;
+		} spinup_ap;
+		struct {
+			uint64_t	rflags;
+		} hlt;
+	} u;
+};
+
+/* APIs to inject faults into the guest */
+void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
+    int errcode);
+
+static __inline void
+vm_inject_ud(void *vm, int vcpuid)
+{
+	vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
+}
+
+static __inline void
+vm_inject_gp(void *vm, int vcpuid)
+{
+	vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
+}
+
+static __inline void
+vm_inject_ac(void *vm, int vcpuid, int errcode)
+{
+	vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
+}
+
+static __inline void
+vm_inject_ss(void *vm, int vcpuid, int errcode)
+{
+	vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
+}
+
+void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
+
+int vm_restart_instruction(void *vm, int vcpuid);
+
+#ifndef	__FreeBSD__
+#ifdef	_KERNEL
+extern void vmm_sol_glue_init(void);
+extern void vmm_sol_glue_cleanup(void);
+
+extern int vmm_mod_load(void);
+extern int vmm_mod_unload(void);
+#endif
+#endif
+
+#endif	/* _VMM_H_ */
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
new file mode 100644
index 0000000000..3e74eb8786
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -0,0 +1,334 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/include/vmm_dev.h 268889 2014-07-19 20:59:08Z neel $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef	_VMM_DEV_H_
+#define	_VMM_DEV_H_
+
+#ifdef _KERNEL
+void	vmmdev_init(void);
+int	vmmdev_cleanup(void);
+#endif
+
+struct vm_memory_segment {
+	vm_paddr_t	gpa;	/* in */
+	size_t		len;
+	int		wired;
+};
+
+struct vm_register {
+	int		cpuid;
+	int		regnum;		/* enum vm_reg_name */
+	uint64_t	regval;
+};
+
+struct vm_seg_desc {			/* data or code segment */
+	int		cpuid;
+	int		regnum;		/* enum vm_reg_name */
+	struct seg_desc desc;
+};
+
+struct vm_run {
+	int		cpuid;
+	struct vm_exit	vm_exit;
+};
+
+struct vm_exception {
+	int		cpuid;
+	int		vector;
+	uint32_t	error_code;
+	int		error_code_valid;
+	int		restart_instruction;
+};
+
+struct vm_lapic_msi {
+	uint64_t	msg;
+	uint64_t	addr;
+};
+
+struct vm_lapic_irq {
+	int		cpuid;
+	int		vector;
+};
+
+struct vm_ioapic_irq {
+	int		irq;
+};
+
+struct vm_isa_irq {
+	int		atpic_irq;
+	int		ioapic_irq;
+};
+
+struct vm_isa_irq_trigger {
+	int		atpic_irq;
+	enum vm_intr_trigger trigger;
+};
+
+struct vm_capability {
+	int		cpuid;
+	enum vm_cap_type captype;
+	int		capval;
+	int		allcpus;
+};
+
+struct vm_pptdev {
+	int		bus;
+	int		slot;
+	int		func;
+};
+
+struct vm_pptdev_mmio {
+	int		bus;
+	int		slot;
+	int		func;
+	vm_paddr_t	gpa;
+	vm_paddr_t	hpa;
+	size_t		len;
+};
+
+struct vm_pptdev_msi {
+	int		vcpu;
+	int		bus;
+	int		slot;
+	int		func;
+	int		numvec;		/* 0 means disabled */
+	uint32_t	msg;
+	uint64_t	addr;
+};
+
+struct vm_pptdev_msix {
+	int		vcpu;
+	int		bus;
+	int		slot;
+	int		func;
+	int		idx;
+	uint32_t	msg;
+	uint32_t	vector_control;
+	uint64_t	addr;
+};
+
+struct vm_nmi {
+	int		cpuid;
+};
+
+#define	MAX_VM_STATS	64
+struct vm_stats {
+	int		cpuid;				/* in */
+	int		num_entries;			/* out */
+	struct timeval	tv;
+	uint64_t	statbuf[MAX_VM_STATS];
+};
+
+struct vm_stat_desc {
+	int		index;				/* in */
+	char		desc[128];			/* out */
+};
+
+struct vm_x2apic {
+	int			cpuid;
+	enum x2apic_state	state;
+};
+
+struct vm_gpa_pte {
+	uint64_t	gpa;				/* in */
+	uint64_t	pte[4];				/* out */
+	int		ptenum;
+};
+
+struct vm_hpet_cap {
+	uint32_t	capabilities;	/* lower 32 bits of HPET capabilities */
+};
+
+struct vm_activate_cpu {
+	int		vcpuid;
+};
+
+struct vm_gla2gpa {
+	int		vcpuid;		/* inputs */
+	int 		prot;		/* PROT_READ or PROT_WRITE */
+	uint64_t	gla;
+	struct vm_guest_paging paging;
+	int		fault;		/* outputs */
+	uint64_t	gpa;
+};
+
+struct vm_cpuset {
+	int		which;
+	int		cpusetsize;
+	cpuset_t	*cpus;
+};
+#define	VM_ACTIVE_CPUS		0
+#define	VM_SUSPENDED_CPUS	1
+
+enum {
+	/* general routines */
+	IOCNUM_ABIVERS = 0,
+	IOCNUM_RUN = 1,
+	IOCNUM_SET_CAPABILITY = 2,
+	IOCNUM_GET_CAPABILITY = 3,
+
+	/* memory apis */
+	IOCNUM_MAP_MEMORY = 10,
+	IOCNUM_GET_MEMORY_SEG = 11,
+	IOCNUM_GET_GPA_PMAP = 12,
+	IOCNUM_GLA2GPA = 13,
+
+	/* register/state accessors */
+	IOCNUM_SET_REGISTER = 20,
+	IOCNUM_GET_REGISTER = 21,
+	IOCNUM_SET_SEGMENT_DESCRIPTOR = 22,
+	IOCNUM_GET_SEGMENT_DESCRIPTOR = 23,
+
+	/* interrupt injection */
+	IOCNUM_INJECT_EXCEPTION = 30,
+	IOCNUM_LAPIC_IRQ = 31,
+	IOCNUM_INJECT_NMI = 32,
+	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
+	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
+	IOCNUM_IOAPIC_PULSE_IRQ = 35,
+	IOCNUM_LAPIC_MSI = 36,
+	IOCNUM_LAPIC_LOCAL_IRQ = 37,
+	IOCNUM_IOAPIC_PINCOUNT = 38,
+	IOCNUM_RESTART_INSTRUCTION = 39,
+
+	/* PCI pass-thru */
+	IOCNUM_BIND_PPTDEV = 40,
+	IOCNUM_UNBIND_PPTDEV = 41,
+	IOCNUM_MAP_PPTDEV_MMIO = 42,
+	IOCNUM_PPTDEV_MSI = 43,
+	IOCNUM_PPTDEV_MSIX = 44,
+
+	/* statistics */
+	IOCNUM_VM_STATS = 50, 
+	IOCNUM_VM_STAT_DESC = 51,
+
+	/* kernel device state */
+	IOCNUM_SET_X2APIC_STATE = 60,
+	IOCNUM_GET_X2APIC_STATE = 61,
+	IOCNUM_GET_HPET_CAPABILITIES = 62,
+
+	/* legacy interrupt injection */
+	IOCNUM_ISA_ASSERT_IRQ = 80,
+	IOCNUM_ISA_DEASSERT_IRQ = 81,
+	IOCNUM_ISA_PULSE_IRQ = 82,
+	IOCNUM_ISA_SET_IRQ_TRIGGER = 83,
+
+	/* vm_cpuset */
+	IOCNUM_ACTIVATE_CPU = 90,
+	IOCNUM_GET_CPUSET = 91,
+};
+
+#define	VM_RUN		\
+	_IOWR('v', IOCNUM_RUN, struct vm_run)
+#define	VM_MAP_MEMORY	\
+	_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
+#define	VM_GET_MEMORY_SEG \
+	_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
+#define	VM_SET_REGISTER \
+	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define	VM_GET_REGISTER \
+	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define	VM_SET_SEGMENT_DESCRIPTOR \
+	_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
+#define	VM_GET_SEGMENT_DESCRIPTOR \
+	_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
+#define	VM_INJECT_EXCEPTION	\
+	_IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
+#define	VM_LAPIC_IRQ 		\
+	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_LOCAL_IRQ 	\
+	_IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_MSI		\
+	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
+#define	VM_IOAPIC_ASSERT_IRQ	\
+	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
+#define	VM_IOAPIC_DEASSERT_IRQ	\
+	_IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq)
+#define	VM_IOAPIC_PULSE_IRQ	\
+	_IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq)
+#define	VM_IOAPIC_PINCOUNT	\
+	_IOR('v', IOCNUM_IOAPIC_PINCOUNT, int)
+#define	VM_ISA_ASSERT_IRQ	\
+	_IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq)
+#define	VM_ISA_DEASSERT_IRQ	\
+	_IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq)
+#define	VM_ISA_PULSE_IRQ	\
+	_IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq)
+#define	VM_ISA_SET_IRQ_TRIGGER	\
+	_IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger)
+#define	VM_SET_CAPABILITY \
+	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define	VM_GET_CAPABILITY \
+	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define	VM_BIND_PPTDEV \
+	_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
+#define	VM_UNBIND_PPTDEV \
+	_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
+#define	VM_MAP_PPTDEV_MMIO \
+	_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
+#define	VM_PPTDEV_MSI \
+	_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
+#define	VM_PPTDEV_MSIX \
+	_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
+#define VM_INJECT_NMI \
+	_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
+#ifdef	__FreeBSD__
+#define	VM_STATS \
+	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#endif
+#define	VM_STAT_DESC \
+	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define	VM_SET_X2APIC_STATE \
+	_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
+#define	VM_GET_X2APIC_STATE \
+	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
+#define	VM_GET_HPET_CAPABILITIES \
+	_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
+#define	VM_GET_GPA_PMAP \
+	_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
+#define	VM_GLA2GPA	\
+	_IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
+#define	VM_ACTIVATE_CPU	\
+	_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define	VM_GET_CPUS	\
+	_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define	VM_RESTART_INSTRUCTION \
+	_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
+#endif
diff --git a/usr/src/uts/i86pc/sys/vmm_impl.h b/usr/src/uts/i86pc/sys/vmm_impl.h
new file mode 100644
index 0000000000..1602fa286d
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm_impl.h
@@ -0,0 +1,86 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2014 Pluribus Networks Inc.
+ */
+
+#ifndef _VMM_IMPL_H_
+#define _VMM_IMPL_H_
+
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/varargs.h>
+
+/*
+ * /dev names:
+ *      /dev/vmmctl         - control device
+ *      /dev/vmm/<name>     - vm devices
+ */
+#define	VMM_DRIVER_NAME		"vmm"
+
+#define	VMM_CTL_MINOR_NODE	"ctl"
+#define	VMM_CTL_MINOR_NAME	VMM_DRIVER_NAME VMM_CTL_NODE
+#define	VMM_CTL_MINOR		0
+
+#define	VMM_IOC_BASE		(('V' << 16) | ('M' << 8))
+
+#define	VMM_CREATE_VM		(VMM_IOC_BASE | 0x01)
+#define	VMM_DESTROY_VM		(VMM_IOC_BASE | 0x02)
+
+struct vmm_ioctl {
+	char vmm_name[VM_MAX_NAMELEN];
+};
+
+#ifdef	_KERNEL
+struct vmm_softc {
+	boolean_t			open;
+	minor_t				minor;
+	struct vm			*vm;
+	char				name[VM_MAX_NAMELEN];
+	SLIST_ENTRY(vmm_softc)		link;
+};
+#endif
+
+/*
+ * VMM trace ring buffer constants
+ */
+#define	VMM_DMSG_RING_SIZE		0x100000	/* 1MB */
+#define	VMM_DMSG_BUF_SIZE		256
+
+/*
+ * VMM trace ring buffer content
+ */
+typedef struct vmm_trace_dmsg {
+	timespec_t		timestamp;
+	char			buf[VMM_DMSG_BUF_SIZE];
+	struct vmm_trace_dmsg	*next;
+} vmm_trace_dmsg_t;
+
+/*
+ * VMM trace ring buffer header
+ */
+typedef struct vmm_trace_rbuf {
+	kmutex_t		lock;		/* lock to avoid clutter */
+	int			looped;		/* completed ring */
+	int			allocfailed;	/* dmsg mem alloc failed */
+	size_t			size;		/* current size */
+	size_t			maxsize;	/* max size */
+	vmm_trace_dmsg_t	*dmsgh;		/* messages head */
+	vmm_trace_dmsg_t	*dmsgp;		/* ptr to last message */
+} vmm_trace_rbuf_t;
+
+/*
+ * VMM trace ring buffer interfaces
+ */
+void vmm_trace_log(const char *fmt, ...);
+
+#endif	/* _VMM_IMPL_H_ */
diff --git a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h b/usr/src/uts/i86pc/sys/vmm_instruction_emul.h
new file mode 100644
index 0000000000..8138890a2c
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/vmm_instruction_emul.h
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/include/vmm_instruction_emul.h 276479 2014-12-31 20:31:32Z dim $
+ */
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2015 Pluribus Networks Inc.
+ */
+
+#ifndef	_VMM_INSTRUCTION_EMUL_H_
+#define	_VMM_INSTRUCTION_EMUL_H_
+
+#include <sys/mman.h>
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
+				 uint64_t *rval, int rsize, void *arg);
+
+typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
+				  uint64_t wval, int wsize, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ * s
+ */
+int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t mrr,
+    mem_region_write_t mrw, void *mrarg);
+
+int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
+    uint64_t val, int size);
+
+/*
+ * Returns 1 if an alignment check exception should be injected and 0 otherwise.
+ */
+int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
+    uint64_t rflags, uint64_t gla);
+
+/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
+int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+
+uint64_t vie_size2mask(int size);
+
+int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+    struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
+    uint64_t *gla);
+
+#ifdef _KERNEL
+/*
+ * APIs to fetch and decode the instruction from nested page fault handler.
+ *
+ * 'vie' must be initialized before calling 'vmm_fetch_instruction()'
+ */
+int vmm_fetch_instruction(struct vm *vm, int cpuid,
+			  struct vm_guest_paging *guest_paging,
+			  uint64_t rip, int inst_length, struct vie *vie);
+
+/*
+ * Translate the guest linear address 'gla' to a guest physical address.
+ *
+ * Returns 0 on success and '*gpa' contains the result of the translation.
+ * Returns 1 if an exception was injected into the guest.
+ * Returns -1 otherwise.
+ */
+int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa);
+
+void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
+
+/*
+ * Decode the instruction fetched into 'vie' so it can be emulated.
+ *
+ * 'gla' is the guest linear address provided by the hardware assist
+ * that caused the nested page table fault. It is used to verify that
+ * the software instruction decoding is in agreement with the hardware.
+ * 
+ * Some hardware assists do not provide the 'gla' to the hypervisor.
+ * To skip the 'gla' verification for this or any other reason pass
+ * in VIE_INVALID_GLA instead.
+ */
+#define	VIE_INVALID_GLA		(1UL << 63)	/* a non-canonical address */
+int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
+			   enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
+#endif	/* _KERNEL */
+
+#endif	/* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/usr/src/uts/i86pc/viona/Makefile b/usr/src/uts/i86pc/viona/Makefile
new file mode 100644
index 0000000000..c2b8bd8dcf
--- /dev/null
+++ b/usr/src/uts/i86pc/viona/Makefile
@@ -0,0 +1,72 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE		= viona
+OBJECTS		= $(VIONA_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(VIONA_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/i86pc/io/viona
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+# Define targets
+#
+ALL_TARGET	= $(BINARY) $(SRC_CONFILE)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Overrides
+#
+CFLAGS		+= $(CCVERBOSE)
+LDFLAGS		+= -dy -Ndrv/dld -Nmisc/mac -Nmisc/dls -Ndrv/vmm
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile
new file mode 100644
index 0000000000..b3ab735781
--- /dev/null
+++ b/usr/src/uts/i86pc/vmm/Makefile
@@ -0,0 +1,94 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2013 Pluribus Networks Inc.
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= vmm
+OBJECTS		= $(VMM_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(VMM_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/i86pc/io/vmm
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+#	Overrides and additions
+#
+
+# These sources only compile with gcc.  Workaround a confluence of cruft
+# regarding dmake and shadow compilation by neutering the sun compiler.
+amd64_CC	= $(ONBLD_TOOLS)/bin/$(MACH)/cw -_gcc
+CFLAGS 		+= -_cc=-xdryrun
+
+ALL_BUILDS	= $(ALL_BUILDSONLY64)
+DEF_BUILDS	= $(DEF_BUILDSONLY64)
+PRE_INC_PATH	= -I$(COMPAT)/freebsd -I$(COMPAT)/freebsd/amd64 \
+	-I$(CONTRIB)/freebsd -I$(CONTRIB)/freebsd/amd64
+INC_PATH	+= -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io
+AS_INC_PATH	+= -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR)
+
+CFLAGS		+= -_gcc=-Wimplicit-function-declaration
+
+OFFSETS_SRC	= $(CONF_SRCDIR)/offsets.in
+ASSYM_H		= $(OBJS_DIR)/vmx_assym.h
+
+CLEANFILES	+= $(ASSYM_H)
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
+
+$(OBJECTS): $(ASSYM_H)
+
+$(ASSYM_H): $(OFFSETS_SRC) $(GENASSYM)
+	$(OFFSETS_CREATE) -I../../i86pc/io/vmm < $(OFFSETS_SRC) >$@
author	Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>	2017-09-26 12:19:41 +0200
committer	Patrick Mooney <pmooney@pfmooney.com>	2018-02-22 15:57:20 +0000
commit	43f85cd4da7e7860e4d240f14e6b5dd45700c7b6 (patch)
tree	fcf7f982094418a90da65ed16d48689bbc72ca5b
parent	44db5f1c904128c3fd7c7ec37e9d894a10e93f8c (diff)
download	illumos-joyent-43f85cd4da7e7860e4d240f14e6b5dd45700c7b6.tar.gz