summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2020-06-02 11:42:39 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2020-06-02 11:42:39 +0000
commitacc9d73392734e746e249c426a1558b911389dfb (patch)
treed7fd81266a9a2c842c257909e78eed5e1dc4e4cd /usr/src
parent5cd90a78a9b9930e642ce913b44316f5a4ae6632 (diff)
parentb39b008f8a57ea7ddfd0f69b24529deba7c25ae1 (diff)
downloadillumos-joyent-acc9d73392734e746e249c426a1558b911389dfb.tar.gz
[illumos-gate merge]
commit b39b008f8a57ea7ddfd0f69b24529deba7c25ae1 12783 async unlinked drain races with ZFS unmount commit 94bce860c16a04a3d8eaeaa18807723c026e23b5 12559 zs: NULL pointer errors commit fdf1a8710a94b4953ba782ed5bdc1549b0356ddc 12804 sysevent.h: C++11 requires a space between string literal and macro commit e86372a01d2d16a5dd4a64e144ed978ba17fe7dd 12668 ZFS support for vectorized algorithms on x86 (initial support) commit 82049ff560eed6fbdf4cf222d894467f5809f9b3 12119 ndmpd: cast between incompatible function types commit 8e3a263e496f0b896b62a8ebe101c0b2b56c9a15 12750 bhyve should not hold HMA registration with no running VMs commit d0b3c59ba652f183eeec1414dd9fbdc56bf05cc8 12775 reorganize bhyve contrib headers commit e213fe2d75a0e47cc0b3f67c473e24dfa9304e0b 12545 ixgbe: IXGBE_LE32_TO_CPUS macro is missing assignment
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/Makefile.master2
-rw-r--r--usr/src/cmd/Makefile3
-rw-r--r--usr/src/cmd/bhyve/Makefile8
-rw-r--r--usr/src/cmd/bhyve/test/Makefile.com8
-rw-r--r--usr/src/cmd/bhyvectl/Makefile6
-rw-r--r--usr/src/cmd/ndmpd/include/tlm.h12
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd.h6
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd_common.h6
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd_data.c15
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd_mover.c52
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd_tar.c37
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd_tar3.c50
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c89
-rw-r--r--usr/src/cmd/ndmpd/tlm/tlm_proto.h6
-rw-r--r--usr/src/cmd/ndmpd/tlm/tlm_restore_writer.c21
-rw-r--r--usr/src/cmd/raidz_test/Makefile61
-rw-r--r--usr/src/cmd/raidz_test/raidz_bench.c228
-rw-r--r--usr/src/cmd/raidz_test/raidz_test.c761
-rw-r--r--usr/src/cmd/raidz_test/raidz_test.h117
-rw-r--r--usr/src/compat/bhyve/README9
-rw-r--r--usr/src/compat/bhyve/amd64/machine/asmacros.h (renamed from usr/src/compat/freebsd/amd64/machine/asmacros.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/atomic.h (renamed from usr/src/compat/freebsd/amd64/machine/atomic.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/clock.h (renamed from usr/src/compat/freebsd/amd64/machine/clock.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/cpu.h (renamed from usr/src/compat/freebsd/amd64/machine/cpu.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/cpufunc.h (renamed from usr/src/compat/freebsd/amd64/machine/cpufunc.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/fpu.h (renamed from usr/src/compat/freebsd/amd64/machine/fpu.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/iodev.h (renamed from usr/src/compat/freebsd/amd64/machine/iodev.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/md_var.h (renamed from usr/src/compat/freebsd/amd64/machine/md_var.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/param.h (renamed from usr/src/compat/freebsd/amd64/machine/param.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/pcb.h (renamed from usr/src/compat/freebsd/amd64/machine/pcb.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/pmap.h (renamed from usr/src/compat/freebsd/amd64/machine/pmap.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/reg.h (renamed from usr/src/compat/freebsd/amd64/machine/reg.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/segments.h (renamed from usr/src/compat/freebsd/amd64/machine/segments.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/smp.h (renamed from usr/src/compat/freebsd/amd64/machine/smp.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/specialreg.h (renamed from usr/src/compat/freebsd/amd64/machine/specialreg.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/vmm.h (renamed from usr/src/compat/freebsd/amd64/machine/vmm.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/vmm_dev.h (renamed from usr/src/compat/freebsd/amd64/machine/vmm_dev.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/vmm_instruction_emul.h (renamed from usr/src/compat/freebsd/amd64/machine/vmm_instruction_emul.h)0
-rw-r--r--usr/src/compat/bhyve/amd64/machine/vmparam.h (renamed from usr/src/compat/freebsd/amd64/machine/vmparam.h)0
-rw-r--r--usr/src/compat/bhyve/contrib/dev/acpica/include/acpi.h (renamed from usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h)0
-rw-r--r--usr/src/compat/bhyve/dev/pci/pcivar.h (renamed from usr/src/compat/freebsd/dev/pci/pcivar.h)0
-rw-r--r--usr/src/compat/bhyve/err.h (renamed from usr/src/compat/freebsd/err.h)0
-rw-r--r--usr/src/compat/bhyve/libutil.h (renamed from usr/src/compat/freebsd/libutil.h)0
-rw-r--r--usr/src/compat/bhyve/net/ethernet.h (renamed from usr/src/compat/freebsd/net/ethernet.h)0
-rw-r--r--usr/src/compat/bhyve/net/ieee_oui.h (renamed from usr/src/compat/freebsd/net/ieee_oui.h)0
-rw-r--r--usr/src/compat/bhyve/paths.h (renamed from usr/src/compat/freebsd/paths.h)0
-rw-r--r--usr/src/compat/bhyve/pthread_np.h (renamed from usr/src/compat/freebsd/pthread_np.h)0
-rw-r--r--usr/src/compat/bhyve/string.h (renamed from usr/src/compat/freebsd/string.h)0
-rw-r--r--usr/src/compat/bhyve/strings.h (renamed from usr/src/compat/freebsd/strings.h)0
-rw-r--r--usr/src/compat/bhyve/sys/_cpuset.h (renamed from usr/src/compat/freebsd/sys/_cpuset.h)0
-rw-r--r--usr/src/compat/bhyve/sys/_iovec.h (renamed from usr/src/compat/freebsd/sys/_iovec.h)0
-rw-r--r--usr/src/compat/bhyve/sys/_pthreadtypes.h (renamed from usr/src/compat/freebsd/sys/_pthreadtypes.h)0
-rw-r--r--usr/src/compat/bhyve/sys/_types.h (renamed from usr/src/compat/freebsd/sys/_types.h)0
-rw-r--r--usr/src/compat/bhyve/sys/bus.h (renamed from usr/src/compat/freebsd/sys/bus.h)0
-rw-r--r--usr/src/compat/bhyve/sys/callout.h (renamed from usr/src/compat/freebsd/sys/callout.h)0
-rw-r--r--usr/src/compat/bhyve/sys/cdefs.h (renamed from usr/src/compat/freebsd/sys/cdefs.h)0
-rw-r--r--usr/src/compat/bhyve/sys/clock.h (renamed from usr/src/compat/freebsd/sys/clock.h)0
-rw-r--r--usr/src/compat/bhyve/sys/cpuset.h (renamed from usr/src/compat/freebsd/sys/cpuset.h)0
-rw-r--r--usr/src/compat/bhyve/sys/disk.h (renamed from usr/src/compat/freebsd/sys/disk.h)0
-rw-r--r--usr/src/compat/bhyve/sys/endian.h (renamed from usr/src/compat/freebsd/sys/endian.h)0
-rw-r--r--usr/src/compat/bhyve/sys/errno.h (renamed from usr/src/compat/freebsd/sys/errno.h)0
-rw-r--r--usr/src/compat/bhyve/sys/eventhandler.h (renamed from usr/src/compat/freebsd/sys/eventhandler.h)0
-rw-r--r--usr/src/compat/bhyve/sys/fcntl.h (renamed from usr/src/compat/freebsd/sys/fcntl.h)0
-rw-r--r--usr/src/compat/bhyve/sys/ioctl.h (renamed from usr/src/compat/freebsd/sys/ioctl.h)0
-rw-r--r--usr/src/compat/bhyve/sys/kernel.h (renamed from usr/src/compat/freebsd/sys/kernel.h)0
-rw-r--r--usr/src/compat/bhyve/sys/ktr.h (renamed from usr/src/compat/freebsd/sys/ktr.h)0
-rw-r--r--usr/src/compat/bhyve/sys/libkern.h (renamed from usr/src/compat/freebsd/sys/libkern.h)0
-rw-r--r--usr/src/compat/bhyve/sys/limits.h (renamed from usr/src/compat/freebsd/sys/limits.h)0
-rw-r--r--usr/src/compat/bhyve/sys/lock.h (renamed from usr/src/compat/freebsd/sys/lock.h)0
-rw-r--r--usr/src/compat/bhyve/sys/malloc.h (renamed from usr/src/compat/freebsd/sys/malloc.h)0
-rw-r--r--usr/src/compat/bhyve/sys/module.h (renamed from usr/src/compat/freebsd/sys/module.h)0
-rw-r--r--usr/src/compat/bhyve/sys/mutex.h (renamed from usr/src/compat/freebsd/sys/mutex.h)0
-rw-r--r--usr/src/compat/bhyve/sys/param.h (renamed from usr/src/compat/freebsd/sys/param.h)0
-rw-r--r--usr/src/compat/bhyve/sys/pcpu.h (renamed from usr/src/compat/freebsd/sys/pcpu.h)0
-rw-r--r--usr/src/compat/bhyve/sys/sched.h (renamed from usr/src/compat/freebsd/sys/sched.h)0
-rw-r--r--usr/src/compat/bhyve/sys/sdt.h (renamed from usr/src/compat/freebsd/sys/sdt.h)0
-rw-r--r--usr/src/compat/bhyve/sys/select.h (renamed from usr/src/compat/freebsd/sys/select.h)0
-rw-r--r--usr/src/compat/bhyve/sys/sglist.h (renamed from usr/src/compat/freebsd/sys/sglist.h)0
-rw-r--r--usr/src/compat/bhyve/sys/smp.h (renamed from usr/src/compat/freebsd/sys/smp.h)0
-rw-r--r--usr/src/compat/bhyve/sys/socket.h (renamed from usr/src/compat/freebsd/sys/socket.h)0
-rw-r--r--usr/src/compat/bhyve/sys/sysctl.h (renamed from usr/src/compat/freebsd/sys/sysctl.h)0
-rw-r--r--usr/src/compat/bhyve/sys/systm.h (renamed from usr/src/compat/freebsd/sys/systm.h)0
-rw-r--r--usr/src/compat/bhyve/sys/time.h (renamed from usr/src/compat/freebsd/sys/time.h)0
-rw-r--r--usr/src/compat/bhyve/sys/types.h (renamed from usr/src/compat/freebsd/sys/types.h)0
-rw-r--r--usr/src/compat/bhyve/sys/uio.h (renamed from usr/src/compat/freebsd/sys/uio.h)0
-rw-r--r--usr/src/compat/bhyve/termios.h (renamed from usr/src/compat/freebsd/termios.h)0
-rw-r--r--usr/src/compat/bhyve/unistd.h (renamed from usr/src/compat/freebsd/unistd.h)0
-rw-r--r--usr/src/compat/bhyve/uuid.h (renamed from usr/src/compat/freebsd/uuid.h)0
-rw-r--r--usr/src/compat/bhyve/vm/vm.h (renamed from usr/src/compat/freebsd/vm/vm.h)0
-rw-r--r--usr/src/compat/bhyve/vm/vm_param.h (renamed from usr/src/compat/freebsd/vm/vm_param.h)0
-rw-r--r--usr/src/compat/bhyve/x86/_types.h (renamed from usr/src/compat/freebsd/x86/_types.h)0
-rw-r--r--usr/src/compat/bhyve/x86/segments.h (renamed from usr/src/compat/freebsd/x86/segments.h)0
-rw-r--r--usr/src/contrib/bhyve/README12
-rw-r--r--usr/src/contrib/bhyve/amd64/machine/_types.h6
-rw-r--r--usr/src/contrib/bhyve/amd64/machine/pmap.h455
-rw-r--r--usr/src/contrib/bhyve/amd64/machine/psl.h6
-rw-r--r--usr/src/contrib/bhyve/amd64/machine/timerreg.h54
-rw-r--r--usr/src/contrib/bhyve/amd64/machine/vm.h45
-rw-r--r--usr/src/contrib/bhyve/dev/acpica/acpi_hpet.h67
-rw-r--r--usr/src/contrib/bhyve/dev/ic/i8253reg.h78
-rw-r--r--usr/src/contrib/bhyve/dev/ic/i8259.h86
-rw-r--r--usr/src/contrib/bhyve/dev/ic/ns16550.h240
-rw-r--r--usr/src/contrib/bhyve/dev/io/iodev.h44
-rw-r--r--usr/src/contrib/bhyve/dev/mii/mii.h239
-rw-r--r--usr/src/contrib/bhyve/dev/nvme/nvme.h1511
-rw-r--r--usr/src/contrib/bhyve/dev/pci/pcireg.h922
-rw-r--r--usr/src/contrib/bhyve/dev/usb/controller/xhcireg.h224
-rw-r--r--usr/src/contrib/bhyve/dev/usb/usb.h801
-rw-r--r--usr/src/contrib/bhyve/dev/usb/usb_endian.h121
-rw-r--r--usr/src/contrib/bhyve/dev/usb/usb_freebsd.h101
-rw-r--r--usr/src/contrib/bhyve/dev/usb/usbdi.h657
-rw-r--r--usr/src/contrib/bhyve/isa/isareg.h70
-rw-r--r--usr/src/contrib/bhyve/isa/rtc.h125
-rw-r--r--usr/src/contrib/bhyve/lib/libutil/expand_number.c93
-rw-r--r--usr/src/contrib/bhyve/lib/libutil/humanize_number.c179
-rw-r--r--usr/src/contrib/bhyve/sys/ata.h1015
-rw-r--r--usr/src/contrib/bhyve/sys/pciio.h146
-rw-r--r--usr/src/contrib/bhyve/sys/queue.h787
-rw-r--r--usr/src/contrib/bhyve/sys/tree.h765
-rw-r--r--usr/src/contrib/bhyve/x86/apicreg.h455
-rw-r--r--usr/src/contrib/bhyve/x86/mptable.h204
-rw-r--r--usr/src/contrib/bhyve/x86/psl.h92
-rw-r--r--usr/src/contrib/bhyve/x86/segments.h274
-rw-r--r--usr/src/contrib/bhyve/x86/specialreg.h1143
-rw-r--r--usr/src/lib/libvmm/Makefile.com4
-rw-r--r--usr/src/lib/libvmmapi/Makefile.com4
-rw-r--r--usr/src/lib/libvmmapi/amd64/Makefile2
-rw-r--r--usr/src/pkg/manifests/system-file-system-zfs-tests.mf2
-rw-r--r--usr/src/pkg/manifests/system-test-zfstest.mf5
-rw-r--r--usr/src/req.flg2
-rw-r--r--usr/src/test/zfs-tests/include/commands.cfg3
-rw-r--r--usr/src/test/zfs-tests/runfiles/delphix.run3
-rw-r--r--usr/src/test/zfs-tests/runfiles/omnios.run3
-rw-r--r--usr/src/test/zfs-tests/runfiles/openindiana.run3
-rw-r--r--usr/src/test/zfs-tests/runfiles/smartos.run3
-rw-r--r--usr/src/test/zfs-tests/tests/functional/raidz/Makefile21
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh30
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh38
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh41
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/setup.ksh32
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/fs/zfs/abd.c181
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/abd.h9
-rw-r--r--usr/src/uts/common/fs/zfs/sys/simd.h40
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_raidz.h65
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h351
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz.c265
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz_math.c571
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h1477
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c337
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vfsops.c39
-rw-r--r--usr/src/uts/common/io/ixgbe/ixgbe_osdep.h2
-rw-r--r--usr/src/uts/common/sys/sysevent.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c99
-rw-r--r--usr/src/uts/i86pc/ppt/Makefile4
-rw-r--r--usr/src/uts/i86pc/vmm/Makefile4
-rw-r--r--usr/src/uts/req.flg4
-rw-r--r--usr/src/uts/sun/io/zs_async.c143
-rw-r--r--usr/src/uts/sun/io/zs_common.c14
-rw-r--r--usr/src/uts/sun4u/zs/Makefile22
161 files changed, 15861 insertions, 483 deletions
diff --git a/usr/src/Makefile.master b/usr/src/Makefile.master
index 7c036ac21a..7dc553209d 100644
--- a/usr/src/Makefile.master
+++ b/usr/src/Makefile.master
@@ -61,7 +61,7 @@ NATIVE_ADJUNCT= /usr
# Compatibility code for FreeBSD etc.
#
COMPAT= $(SRC)/compat
-CONTRIB= $(SRC)/../contrib
+CONTRIB= $(SRC)/contrib
#
# RELEASE_BUILD should be cleared for final release builds.
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index dd035124e9..acb3717b0a 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -21,7 +21,7 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
# Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
@@ -343,6 +343,7 @@ COMMON_SUBDIRS= \
pwconv \
pwd \
raidctl \
+ raidz_test \
ramdiskadm \
rcap \
rcm_daemon \
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index ea4537596a..b766cb8357 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -111,10 +111,10 @@ MEVENT_TEST_OBJS = $(MEVENT_TEST_SRCS:.c=.o)
CLEANFILES = $(PROG) $(ZHYVE_PROG) $(MEVENT_TEST_PROG) $(MEVENT_TEST_OBJS)
CFLAGS += $(CCVERBOSE) -_gcc=-Wimplicit-function-declaration -_gcc=-Wno-parentheses
-CPPFLAGS = -I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd \
- -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64 \
- -I$(CONTRIB)/freebsd/dev/usb/controller \
- -I$(CONTRIB)/freebsd/dev/mii \
+CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
+ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \
+ -I$(CONTRIB)/bhyve/dev/usb/controller \
+ -I$(CONTRIB)/bhyve/dev/mii \
-I$(SRC)/uts/common/io/e1000api \
$(CPPFLAGS.master) \
-I$(ROOT)/usr/platform/i86pc/include \
diff --git a/usr/src/cmd/bhyve/test/Makefile.com b/usr/src/cmd/bhyve/test/Makefile.com
index 2bd67140f5..aeba956e12 100644
--- a/usr/src/cmd/bhyve/test/Makefile.com
+++ b/usr/src/cmd/bhyve/test/Makefile.com
@@ -28,9 +28,9 @@ CFLAGS += $(CCVERBOSE) -_gcc=-Wimplicit-function-declaration \
CFLAGS64 += $(CCVERBOSE) -_gcc=-Wimplicit-function-declaration \
-_gcc=-Wno-parentheses
CPPFLAGS = -I$(SRC)/cmd/bhyve \
- -I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd \
- -I$(CONTRIB)/freebsd/dev/usb/controller \
- -I$(CONTRIB)/freebsd/dev/mii \
+ -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
+ -I$(CONTRIB)/bhyve/dev/usb/controller \
+ -I$(CONTRIB)/bhyve/dev/mii \
$(CPPFLAGS.master) \
-I$(ROOT)/usr/platform/i86pc/include \
-I$(SRC)/uts/i86pc/io/vmm \
@@ -38,7 +38,7 @@ CPPFLAGS = -I$(SRC)/cmd/bhyve \
-I$(SRC)/uts/i86pc \
-I$(SRC)/lib/libdladm/common \
-DWITHOUT_CAPSICUM
-CPPFLAGS += -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64
+CPPFLAGS += -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64
SMOFF += all_func_returns
diff --git a/usr/src/cmd/bhyvectl/Makefile b/usr/src/cmd/bhyvectl/Makefile
index 72b3cd913f..c35b590196 100644
--- a/usr/src/cmd/bhyvectl/Makefile
+++ b/usr/src/cmd/bhyvectl/Makefile
@@ -28,8 +28,8 @@ CLOBBERFILES += $(ROOTUSRSBINPROG)
.KEEP_STATE:
CFLAGS += $(CCVERBOSE)
-CPPFLAGS = -I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd \
- -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64 \
+CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
+ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \
$(CPPFLAGS.master) \
-I$(ROOT)/usr/platform/i86pc/include \
-I$(SRC)/uts/i86pc/io/vmm
@@ -53,6 +53,6 @@ clean:
include ../Makefile.targ
-%.o: $(CONTRIB)/freebsd/lib/libutil/%.c
+%.o: $(CONTRIB)/bhyve/lib/libutil/%.c
$(COMPILE.c) -o $@ $<
$(POST_PROCESS_O)
diff --git a/usr/src/cmd/ndmpd/include/tlm.h b/usr/src/cmd/ndmpd/include/tlm.h
index f382d2eca3..8c667a334b 100644
--- a/usr/src/cmd/ndmpd/include/tlm.h
+++ b/usr/src/cmd/ndmpd/include/tlm.h
@@ -11,10 +11,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -113,9 +113,9 @@ typedef struct fs_fhandle {
} fs_fhandle_t;
typedef struct scsi_link {
- struct scsi_link *sl_next;
- struct scsi_link *sl_prev;
- struct scsi_adapter *sl_sa;
+ struct scsi_link *sl_next;
+ struct scsi_link *sl_prev;
+ struct scsi_adapter *sl_sa;
unsigned int sl_sid;
unsigned int sl_lun;
unsigned int sl_requested_max_active;
@@ -427,7 +427,7 @@ typedef struct tm_ops {
int (*tm_putfile)();
int (*tm_putdir)();
int (*tm_putvol)(); /* Reserved */
- int (*tm_getfile)();
+ void * (*tm_getfile)(void *);
int (*tm_getdir)();
int (*tm_getvol)(); /* Reserved */
} tm_ops_t;
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd.h b/usr/src/cmd/ndmpd/ndmp/ndmpd.h
index 4f9fcd9a09..929a833c1c 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd.h
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd.h
@@ -11,10 +11,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -998,7 +998,7 @@ extern int ndmp_backup_extract_params(ndmpd_session_t *,
ndmpd_module_params_t *);
extern int ndmp_restore_extract_params(ndmpd_session_t *,
ndmpd_module_params_t *);
-extern int ndmp_tar_reader(ndmp_tar_reader_arg_t *);
+extern void *ndmp_tar_reader(void *);
extern int tape_open(char *, int);
extern int tape_is_at_bot(ndmpd_session_t *);
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd_common.h b/usr/src/cmd/ndmpd/ndmp/ndmpd_common.h
index 846f64e66f..383d5a673c 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_common.h
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_common.h
@@ -10,10 +10,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -281,6 +281,6 @@ extern int ndmp_log_msg_id;
/*
* Module function prototypes.
*/
-typedef int module_start_func_t(void *);
+typedef void *module_start_func_t(void *);
typedef int module_abort_func_t(void *);
#endif /* _NDMP_COMMON_H */
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd_data.c b/usr/src/cmd/ndmpd/ndmp/ndmpd_data.c
index 9350cb05f7..2713f8bec2 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_data.c
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_data.c
@@ -11,10 +11,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -1466,7 +1466,7 @@ ndmpd_tar_start_backup_v3(ndmpd_session_t *session, char *bu_type,
* client request here.
*/
err = pthread_create(NULL, NULL,
- (funct_t)session->ns_data.dd_module.dm_start_func,
+ session->ns_data.dd_module.dm_start_func,
params);
if (err != 0) {
NDMP_LOG(LOG_ERR, "Can't start backup session.");
@@ -1595,7 +1595,7 @@ ndmpd_tar_start_recover_v3(ndmpd_session_t *session,
* client request here.
*/
err = pthread_create(NULL, NULL,
- (funct_t)session->ns_data.dd_module.dm_start_func,
+ session->ns_data.dd_module.dm_start_func,
params);
if (err != 0) {
@@ -1692,7 +1692,8 @@ ndmpd_zfs_start_op(ndmpd_session_t *session, ndmp_pval *env_val,
}
err = pthread_create(&tid, NULL,
- (funct_t)session->ns_data.dd_module.dm_start_func, ndmpd_zfs_args);
+ session->ns_data.dd_module.dm_start_func,
+ ndmpd_zfs_args);
if (err) {
NDMP_LOG(LOG_ERR, "Can't start %s session (errno %d)",
@@ -2185,7 +2186,7 @@ ndmpd_tar_start_backup_v2(ndmpd_session_t *session, char *bu_type,
* client request here.
*/
(void) pthread_create(NULL, NULL,
- (funct_t)session->ns_data.dd_module.dm_start_func,
+ session->ns_data.dd_module.dm_start_func,
params);
return (NDMP_NO_ERR);
@@ -2322,7 +2323,7 @@ ndmpd_tar_start_recover_v2(ndmpd_session_t *session, char *bu_type,
* client request here.
*/
(void) pthread_create(NULL, NULL,
- (funct_t)session->ns_data.dd_module.dm_start_func,
+ session->ns_data.dd_module.dm_start_func,
params);
return (NDMP_NO_ERR);
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd_mover.c b/usr/src/cmd/ndmpd/ndmp/ndmpd_mover.c
index 2f7f311e75..8428679e93 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_mover.c
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_mover.c
@@ -10,10 +10,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -2503,8 +2503,8 @@ mover_tape_read_one_buf(ndmpd_session_t *session, tlm_buffer_t *buf)
* 0: on success
* -1: otherwise
*/
-int
-mover_tape_reader(ndmpd_session_t *session)
+void *
+mover_tape_reader(void *ptr)
{
int bidx; /* buffer index */
int rv;
@@ -2513,10 +2513,11 @@ mover_tape_reader(ndmpd_session_t *session)
tlm_buffers_t *bufs;
tlm_cmd_t *lcmd; /* Local command */
tlm_commands_t *cmds; /* Commands structure */
+ ndmpd_session_t *session = ptr;
if ((nlp = ndmp_get_nlp(session)) == NULL) {
NDMP_LOG(LOG_DEBUG, "nlp == NULL");
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
cmds = &nlp->nlp_cmds;
@@ -2582,7 +2583,7 @@ mover_tape_reader(ndmpd_session_t *session)
cmds->tcs_reader_count--;
lcmd->tc_ref--;
lcmd->tc_writer = TLM_STOP;
- return (0);
+ return (NULL);
}
@@ -2652,8 +2653,8 @@ mover_socket_write_one_buf(ndmpd_session_t *session, tlm_buffer_t *buf)
* 0: on success
* -1: otherwise
*/
-int
-mover_socket_writer(ndmpd_session_t *session)
+void *
+mover_socket_writer(void *ptr)
{
int bidx; /* buffer index */
ndmp_lbr_params_t *nlp;
@@ -2661,10 +2662,11 @@ mover_socket_writer(ndmpd_session_t *session)
tlm_buffers_t *bufs;
tlm_cmd_t *lcmd; /* Local command */
tlm_commands_t *cmds; /* Commands structure */
+ ndmpd_session_t *session = ptr;
if ((nlp = ndmp_get_nlp(session)) == NULL) {
NDMP_LOG(LOG_DEBUG, "nlp == NULL");
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
cmds = &nlp->nlp_cmds;
@@ -2725,7 +2727,7 @@ mover_socket_writer(ndmpd_session_t *session)
cmds->tcs_writer_count--;
lcmd->tc_ref--;
lcmd->tc_reader = TLM_STOP;
- return (0);
+ return (NULL);
}
@@ -2772,7 +2774,7 @@ start_mover_for_restore(ndmpd_session_t *session)
* must be sent to the client before probable errors are sent
* to the client.
*/
- rc = pthread_create(NULL, NULL, (funct_t)mover_tape_reader, session);
+ rc = pthread_create(NULL, NULL, mover_tape_reader, session);
if (rc == 0) {
tlm_cmd_wait(cmds->tcs_command, TLM_TAPE_READER);
} else {
@@ -2781,7 +2783,7 @@ start_mover_for_restore(ndmpd_session_t *session)
return (-1);
}
- rc = pthread_create(NULL, NULL, (funct_t)mover_socket_writer, session);
+ rc = pthread_create(NULL, NULL, mover_socket_writer, session);
if (rc == 0) {
tlm_cmd_wait(cmds->tcs_command, TLM_SOCK_WRITER);
} else {
@@ -2874,8 +2876,8 @@ mover_socket_read_one_buf(ndmpd_session_t *session, tlm_buffer_t *buf,
* 0: on success
* -1: otherwise
*/
-int
-mover_socket_reader(ndmpd_session_t *session)
+void *
+mover_socket_reader(void *ptr)
{
int bidx; /* buffer index */
ndmp_lbr_params_t *nlp;
@@ -2883,11 +2885,12 @@ mover_socket_reader(ndmpd_session_t *session)
tlm_buffers_t *bufs;
tlm_cmd_t *lcmd; /* Local command */
tlm_commands_t *cmds; /* Commands structure */
+ ndmpd_session_t *session = ptr;
static int nr = 0;
if ((nlp = ndmp_get_nlp(session)) == NULL) {
NDMP_LOG(LOG_DEBUG, "nlp == NULL");
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
cmds = &nlp->nlp_cmds;
@@ -2951,7 +2954,7 @@ mover_socket_reader(ndmpd_session_t *session)
cmds->tcs_reader_count--;
lcmd->tc_ref--;
lcmd->tc_writer = TLM_STOP;
- return (0);
+ return (NULL);
}
@@ -3014,8 +3017,8 @@ mover_tape_write_one_buf(ndmpd_session_t *session, tlm_buffer_t *buf)
* 0: on success
* -1: otherwise
*/
-int
-mover_tape_writer(ndmpd_session_t *session)
+void *
+mover_tape_writer(void *ptr)
{
int bidx;
ndmp_lbr_params_t *nlp;
@@ -3023,11 +3026,12 @@ mover_tape_writer(ndmpd_session_t *session)
tlm_buffers_t *bufs;
tlm_cmd_t *lcmd;
tlm_commands_t *cmds;
+ ndmpd_session_t *session = ptr;
static int nw = 0;
if ((nlp = ndmp_get_nlp(session)) == NULL) {
NDMP_LOG(LOG_DEBUG, "nlp == NULL");
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
cmds = &nlp->nlp_cmds;
@@ -3096,7 +3100,7 @@ mover_tape_writer(ndmpd_session_t *session)
cmds->tcs_writer_count--;
lcmd->tc_ref--;
lcmd->tc_reader = TLM_STOP;
- return (0);
+ return (NULL);
}
@@ -3143,7 +3147,7 @@ start_mover_for_backup(ndmpd_session_t *session)
* must be sent to the client before probable errors are sent
* to the client.
*/
- rc = pthread_create(NULL, NULL, (funct_t)mover_socket_reader, session);
+ rc = pthread_create(NULL, NULL, mover_socket_reader, session);
if (rc == 0) {
tlm_cmd_wait(cmds->tcs_command, TLM_SOCK_READER);
} else {
@@ -3152,7 +3156,7 @@ start_mover_for_backup(ndmpd_session_t *session)
return (-1);
}
- rc = pthread_create(NULL, NULL, (funct_t)mover_tape_writer, session);
+ rc = pthread_create(NULL, NULL, mover_tape_writer, session);
if (rc == 0) {
tlm_cmd_wait(cmds->tcs_command, TLM_TAPE_WRITER);
} else {
@@ -3180,7 +3184,7 @@ start_mover_for_backup(ndmpd_session_t *session)
* Note: non-zero is also returned if the backup type is
* neither TAR nor DUMP. I.e. the is_writer_running()
* check does not apply in this case and things should
- * appear successful.
+ * appear successful.
*/
static boolean_t
is_writer_running(ndmpd_session_t *session)
@@ -3216,7 +3220,7 @@ is_writer_running(ndmpd_session_t *session)
* Note: non-zero is also returned if the backup type is
* neither TAR nor DUMP. I.e. the is_writer_running()
* check does not apply in this case and things should
- * appear successful.
+ * appear successful.
*/
static boolean_t
is_writer_running_v3(ndmpd_session_t *session)
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd_tar.c b/usr/src/cmd/ndmpd/ndmp/ndmpd_tar.c
index 8040355eb0..89dc343051 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_tar.c
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_tar.c
@@ -10,10 +10,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -370,8 +370,7 @@ ndmp_write_utf8magic(tlm_cmd_t *cmd)
*
*/
static boolean_t
-timecmp(bk_selector_t *bksp,
- struct stat64 *attr)
+timecmp(bk_selector_t *bksp, struct stat64 *attr)
{
ndmp_lbr_params_t *nlp;
@@ -476,7 +475,7 @@ get_dir_acl_info(char *dir, tlm_acls_t *tlm_acls, tlm_job_stats_t *js)
char *spot;
char *fil;
acl_t *aclp = NULL;
- char *acltp;
+ char *acltp;
checkpointed_dir = ndmp_malloc(TLM_MAX_PATH_NAME);
if (checkpointed_dir == NULL)
@@ -656,7 +655,7 @@ backup_work(char *bk_path, tlm_job_stats_t *job_stats,
longlong_t fsize;
bk_selector_t bks;
tlm_cmd_t *local_commands;
- long dpos;
+ long dpos;
NDMP_LOG(LOG_DEBUG, "nr_chkpnted %d nr_ldate: %u bk_path: \"%s\"",
NLP_ISCHKPNTED(nlp), nlp->nlp_ldate, bk_path);
@@ -1067,8 +1066,8 @@ read_one_buf(ndmpd_module_params_t *mod_params, tlm_buffers_t *bufs,
* file from the tape and wakes up the consumer thread to extract
* it on the disk
*/
-int
-ndmp_tar_reader(ndmp_tar_reader_arg_t *argp)
+void *
+ndmp_tar_reader(void *ptr)
{
int bidx;
int err;
@@ -1078,9 +1077,10 @@ ndmp_tar_reader(ndmp_tar_reader_arg_t *argp)
ndmpd_session_t *session;
ndmpd_module_params_t *mod_params;
tlm_commands_t *cmds;
+ ndmp_tar_reader_arg_t *argp = ptr;
if (!argp)
- return (-1);
+ return ((void *)(uintptr_t)-1);
session = argp->tr_session;
mod_params = argp->tr_mod_params;
@@ -1097,7 +1097,7 @@ ndmp_tar_reader(ndmp_tar_reader_arg_t *argp)
if (err != 0) {
tlm_cmd_signal(cmds->tcs_command, TLM_TAR_READER);
- return (err);
+ return ((void *)(uintptr_t)err);
}
lcmd = cmds->tcs_command;
@@ -1160,7 +1160,7 @@ ndmp_tar_reader(ndmp_tar_reader_arg_t *argp)
*/
cmds->tcs_reader_count--;
lcmd->tc_ref--;
- return (err);
+ return ((void *)(uintptr_t)err);
}
@@ -1314,8 +1314,7 @@ ndmpd_tar_restore(ndmpd_session_t *session, ndmpd_module_params_t *mod_params,
arg.tr_mod_params = mod_params;
arg.tr_cmds = cmds;
- err = pthread_create(&rdtp, NULL, (funct_t)ndmp_tar_reader,
- (void *)&arg);
+ err = pthread_create(&rdtp, NULL, ndmp_tar_reader, &arg);
if (err == 0) {
tlm_cmd_wait(cmds->tcs_command, TLM_TAR_READER);
} else {
@@ -1346,8 +1345,8 @@ ndmpd_tar_restore(ndmpd_session_t *session, ndmpd_module_params_t *mod_params,
if (tm_tar_ops.tm_getfile != NULL) {
- err = pthread_create(&wrtp, NULL,
- (funct_t)tm_tar_ops.tm_getfile, (void *)&tlm_arg);
+ err = pthread_create(&wrtp, NULL, tm_tar_ops.tm_getfile,
+ &tlm_arg);
} else {
(void) pthread_barrier_destroy(&tlm_arg.ba_barrier);
NDMP_LOG(LOG_DEBUG,
@@ -1859,7 +1858,7 @@ ndmp_restore_extract_params(ndmpd_session_t *session,
* and calls ndmp_tar_backup to perform the actual backup. It does the cleanup
* and release the snapshot at the end.
*/
-int
+void *
ndmpd_tar_backup_starter(void *arg)
{
ndmpd_module_params_t *mod_params = arg;
@@ -1933,7 +1932,7 @@ ndmpd_tar_backup_starter(void *arg)
NS_DEC(nbk);
ndmp_session_unref(session);
- return (err);
+ return ((void *)(uintptr_t)err);
}
@@ -1966,7 +1965,7 @@ ndmpd_tar_backup_abort(void *module_cookie)
* Starts the restore by running ndmpd_tar_restore function (V2 only)
*/
-int
+void *
ndmpd_tar_restore_starter(void *arg)
{
ndmpd_module_params_t *mod_params = arg;
@@ -1986,7 +1985,7 @@ ndmpd_tar_restore_starter(void *arg)
NS_DEC(nrs);
ndmp_session_unref(session);
- return (err);
+ return ((void *)(uintptr_t)err);
}
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd_tar3.c b/usr/src/cmd/ndmpd/ndmp/ndmpd_tar3.c
index d3e89d5a3d..001368ac44 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_tar3.c
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_tar3.c
@@ -10,10 +10,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -2222,8 +2222,8 @@ lbrbk_v3(void *arg, fst_node_t *pnp, fst_node_t *enp)
* 0: on success
* != 0: otherwise
*/
-static int
-backup_reader_v3(backup_reader_arg_t *argp)
+static void *
+backup_reader_v3(void *ptr)
{
int rv;
tlm_cmd_t *lcmd;
@@ -2234,9 +2234,10 @@ backup_reader_v3(backup_reader_arg_t *argp)
char *jname;
ndmp_lbr_params_t *nlp;
tlm_commands_t *cmds;
+ backup_reader_arg_t *argp = ptr;
if (!argp)
- return (-1);
+ return ((void *)(uintptr_t)-1);
jname = argp->br_jname;
nlp = argp->br_nlp;
@@ -2265,7 +2266,7 @@ backup_reader_v3(backup_reader_arg_t *argp)
bp.bp_tmp = ndmp_malloc(sizeof (char) * TLM_MAX_PATH_NAME);
if (!bp.bp_tmp)
- return (-1);
+ return ((void *)(uintptr_t)-1);
/*
* Make the checkpointed paths for traversing the
@@ -2277,7 +2278,7 @@ backup_reader_v3(backup_reader_arg_t *argp)
bp.bp_chkpnm = ndmp_malloc(sizeof (char) * TLM_MAX_PATH_NAME);
if (!bp.bp_chkpnm) {
NDMP_FREE(bp.bp_tmp);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
(void) tlm_build_snapshot_name(nlp->nlp_backup_path,
bp.bp_chkpnm, nlp->nlp_jstat->js_job_name);
@@ -2341,8 +2342,7 @@ backup_reader_v3(backup_reader_arg_t *argp)
lcmd->tc_writer = TLM_STOP;
tlm_release_reader_writer_ipc(lcmd);
tlm_un_ref_job_stats(jname);
- return (rv);
-
+ return ((void *)(uintptr_t)rv);
}
@@ -2438,8 +2438,7 @@ tar_backup_v3(ndmpd_session_t *session, ndmpd_module_params_t *params,
(void) pthread_barrier_init(&arg.br_barrier, 0, 2);
- err = pthread_create(&rdtp, NULL, (funct_t)backup_reader_v3,
- (void *)&arg);
+ err = pthread_create(&rdtp, NULL, backup_reader_v3, &arg);
if (err == 0) {
(void) pthread_barrier_wait(&arg.br_barrier);
(void) pthread_barrier_destroy(&arg.br_barrier);
@@ -2512,9 +2511,10 @@ backup_out:
* Find the estimate of backup size. This is used to get an estimate
* of the progress of backup during NDMP backup.
*/
-void
-get_backup_size(ndmp_bkup_size_arg_t *sarg)
+void *
+get_backup_size(void *ptr)
{
+ ndmp_bkup_size_arg_t *sarg = ptr;
fs_traverse_t ft;
u_longlong_t bk_size;
char spath[PATH_MAX];
@@ -2543,6 +2543,7 @@ get_backup_size(ndmp_bkup_size_arg_t *sarg)
bk_size, bk_size / 1024, bk_size /(1024 * 1024));
}
sarg->bs_session->ns_data.dd_data_size = bk_size;
+ return (NULL);
}
/*
@@ -3186,8 +3187,7 @@ ndmpd_dar_tar_v3(ndmpd_session_t *session, ndmpd_module_params_t *params,
arg.tr_mod_params = params;
arg.tr_cmds = cmds;
- err = pthread_create(&rdtp, NULL, (funct_t)ndmp_tar_reader,
- (void *)&arg);
+ err = pthread_create(&rdtp, NULL, ndmp_tar_reader, &arg);
if (err == 0) {
tlm_cmd_wait(cmds->tcs_command, TLM_TAR_READER);
} else {
@@ -3445,8 +3445,8 @@ ndmp_plugin_pre_restore(ndmp_context_t *ctxp, ndmpd_module_params_t *params,
* /link-to-backup-path -> /backup/path
*
* Returns:
- * Pointer to the new path (allocated)
- * NULL if the path doesnt exist
+ * Pointer to the new path (allocated)
+ * NULL if the path doesnt exist
*/
static char *
get_absolute_path(const char *bkpath)
@@ -3548,8 +3548,7 @@ ndmpd_rs_sar_tar_v3(ndmpd_session_t *session, ndmpd_module_params_t *params,
arg.tr_session = session;
arg.tr_mod_params = params;
arg.tr_cmds = cmds;
- err = pthread_create(&rdtp, NULL, (funct_t)ndmp_tar_reader,
- (void *)&arg);
+ err = pthread_create(&rdtp, NULL, ndmp_tar_reader, &arg);
if (err == 0) {
tlm_cmd_wait(cmds->tcs_command, TLM_TAR_READER);
} else {
@@ -3746,7 +3745,7 @@ ndmp_backup_get_params_v3(ndmpd_session_t *session,
* 0: on success
* != 0: otherwise
*/
-int
+void *
ndmpd_tar_backup_starter_v3(void *arg)
{
ndmpd_module_params_t *params = arg;
@@ -3780,8 +3779,7 @@ ndmpd_tar_backup_starter_v3(void *arg)
sarg.bs_path = nlp->nlp_backup_path;
/* Get an estimate of the data size */
- if (pthread_create(&tid, NULL, (funct_t)get_backup_size,
- (void *)&sarg) == 0)
+ if (pthread_create(&tid, NULL, get_backup_size, &sarg) == 0)
(void) pthread_detach(tid);
err = ndmp_get_cur_bk_time(nlp, &nlp->nlp_cdate, jname);
@@ -3810,8 +3808,7 @@ ndmpd_tar_backup_starter_v3(void *arg)
NS_DEC(nbk);
ndmp_session_unref(session);
- return (err);
-
+ return ((void *)(uintptr_t)err);
}
@@ -3924,7 +3921,7 @@ ndmp_restore_get_params_v3(ndmpd_session_t *session,
* NDMP_NO_ERR: on success
* != NDMP_NO_ERR: otherwise
*/
-int
+void *
ndmpd_tar_restore_starter_v3(void *arg)
{
ndmpd_module_params_t *params = arg;
@@ -3948,8 +3945,7 @@ ndmpd_tar_restore_starter_v3(void *arg)
/* nlp_params is allocated in start_recover() */
NDMP_FREE(nlp->nlp_params);
ndmp_session_unref(session);
- return (err);
-
+ return ((void *)(uintptr_t)err);
}
/*
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c b/usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c
index c124794fc4..72708f0475 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c
@@ -11,10 +11,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -74,12 +74,12 @@ static void ndmpd_zfs_close_one_fd(ndmpd_zfs_args_t *, int);
static int ndmpd_zfs_header_write(ndmpd_session_t *);
static int ndmpd_zfs_header_read(ndmpd_zfs_args_t *);
-static int ndmpd_zfs_backup_send_read(ndmpd_zfs_args_t *);
-static int ndmpd_zfs_backup_tape_write(ndmpd_zfs_args_t *);
+static void *ndmpd_zfs_backup_send_read(void *);
+static void *ndmpd_zfs_backup_tape_write(void *);
static int ndmpd_zfs_restore(ndmpd_zfs_args_t *);
-static int ndmpd_zfs_restore_tape_read(ndmpd_zfs_args_t *);
-static int ndmpd_zfs_restore_recv_write(ndmpd_zfs_args_t *);
+static void *ndmpd_zfs_restore_tape_read(void *);
+static void *ndmpd_zfs_restore_recv_write(void *);
static int ndmpd_zfs_reader_writer(ndmpd_zfs_args_t *, int **, int **);
@@ -143,8 +143,8 @@ static int ndmpd_zfs_backup(ndmpd_zfs_args_t *);
*
* Examples:
*
- * 0.0.n/0.bob.p
- * 0.0.u/1.bob.p/0.jane.d
+ * 0.0.n/0.bob.p
+ * 0.0.u/1.bob.p/0.jane.d
*
* Note: NDMPD_ZFS_SUBPROP_MAX is calculated based on ZFS_MAXPROPLEN
*/
@@ -164,8 +164,8 @@ static int ndmpd_zfs_backup(ndmpd_zfs_args_t *);
#define NDMPD_ZFS_LOG_ZERR(ndmpd_zfs_args, ...) { \
NDMP_LOG(LOG_ERR, __VA_ARGS__); \
NDMP_LOG(LOG_ERR, "%s--%s", \
- libzfs_error_action((ndmpd_zfs_args)->nz_zlibh), \
- libzfs_error_description((ndmpd_zfs_args)->nz_zlibh)); \
+ libzfs_error_action((ndmpd_zfs_args)->nz_zlibh), \
+ libzfs_error_description((ndmpd_zfs_args)->nz_zlibh)); \
ndmpd_zfs_zerr_dma_log((ndmpd_zfs_args)); \
}
@@ -459,7 +459,7 @@ _err:
return (-1);
}
-int
+void *
ndmpd_zfs_backup_starter(void *arg)
{
ndmpd_zfs_args_t *ndmpd_zfs_args = arg;
@@ -492,7 +492,7 @@ _done:
ndmp_session_unref(session);
ndmpd_zfs_fini(ndmpd_zfs_args);
- return (err);
+ return ((void *)(uintptr_t)err);
}
static int
@@ -599,17 +599,18 @@ ndmpd_zfs_backup(ndmpd_zfs_args_t *ndmpd_zfs_args)
*
* This routine executes zfs_send() to create the backup data stream.
* The value of ZFS_MODE determines the type of zfs_send():
- * dataset ('d'): Only the dataset specified (i.e., top level) is backed up
- * recursive ('r'): The dataset and its child file systems are backed up
- * package ('p'): Same as 'r', except all intermediate snapshots are also
+ * dataset ('d'): Only the dataset specified (i.e., top level) is backed up
+ * recursive ('r'): The dataset and its child file systems are backed up
+ * package ('p'): Same as 'r', except all intermediate snapshots are also
* backed up
*
* Volumes do not have descednants, so 'd' and 'r' produce equivalent results.
*/
-static int
-ndmpd_zfs_backup_send_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
+static void *
+ndmpd_zfs_backup_send_read(void *ptr)
{
+ ndmpd_zfs_args_t *ndmpd_zfs_args = ptr;
ndmpd_session_t *session = (ndmpd_session_t *)
(ndmpd_zfs_params->mp_daemon_cookie);
sendflags_t flags = { 0 };
@@ -623,7 +624,7 @@ ndmpd_zfs_backup_send_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
if (!zhp) {
if (!session->ns_data.dd_abort)
NDMPD_ZFS_LOG_ZERR(ndmpd_zfs_args, "zfs_open");
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
switch (ndmpd_zfs_args->nz_zfs_mode) {
@@ -641,14 +642,14 @@ ndmpd_zfs_backup_send_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
NDMP_LOG(LOG_ERR, "unknown zfs_mode: %c",
ndmpd_zfs_args->nz_zfs_mode);
zfs_close(zhp);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
if (ndmpd_zfs_is_incremental(ndmpd_zfs_args)) {
if (ndmpd_zfs_args->nz_fromsnap[0] == '\0') {
NDMP_LOG(LOG_ERR, "no fromsnap");
zfs_close(zhp);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
fromsnap = ndmpd_zfs_args->nz_fromsnap;
}
@@ -661,7 +662,7 @@ ndmpd_zfs_backup_send_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
zfs_close(zhp);
- return (err);
+ return ((void *)(uintptr_t)err);
}
/*
@@ -672,9 +673,10 @@ ndmpd_zfs_backup_send_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
* ndmpd_zfs_args->nz_bufsize).
*/
-static int
-ndmpd_zfs_backup_tape_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
+static void *
+ndmpd_zfs_backup_tape_write(void *ptr)
{
+ ndmpd_zfs_args_t *ndmpd_zfs_args = ptr;
ndmpd_session_t *session = (ndmpd_session_t *)
(ndmpd_zfs_params->mp_daemon_cookie);
int bufsize = ndmpd_zfs_args->nz_bufsize;
@@ -685,7 +687,7 @@ ndmpd_zfs_backup_tape_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
buf = ndmp_malloc(bufsize);
if (buf == NULL) {
NDMP_LOG(LOG_DEBUG, "buf NULL");
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
bytes_totalp =
@@ -704,7 +706,8 @@ ndmpd_zfs_backup_tape_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
*bytes_totalp - bufsize, *bytes_totalp);
free(buf);
- return (ndmpd_zfs_addenv_backup_size(ndmpd_zfs_args,
+ return ((void *)(uintptr_t)
+ ndmpd_zfs_addenv_backup_size(ndmpd_zfs_args,
*bytes_totalp));
}
@@ -712,7 +715,7 @@ ndmpd_zfs_backup_tape_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
NDMP_LOG(LOG_DEBUG, "pipe read error (errno %d)",
errno);
free(buf);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
NS_ADD(rdisk, count);
@@ -720,7 +723,7 @@ ndmpd_zfs_backup_tape_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
(void) ndmpd_zfs_abort((void *) ndmpd_zfs_args);
NDMP_LOG(LOG_ERR, "MOD_WRITE error");
free(buf);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
*bytes_totalp += count;
@@ -750,7 +753,7 @@ ndmpd_zfs_addenv_backup_size(ndmpd_zfs_args_t *ndmpd_zfs_args,
return (0);
}
-int
+void *
ndmpd_zfs_restore_starter(void *arg)
{
ndmpd_zfs_args_t *ndmpd_zfs_args = arg;
@@ -770,7 +773,7 @@ ndmpd_zfs_restore_starter(void *arg)
ndmpd_zfs_fini(ndmpd_zfs_args);
- return (err);
+ return ((void *)(uintptr_t)err);
}
static int
@@ -827,9 +830,10 @@ ndmpd_zfs_restore(ndmpd_zfs_args_t *ndmpd_zfs_args)
return (err);
}
-static int
-ndmpd_zfs_restore_tape_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
+static void *
+ndmpd_zfs_restore_tape_read(void *ptr)
{
+ ndmpd_zfs_args_t *ndmpd_zfs_args = ptr;
ndmpd_session_t *session = (ndmpd_session_t *)
(ndmpd_zfs_params->mp_daemon_cookie);
int bufsize = ndmpd_zfs_args->nz_bufsize;
@@ -843,7 +847,7 @@ ndmpd_zfs_restore_tape_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
buf = ndmp_malloc(bufsize);
if (buf == NULL) {
NDMP_LOG(LOG_DEBUG, "buf NULL");
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
bytes_totalp = &ndmpd_zfs_args->nz_nlp->nlp_bytes_total;
@@ -861,7 +865,7 @@ ndmpd_zfs_restore_tape_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
NDMP_LOG(LOG_ERR, "MOD_READ error: %d; returning -1",
err);
free(buf);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
count = write(ndmpd_zfs_args->nz_pipe_fd[PIPE_TAPE], buf,
@@ -880,7 +884,7 @@ ndmpd_zfs_restore_tape_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
}
free(buf);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
NS_ADD(wdisk, count);
@@ -889,7 +893,7 @@ ndmpd_zfs_restore_tape_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
}
free(buf);
- return (0);
+ return (NULL);
}
/*
@@ -898,9 +902,10 @@ ndmpd_zfs_restore_tape_read(ndmpd_zfs_args_t *ndmpd_zfs_args)
* This routine executes zfs_receive() to restore the backup.
*/
-static int
-ndmpd_zfs_restore_recv_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
+static void *
+ndmpd_zfs_restore_recv_write(void *ptr)
{
+ ndmpd_zfs_args_t *ndmpd_zfs_args = ptr;
ndmpd_session_t *session = (ndmpd_session_t *)
(ndmpd_zfs_params->mp_daemon_cookie);
recvflags_t flags;
@@ -921,7 +926,7 @@ ndmpd_zfs_restore_recv_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
if (err && !session->ns_data.dd_abort)
NDMPD_ZFS_LOG_ZERR(ndmpd_zfs_args, "zfs_receive: %d", err);
- return (err);
+ return ((void *)(uintptr_t)err);
}
/*
@@ -941,12 +946,12 @@ ndmpd_zfs_reader_writer(ndmpd_zfs_args_t *ndmpd_zfs_args,
switch (ndmpd_zfs_params->mp_operation) {
case NDMP_DATA_OP_BACKUP:
- sendrecv_func = (funct_t)ndmpd_zfs_backup_send_read;
- tape_func = (funct_t)ndmpd_zfs_backup_tape_write;
+ sendrecv_func = ndmpd_zfs_backup_send_read;
+ tape_func = ndmpd_zfs_backup_tape_write;
break;
case NDMP_DATA_OP_RECOVER:
- sendrecv_func = (funct_t)ndmpd_zfs_restore_recv_write;
- tape_func = (funct_t)ndmpd_zfs_restore_tape_read;
+ sendrecv_func = ndmpd_zfs_restore_recv_write;
+ tape_func = ndmpd_zfs_restore_tape_read;
break;
}
diff --git a/usr/src/cmd/ndmpd/tlm/tlm_proto.h b/usr/src/cmd/ndmpd/tlm/tlm_proto.h
index f37f206e6e..6bdd214a5d 100644
--- a/usr/src/cmd/ndmpd/tlm/tlm_proto.h
+++ b/usr/src/cmd/ndmpd/tlm/tlm_proto.h
@@ -10,10 +10,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -134,7 +134,7 @@ extern int tar_putdir(char *,
tlm_cmd_t *,
tlm_job_stats_t *);
-extern int tar_getfile(tlm_backup_restore_arg_t *);
+extern void * tar_getfile(void *);
extern int
tar_getdir(tlm_commands_t *,
diff --git a/usr/src/cmd/ndmpd/tlm/tlm_restore_writer.c b/usr/src/cmd/ndmpd/tlm/tlm_restore_writer.c
index 82af6967ef..f757e59deb 100644
--- a/usr/src/cmd/ndmpd/tlm/tlm_restore_writer.c
+++ b/usr/src/cmd/ndmpd/tlm/tlm_restore_writer.c
@@ -11,10 +11,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * - Redistributions of source code must retain the above copyright
+ * - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
- * - Redistributions in binary form must reproduce the above copyright
+ * - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
@@ -286,8 +286,8 @@ tar_getdir(tlm_commands_t *commands,
long nm_end, lnk_end;
char *name, *nmp;
cstack_t *stp;
- char *bkpath;
- char *parentlnk;
+ char *bkpath;
+ char *parentlnk;
int dir_dar = 0;
/*
@@ -987,9 +987,10 @@ tar_getdir(tlm_commands_t *commands,
/*
* Main file restore function for tar (should run as a thread)
*/
-int
-tar_getfile(tlm_backup_restore_arg_t *argp)
+void *
+tar_getfile(void *ptr)
{
+ tlm_backup_restore_arg_t *argp = ptr;
tlm_job_stats_t *job_stats;
char **sels; /* list of files desired */
char **exls; /* list of files not wanted */
@@ -1012,7 +1013,7 @@ tar_getfile(tlm_backup_restore_arg_t *argp)
if (dir == NULL) {
local_commands->tc_reader = TLM_STOP;
(void) pthread_barrier_wait(&argp->ba_barrier);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
(void) strlcpy(job, argp->ba_job, TLM_MAX_BACKUP_JOB_NAME+1);
@@ -1031,7 +1032,7 @@ tar_getfile(tlm_backup_restore_arg_t *argp)
local_commands->tc_reader = TLM_STOP;
free(dir);
(void) pthread_barrier_wait(&argp->ba_barrier);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
sels = argp->ba_sels;
@@ -1039,7 +1040,7 @@ tar_getfile(tlm_backup_restore_arg_t *argp)
local_commands->tc_reader = TLM_STOP;
free(dir);
(void) pthread_barrier_wait(&argp->ba_barrier);
- return (-1);
+ return ((void *)(uintptr_t)-1);
}
exls = &list;
@@ -1081,7 +1082,7 @@ tar_getfile(tlm_backup_restore_arg_t *argp)
local_commands->tc_reader = TLM_STOP;
tlm_release_reader_writer_ipc(local_commands);
free(dir);
- return (erc);
+ return ((void *)(uintptr_t)erc);
}
/*
diff --git a/usr/src/cmd/raidz_test/Makefile b/usr/src/cmd/raidz_test/Makefile
new file mode 100644
index 0000000000..43e0c07829
--- /dev/null
+++ b/usr/src/cmd/raidz_test/Makefile
@@ -0,0 +1,61 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include ../Makefile.cmd
+include ../Makefile.cmd.64
+
+PROG= raidz_test
+OBJS= raidz_test.o raidz_bench.o
+SRCS= $(OBJS:%.o=%.c)
+POFILES= $(PROG:%=%.po)
+
+# No msg catalog here.
+POFILE=
+
+LDLIBS += -lzpool -lfakekernel -lumem
+
+INCS += -I../../lib/libzpool/common
+INCS += -I../../uts/common/fs/zfs
+
+CPPFLAGS.first = -I$(SRC)/lib/libfakekernel/common -D_FAKE_KERNEL
+CPPFLAGS += -D_LARGEFILE64_SOURCE=1
+CPPFLAGS += $(INCS)
+
+CSTD = $(CSTD_GNU99)
+
+CERRWARN += -_gcc=-Wno-type-limits
+
+SMATCH=off
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $(PROG) $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+install: all $(ROOTPROG)
+
+clean:
+ $(RM) $(OBJS)
+
+_msg: $(MSGDOMAIN) $(POFILES)
+ $(CP) $(POFILES) $(MSGDOMAIN)
+
+$(MSGDOMAIN):
+ $(INS.dir)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/raidz_test/raidz_bench.c b/usr/src/cmd/raidz_test/raidz_bench.c
new file mode 100644
index 0000000000..9dc22af6fd
--- /dev/null
+++ b/usr/src/cmd/raidz_test/raidz_bench.c
@@ -0,0 +1,228 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/zio.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
+#include <stdio.h>
+#include <strings.h>
+
+#include <sys/time.h>
+
+#include "raidz_test.h"
+
+#define GEN_BENCH_MEMORY (((uint64_t)1ULL)<<32)
+#define REC_BENCH_MEMORY (((uint64_t)1ULL)<<29)
+#define BENCH_ASHIFT 12
+#define MIN_CS_SHIFT BENCH_ASHIFT
+#define MAX_CS_SHIFT SPA_MAXBLOCKSHIFT
+
+static zio_t zio_bench;
+static raidz_map_t *rm_bench;
+static size_t max_data_size = SPA_MAXBLOCKSIZE;
+
+static void
+bench_init_raidz_map(void)
+{
+ zio_bench.io_offset = 0;
+ zio_bench.io_size = max_data_size;
+
+ /*
+ * To permit larger column sizes these have to be done
+ * allocated using aligned alloc instead of zio_abd_buf_alloc
+ */
+ zio_bench.io_abd = raidz_alloc(max_data_size);
+
+ init_zio_abd(&zio_bench);
+}
+
+static void
+bench_fini_raidz_maps(void)
+{
+ /* tear down golden zio */
+ raidz_free(zio_bench.io_abd, max_data_size);
+ bzero(&zio_bench, sizeof (zio_t));
+}
+
+static inline void
+run_gen_bench_impl(const char *impl)
+{
+ int fn, ncols;
+ uint64_t ds, iter_cnt, iter, disksize;
+ hrtime_t start;
+ double elapsed, d_bw;
+
+ /* Benchmark generate functions */
+ for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
+
+ for (ds = MIN_CS_SHIFT; ds <= MAX_CS_SHIFT; ds++) {
+ /* create suitable raidz_map */
+ ncols = rto_opts.rto_dcols + fn + 1;
+ zio_bench.io_size = 1ULL << ds;
+ rm_bench = vdev_raidz_map_alloc(&zio_bench,
+ BENCH_ASHIFT, ncols, fn+1);
+
+ /* estimate iteration count */
+ iter_cnt = GEN_BENCH_MEMORY;
+ iter_cnt /= zio_bench.io_size;
+
+ start = gethrtime();
+ for (iter = 0; iter < iter_cnt; iter++)
+ vdev_raidz_generate_parity(rm_bench);
+ elapsed = NSEC2SEC((double)(gethrtime() - start));
+
+ disksize = (1ULL << ds) / rto_opts.rto_dcols;
+ d_bw = (double)iter_cnt * (double)disksize;
+ d_bw /= (1024.0 * 1024.0 * elapsed);
+
+ LOG(D_ALL, "%10s, %8s, %zu, %10llu, %lf, %lf, %u\n",
+ impl,
+ raidz_gen_name[fn],
+ rto_opts.rto_dcols,
+ (1ULL<<ds),
+ d_bw,
+ d_bw * (double)(ncols),
+ (unsigned)iter_cnt);
+
+ vdev_raidz_map_free(rm_bench);
+ }
+ }
+}
+
+void
+run_gen_bench(void)
+{
+ char **impl_name;
+
+ LOG(D_INFO, DBLSEP "\nBenchmarking parity generation...\n\n");
+ LOG(D_ALL, "impl, math, dcols, iosize, disk_bw, total_bw, iter\n");
+
+ for (impl_name = (char **)raidz_impl_names; *impl_name != NULL;
+ impl_name++) {
+
+ if (vdev_raidz_impl_set(*impl_name) != 0)
+ continue;
+
+ run_gen_bench_impl(*impl_name);
+ }
+}
+
+static void
+run_rec_bench_impl(const char *impl)
+{
+ int fn, ncols, nbad;
+ uint64_t ds, iter_cnt, iter, disksize;
+ hrtime_t start;
+ double elapsed, d_bw;
+ static const int tgt[7][3] = {
+ {1, 2, 3}, /* rec_p: bad QR & D[0] */
+ {0, 2, 3}, /* rec_q: bad PR & D[0] */
+ {0, 1, 3}, /* rec_r: bad PQ & D[0] */
+ {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
+ {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
+ {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
+ {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
+ };
+
+ for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
+ for (ds = MIN_CS_SHIFT; ds <= MAX_CS_SHIFT; ds++) {
+
+ /* create suitable raidz_map */
+ ncols = rto_opts.rto_dcols + PARITY_PQR;
+ zio_bench.io_size = 1ULL << ds;
+
+ /*
+ * raidz block is too short to test
+ * the requested method
+ */
+ if (zio_bench.io_size / rto_opts.rto_dcols <
+ (1ULL << BENCH_ASHIFT))
+ continue;
+
+ rm_bench = vdev_raidz_map_alloc(&zio_bench,
+ BENCH_ASHIFT, ncols, PARITY_PQR);
+
+ /* estimate iteration count */
+ iter_cnt = (REC_BENCH_MEMORY);
+ iter_cnt /= zio_bench.io_size;
+
+ /* calculate how many bad columns there are */
+ nbad = MIN(3, raidz_ncols(rm_bench) -
+ raidz_parity(rm_bench));
+
+ start = gethrtime();
+ for (iter = 0; iter < iter_cnt; iter++)
+ vdev_raidz_reconstruct(rm_bench, tgt[fn], nbad);
+ elapsed = NSEC2SEC((double)(gethrtime() - start));
+
+ disksize = (1ULL << ds) / rto_opts.rto_dcols;
+ d_bw = (double)iter_cnt * (double)(disksize);
+ d_bw /= (1024.0 * 1024.0 * elapsed);
+
+ LOG(D_ALL, "%10s, %8s, %zu, %10llu, %lf, %lf, %u\n",
+ impl,
+ raidz_rec_name[fn],
+ rto_opts.rto_dcols,
+ (1ULL<<ds),
+ d_bw,
+ d_bw * (double)ncols,
+ (unsigned)iter_cnt);
+
+ vdev_raidz_map_free(rm_bench);
+ }
+ }
+}
+
+void
+run_rec_bench(void)
+{
+ char **impl_name;
+
+ LOG(D_INFO, DBLSEP "\nBenchmarking data reconstruction...\n\n");
+ LOG(D_ALL, "impl, math, dcols, iosize, disk_bw, total_bw, iter\n");
+
+ for (impl_name = (char **)raidz_impl_names; *impl_name != NULL;
+ impl_name++) {
+
+ if (vdev_raidz_impl_set(*impl_name) != 0)
+ continue;
+
+ run_rec_bench_impl(*impl_name);
+ }
+}
+
+void
+run_raidz_benchmark(void)
+{
+ bench_init_raidz_map();
+
+ run_gen_bench();
+ run_rec_bench();
+
+ bench_fini_raidz_maps();
+}
diff --git a/usr/src/cmd/raidz_test/raidz_test.c b/usr/src/cmd/raidz_test/raidz_test.c
new file mode 100644
index 0000000000..8d025b479d
--- /dev/null
+++ b/usr/src/cmd/raidz_test/raidz_test.c
@@ -0,0 +1,761 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/zio.h>
+#include <umem.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include "raidz_test.h"
+
+static int *rand_data;
+raidz_test_opts_t rto_opts;
+
+static char gdb[256];
+static const char gdb_tmpl[] = "gdb -ex \"set pagination 0\" -p %d";
+
+#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))
+
+static void print_opts(raidz_test_opts_t *opts, boolean_t force)
+{
+ char *verbose;
+ switch (opts->rto_v) {
+ case 0:
+ verbose = "no";
+ break;
+ case 1:
+ verbose = "info";
+ break;
+ default:
+ verbose = "debug";
+ break;
+ }
+
+ if (force || opts->rto_v >= D_INFO) {
+ (void) fprintf(stdout, DBLSEP "Running with options:\n"
+ " (-a) zio ashift : %zu\n"
+ " (-o) zio offset : 1 << %zu\n"
+ " (-d) number of raidz data columns : %zu\n"
+ " (-s) size of DATA : 1 << %zu\n"
+ " (-S) sweep parameters : %s \n"
+ " (-v) verbose : %s \n\n",
+ opts->rto_ashift, /* -a */
+ ilog2(opts->rto_offset), /* -o */
+ opts->rto_dcols, /* -d */
+ ilog2(opts->rto_dsize), /* -s */
+ opts->rto_sweep ? "yes" : "no", /* -S */
+ verbose); /* -v */
+ }
+}
+
+static void usage(boolean_t requested)
+{
+ const raidz_test_opts_t *o = &rto_opts_defaults;
+
+ FILE *fp = requested ? stdout : stderr;
+
+ (void) fprintf(fp, "Usage:\n"
+ "\t[-a zio ashift (default: %zu)]\n"
+ "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
+ "\t[-d number of raidz data columns (default: %zu)]\n"
+ "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
+ "\t[-S parameter sweep (default: %s)]\n"
+ "\t[-t timeout for parameter sweep test]\n"
+ "\t[-B benchmark all raidz implementations]\n"
+ "\t[-v increase verbosity (default: %zu)]\n"
+ "\t[-h (print help)]\n"
+ "\t[-T test the test, see if failure would be detected]\n"
+ "\t[-D debug (attach gdb on SIGSEGV)]\n"
+ "",
+ o->rto_ashift, /* -a */
+ ilog2(o->rto_offset), /* -o */
+ o->rto_dcols, /* -d */
+ ilog2(o->rto_dsize), /* -s */
+ rto_opts.rto_sweep ? "yes" : "no", /* -S */
+ o->rto_v); /* -d */
+
+ exit(requested ? 0 : 1);
+}
+
+static void process_options(int argc, char **argv)
+{
+ size_t value;
+ int opt;
+
+ raidz_test_opts_t *o = &rto_opts;
+
+ bcopy(&rto_opts_defaults, o, sizeof (*o));
+
+ while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) {
+ value = 0;
+
+ switch (opt) {
+ case 'a':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_ashift = MIN(13, MAX(9, value));
+ break;
+ case 'o':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
+ break;
+ case 'd':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_dcols = MIN(255, MAX(1, value));
+ break;
+ case 's':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT,
+ MAX(SPA_MINBLOCKSHIFT, value));
+ break;
+ case 't':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_sweep_timeout = value;
+ break;
+ case 'v':
+ o->rto_v++;
+ break;
+ case 'S':
+ o->rto_sweep = 1;
+ break;
+ case 'B':
+ o->rto_benchmark = 1;
+ break;
+ case 'D':
+ o->rto_gdb = 1;
+ break;
+ case 'T':
+ o->rto_sanity = 1;
+ break;
+ case 'h':
+ usage(B_TRUE);
+ break;
+ case '?':
+ default:
+ usage(B_FALSE);
+ break;
+ }
+ }
+}
+
+#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
+#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
+
+#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
+#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
+
+static int
+cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
+{
+ int i, ret = 0;
+
+ VERIFY(parity >= 1 && parity <= 3);
+
+ for (i = 0; i < parity; i++) {
+ if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i),
+ CODE_COL(rm, i)->abd_size) != 0) {
+ ret++;
+ LOG_OPT(D_DEBUG, opts,
+ "\nParity block [%d] different!\n", i);
+ }
+ }
+ return (ret);
+}
+
+static int
+cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
+{
+ int i, ret = 0;
+ int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
+
+ for (i = 0; i < dcols; i++) {
+ if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i),
+ DATA_COL(opts->rm_golden, i)->abd_size) != 0) {
+ ret++;
+
+ LOG_OPT(D_DEBUG, opts,
+ "\nData block [%d] different!\n", i);
+ }
+ }
+ return (ret);
+}
+
+static int
+init_rand(void *data, size_t size, void *private)
+{
+ int i;
+ int *dst = (int *)data;
+
+ for (i = 0; i < size / sizeof (int); i++)
+ dst[i] = rand_data[i];
+
+ return (0);
+}
+
+static void
+corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
+{
+ int i;
+ raidz_col_t *col;
+
+ for (i = 0; i < cnt; i++) {
+ col = &rm->rm_col[tgts[i]];
+ (void) abd_iterate_func(col->rc_abd, 0, col->rc_size,
+ init_rand, NULL);
+ }
+}
+
+void
+init_zio_abd(zio_t *zio)
+{
+ (void) abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
+}
+
+static void
+fini_raidz_map(zio_t **zio, raidz_map_t **rm)
+{
+ vdev_raidz_map_free(*rm);
+ raidz_free((*zio)->io_abd, (*zio)->io_size);
+ umem_free(*zio, sizeof (zio_t));
+
+ *zio = NULL;
+ *rm = NULL;
+}
+
+static int
+init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
+{
+ int err = 0;
+ zio_t *zio_test;
+ raidz_map_t *rm_test;
+ const size_t total_ncols = opts->rto_dcols + parity;
+
+ if (opts->rm_golden) {
+ fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
+ }
+
+ opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
+ zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
+
+ opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
+ opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
+
+ opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
+ zio_test->io_abd = raidz_alloc(opts->rto_dsize);
+
+ init_zio_abd(opts->zio_golden);
+ init_zio_abd(zio_test);
+
+ VERIFY0(vdev_raidz_impl_set("original"));
+
+ opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
+ opts->rto_ashift, total_ncols, parity);
+ rm_test = vdev_raidz_map_alloc(zio_test,
+ opts->rto_ashift, total_ncols, parity);
+
+ VERIFY(opts->zio_golden);
+ VERIFY(opts->rm_golden);
+
+ vdev_raidz_generate_parity(opts->rm_golden);
+ vdev_raidz_generate_parity(rm_test);
+
+ /* sanity check */
+ err |= cmp_data(opts, rm_test);
+ err |= cmp_code(opts, rm_test, parity);
+
+ if (err)
+ ERRMSG("initializing the golden copy ... [FAIL]!\n");
+
+ /* tear down raidz_map of test zio */
+ fini_raidz_map(&zio_test, &rm_test);
+
+ return (err);
+}
+
+static raidz_map_t *
+init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
+{
+ raidz_map_t *rm = NULL;
+ const size_t alloc_dsize = opts->rto_dsize;
+ const size_t total_ncols = opts->rto_dcols + parity;
+ const int ccols[] = { 0, 1, 2 };
+
+ VERIFY(zio);
+ VERIFY(parity <= 3 && parity >= 1);
+
+ *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
+
+ (*zio)->io_offset = 0;
+ (*zio)->io_size = alloc_dsize;
+ (*zio)->io_abd = raidz_alloc(alloc_dsize);
+ init_zio_abd(*zio);
+
+ rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
+ total_ncols, parity);
+ VERIFY(rm);
+
+ /* Make sure code columns are destroyed */
+ corrupt_colums(rm, ccols, parity);
+
+ return (rm);
+}
+
+static int
+run_gen_check(raidz_test_opts_t *opts)
+{
+ char **impl_name;
+ int fn, err = 0;
+ zio_t *zio_test;
+ raidz_map_t *rm_test;
+
+ err = init_raidz_golden_map(opts, PARITY_PQR);
+ if (0 != err)
+ return (err);
+
+ LOG(D_INFO, DBLSEP);
+ LOG(D_INFO, "Testing parity generation...\n");
+
+ for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
+ impl_name++) {
+
+ LOG(D_INFO, SEP);
+ LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
+
+ if (0 != vdev_raidz_impl_set(*impl_name)) {
+ LOG(D_INFO, "[SKIP]\n");
+ continue;
+ } else {
+ LOG(D_INFO, "[SUPPORTED]\n");
+ }
+
+ for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ /* create suitable raidz_map */
+ rm_test = init_raidz_map(opts, &zio_test, fn+1);
+ VERIFY(rm_test);
+
+ LOG(D_INFO, "\t\tTesting method [%s] ...",
+ raidz_gen_name[fn]);
+
+ if (!opts->rto_sanity)
+ vdev_raidz_generate_parity(rm_test);
+
+ if (cmp_code(opts, rm_test, fn+1) != 0) {
+ LOG(D_INFO, "[FAIL]\n");
+ err++;
+ } else
+ LOG(D_INFO, "[PASS]\n");
+
+ fini_raidz_map(&zio_test, &rm_test);
+ }
+ }
+
+ fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
+
+ return (err);
+}
+
+static int
+run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
+{
+ int x0, x1, x2;
+ int tgtidx[3];
+ int err = 0;
+ static const int rec_tgts[7][3] = {
+ {1, 2, 3}, /* rec_p: bad QR & D[0] */
+ {0, 2, 3}, /* rec_q: bad PR & D[0] */
+ {0, 1, 3}, /* rec_r: bad PQ & D[0] */
+ {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
+ {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
+ {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
+ {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
+ };
+
+ memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
+
+ if (fn < RAIDZ_REC_PQ) {
+ /* can reconstruct 1 failed data disk */
+ for (x0 = 0; x0 < opts->rto_dcols; x0++) {
+ if (x0 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ LOG(D_DEBUG, "[%d] ", x0);
+
+ tgtidx[2] = x0 + raidz_parity(rm);
+
+ corrupt_colums(rm, tgtidx+2, 1);
+
+ if (!opts->rto_sanity)
+ (void) vdev_raidz_reconstruct(rm, tgtidx, 3);
+
+ if (cmp_data(opts, rm) != 0) {
+ err++;
+ LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
+ }
+ }
+
+ } else if (fn < RAIDZ_REC_PQR) {
+ /* can reconstruct 2 failed data disk */
+ for (x0 = 0; x0 < opts->rto_dcols; x0++) {
+ if (x0 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+ for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
+ if (x1 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ LOG(D_DEBUG, "[%d %d] ", x0, x1);
+
+ tgtidx[1] = x0 + raidz_parity(rm);
+ tgtidx[2] = x1 + raidz_parity(rm);
+
+ corrupt_colums(rm, tgtidx+1, 2);
+
+ if (!opts->rto_sanity)
+ (void) vdev_raidz_reconstruct(rm,
+ tgtidx, 3);
+
+ if (cmp_data(opts, rm) != 0) {
+ err++;
+ LOG(D_DEBUG, "\nREC D[%d %d]... "
+ "[FAIL]\n", x0, x1);
+ }
+ }
+ }
+ } else {
+ /* can reconstruct 3 failed data disk */
+ for (x0 = 0; x0 < opts->rto_dcols; x0++) {
+ if (x0 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+ for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
+ if (x1 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+ for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
+ if (x2 >=
+ rm->rm_cols - raidz_parity(rm))
+ continue;
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
+
+ tgtidx[0] = x0 + raidz_parity(rm);
+ tgtidx[1] = x1 + raidz_parity(rm);
+ tgtidx[2] = x2 + raidz_parity(rm);
+
+ corrupt_colums(rm, tgtidx, 3);
+
+ if (!opts->rto_sanity)
+ (void) vdev_raidz_reconstruct(
+ rm, tgtidx, 3);
+
+ if (cmp_data(opts, rm) != 0) {
+ err++;
+ LOG(D_DEBUG,
+ "\nREC D[%d %d %d]... "
+ "[FAIL]\n", x0, x1, x2);
+ }
+ }
+ }
+ }
+ }
+ return (err);
+}
+
+static int
+run_rec_check(raidz_test_opts_t *opts)
+{
+ char **impl_name;
+ unsigned fn, err = 0;
+ zio_t *zio_test;
+ raidz_map_t *rm_test;
+
+ err = init_raidz_golden_map(opts, PARITY_PQR);
+ if (0 != err)
+ return (err);
+
+ LOG(D_INFO, DBLSEP);
+ LOG(D_INFO, "Testing data reconstruction...\n");
+
+ for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
+ impl_name++) {
+
+ LOG(D_INFO, SEP);
+ LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
+
+ if (vdev_raidz_impl_set(*impl_name) != 0) {
+ LOG(D_INFO, "[SKIP]\n");
+ continue;
+ } else
+ LOG(D_INFO, "[SUPPORTED]\n");
+
+
+ /* create suitable raidz_map */
+ rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
+ /* generate parity */
+ vdev_raidz_generate_parity(rm_test);
+
+ for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
+
+ LOG(D_INFO, "\t\tTesting method [%s] ...",
+ raidz_rec_name[fn]);
+
+ if (run_rec_check_impl(opts, rm_test, fn) != 0) {
+ LOG(D_INFO, "[FAIL]\n");
+ err++;
+
+ } else
+ LOG(D_INFO, "[PASS]\n");
+
+ }
+ /* tear down test raidz_map */
+ fini_raidz_map(&zio_test, &rm_test);
+ }
+
+ fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
+
+ return (err);
+}
+
+static int
+run_test(raidz_test_opts_t *opts)
+{
+ int err = 0;
+
+ if (opts == NULL)
+ opts = &rto_opts;
+
+ print_opts(opts, B_FALSE);
+
+ err |= run_gen_check(opts);
+ err |= run_rec_check(opts);
+
+ return (err);
+}
+
+#define SWEEP_RUNNING 0
+#define SWEEP_FINISHED 1
+#define SWEEP_ERROR 2
+#define SWEEP_TIMEOUT 3
+
+static int sweep_state = 0;
+static raidz_test_opts_t failed_opts;
+
+static kmutex_t sem_mtx;
+static kcondvar_t sem_cv;
+static int max_free_slots;
+static int free_slots;
+
+static void
+sweep_thread(void *arg)
+{
+ int err = 0;
+ raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
+ VERIFY(opts != NULL);
+
+ err = run_test(opts);
+
+ if (rto_opts.rto_sanity) {
+ /* 25% chance that a sweep test fails */
+ if (rand() < (RAND_MAX/4))
+ err = 1;
+ }
+
+ if (0 != err) {
+ mutex_enter(&sem_mtx);
+ memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
+ sweep_state = SWEEP_ERROR;
+ mutex_exit(&sem_mtx);
+ }
+
+ umem_free(opts, sizeof (raidz_test_opts_t));
+
+ /* signal the next thread */
+ mutex_enter(&sem_mtx);
+ free_slots++;
+ cv_signal(&sem_cv);
+ mutex_exit(&sem_mtx);
+
+ thread_exit();
+}
+
+static int
+run_sweep(void)
+{
+ static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
+ static const size_t ashift_v[] = { 9, 12, 14 };
+ static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
+ 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
+
+ (void) setvbuf(stdout, NULL, _IONBF, 0);
+
+ ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
+ ARRAY_SIZE(dcols_v);
+ ulong_t tried_comb = 0;
+ hrtime_t time_diff, start_time = gethrtime();
+ raidz_test_opts_t *opts;
+ int a, d, s;
+
+ max_free_slots = free_slots = MAX(2, boot_ncpus);
+
+ mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
+
+ for (s = 0; s < ARRAY_SIZE(size_v); s++)
+ for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
+ for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
+
+ if (size_v[s] < (1 << ashift_v[a])) {
+ total_comb--;
+ continue;
+ }
+
+ if (++tried_comb % 20 == 0)
+ LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
+
+ /* wait for signal to start new thread */
+ mutex_enter(&sem_mtx);
+ while (cv_timedwait_sig(&sem_cv, &sem_mtx,
+ ddi_get_lbolt() + hz)) {
+
+ /* check if should stop the test (timeout) */
+ time_diff = (gethrtime() - start_time) / NANOSEC;
+ if (rto_opts.rto_sweep_timeout > 0 &&
+ time_diff >= rto_opts.rto_sweep_timeout) {
+ sweep_state = SWEEP_TIMEOUT;
+ rto_opts.rto_should_stop = B_TRUE;
+ mutex_exit(&sem_mtx);
+ goto exit;
+ }
+
+ /* check if should stop the test (error) */
+ if (sweep_state != SWEEP_RUNNING) {
+ mutex_exit(&sem_mtx);
+ goto exit;
+ }
+
+ /* exit loop if a slot is available */
+ if (free_slots > 0) {
+ break;
+ }
+ }
+
+ free_slots--;
+ mutex_exit(&sem_mtx);
+
+ opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
+ opts->rto_ashift = ashift_v[a];
+ opts->rto_dcols = dcols_v[d];
+ opts->rto_offset = (1 << ashift_v[a]) * rand();
+ opts->rto_dsize = size_v[s];
+ opts->rto_v = 0; /* be quiet */
+
+ VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
+ 0, NULL, TS_RUN, maxclsyspri), !=, NULL);
+ }
+
+exit:
+ LOG(D_ALL, "\nWaiting for test threads to finish...\n");
+ mutex_enter(&sem_mtx);
+ VERIFY(free_slots <= max_free_slots);
+ while (free_slots < max_free_slots) {
+ (void) cv_wait(&sem_cv, &sem_mtx);
+ }
+ mutex_exit(&sem_mtx);
+
+ if (sweep_state == SWEEP_ERROR) {
+ ERRMSG("Sweep test failed! Failed option: \n");
+ print_opts(&failed_opts, B_TRUE);
+ } else {
+ if (sweep_state == SWEEP_TIMEOUT)
+ LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
+ (ulong_t)rto_opts.rto_sweep_timeout);
+
+ LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
+ (ulong_t)tried_comb);
+ }
+
+ mutex_destroy(&sem_mtx);
+
+ return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
+}
+
+int
+main(int argc, char **argv)
+{
+ size_t i;
+ int err = 0;
+
+ /* init gdb string early */
+ (void) sprintf(gdb, gdb_tmpl, getpid());
+
+ (void) setvbuf(stdout, NULL, _IOLBF, 0);
+
+ dprintf_setup(&argc, argv);
+
+ process_options(argc, argv);
+
+ kernel_init(FREAD);
+
+ /* setup random data because rand() is not reentrant */
+ rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ srand((unsigned)time(NULL) * getpid());
+ for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
+ rand_data[i] = rand();
+
+ mprotect((void *)rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
+
+ if (rto_opts.rto_benchmark) {
+ run_raidz_benchmark();
+ } else if (rto_opts.rto_sweep) {
+ err = run_sweep();
+ } else {
+ err = run_test(NULL);
+ }
+
+ umem_free(rand_data, SPA_MAXBLOCKSIZE);
+ kernel_fini();
+
+ return (err);
+}
diff --git a/usr/src/cmd/raidz_test/raidz_test.h b/usr/src/cmd/raidz_test/raidz_test.h
new file mode 100644
index 0000000000..c91e521436
--- /dev/null
+++ b/usr/src/cmd/raidz_test/raidz_test.h
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef RAIDZ_TEST_H
+#define RAIDZ_TEST_H
+
+#include <sys/spa.h>
+
+static const char *raidz_impl_names[] = {
+ "original",
+ "scalar",
+ "sse2",
+ "ssse3",
+ "avx2",
+ "avx512f",
+ "avx512bw",
+ "aarch64_neon",
+ "aarch64_neonx2",
+ "powerpc_altivec",
+ NULL
+};
+
+typedef struct raidz_test_opts {
+ size_t rto_ashift;
+ size_t rto_offset;
+ size_t rto_dcols;
+ size_t rto_dsize;
+ size_t rto_v;
+ size_t rto_sweep;
+ size_t rto_sweep_timeout;
+ size_t rto_benchmark;
+ size_t rto_sanity;
+ size_t rto_gdb;
+
+ /* non-user options */
+ boolean_t rto_should_stop;
+
+ zio_t *zio_golden;
+ raidz_map_t *rm_golden;
+} raidz_test_opts_t;
+
+static const raidz_test_opts_t rto_opts_defaults = {
+ .rto_ashift = 9,
+ .rto_offset = 1ULL << 0,
+ .rto_dcols = 8,
+ .rto_dsize = 1<<19,
+ .rto_v = 0,
+ .rto_sweep = 0,
+ .rto_benchmark = 0,
+ .rto_sanity = 0,
+ .rto_gdb = 0,
+ .rto_should_stop = B_FALSE
+};
+
+extern raidz_test_opts_t rto_opts;
+
+static inline size_t ilog2(size_t a)
+{
+ return (a > 1 ? 1 + ilog2(a >> 1) : 0);
+}
+
+
+#define D_ALL 0
+#define D_INFO 1
+#define D_DEBUG 2
+
+#define LOG(lvl, a...) \
+{ \
+ if (rto_opts.rto_v >= lvl) \
+ (void) fprintf(stdout, a); \
+} \
+
+#define LOG_OPT(lvl, opt, a...) \
+{ \
+ if (opt->rto_v >= lvl) \
+ (void) fprintf(stdout, a); \
+} \
+
+#define ERRMSG(a...) (void) fprintf(stderr, a)
+
+
+#define DBLSEP "================\n"
+#define SEP "----------------\n"
+
+
+#define raidz_alloc(size) abd_alloc(size, B_FALSE)
+#define raidz_free(p, size) abd_free(p)
+
+
+void init_zio_abd(zio_t *zio);
+
+void run_raidz_benchmark(void);
+
+#endif /* RAIDZ_TEST_H */
diff --git a/usr/src/compat/bhyve/README b/usr/src/compat/bhyve/README
new file mode 100644
index 0000000000..87ceb83695
--- /dev/null
+++ b/usr/src/compat/bhyve/README
@@ -0,0 +1,9 @@
+These are the compatibility headers for building bhyve on illumos. They are
+comprised of the shims and glue needed to use native functionality as the
+backing for FreeBSD interfaces. It often means heavy use of #include_next,
+#define renames, and forward definitions to some glue functions.
+
+For headers which would otherwise be copied verbatim from FreeBSD, the
+usr/src/contrib/bhyve area is the appropriate home. This allows us to carry
+only the bare minimum in the 'compat' headers while making updates for the
+'contrib' headers easy (simply copy over the new version).
diff --git a/usr/src/compat/freebsd/amd64/machine/asmacros.h b/usr/src/compat/bhyve/amd64/machine/asmacros.h
index 1f6955130b..1f6955130b 100644
--- a/usr/src/compat/freebsd/amd64/machine/asmacros.h
+++ b/usr/src/compat/bhyve/amd64/machine/asmacros.h
diff --git a/usr/src/compat/freebsd/amd64/machine/atomic.h b/usr/src/compat/bhyve/amd64/machine/atomic.h
index 1da9724b7d..1da9724b7d 100644
--- a/usr/src/compat/freebsd/amd64/machine/atomic.h
+++ b/usr/src/compat/bhyve/amd64/machine/atomic.h
diff --git a/usr/src/compat/freebsd/amd64/machine/clock.h b/usr/src/compat/bhyve/amd64/machine/clock.h
index f50b42a126..f50b42a126 100644
--- a/usr/src/compat/freebsd/amd64/machine/clock.h
+++ b/usr/src/compat/bhyve/amd64/machine/clock.h
diff --git a/usr/src/compat/freebsd/amd64/machine/cpu.h b/usr/src/compat/bhyve/amd64/machine/cpu.h
index 40253af108..40253af108 100644
--- a/usr/src/compat/freebsd/amd64/machine/cpu.h
+++ b/usr/src/compat/bhyve/amd64/machine/cpu.h
diff --git a/usr/src/compat/freebsd/amd64/machine/cpufunc.h b/usr/src/compat/bhyve/amd64/machine/cpufunc.h
index 0b7bcdaa59..0b7bcdaa59 100644
--- a/usr/src/compat/freebsd/amd64/machine/cpufunc.h
+++ b/usr/src/compat/bhyve/amd64/machine/cpufunc.h
diff --git a/usr/src/compat/freebsd/amd64/machine/fpu.h b/usr/src/compat/bhyve/amd64/machine/fpu.h
index 6bc651d996..6bc651d996 100644
--- a/usr/src/compat/freebsd/amd64/machine/fpu.h
+++ b/usr/src/compat/bhyve/amd64/machine/fpu.h
diff --git a/usr/src/compat/freebsd/amd64/machine/iodev.h b/usr/src/compat/bhyve/amd64/machine/iodev.h
index c7cdddc817..c7cdddc817 100644
--- a/usr/src/compat/freebsd/amd64/machine/iodev.h
+++ b/usr/src/compat/bhyve/amd64/machine/iodev.h
diff --git a/usr/src/compat/freebsd/amd64/machine/md_var.h b/usr/src/compat/bhyve/amd64/machine/md_var.h
index ed57a8bebc..ed57a8bebc 100644
--- a/usr/src/compat/freebsd/amd64/machine/md_var.h
+++ b/usr/src/compat/bhyve/amd64/machine/md_var.h
diff --git a/usr/src/compat/freebsd/amd64/machine/param.h b/usr/src/compat/bhyve/amd64/machine/param.h
index b152f4d526..b152f4d526 100644
--- a/usr/src/compat/freebsd/amd64/machine/param.h
+++ b/usr/src/compat/bhyve/amd64/machine/param.h
diff --git a/usr/src/compat/freebsd/amd64/machine/pcb.h b/usr/src/compat/bhyve/amd64/machine/pcb.h
index 75b5de640c..75b5de640c 100644
--- a/usr/src/compat/freebsd/amd64/machine/pcb.h
+++ b/usr/src/compat/bhyve/amd64/machine/pcb.h
diff --git a/usr/src/compat/freebsd/amd64/machine/pmap.h b/usr/src/compat/bhyve/amd64/machine/pmap.h
index ce3185629b..ce3185629b 100644
--- a/usr/src/compat/freebsd/amd64/machine/pmap.h
+++ b/usr/src/compat/bhyve/amd64/machine/pmap.h
diff --git a/usr/src/compat/freebsd/amd64/machine/reg.h b/usr/src/compat/bhyve/amd64/machine/reg.h
index 4a73463603..4a73463603 100644
--- a/usr/src/compat/freebsd/amd64/machine/reg.h
+++ b/usr/src/compat/bhyve/amd64/machine/reg.h
diff --git a/usr/src/compat/freebsd/amd64/machine/segments.h b/usr/src/compat/bhyve/amd64/machine/segments.h
index d0655f4a0e..d0655f4a0e 100644
--- a/usr/src/compat/freebsd/amd64/machine/segments.h
+++ b/usr/src/compat/bhyve/amd64/machine/segments.h
diff --git a/usr/src/compat/freebsd/amd64/machine/smp.h b/usr/src/compat/bhyve/amd64/machine/smp.h
index 9c4f2d111b..9c4f2d111b 100644
--- a/usr/src/compat/freebsd/amd64/machine/smp.h
+++ b/usr/src/compat/bhyve/amd64/machine/smp.h
diff --git a/usr/src/compat/freebsd/amd64/machine/specialreg.h b/usr/src/compat/bhyve/amd64/machine/specialreg.h
index ead63aaaab..ead63aaaab 100644
--- a/usr/src/compat/freebsd/amd64/machine/specialreg.h
+++ b/usr/src/compat/bhyve/amd64/machine/specialreg.h
diff --git a/usr/src/compat/freebsd/amd64/machine/vmm.h b/usr/src/compat/bhyve/amd64/machine/vmm.h
index 1c54c0830d..1c54c0830d 100644
--- a/usr/src/compat/freebsd/amd64/machine/vmm.h
+++ b/usr/src/compat/bhyve/amd64/machine/vmm.h
diff --git a/usr/src/compat/freebsd/amd64/machine/vmm_dev.h b/usr/src/compat/bhyve/amd64/machine/vmm_dev.h
index fe9cb6c705..fe9cb6c705 100644
--- a/usr/src/compat/freebsd/amd64/machine/vmm_dev.h
+++ b/usr/src/compat/bhyve/amd64/machine/vmm_dev.h
diff --git a/usr/src/compat/freebsd/amd64/machine/vmm_instruction_emul.h b/usr/src/compat/bhyve/amd64/machine/vmm_instruction_emul.h
index 02c3f391c7..02c3f391c7 100644
--- a/usr/src/compat/freebsd/amd64/machine/vmm_instruction_emul.h
+++ b/usr/src/compat/bhyve/amd64/machine/vmm_instruction_emul.h
diff --git a/usr/src/compat/freebsd/amd64/machine/vmparam.h b/usr/src/compat/bhyve/amd64/machine/vmparam.h
index c76a3259f3..c76a3259f3 100644
--- a/usr/src/compat/freebsd/amd64/machine/vmparam.h
+++ b/usr/src/compat/bhyve/amd64/machine/vmparam.h
diff --git a/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h b/usr/src/compat/bhyve/contrib/dev/acpica/include/acpi.h
index 2668f98ab3..2668f98ab3 100644
--- a/usr/src/compat/freebsd/contrib/dev/acpica/include/acpi.h
+++ b/usr/src/compat/bhyve/contrib/dev/acpica/include/acpi.h
diff --git a/usr/src/compat/freebsd/dev/pci/pcivar.h b/usr/src/compat/bhyve/dev/pci/pcivar.h
index 064d983117..064d983117 100644
--- a/usr/src/compat/freebsd/dev/pci/pcivar.h
+++ b/usr/src/compat/bhyve/dev/pci/pcivar.h
diff --git a/usr/src/compat/freebsd/err.h b/usr/src/compat/bhyve/err.h
index 40d144e025..40d144e025 100644
--- a/usr/src/compat/freebsd/err.h
+++ b/usr/src/compat/bhyve/err.h
diff --git a/usr/src/compat/freebsd/libutil.h b/usr/src/compat/bhyve/libutil.h
index f899d4425e..f899d4425e 100644
--- a/usr/src/compat/freebsd/libutil.h
+++ b/usr/src/compat/bhyve/libutil.h
diff --git a/usr/src/compat/freebsd/net/ethernet.h b/usr/src/compat/bhyve/net/ethernet.h
index dcd3a58925..dcd3a58925 100644
--- a/usr/src/compat/freebsd/net/ethernet.h
+++ b/usr/src/compat/bhyve/net/ethernet.h
diff --git a/usr/src/compat/freebsd/net/ieee_oui.h b/usr/src/compat/bhyve/net/ieee_oui.h
index 068328d833..068328d833 100644
--- a/usr/src/compat/freebsd/net/ieee_oui.h
+++ b/usr/src/compat/bhyve/net/ieee_oui.h
diff --git a/usr/src/compat/freebsd/paths.h b/usr/src/compat/bhyve/paths.h
index e43c963f93..e43c963f93 100644
--- a/usr/src/compat/freebsd/paths.h
+++ b/usr/src/compat/bhyve/paths.h
diff --git a/usr/src/compat/freebsd/pthread_np.h b/usr/src/compat/bhyve/pthread_np.h
index c4f76b259c..c4f76b259c 100644
--- a/usr/src/compat/freebsd/pthread_np.h
+++ b/usr/src/compat/bhyve/pthread_np.h
diff --git a/usr/src/compat/freebsd/string.h b/usr/src/compat/bhyve/string.h
index 7e0f5c7ddc..7e0f5c7ddc 100644
--- a/usr/src/compat/freebsd/string.h
+++ b/usr/src/compat/bhyve/string.h
diff --git a/usr/src/compat/freebsd/strings.h b/usr/src/compat/bhyve/strings.h
index fa3539fb96..fa3539fb96 100644
--- a/usr/src/compat/freebsd/strings.h
+++ b/usr/src/compat/bhyve/strings.h
diff --git a/usr/src/compat/freebsd/sys/_cpuset.h b/usr/src/compat/bhyve/sys/_cpuset.h
index 286d26fc00..286d26fc00 100644
--- a/usr/src/compat/freebsd/sys/_cpuset.h
+++ b/usr/src/compat/bhyve/sys/_cpuset.h
diff --git a/usr/src/compat/freebsd/sys/_iovec.h b/usr/src/compat/bhyve/sys/_iovec.h
index b755ae7e21..b755ae7e21 100644
--- a/usr/src/compat/freebsd/sys/_iovec.h
+++ b/usr/src/compat/bhyve/sys/_iovec.h
diff --git a/usr/src/compat/freebsd/sys/_pthreadtypes.h b/usr/src/compat/bhyve/sys/_pthreadtypes.h
index d746da3712..d746da3712 100644
--- a/usr/src/compat/freebsd/sys/_pthreadtypes.h
+++ b/usr/src/compat/bhyve/sys/_pthreadtypes.h
diff --git a/usr/src/compat/freebsd/sys/_types.h b/usr/src/compat/bhyve/sys/_types.h
index 62c327d216..62c327d216 100644
--- a/usr/src/compat/freebsd/sys/_types.h
+++ b/usr/src/compat/bhyve/sys/_types.h
diff --git a/usr/src/compat/freebsd/sys/bus.h b/usr/src/compat/bhyve/sys/bus.h
index e3b5e0e69d..e3b5e0e69d 100644
--- a/usr/src/compat/freebsd/sys/bus.h
+++ b/usr/src/compat/bhyve/sys/bus.h
diff --git a/usr/src/compat/freebsd/sys/callout.h b/usr/src/compat/bhyve/sys/callout.h
index 11823e6321..11823e6321 100644
--- a/usr/src/compat/freebsd/sys/callout.h
+++ b/usr/src/compat/bhyve/sys/callout.h
diff --git a/usr/src/compat/freebsd/sys/cdefs.h b/usr/src/compat/bhyve/sys/cdefs.h
index 0b857437e3..0b857437e3 100644
--- a/usr/src/compat/freebsd/sys/cdefs.h
+++ b/usr/src/compat/bhyve/sys/cdefs.h
diff --git a/usr/src/compat/freebsd/sys/clock.h b/usr/src/compat/bhyve/sys/clock.h
index ebf7f171a3..ebf7f171a3 100644
--- a/usr/src/compat/freebsd/sys/clock.h
+++ b/usr/src/compat/bhyve/sys/clock.h
diff --git a/usr/src/compat/freebsd/sys/cpuset.h b/usr/src/compat/bhyve/sys/cpuset.h
index 626b323d7d..626b323d7d 100644
--- a/usr/src/compat/freebsd/sys/cpuset.h
+++ b/usr/src/compat/bhyve/sys/cpuset.h
diff --git a/usr/src/compat/freebsd/sys/disk.h b/usr/src/compat/bhyve/sys/disk.h
index c9bdc6a2d8..c9bdc6a2d8 100644
--- a/usr/src/compat/freebsd/sys/disk.h
+++ b/usr/src/compat/bhyve/sys/disk.h
diff --git a/usr/src/compat/freebsd/sys/endian.h b/usr/src/compat/bhyve/sys/endian.h
index 24ea02d251..24ea02d251 100644
--- a/usr/src/compat/freebsd/sys/endian.h
+++ b/usr/src/compat/bhyve/sys/endian.h
diff --git a/usr/src/compat/freebsd/sys/errno.h b/usr/src/compat/bhyve/sys/errno.h
index bd37f43065..bd37f43065 100644
--- a/usr/src/compat/freebsd/sys/errno.h
+++ b/usr/src/compat/bhyve/sys/errno.h
diff --git a/usr/src/compat/freebsd/sys/eventhandler.h b/usr/src/compat/bhyve/sys/eventhandler.h
index 133aa664f0..133aa664f0 100644
--- a/usr/src/compat/freebsd/sys/eventhandler.h
+++ b/usr/src/compat/bhyve/sys/eventhandler.h
diff --git a/usr/src/compat/freebsd/sys/fcntl.h b/usr/src/compat/bhyve/sys/fcntl.h
index 062a3b84ac..062a3b84ac 100644
--- a/usr/src/compat/freebsd/sys/fcntl.h
+++ b/usr/src/compat/bhyve/sys/fcntl.h
diff --git a/usr/src/compat/freebsd/sys/ioctl.h b/usr/src/compat/bhyve/sys/ioctl.h
index 72a46b8085..72a46b8085 100644
--- a/usr/src/compat/freebsd/sys/ioctl.h
+++ b/usr/src/compat/bhyve/sys/ioctl.h
diff --git a/usr/src/compat/freebsd/sys/kernel.h b/usr/src/compat/bhyve/sys/kernel.h
index adf96f40fc..adf96f40fc 100644
--- a/usr/src/compat/freebsd/sys/kernel.h
+++ b/usr/src/compat/bhyve/sys/kernel.h
diff --git a/usr/src/compat/freebsd/sys/ktr.h b/usr/src/compat/bhyve/sys/ktr.h
index 96c499ef18..96c499ef18 100644
--- a/usr/src/compat/freebsd/sys/ktr.h
+++ b/usr/src/compat/bhyve/sys/ktr.h
diff --git a/usr/src/compat/freebsd/sys/libkern.h b/usr/src/compat/bhyve/sys/libkern.h
index 94675a0d66..94675a0d66 100644
--- a/usr/src/compat/freebsd/sys/libkern.h
+++ b/usr/src/compat/bhyve/sys/libkern.h
diff --git a/usr/src/compat/freebsd/sys/limits.h b/usr/src/compat/bhyve/sys/limits.h
index 0e66319791..0e66319791 100644
--- a/usr/src/compat/freebsd/sys/limits.h
+++ b/usr/src/compat/bhyve/sys/limits.h
diff --git a/usr/src/compat/freebsd/sys/lock.h b/usr/src/compat/bhyve/sys/lock.h
index fd6021a87e..fd6021a87e 100644
--- a/usr/src/compat/freebsd/sys/lock.h
+++ b/usr/src/compat/bhyve/sys/lock.h
diff --git a/usr/src/compat/freebsd/sys/malloc.h b/usr/src/compat/bhyve/sys/malloc.h
index 341d57b807..341d57b807 100644
--- a/usr/src/compat/freebsd/sys/malloc.h
+++ b/usr/src/compat/bhyve/sys/malloc.h
diff --git a/usr/src/compat/freebsd/sys/module.h b/usr/src/compat/bhyve/sys/module.h
index 87b73e3fa3..87b73e3fa3 100644
--- a/usr/src/compat/freebsd/sys/module.h
+++ b/usr/src/compat/bhyve/sys/module.h
diff --git a/usr/src/compat/freebsd/sys/mutex.h b/usr/src/compat/bhyve/sys/mutex.h
index 9e588cb98a..9e588cb98a 100644
--- a/usr/src/compat/freebsd/sys/mutex.h
+++ b/usr/src/compat/bhyve/sys/mutex.h
diff --git a/usr/src/compat/freebsd/sys/param.h b/usr/src/compat/bhyve/sys/param.h
index 5ba21a2809..5ba21a2809 100644
--- a/usr/src/compat/freebsd/sys/param.h
+++ b/usr/src/compat/bhyve/sys/param.h
diff --git a/usr/src/compat/freebsd/sys/pcpu.h b/usr/src/compat/bhyve/sys/pcpu.h
index 1bad53c159..1bad53c159 100644
--- a/usr/src/compat/freebsd/sys/pcpu.h
+++ b/usr/src/compat/bhyve/sys/pcpu.h
diff --git a/usr/src/compat/freebsd/sys/sched.h b/usr/src/compat/bhyve/sys/sched.h
index b426ee757e..b426ee757e 100644
--- a/usr/src/compat/freebsd/sys/sched.h
+++ b/usr/src/compat/bhyve/sys/sched.h
diff --git a/usr/src/compat/freebsd/sys/sdt.h b/usr/src/compat/bhyve/sys/sdt.h
index 32d887c0d8..32d887c0d8 100644
--- a/usr/src/compat/freebsd/sys/sdt.h
+++ b/usr/src/compat/bhyve/sys/sdt.h
diff --git a/usr/src/compat/freebsd/sys/select.h b/usr/src/compat/bhyve/sys/select.h
index fcb40c23b1..fcb40c23b1 100644
--- a/usr/src/compat/freebsd/sys/select.h
+++ b/usr/src/compat/bhyve/sys/select.h
diff --git a/usr/src/compat/freebsd/sys/sglist.h b/usr/src/compat/bhyve/sys/sglist.h
index 519c67915f..519c67915f 100644
--- a/usr/src/compat/freebsd/sys/sglist.h
+++ b/usr/src/compat/bhyve/sys/sglist.h
diff --git a/usr/src/compat/freebsd/sys/smp.h b/usr/src/compat/bhyve/sys/smp.h
index 3d6413ce16..3d6413ce16 100644
--- a/usr/src/compat/freebsd/sys/smp.h
+++ b/usr/src/compat/bhyve/sys/smp.h
diff --git a/usr/src/compat/freebsd/sys/socket.h b/usr/src/compat/bhyve/sys/socket.h
index 3bf7a8f440..3bf7a8f440 100644
--- a/usr/src/compat/freebsd/sys/socket.h
+++ b/usr/src/compat/bhyve/sys/socket.h
diff --git a/usr/src/compat/freebsd/sys/sysctl.h b/usr/src/compat/bhyve/sys/sysctl.h
index 9f6a695e34..9f6a695e34 100644
--- a/usr/src/compat/freebsd/sys/sysctl.h
+++ b/usr/src/compat/bhyve/sys/sysctl.h
diff --git a/usr/src/compat/freebsd/sys/systm.h b/usr/src/compat/bhyve/sys/systm.h
index 43fa16d450..43fa16d450 100644
--- a/usr/src/compat/freebsd/sys/systm.h
+++ b/usr/src/compat/bhyve/sys/systm.h
diff --git a/usr/src/compat/freebsd/sys/time.h b/usr/src/compat/bhyve/sys/time.h
index 4e0fbfc02c..4e0fbfc02c 100644
--- a/usr/src/compat/freebsd/sys/time.h
+++ b/usr/src/compat/bhyve/sys/time.h
diff --git a/usr/src/compat/freebsd/sys/types.h b/usr/src/compat/bhyve/sys/types.h
index 63731da42e..63731da42e 100644
--- a/usr/src/compat/freebsd/sys/types.h
+++ b/usr/src/compat/bhyve/sys/types.h
diff --git a/usr/src/compat/freebsd/sys/uio.h b/usr/src/compat/bhyve/sys/uio.h
index 05c6f2a028..05c6f2a028 100644
--- a/usr/src/compat/freebsd/sys/uio.h
+++ b/usr/src/compat/bhyve/sys/uio.h
diff --git a/usr/src/compat/freebsd/termios.h b/usr/src/compat/bhyve/termios.h
index feaa705358..feaa705358 100644
--- a/usr/src/compat/freebsd/termios.h
+++ b/usr/src/compat/bhyve/termios.h
diff --git a/usr/src/compat/freebsd/unistd.h b/usr/src/compat/bhyve/unistd.h
index b4357e1da5..b4357e1da5 100644
--- a/usr/src/compat/freebsd/unistd.h
+++ b/usr/src/compat/bhyve/unistd.h
diff --git a/usr/src/compat/freebsd/uuid.h b/usr/src/compat/bhyve/uuid.h
index 72ef2c7787..72ef2c7787 100644
--- a/usr/src/compat/freebsd/uuid.h
+++ b/usr/src/compat/bhyve/uuid.h
diff --git a/usr/src/compat/freebsd/vm/vm.h b/usr/src/compat/bhyve/vm/vm.h
index f5bb7b6eb8..f5bb7b6eb8 100644
--- a/usr/src/compat/freebsd/vm/vm.h
+++ b/usr/src/compat/bhyve/vm/vm.h
diff --git a/usr/src/compat/freebsd/vm/vm_param.h b/usr/src/compat/bhyve/vm/vm_param.h
index fd76b62a37..fd76b62a37 100644
--- a/usr/src/compat/freebsd/vm/vm_param.h
+++ b/usr/src/compat/bhyve/vm/vm_param.h
diff --git a/usr/src/compat/freebsd/x86/_types.h b/usr/src/compat/bhyve/x86/_types.h
index 8bbae549d8..8bbae549d8 100644
--- a/usr/src/compat/freebsd/x86/_types.h
+++ b/usr/src/compat/bhyve/x86/_types.h
diff --git a/usr/src/compat/freebsd/x86/segments.h b/usr/src/compat/bhyve/x86/segments.h
index 11edc582b5..11edc582b5 100644
--- a/usr/src/compat/freebsd/x86/segments.h
+++ b/usr/src/compat/bhyve/x86/segments.h
diff --git a/usr/src/contrib/bhyve/README b/usr/src/contrib/bhyve/README
new file mode 100644
index 0000000000..415048d584
--- /dev/null
+++ b/usr/src/contrib/bhyve/README
@@ -0,0 +1,12 @@
+These are headers (and sources) contributed directly from FreeBSD which are
+required to build bhyve. Updates are made to our copies here when corresponding
+updates to bhyve require them.
+
+With few exceptions (noted below) they are verbatim copies taken from the
+FreeBSD source tree. Files requiring extensive modification are likely a
+candidate to exist as a shim in usr/src/compat/bhyve. Conversely, if only a
+tiny modification is needed to make a file suitable for contrib, that trade-off
+is probably worthwhile (if done with a '#ifdef _FreeBSD_' guard for clarity).
+
+Files modified for compatibility:
+- isa/rtc.h
diff --git a/usr/src/contrib/bhyve/amd64/machine/_types.h b/usr/src/contrib/bhyve/amd64/machine/_types.h
new file mode 100644
index 0000000000..59994352b5
--- /dev/null
+++ b/usr/src/contrib/bhyve/amd64/machine/_types.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/amd64/include/_types.h 232261 2012-02-28 18:15:28Z tijl $ */
+
+#include <x86/_types.h>
diff --git a/usr/src/contrib/bhyve/amd64/machine/pmap.h b/usr/src/contrib/bhyve/amd64/machine/pmap.h
new file mode 100644
index 0000000000..a0b8ee37f2
--- /dev/null
+++ b/usr/src/contrib/bhyve/amd64/machine/pmap.h
@@ -0,0 +1,455 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm.
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Derived from hp300 version by Mike Hibler, this version by William
+ * Jolitz uses a recursive map [a pde points to the page directory] to
+ * map the page tables using the pagetables themselves. This is done to
+ * reduce the impact on kernel virtual memory for lots of sparse address
+ * space, and to reduce the cost of memory to each process.
+ *
+ * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90
+ * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PMAP_H_
+#define _MACHINE_PMAP_H_
+
+/*
+ * Page-directory and page-table entries follow this format, with a few
+ * of the fields not present here and there, depending on a lot of things.
+ */
+ /* ---- Intel Nomenclature ---- */
+#define X86_PG_V 0x001 /* P Valid */
+#define X86_PG_RW 0x002 /* R/W Read/Write */
+#define X86_PG_U 0x004 /* U/S User/Supervisor */
+#define X86_PG_NC_PWT 0x008 /* PWT Write through */
+#define X86_PG_NC_PCD 0x010 /* PCD Cache disable */
+#define X86_PG_A 0x020 /* A Accessed */
+#define X86_PG_M 0x040 /* D Dirty */
+#define X86_PG_PS 0x080 /* PS Page size (0=4k,1=2M) */
+#define X86_PG_PTE_PAT 0x080 /* PAT PAT index */
+#define X86_PG_G 0x100 /* G Global */
+#define X86_PG_AVAIL1 0x200 /* / Available for system */
+#define X86_PG_AVAIL2 0x400 /* < programmers use */
+#define X86_PG_AVAIL3 0x800 /* \ */
+#define X86_PG_PDE_PAT 0x1000 /* PAT PAT index */
+#define X86_PG_NX (1ul<<63) /* No-execute */
+#define X86_PG_AVAIL(x) (1ul << (x))
+
+/* Page level cache control fields used to determine the PAT type */
+#define X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
+#define X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
+
+/*
+ * Intel extended page table (EPT) bit definitions.
+ */
+#define EPT_PG_READ 0x001 /* R Read */
+#define EPT_PG_WRITE 0x002 /* W Write */
+#define EPT_PG_EXECUTE 0x004 /* X Execute */
+#define EPT_PG_IGNORE_PAT 0x040 /* IPAT Ignore PAT */
+#define EPT_PG_PS 0x080 /* PS Page size */
+#define EPT_PG_A 0x100 /* A Accessed */
+#define EPT_PG_M 0x200 /* D Dirty */
+#define EPT_PG_MEMORY_TYPE(x) ((x) << 3) /* MT Memory Type */
+
+/*
+ * Define the PG_xx macros in terms of the bits on x86 PTEs.
+ */
+#define PG_V X86_PG_V
+#define PG_RW X86_PG_RW
+#define PG_U X86_PG_U
+#define PG_NC_PWT X86_PG_NC_PWT
+#define PG_NC_PCD X86_PG_NC_PCD
+#define PG_A X86_PG_A
+#define PG_M X86_PG_M
+#define PG_PS X86_PG_PS
+#define PG_PTE_PAT X86_PG_PTE_PAT
+#define PG_G X86_PG_G
+#define PG_AVAIL1 X86_PG_AVAIL1
+#define PG_AVAIL2 X86_PG_AVAIL2
+#define PG_AVAIL3 X86_PG_AVAIL3
+#define PG_PDE_PAT X86_PG_PDE_PAT
+#define PG_NX X86_PG_NX
+#define PG_PDE_CACHE X86_PG_PDE_CACHE
+#define PG_PTE_CACHE X86_PG_PTE_CACHE
+
+/* Our various interpretations of the above */
+#define PG_W X86_PG_AVAIL3 /* "Wired" pseudoflag */
+#define PG_MANAGED X86_PG_AVAIL2
+#define EPT_PG_EMUL_V X86_PG_AVAIL(52)
+#define EPT_PG_EMUL_RW X86_PG_AVAIL(53)
+#define PG_PROMOTED X86_PG_AVAIL(54) /* PDE only */
+#define PG_FRAME (0x000ffffffffff000ul)
+#define PG_PS_FRAME (0x000fffffffe00000ul)
+
+/*
+ * Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB
+ * (PTE) page mappings have identical settings for the following fields:
+ */
+#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \
+ PG_M | PG_A | PG_U | PG_RW | PG_V)
+
+/*
+ * Page Protection Exception bits
+ */
+
+#define PGEX_P 0x01 /* Protection violation vs. not present */
+#define PGEX_W 0x02 /* during a Write cycle */
+#define PGEX_U 0x04 /* access from User mode (UPL) */
+#define PGEX_RSV 0x08 /* reserved PTE field is non-zero */
+#define PGEX_I 0x10 /* during an instruction fetch */
+
+/*
+ * undef the PG_xx macros that define bits in the regular x86 PTEs that
+ * have a different position in nested PTEs. This is done when compiling
+ * code that needs to be aware of the differences between regular x86 and
+ * nested PTEs.
+ *
+ * The appropriate bitmask will be calculated at runtime based on the pmap
+ * type.
+ */
+#ifdef AMD64_NPT_AWARE
+#undef PG_AVAIL1 /* X86_PG_AVAIL1 aliases with EPT_PG_M */
+#undef PG_G
+#undef PG_A
+#undef PG_M
+#undef PG_PDE_PAT
+#undef PG_PDE_CACHE
+#undef PG_PTE_PAT
+#undef PG_PTE_CACHE
+#undef PG_RW
+#undef PG_V
+#endif
+
+/*
+ * Pte related macros. This is complicated by having to deal with
+ * the sign extension of the 48th bit.
+ */
+#define KVADDR(l4, l3, l2, l1) ( \
+ ((unsigned long)-1 << 47) | \
+ ((unsigned long)(l4) << PML4SHIFT) | \
+ ((unsigned long)(l3) << PDPSHIFT) | \
+ ((unsigned long)(l2) << PDRSHIFT) | \
+ ((unsigned long)(l1) << PAGE_SHIFT))
+
+#define UVADDR(l4, l3, l2, l1) ( \
+ ((unsigned long)(l4) << PML4SHIFT) | \
+ ((unsigned long)(l3) << PDPSHIFT) | \
+ ((unsigned long)(l2) << PDRSHIFT) | \
+ ((unsigned long)(l1) << PAGE_SHIFT))
+
+/*
+ * Number of kernel PML4 slots. Can be anywhere from 1 to 64 or so,
+ * but setting it larger than NDMPML4E makes no sense.
+ *
+ * Each slot provides .5 TB of kernel virtual space.
+ */
+#define NKPML4E 4
+
+#define NUPML4E (NPML4EPG/2) /* number of userland PML4 pages */
+#define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */
+#define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */
+
+/*
+ * NDMPML4E is the maximum number of PML4 entries that will be
+ * used to implement the direct map. It must be a power of two,
+ * and should generally exceed NKPML4E. The maximum possible
+ * value is 64; using 128 will make the direct map intrude into
+ * the recursive page table map.
+ */
+#define NDMPML4E 8
+
+/*
+ * These values control the layout of virtual memory. The starting address
+ * of the direct map, which is controlled by DMPML4I, must be a multiple of
+ * its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
+ *
+ * Note: KPML4I is the index of the (single) level 4 page that maps
+ * the KVA that holds KERNBASE, while KPML4BASE is the index of the
+ * first level 4 page that maps VM_MIN_KERNEL_ADDRESS. If NKPML4E
+ * is 1, these are the same, otherwise KPML4BASE < KPML4I and extra
+ * level 4 PDEs are needed to map from VM_MIN_KERNEL_ADDRESS up to
+ * KERNBASE.
+ *
+ * (KPML4I combines with KPDPI to choose where KERNBASE starts.
+ * Or, in other words, KPML4I provides bits 39..47 of KERNBASE,
+ * and KPDPI provides bits 30..38.)
+ */
+#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
+
+#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */
+#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
+
+#define KPML4I (NPML4EPG-1)
+#define KPDPI (NPDPEPG-2) /* kernbase at -2GB */
+
+/*
+ * XXX doesn't really belong here I guess...
+ */
+#define ISA_HOLE_START 0xa0000
+#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
+
+#define PMAP_PCID_NONE 0xffffffff
+#define PMAP_PCID_KERN 0
+#define PMAP_PCID_OVERMAX 0x1000
+
+#ifndef LOCORE
+
+#include <sys/queue.h>
+#include <sys/_cpuset.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+
+#include <vm/_vm_radix.h>
+
+typedef u_int64_t pd_entry_t;
+typedef u_int64_t pt_entry_t;
+typedef u_int64_t pdp_entry_t;
+typedef u_int64_t pml4_entry_t;
+
+/*
+ * Address of current address space page table maps and directories.
+ */
+#ifdef _KERNEL
+#define addr_PTmap (KVADDR(PML4PML4I, 0, 0, 0))
+#define addr_PDmap (KVADDR(PML4PML4I, PML4PML4I, 0, 0))
+#define addr_PDPmap (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
+#define addr_PML4map (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
+#define addr_PML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
+#define PTmap ((pt_entry_t *)(addr_PTmap))
+#define PDmap ((pd_entry_t *)(addr_PDmap))
+#define PDPmap ((pd_entry_t *)(addr_PDPmap))
+#define PML4map ((pd_entry_t *)(addr_PML4map))
+#define PML4pml4e ((pd_entry_t *)(addr_PML4pml4e))
+
+extern int nkpt; /* Initial number of kernel page tables */
+extern u_int64_t KPDPphys; /* physical address of kernel level 3 */
+extern u_int64_t KPML4phys; /* physical address of kernel level 4 */
+
+/*
+ * virtual address to page table entry and
+ * to physical address.
+ * Note: these work recursively, thus vtopte of a pte will give
+ * the corresponding pde that in turn maps it.
+ */
+pt_entry_t *vtopte(vm_offset_t);
+#define vtophys(va) pmap_kextract(((vm_offset_t) (va)))
+
+#define pte_load_store(ptep, pte) atomic_swap_long(ptep, pte)
+#define pte_load_clear(ptep) atomic_swap_long(ptep, 0)
+#define pte_store(ptep, pte) do { \
+ *(u_long *)(ptep) = (u_long)(pte); \
+} while (0)
+#define pte_clear(ptep) pte_store(ptep, 0)
+
+#define pde_store(pdep, pde) pte_store(pdep, pde)
+
+extern pt_entry_t pg_nx;
+
+#endif /* _KERNEL */
+
+/*
+ * Pmap stuff
+ */
+struct pv_entry;
+struct pv_chunk;
+
+/*
+ * Locks
+ * (p) PV list lock
+ */
+struct md_page {
+ TAILQ_HEAD(, pv_entry) pv_list; /* (p) */
+ int pv_gen; /* (p) */
+ int pat_mode;
+};
+
+enum pmap_type {
+ PT_X86, /* regular x86 page tables */
+ PT_EPT, /* Intel's nested page tables */
+ PT_RVI, /* AMD's nested page tables */
+};
+
+struct pmap_pcids {
+ uint32_t pm_pcid;
+ uint32_t pm_gen;
+};
+
+/*
+ * The kernel virtual address (KVA) of the level 4 page table page is always
+ * within the direct map (DMAP) region.
+ */
+struct pmap {
+ struct mtx pm_mtx;
+ pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
+ uint64_t pm_cr3;
+ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
+ cpuset_t pm_active; /* active on cpus */
+ enum pmap_type pm_type; /* regular or nested tables */
+ struct pmap_statistics pm_stats; /* pmap statistics */
+ struct vm_radix pm_root; /* spare page table pages */
+ long pm_eptgen; /* EPT pmap generation id */
+ int pm_flags;
+ struct pmap_pcids pm_pcids[MAXCPU];
+};
+
+/* flags */
+#define PMAP_NESTED_IPIMASK 0xff
+#define PMAP_PDE_SUPERPAGE (1 << 8) /* supports 2MB superpages */
+#define PMAP_EMULATE_AD_BITS (1 << 9) /* needs A/D bits emulation */
+#define PMAP_SUPPORTS_EXEC_ONLY (1 << 10) /* execute only mappings ok */
+
+typedef struct pmap *pmap_t;
+
+#ifdef _KERNEL
+extern struct pmap kernel_pmap_store;
+#define kernel_pmap (&kernel_pmap_store)
+
+#define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx)
+#define PMAP_LOCK_ASSERT(pmap, type) \
+ mtx_assert(&(pmap)->pm_mtx, (type))
+#define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx)
+#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \
+ NULL, MTX_DEF | MTX_DUPOK)
+#define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx)
+#define PMAP_MTX(pmap) (&(pmap)->pm_mtx)
+#define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx)
+#define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx)
+
+int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags);
+int pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype);
+#endif
+
+/*
+ * For each vm_page_t, there is a list of all currently valid virtual
+ * mappings of that page. An entry is a pv_entry_t, the list is pv_list.
+ */
+typedef struct pv_entry {
+ vm_offset_t pv_va; /* virtual address for mapping */
+ TAILQ_ENTRY(pv_entry) pv_next;
+} *pv_entry_t;
+
+/*
+ * pv_entries are allocated in chunks per-process. This avoids the
+ * need to track per-pmap assignments.
+ */
+#define _NPCM 3
+#define _NPCPV 168
+struct pv_chunk {
+ pmap_t pc_pmap;
+ TAILQ_ENTRY(pv_chunk) pc_list;
+ uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */
+ TAILQ_ENTRY(pv_chunk) pc_lru;
+ struct pv_entry pc_pventry[_NPCPV];
+};
+
+#ifdef _KERNEL
+
+extern caddr_t CADDR1;
+extern pt_entry_t *CMAP1;
+extern vm_paddr_t phys_avail[];
+extern vm_paddr_t dump_avail[];
+extern vm_offset_t virtual_avail;
+extern vm_offset_t virtual_end;
+extern vm_paddr_t dmaplimit;
+extern int pmap_pcid_enabled;
+extern int invpcid_works;
+
+#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode)
+#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
+#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
+
+struct thread;
+
+void pmap_activate_sw(struct thread *);
+void pmap_bootstrap(vm_paddr_t *);
+int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde);
+int pmap_change_attr(vm_offset_t, vm_size_t, int);
+void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate);
+void pmap_init_pat(void);
+void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
+void *pmap_kenter_temporary(vm_paddr_t pa, int i);
+vm_paddr_t pmap_kextract(vm_offset_t);
+void pmap_kremove(vm_offset_t);
+void *pmap_mapbios(vm_paddr_t, vm_size_t);
+void *pmap_mapdev(vm_paddr_t, vm_size_t);
+void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
+boolean_t pmap_page_is_mapped(vm_page_t m);
+void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
+void pmap_pinit_pml4(vm_page_t);
+void pmap_unmapdev(vm_offset_t, vm_size_t);
+void pmap_invalidate_page(pmap_t, vm_offset_t);
+void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
+void pmap_invalidate_all(pmap_t);
+void pmap_invalidate_cache(void);
+void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
+void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
+ boolean_t force);
+void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
+boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
+void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
+#endif /* _KERNEL */
+
+/* Return various clipped indexes for a given VA */
+static __inline vm_pindex_t
+pmap_pte_index(vm_offset_t va)
+{
+
+ return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pde_index(vm_offset_t va)
+{
+
+ return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pdpe_index(vm_offset_t va)
+{
+
+ return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pml4e_index(vm_offset_t va)
+{
+
+ return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
+}
+
+#endif /* !LOCORE */
+
+#endif /* !_MACHINE_PMAP_H_ */
diff --git a/usr/src/contrib/bhyve/amd64/machine/psl.h b/usr/src/contrib/bhyve/amd64/machine/psl.h
new file mode 100644
index 0000000000..c660bfbab0
--- /dev/null
+++ b/usr/src/contrib/bhyve/amd64/machine/psl.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD: head/sys/amd64/include/psl.h 233204 2012-03-19 21:29:57Z tijl $ */
+
+#include <x86/psl.h>
diff --git a/usr/src/contrib/bhyve/amd64/machine/timerreg.h b/usr/src/contrib/bhyve/amd64/machine/timerreg.h
new file mode 100644
index 0000000000..bca7b4dd19
--- /dev/null
+++ b/usr/src/contrib/bhyve/amd64/machine/timerreg.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (C) 2005 TAKAHASHI Yoshihiro. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/include/timerreg.h 177642 2008-03-26 20:09:21Z phk $
+ */
+
+/*
+ * The outputs of the three timers are connected as follows:
+ *
+ * timer 0 -> irq 0
+ * timer 1 -> dma chan 0 (for dram refresh)
+ * timer 2 -> speaker (via keyboard controller)
+ *
+ * Timer 0 is used to call hardclock.
+ * Timer 2 is used to generate console beeps.
+ */
+
+#ifndef _MACHINE_TIMERREG_H_
+#define _MACHINE_TIMERREG_H_
+
+#ifdef _KERNEL
+
+#include <dev/ic/i8253reg.h>
+
+#define IO_TIMER1 0x40 /* 8253 Timer #1 */
+#define TIMER_CNTR0 (IO_TIMER1 + TIMER_REG_CNTR0)
+#define TIMER_CNTR1 (IO_TIMER1 + TIMER_REG_CNTR1)
+#define TIMER_CNTR2 (IO_TIMER1 + TIMER_REG_CNTR2)
+#define TIMER_MODE (IO_TIMER1 + TIMER_REG_MODE)
+
+#endif /* _KERNEL */
+
+#endif /* _MACHINE_TIMERREG_H_ */
diff --git a/usr/src/contrib/bhyve/amd64/machine/vm.h b/usr/src/contrib/bhyve/amd64/machine/vm.h
new file mode 100644
index 0000000000..885c1607ea
--- /dev/null
+++ b/usr/src/contrib/bhyve/amd64/machine/vm.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/amd64/include/vm.h 233671 2012-03-29 16:51:22Z jhb $
+ */
+
+#ifndef _MACHINE_VM_H_
+#define _MACHINE_VM_H_
+
+#include <machine/specialreg.h>
+
+/* Memory attributes. */
+#define VM_MEMATTR_UNCACHEABLE ((vm_memattr_t)PAT_UNCACHEABLE)
+#define VM_MEMATTR_WRITE_COMBINING ((vm_memattr_t)PAT_WRITE_COMBINING)
+#define VM_MEMATTR_WRITE_THROUGH ((vm_memattr_t)PAT_WRITE_THROUGH)
+#define VM_MEMATTR_WRITE_PROTECTED ((vm_memattr_t)PAT_WRITE_PROTECTED)
+#define VM_MEMATTR_WRITE_BACK ((vm_memattr_t)PAT_WRITE_BACK)
+#define VM_MEMATTR_WEAK_UNCACHEABLE ((vm_memattr_t)PAT_UNCACHED)
+
+#define VM_MEMATTR_DEFAULT VM_MEMATTR_WRITE_BACK
+
+#endif /* !_MACHINE_VM_H_ */
diff --git a/usr/src/contrib/bhyve/dev/acpica/acpi_hpet.h b/usr/src/contrib/bhyve/dev/acpica/acpi_hpet.h
new file mode 100644
index 0000000000..df817b7a2b
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/acpica/acpi_hpet.h
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 2005 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/acpica/acpi_hpet.h 224919 2011-08-16 21:51:29Z mav $
+ */
+
+#ifndef __ACPI_HPET_H__
+#define __ACPI_HPET_H__
+
+#define HPET_MEM_WIDTH 0x400 /* Expected memory region size */
+
+/* General registers */
+#define HPET_CAPABILITIES 0x0 /* General capabilities and ID */
+#define HPET_CAP_VENDOR_ID 0xffff0000
+#define HPET_CAP_LEG_RT 0x00008000
+#define HPET_CAP_COUNT_SIZE 0x00002000 /* 1 = 64-bit, 0 = 32-bit */
+#define HPET_CAP_NUM_TIM 0x00001f00
+#define HPET_CAP_REV_ID 0x000000ff
+#define HPET_PERIOD 0x4 /* Period (1/hz) of timer */
+#define HPET_CONFIG 0x10 /* General configuration register */
+#define HPET_CNF_LEG_RT 0x00000002
+#define HPET_CNF_ENABLE 0x00000001
+#define HPET_ISR 0x20 /* General interrupt status register */
+#define HPET_MAIN_COUNTER 0xf0 /* Main counter register */
+
+/* Timer registers */
+#define HPET_TIMER_CAP_CNF(x) ((x) * 0x20 + 0x100)
+#define HPET_TCAP_INT_ROUTE 0xffffffff00000000
+#define HPET_TCAP_FSB_INT_DEL 0x00008000
+#define HPET_TCNF_FSB_EN 0x00004000
+#define HPET_TCNF_INT_ROUTE 0x00003e00
+#define HPET_TCNF_32MODE 0x00000100
+#define HPET_TCNF_VAL_SET 0x00000040
+#define HPET_TCAP_SIZE 0x00000020 /* 1 = 64-bit, 0 = 32-bit */
+#define HPET_TCAP_PER_INT 0x00000010 /* Supports periodic interrupts */
+#define HPET_TCNF_TYPE 0x00000008 /* 1 = periodic, 0 = one-shot */
+#define HPET_TCNF_INT_ENB 0x00000004
+#define HPET_TCNF_INT_TYPE 0x00000002 /* 1 = level triggered, 0 = edge */
+#define HPET_TIMER_COMPARATOR(x) ((x) * 0x20 + 0x108)
+#define HPET_TIMER_FSB_VAL(x) ((x) * 0x20 + 0x110)
+#define HPET_TIMER_FSB_ADDR(x) ((x) * 0x20 + 0x114)
+
+#define HPET_MIN_CYCLES 128 /* Period considered reliable. */
+
+#endif /* !__ACPI_HPET_H__ */
diff --git a/usr/src/contrib/bhyve/dev/ic/i8253reg.h b/usr/src/contrib/bhyve/dev/ic/i8253reg.h
new file mode 100644
index 0000000000..47568b3436
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/ic/i8253reg.h
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Header: timerreg.h,v 1.2 93/02/28 15:08:58 mccanne Exp
+ * $FreeBSD: head/sys/dev/ic/i8253reg.h 146215 2005-05-14 10:26:31Z nyan $
+ */
+
+/*
+ * Register definitions for the Intel 8253 Programmable Interval Timer.
+ *
+ * This chip has three independent 16-bit down counters that can be
+ * read on the fly. There are three mode registers and three countdown
+ * registers. The countdown registers are addressed directly, via the
+ * first three I/O ports. The three mode registers are accessed via
+ * the fourth I/O port, with two bits in the mode byte indicating the
+ * register. (Why are hardware interfaces always so braindead?).
+ *
+ * To write a value into the countdown register, the mode register
+ * is first programmed with a command indicating the which byte of
+ * the two byte register is to be modified. The three possibilities
+ * are load msb (TMR_MR_MSB), load lsb (TMR_MR_LSB), or load lsb then
+ * msb (TMR_MR_BOTH).
+ *
+ * To read the current value ("on the fly") from the countdown register,
+ * you write a "latch" command into the mode register, then read the stable
+ * value from the corresponding I/O port. For example, you write
+ * TMR_MR_LATCH into the corresponding mode register. Presumably,
+ * after doing this, a write operation to the I/O port would result
+ * in undefined behavior (but hopefully not fry the chip).
+ * Reading in this manner has no side effects.
+ */
+
+/*
+ * Macros for specifying values to be written into a mode register.
+ */
+#define TIMER_REG_CNTR0 0 /* timer 0 counter port */
+#define TIMER_REG_CNTR1 1 /* timer 1 counter port */
+#define TIMER_REG_CNTR2 2 /* timer 2 counter port */
+#define TIMER_REG_MODE 3 /* timer mode port */
+#define TIMER_SEL0 0x00 /* select counter 0 */
+#define TIMER_SEL1 0x40 /* select counter 1 */
+#define TIMER_SEL2 0x80 /* select counter 2 */
+#define TIMER_INTTC 0x00 /* mode 0, intr on terminal cnt */
+#define TIMER_ONESHOT 0x02 /* mode 1, one shot */
+#define TIMER_RATEGEN 0x04 /* mode 2, rate generator */
+#define TIMER_SQWAVE 0x06 /* mode 3, square wave */
+#define TIMER_SWSTROBE 0x08 /* mode 4, s/w triggered strobe */
+#define TIMER_HWSTROBE 0x0a /* mode 5, h/w triggered strobe */
+#define TIMER_LATCH 0x00 /* latch counter for reading */
+#define TIMER_LSB 0x10 /* r/w counter LSB */
+#define TIMER_MSB 0x20 /* r/w counter MSB */
+#define TIMER_16BIT 0x30 /* r/w counter 16 bits, LSB first */
+#define TIMER_BCD 0x01 /* count in BCD */
diff --git a/usr/src/contrib/bhyve/dev/ic/i8259.h b/usr/src/contrib/bhyve/dev/ic/i8259.h
new file mode 100644
index 0000000000..be523c1df4
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/ic/i8259.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/ic/i8259.h 151580 2005-10-23 09:05:51Z glebius $
+ */
+
+/*
+ * Register defintions for the i8259A programmable interrupt controller.
+ */
+
+#ifndef _DEV_IC_I8259_H_
+#define _DEV_IC_I8259_H_
+
+/* Initialization control word 1. Written to even address. */
+#define ICW1_IC4 0x01 /* ICW4 present */
+#define ICW1_SNGL 0x02 /* 1 = single, 0 = cascaded */
+#define ICW1_ADI 0x04 /* 1 = 4, 0 = 8 byte vectors */
+#define ICW1_LTIM 0x08 /* 1 = level trigger, 0 = edge */
+#define ICW1_RESET 0x10 /* must be 1 */
+/* 0x20 - 0x80 - in 8080/8085 mode only */
+
+/* Initialization control word 2. Written to the odd address. */
+/* No definitions, it is the base vector of the IDT for 8086 mode */
+
+/* Initialization control word 3. Written to the odd address. */
+/* For a master PIC, bitfield indicating a slave 8259 on given input */
+/* For slave, lower 3 bits are the slave's ID binary id on master */
+
+/* Initialization control word 4. Written to the odd address. */
+#define ICW4_8086 0x01 /* 1 = 8086, 0 = 8080 */
+#define ICW4_AEOI 0x02 /* 1 = Auto EOI */
+#define ICW4_MS 0x04 /* 1 = buffered master, 0 = slave */
+#define ICW4_BUF 0x08 /* 1 = enable buffer mode */
+#define ICW4_SFNM 0x10 /* 1 = special fully nested mode */
+
+/* Operation control words. Written after initialization. */
+
+/* Operation control word type 1 */
+/*
+ * No definitions. Written to the odd address. Bitmask for interrupts.
+ * 1 = disabled.
+ */
+
+/* Operation control word type 2. Bit 3 (0x08) must be zero. Even address. */
+#define OCW2_L0 0x01 /* Level */
+#define OCW2_L1 0x02
+#define OCW2_L2 0x04
+/* 0x08 must be 0 to select OCW2 vs OCW3 */
+/* 0x10 must be 0 to select OCW2 vs ICW1 */
+#define OCW2_EOI 0x20 /* 1 = EOI */
+#define OCW2_SL 0x40 /* EOI mode */
+#define OCW2_R 0x80 /* EOI mode */
+
+/* Operation control word type 3. Bit 3 (0x08) must be set. Even address. */
+#define OCW3_RIS 0x01 /* 1 = read IS, 0 = read IR */
+#define OCW3_RR 0x02 /* register read */
+#define OCW3_P 0x04 /* poll mode command */
+/* 0x08 must be 1 to select OCW3 vs OCW2 */
+#define OCW3_SEL 0x08 /* must be 1 */
+/* 0x10 must be 0 to select OCW3 vs ICW1 */
+#define OCW3_SMM 0x20 /* special mode mask */
+#define OCW3_ESMM 0x40 /* enable SMM */
+
+#endif /* !_DEV_IC_I8259_H_ */
diff --git a/usr/src/contrib/bhyve/dev/ic/ns16550.h b/usr/src/contrib/bhyve/dev/ic/ns16550.h
new file mode 100644
index 0000000000..5e8f30e3e8
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/ic/ns16550.h
@@ -0,0 +1,240 @@
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)ns16550.h 7.1 (Berkeley) 5/9/91
+ * $FreeBSD: head/sys/dev/ic/ns16550.h 257170 2013-10-26 17:24:59Z zbb $
+ */
+
+/*
+ * NS8250... UART registers.
+ */
+
+/* 8250 registers #[0-6]. */
+
+#define com_data 0 /* data register (R/W) */
+#define REG_DATA com_data
+
+#define com_ier 1 /* interrupt enable register (W) */
+#define REG_IER com_ier
+#define IER_ERXRDY 0x1
+#define IER_ETXRDY 0x2
+#define IER_ERLS 0x4
+#define IER_EMSC 0x8
+
+#define IER_BITS "\20\1ERXRDY\2ETXRDY\3ERLS\4EMSC"
+
+#define com_iir 2 /* interrupt identification register (R) */
+#define REG_IIR com_iir
+#define IIR_IMASK 0xf
+#define IIR_RXTOUT 0xc
+#define IIR_BUSY 0x7
+#define IIR_RLS 0x6
+#define IIR_RXRDY 0x4
+#define IIR_TXRDY 0x2
+#define IIR_NOPEND 0x1
+#define IIR_MLSC 0x0
+#define IIR_FIFO_MASK 0xc0 /* set if FIFOs are enabled */
+
+#define IIR_BITS "\20\1NOPEND\2TXRDY\3RXRDY"
+
+#define com_lcr 3 /* line control register (R/W) */
+#define com_cfcr com_lcr /* character format control register (R/W) */
+#define REG_LCR com_lcr
+#define LCR_DLAB 0x80
+#define CFCR_DLAB LCR_DLAB
+#define LCR_EFR_ENABLE 0xbf /* magic to enable EFR on 16650 up */
+#define CFCR_EFR_ENABLE LCR_EFR_ENABLE
+#define LCR_SBREAK 0x40
+#define CFCR_SBREAK LCR_SBREAK
+#define LCR_PZERO 0x30
+#define CFCR_PZERO LCR_PZERO
+#define LCR_PONE 0x20
+#define CFCR_PONE LCR_PONE
+#define LCR_PEVEN 0x10
+#define CFCR_PEVEN LCR_PEVEN
+#define LCR_PODD 0x00
+#define CFCR_PODD LCR_PODD
+#define LCR_PENAB 0x08
+#define CFCR_PENAB LCR_PENAB
+#define LCR_STOPB 0x04
+#define CFCR_STOPB LCR_STOPB
+#define LCR_8BITS 0x03
+#define CFCR_8BITS LCR_8BITS
+#define LCR_7BITS 0x02
+#define CFCR_7BITS LCR_7BITS
+#define LCR_6BITS 0x01
+#define CFCR_6BITS LCR_6BITS
+#define LCR_5BITS 0x00
+#define CFCR_5BITS LCR_5BITS
+
+#define com_mcr 4 /* modem control register (R/W) */
+#define REG_MCR com_mcr
+#define MCR_PRESCALE 0x80 /* only available on 16650 up */
+#define MCR_LOOPBACK 0x10
+#define MCR_IE 0x08
+#define MCR_IENABLE MCR_IE
+#define MCR_DRS 0x04
+#define MCR_RTS 0x02
+#define MCR_DTR 0x01
+
+#define MCR_BITS "\20\1DTR\2RTS\3DRS\4IE\5LOOPBACK\10PRESCALE"
+
+#define com_lsr 5 /* line status register (R/W) */
+#define REG_LSR com_lsr
+#define LSR_RCV_FIFO 0x80
+#define LSR_TEMT 0x40
+#define LSR_TSRE LSR_TEMT
+#define LSR_THRE 0x20
+#define LSR_TXRDY LSR_THRE
+#define LSR_BI 0x10
+#define LSR_FE 0x08
+#define LSR_PE 0x04
+#define LSR_OE 0x02
+#define LSR_RXRDY 0x01
+#define LSR_RCV_MASK 0x1f
+
+#define LSR_BITS "\20\1RXRDY\2OE\3PE\4FE\5BI\6THRE\7TEMT\10RCV_FIFO"
+
+#define com_msr 6 /* modem status register (R/W) */
+#define REG_MSR com_msr
+#define MSR_DCD 0x80
+#define MSR_RI 0x40
+#define MSR_DSR 0x20
+#define MSR_CTS 0x10
+#define MSR_DDCD 0x08
+#define MSR_TERI 0x04
+#define MSR_DDSR 0x02
+#define MSR_DCTS 0x01
+
+#define MSR_BITS "\20\1DCTS\2DDSR\3TERI\4DDCD\5CTS\6DSR\7RI\10DCD"
+
+/* 8250 multiplexed registers #[0-1]. Access enabled by LCR[7]. */
+#define com_dll 0 /* divisor latch low (R/W) */
+#define com_dlbl com_dll
+#define com_dlm 1 /* divisor latch high (R/W) */
+#define com_dlbh com_dlm
+#define REG_DLL com_dll
+#define REG_DLH com_dlm
+
+/* 16450 register #7. Not multiplexed. */
+#define com_scr 7 /* scratch register (R/W) */
+
+/* 16550 register #2. Not multiplexed. */
+#define com_fcr 2 /* FIFO control register (W) */
+#define com_fifo com_fcr
+#define REG_FCR com_fcr
+#define FCR_ENABLE 0x01
+#define FIFO_ENABLE FCR_ENABLE
+#define FCR_RCV_RST 0x02
+#define FIFO_RCV_RST FCR_RCV_RST
+#define FCR_XMT_RST 0x04
+#define FIFO_XMT_RST FCR_XMT_RST
+#define FCR_DMA 0x08
+#define FIFO_DMA_MODE FCR_DMA
+#define FCR_RX_LOW 0x00
+#define FIFO_RX_LOW FCR_RX_LOW
+#define FCR_RX_MEDL 0x40
+#define FIFO_RX_MEDL FCR_RX_MEDL
+#define FCR_RX_MEDH 0x80
+#define FIFO_RX_MEDH FCR_RX_MEDH
+#define FCR_RX_HIGH 0xc0
+#define FIFO_RX_HIGH FCR_RX_HIGH
+
+#define FCR_BITS "\20\1ENABLE\2RCV_RST\3XMT_RST\4DMA"
+
+/* 16650 registers #2,[4-7]. Access enabled by LCR_EFR_ENABLE. */
+
+#define com_efr 2 /* enhanced features register (R/W) */
+#define REG_EFR com_efr
+#define EFR_CTS 0x80
+#define EFR_AUTOCTS EFR_CTS
+#define EFR_RTS 0x40
+#define EFR_AUTORTS EFR_RTS
+#define EFR_EFE 0x10 /* enhanced functions enable */
+
+#define com_xon1 4 /* XON 1 character (R/W) */
+#define com_xon2 5 /* XON 2 character (R/W) */
+#define com_xoff1 6 /* XOFF 1 character (R/W) */
+#define com_xoff2 7 /* XOFF 2 character (R/W) */
+
+#define DW_REG_USR 31 /* DesignWare derived Uart Status Reg */
+#define com_usr 39 /* Octeon 16750/16550 Uart Status Reg */
+#define REG_USR com_usr
+#define USR_BUSY 1 /* Uart Busy. Serial transfer in progress */
+#define USR_TXFIFO_NOTFULL 2 /* Uart TX FIFO Not full */
+
+/* 16950 register #1. Access enabled by ACR[7]. Also requires !LCR[7]. */
+#define com_asr 1 /* additional status register (R[0-7]/W[0-1]) */
+
+/* 16950 register #3. R/W access enabled by ACR[7]. */
+#define com_rfl 3 /* receiver fifo level (R) */
+
+/*
+ * 16950 register #4. Access enabled by ACR[7]. Also requires
+ * !LCR_EFR_ENABLE.
+ */
+#define com_tfl 4 /* transmitter fifo level (R) */
+
+/*
+ * 16950 register #5. Accessible if !LCR_EFR_ENABLE. Read access also
+ * requires ACR[6].
+ */
+#define com_icr 5 /* index control register (R/W) */
+
+/*
+ * 16950 register #7. It is the same as com_scr except it has a different
+ * abbreviation in the manufacturer's data sheet and it also serves as an
+ * index into the Indexed Control register set.
+ */
+#define com_spr com_scr /* scratch pad (and index) register (R/W) */
+#define REG_SPR com_scr
+
+/*
+ * 16950 indexed control registers #[0-0x13]. Access is via index in SPR,
+ * data in ICR (if ICR is accessible).
+ */
+
+#define com_acr 0 /* additional control register (R/W) */
+#define ACR_ASE 0x80 /* ASR/RFL/TFL enable */
+#define ACR_ICRE 0x40 /* ICR enable */
+#define ACR_TLE 0x20 /* TTL/RTL enable */
+
+#define com_cpr 1 /* clock prescaler register (R/W) */
+#define com_tcr 2 /* times clock register (R/W) */
+#define com_ttl 4 /* transmitter trigger level (R/W) */
+#define com_rtl 5 /* receiver trigger level (R/W) */
+/* ... */
+
+/* Hardware extension mode register for RSB-2000/3000. */
+#define com_emr com_msr
+#define EMR_EXBUFF 0x04
+#define EMR_CTSFLW 0x08
+#define EMR_DSRFLW 0x10
+#define EMR_RTSFLW 0x20
+#define EMR_DTRFLW 0x40
+#define EMR_EFMODE 0x80
diff --git a/usr/src/contrib/bhyve/dev/io/iodev.h b/usr/src/contrib/bhyve/dev/io/iodev.h
new file mode 100644
index 0000000000..d040fcccf4
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/io/iodev.h
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (c) 2010 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DEV_IODEV_H_
+#define _DEV_IODEV_H_
+
+#define IODEV_PIO_READ 0
+#define IODEV_PIO_WRITE 1
+
+struct iodev_pio_req {
+ u_int access;
+ u_int port;
+ u_int width;
+ u_int val;
+};
+
+#define IODEV_PIO _IOWR('I', 0, struct iodev_pio_req)
+
+#endif /* _DEV_IODEV_H_ */
diff --git a/usr/src/contrib/bhyve/dev/mii/mii.h b/usr/src/contrib/bhyve/dev/mii/mii.h
new file mode 100644
index 0000000000..fa1ec84eaa
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/mii/mii.h
@@ -0,0 +1,239 @@
+/* $NetBSD: mii.h,v 1.18 2014/06/16 14:43:22 msaitoh Exp $ */
+
+/*-
+ * Copyright (c) 1997 Manuel Bouyer. All rights reserved.
+ *
+ * Modification to match BSD/OS 3.0 MII interface by Jason R. Thorpe,
+ * Numerical Aerospace Simulation Facility, NASA Ames Research Center.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DEV_MII_MII_H_
+#define _DEV_MII_MII_H_
+
+/*
+ * Registers common to all PHYs.
+ */
+
+#define MII_NPHY 32 /* max # of PHYs per MII */
+
+/*
+ * MII commands, used if a device must drive the MII lines
+ * manually.
+ */
+#define MII_COMMAND_START 0x01
+#define MII_COMMAND_READ 0x02
+#define MII_COMMAND_WRITE 0x01
+#define MII_COMMAND_ACK 0x02
+
+#define MII_BMCR 0x00 /* Basic mode control register (rw) */
+#define BMCR_RESET 0x8000 /* reset */
+#define BMCR_LOOP 0x4000 /* loopback */
+#define BMCR_SPEED0 0x2000 /* speed selection (LSB) */
+#define BMCR_AUTOEN 0x1000 /* autonegotiation enable */
+#define BMCR_PDOWN 0x0800 /* power down */
+#define BMCR_ISO 0x0400 /* isolate */
+#define BMCR_STARTNEG 0x0200 /* restart autonegotiation */
+#define BMCR_FDX 0x0100 /* Set duplex mode */
+#define BMCR_CTEST 0x0080 /* collision test */
+#define BMCR_SPEED1 0x0040 /* speed selection (MSB) */
+
+#define BMCR_S10 0x0000 /* 10 Mb/s */
+#define BMCR_S100 BMCR_SPEED0 /* 100 Mb/s */
+#define BMCR_S1000 BMCR_SPEED1 /* 1000 Mb/s */
+
+#define BMCR_SPEED(x) ((x) & (BMCR_SPEED0|BMCR_SPEED1))
+
+#define MII_BMSR 0x01 /* Basic mode status register (ro) */
+#define BMSR_100T4 0x8000 /* 100 base T4 capable */
+#define BMSR_100TXFDX 0x4000 /* 100 base Tx full duplex capable */
+#define BMSR_100TXHDX 0x2000 /* 100 base Tx half duplex capable */
+#define BMSR_10TFDX 0x1000 /* 10 base T full duplex capable */
+#define BMSR_10THDX 0x0800 /* 10 base T half duplex capable */
+#define BMSR_100T2FDX 0x0400 /* 100 base T2 full duplex capable */
+#define BMSR_100T2HDX 0x0200 /* 100 base T2 half duplex capable */
+#define BMSR_EXTSTAT 0x0100 /* Extended status in register 15 */
+#define BMSR_MFPS 0x0040 /* MII Frame Preamble Suppression */
+#define BMSR_ACOMP 0x0020 /* Autonegotiation complete */
+#define BMSR_RFAULT 0x0010 /* Link partner fault */
+#define BMSR_ANEG 0x0008 /* Autonegotiation capable */
+#define BMSR_LINK 0x0004 /* Link status */
+#define BMSR_JABBER 0x0002 /* Jabber detected */
+#define BMSR_EXTCAP 0x0001 /* Extended capability */
+
+#define BMSR_DEFCAPMASK 0xffffffff
+
+/*
+ * Note that the EXTSTAT bit indicates that there is extended status
+ * info available in register 15, but 802.3 section 22.2.4.3 also
+ * states that all 1000 Mb/s capable PHYs will set this bit to 1.
+ */
+
+#define BMSR_MEDIAMASK (BMSR_100T4|BMSR_100TXFDX|BMSR_100TXHDX| \
+ BMSR_10TFDX|BMSR_10THDX|BMSR_100T2FDX|BMSR_100T2HDX)
+
+/*
+ * Convert BMSR media capabilities to ANAR bits for autonegotiation.
+ * Note the shift chopps off the BMSR_ANEG bit.
+ */
+#define BMSR_MEDIA_TO_ANAR(x) (((x) & BMSR_MEDIAMASK) >> 6)
+
+#define MII_PHYIDR1 0x02 /* ID register 1 (ro) */
+
+#define MII_PHYIDR2 0x03 /* ID register 2 (ro) */
+#define IDR2_OUILSB 0xfc00 /* OUI LSB */
+#define IDR2_MODEL 0x03f0 /* vendor model */
+#define IDR2_REV 0x000f /* vendor revision */
+
+#define MII_ANAR 0x04 /* Autonegotiation advertisement (rw) */
+ /* section 28.2.4.1 and 37.2.6.1 */
+#define ANAR_NP 0x8000 /* Next page (ro) */
+#define ANAR_ACK 0x4000 /* link partner abilities acknowledged (ro) */
+#define ANAR_RF 0x2000 /* remote fault (ro) */
+ /* Annex 28B.2 */
+#define ANAR_FC 0x0400 /* local device supports PAUSE */
+#define ANAR_T4 0x0200 /* local device supports 100bT4 */
+#define ANAR_TX_FD 0x0100 /* local device supports 100bTx FD */
+#define ANAR_TX 0x0080 /* local device supports 100bTx */
+#define ANAR_10_FD 0x0040 /* local device supports 10bT FD */
+#define ANAR_10 0x0020 /* local device supports 10bT */
+#define ANAR_CSMA 0x0001 /* protocol selector CSMA/CD */
+#define ANAR_PAUSE_NONE (0 << 10)
+#define ANAR_PAUSE_SYM (1 << 10)
+#define ANAR_PAUSE_ASYM (2 << 10)
+#define ANAR_PAUSE_TOWARDS (3 << 10)
+
+ /* Annex 28D */
+#define ANAR_X_FD 0x0020 /* local device supports 1000BASE-X FD */
+#define ANAR_X_HD 0x0040 /* local device supports 1000BASE-X HD */
+#define ANAR_X_PAUSE_NONE (0 << 7)
+#define ANAR_X_PAUSE_SYM (1 << 7)
+#define ANAR_X_PAUSE_ASYM (2 << 7)
+#define ANAR_X_PAUSE_TOWARDS (3 << 7)
+
+#define MII_ANLPAR 0x05 /* Autonegotiation lnk partner abilities (rw) */
+ /* section 28.2.4.1 and 37.2.6.1 */
+#define ANLPAR_NP 0x8000 /* Next page (ro) */
+#define ANLPAR_ACK 0x4000 /* link partner accepted ACK (ro) */
+#define ANLPAR_RF 0x2000 /* remote fault (ro) */
+#define ANLPAR_FC 0x0400 /* link partner supports PAUSE */
+#define ANLPAR_T4 0x0200 /* link partner supports 100bT4 */
+#define ANLPAR_TX_FD 0x0100 /* link partner supports 100bTx FD */
+#define ANLPAR_TX 0x0080 /* link partner supports 100bTx */
+#define ANLPAR_10_FD 0x0040 /* link partner supports 10bT FD */
+#define ANLPAR_10 0x0020 /* link partner supports 10bT */
+#define ANLPAR_CSMA 0x0001 /* protocol selector CSMA/CD */
+#define ANLPAR_PAUSE_MASK (3 << 10)
+#define ANLPAR_PAUSE_NONE (0 << 10)
+#define ANLPAR_PAUSE_SYM (1 << 10)
+#define ANLPAR_PAUSE_ASYM (2 << 10)
+#define ANLPAR_PAUSE_TOWARDS (3 << 10)
+
+#define ANLPAR_X_FD 0x0020 /* local device supports 1000BASE-X FD */
+#define ANLPAR_X_HD 0x0040 /* local device supports 1000BASE-X HD */
+#define ANLPAR_X_PAUSE_MASK (3 << 7)
+#define ANLPAR_X_PAUSE_NONE (0 << 7)
+#define ANLPAR_X_PAUSE_SYM (1 << 7)
+#define ANLPAR_X_PAUSE_ASYM (2 << 7)
+#define ANLPAR_X_PAUSE_TOWARDS (3 << 7)
+
+#define MII_ANER 0x06 /* Autonegotiation expansion (ro) */
+ /* section 28.2.4.1 and 37.2.6.1 */
+#define ANER_MLF 0x0010 /* multiple link detection fault */
+#define ANER_LPNP 0x0008 /* link parter next page-able */
+#define ANER_NP 0x0004 /* next page-able */
+#define ANER_PAGE_RX 0x0002 /* Page received */
+#define ANER_LPAN 0x0001 /* link parter autoneg-able */
+
+#define MII_ANNP 0x07 /* Autonegotiation next page */
+ /* section 28.2.4.1 and 37.2.6.1 */
+
+#define MII_ANLPRNP 0x08 /* Autonegotiation link partner rx next page */
+ /* section 32.5.1 and 37.2.6.1 */
+
+ /* This is also the 1000baseT control register */
+#define MII_100T2CR 0x09 /* 100base-T2 control register */
+#define GTCR_TEST_MASK 0xe000 /* see 802.3ab ss. 40.6.1.1.2 */
+#define GTCR_MAN_MS 0x1000 /* enable manual master/slave control */
+#define GTCR_ADV_MS 0x0800 /* 1 = adv. master, 0 = adv. slave */
+#define GTCR_PORT_TYPE 0x0400 /* 1 = DCE, 0 = DTE (NIC) */
+#define GTCR_ADV_1000TFDX 0x0200 /* adv. 1000baseT FDX */
+#define GTCR_ADV_1000THDX 0x0100 /* adv. 1000baseT HDX */
+
+ /* This is also the 1000baseT status register */
+#define MII_100T2SR 0x0a /* 100base-T2 status register */
+#define GTSR_MAN_MS_FLT 0x8000 /* master/slave config fault */
+#define GTSR_MS_RES 0x4000 /* result: 1 = master, 0 = slave */
+#define GTSR_LRS 0x2000 /* local rx status, 1 = ok */
+#define GTSR_RRS 0x1000 /* remote rx status, 1 = ok */
+#define GTSR_LP_1000TFDX 0x0800 /* link partner 1000baseT FDX capable */
+#define GTSR_LP_1000THDX 0x0400 /* link partner 1000baseT HDX capable */
+#define GTSR_LP_ASM_DIR 0x0200 /* link partner asym. pause dir. capable */
+#define GTSR_IDLE_ERR 0x00ff /* IDLE error count */
+
+#define MII_PSECR 0x0b /* PSE control register */
+#define PSECR_PACTLMASK 0x000c /* pair control mask */
+#define PSECR_PSEENMASK 0x0003 /* PSE enable mask */
+#define PSECR_PINOUTB 0x0008 /* PSE pinout Alternative B */
+#define PSECR_PINOUTA 0x0004 /* PSE pinout Alternative A */
+#define PSECR_FOPOWTST 0x0002 /* Force Power Test Mode */
+#define PSECR_PSEEN 0x0001 /* PSE Enabled */
+#define PSECR_PSEDIS 0x0000 /* PSE Disabled */
+
+#define MII_PSESR 0x0c /* PSE status register */
+#define PSESR_PWRDENIED 0x1000 /* Power Denied */
+#define PSESR_VALSIG 0x0800 /* Valid PD signature detected */
+#define PSESR_INVALSIG 0x0400 /* Invalid PD signature detected */
+#define PSESR_SHORTCIRC 0x0200 /* Short circuit condition detected */
+#define PSESR_OVERLOAD 0x0100 /* Overload condition detected */
+#define PSESR_MPSABSENT 0x0080 /* MPS absent condition detected */
+#define PSESR_PDCLMASK 0x0070 /* PD Class mask */
+#define PSESR_STATMASK 0x000e /* PSE Status mask */
+#define PSESR_PAIRCTABL 0x0001 /* PAIR Control Ability */
+#define PSESR_PDCL_4 (4 << 4) /* Class 4 */
+#define PSESR_PDCL_3 (3 << 4) /* Class 3 */
+#define PSESR_PDCL_2 (2 << 4) /* Class 2 */
+#define PSESR_PDCL_1 (1 << 4) /* Class 1 */
+#define PSESR_PDCL_0 (0 << 4) /* Class 0 */
+
+#define MII_MMDACR 0x0d /* MMD access control register */
+#define MMDACR_FUNCMASK 0xc000 /* function */
+#define MMDACR_DADDRMASK 0x001f /* device address */
+#define MMDACR_FN_ADDRESS (0 << 14) /* address */
+#define MMDACR_FN_DATANPI (1 << 14) /* data, no post increment */
+#define MMDACR_FN_DATAPIRW (2 << 14) /* data, post increment on r/w */
+#define MMDACR_FN_DATAPIW (3 << 14) /* data, post increment on wr only */
+
+#define MII_MMDAADR 0x0e /* MMD access address data register */
+
+#define MII_EXTSR 0x0f /* Extended status register */
+#define EXTSR_1000XFDX 0x8000 /* 1000X full-duplex capable */
+#define EXTSR_1000XHDX 0x4000 /* 1000X half-duplex capable */
+#define EXTSR_1000TFDX 0x2000 /* 1000T full-duplex capable */
+#define EXTSR_1000THDX 0x1000 /* 1000T half-duplex capable */
+
+#define EXTSR_MEDIAMASK (EXTSR_1000XFDX|EXTSR_1000XHDX| \
+ EXTSR_1000TFDX|EXTSR_1000THDX)
+
+#endif /* _DEV_MII_MII_H_ */
diff --git a/usr/src/contrib/bhyve/dev/nvme/nvme.h b/usr/src/contrib/bhyve/dev/nvme/nvme.h
new file mode 100644
index 0000000000..c7f6496426
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/nvme/nvme.h
@@ -0,0 +1,1511 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 2012-2013 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef __NVME_H__
+#define __NVME_H__
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#endif
+
+#include <sys/param.h>
+#include <sys/endian.h>
+
+#define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command)
+#define NVME_RESET_CONTROLLER _IO('n', 1)
+
+#define NVME_IO_TEST _IOWR('n', 100, struct nvme_io_test)
+#define NVME_BIO_TEST _IOWR('n', 101, struct nvme_io_test)
+
+/*
+ * Macros to deal with NVME revisions, as defined VS register
+ */
+#define NVME_REV(x, y) (((x) << 16) | ((y) << 8))
+#define NVME_MAJOR(r) (((r) >> 16) & 0xffff)
+#define NVME_MINOR(r) (((r) >> 8) & 0xff)
+
+/*
+ * Use to mark a command to apply to all namespaces, or to retrieve global
+ * log pages.
+ */
+#define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF)
+
+/* Cap nvme to 1MB transfers driver explodes with larger sizes */
+#define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20))
+
+/* Register field definitions */
+#define NVME_CAP_LO_REG_MQES_SHIFT (0)
+#define NVME_CAP_LO_REG_MQES_MASK (0xFFFF)
+#define NVME_CAP_LO_REG_CQR_SHIFT (16)
+#define NVME_CAP_LO_REG_CQR_MASK (0x1)
+#define NVME_CAP_LO_REG_AMS_SHIFT (17)
+#define NVME_CAP_LO_REG_AMS_MASK (0x3)
+#define NVME_CAP_LO_REG_TO_SHIFT (24)
+#define NVME_CAP_LO_REG_TO_MASK (0xFF)
+
+#define NVME_CAP_HI_REG_DSTRD_SHIFT (0)
+#define NVME_CAP_HI_REG_DSTRD_MASK (0xF)
+#define NVME_CAP_HI_REG_CSS_NVM_SHIFT (5)
+#define NVME_CAP_HI_REG_CSS_NVM_MASK (0x1)
+#define NVME_CAP_HI_REG_MPSMIN_SHIFT (16)
+#define NVME_CAP_HI_REG_MPSMIN_MASK (0xF)
+#define NVME_CAP_HI_REG_MPSMAX_SHIFT (20)
+#define NVME_CAP_HI_REG_MPSMAX_MASK (0xF)
+
+#define NVME_CC_REG_EN_SHIFT (0)
+#define NVME_CC_REG_EN_MASK (0x1)
+#define NVME_CC_REG_CSS_SHIFT (4)
+#define NVME_CC_REG_CSS_MASK (0x7)
+#define NVME_CC_REG_MPS_SHIFT (7)
+#define NVME_CC_REG_MPS_MASK (0xF)
+#define NVME_CC_REG_AMS_SHIFT (11)
+#define NVME_CC_REG_AMS_MASK (0x7)
+#define NVME_CC_REG_SHN_SHIFT (14)
+#define NVME_CC_REG_SHN_MASK (0x3)
+#define NVME_CC_REG_IOSQES_SHIFT (16)
+#define NVME_CC_REG_IOSQES_MASK (0xF)
+#define NVME_CC_REG_IOCQES_SHIFT (20)
+#define NVME_CC_REG_IOCQES_MASK (0xF)
+
+#define NVME_CSTS_REG_RDY_SHIFT (0)
+#define NVME_CSTS_REG_RDY_MASK (0x1)
+#define NVME_CSTS_REG_CFS_SHIFT (1)
+#define NVME_CSTS_REG_CFS_MASK (0x1)
+#define NVME_CSTS_REG_SHST_SHIFT (2)
+#define NVME_CSTS_REG_SHST_MASK (0x3)
+
+#define NVME_CSTS_GET_SHST(csts) (((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK)
+
+#define NVME_AQA_REG_ASQS_SHIFT (0)
+#define NVME_AQA_REG_ASQS_MASK (0xFFF)
+#define NVME_AQA_REG_ACQS_SHIFT (16)
+#define NVME_AQA_REG_ACQS_MASK (0xFFF)
+
+/* Command field definitions */
+
+#define NVME_CMD_FUSE_SHIFT (8)
+#define NVME_CMD_FUSE_MASK (0x3)
+
+#define NVME_STATUS_P_SHIFT (0)
+#define NVME_STATUS_P_MASK (0x1)
+#define NVME_STATUS_SC_SHIFT (1)
+#define NVME_STATUS_SC_MASK (0xFF)
+#define NVME_STATUS_SCT_SHIFT (9)
+#define NVME_STATUS_SCT_MASK (0x7)
+#define NVME_STATUS_M_SHIFT (14)
+#define NVME_STATUS_M_MASK (0x1)
+#define NVME_STATUS_DNR_SHIFT (15)
+#define NVME_STATUS_DNR_MASK (0x1)
+
+#define NVME_STATUS_GET_P(st) (((st) >> NVME_STATUS_P_SHIFT) & NVME_STATUS_P_MASK)
+#define NVME_STATUS_GET_SC(st) (((st) >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK)
+#define NVME_STATUS_GET_SCT(st) (((st) >> NVME_STATUS_SCT_SHIFT) & NVME_STATUS_SCT_MASK)
+#define NVME_STATUS_GET_M(st) (((st) >> NVME_STATUS_M_SHIFT) & NVME_STATUS_M_MASK)
+#define NVME_STATUS_GET_DNR(st) (((st) >> NVME_STATUS_DNR_SHIFT) & NVME_STATUS_DNR_MASK)
+
+#define NVME_PWR_ST_MPS_SHIFT (0)
+#define NVME_PWR_ST_MPS_MASK (0x1)
+#define NVME_PWR_ST_NOPS_SHIFT (1)
+#define NVME_PWR_ST_NOPS_MASK (0x1)
+#define NVME_PWR_ST_RRT_SHIFT (0)
+#define NVME_PWR_ST_RRT_MASK (0x1F)
+#define NVME_PWR_ST_RRL_SHIFT (0)
+#define NVME_PWR_ST_RRL_MASK (0x1F)
+#define NVME_PWR_ST_RWT_SHIFT (0)
+#define NVME_PWR_ST_RWT_MASK (0x1F)
+#define NVME_PWR_ST_RWL_SHIFT (0)
+#define NVME_PWR_ST_RWL_MASK (0x1F)
+#define NVME_PWR_ST_IPS_SHIFT (6)
+#define NVME_PWR_ST_IPS_MASK (0x3)
+#define NVME_PWR_ST_APW_SHIFT (0)
+#define NVME_PWR_ST_APW_MASK (0x7)
+#define NVME_PWR_ST_APS_SHIFT (6)
+#define NVME_PWR_ST_APS_MASK (0x3)
+
+/** Controller Multi-path I/O and Namespace Sharing Capabilities */
+/* More then one port */
+#define NVME_CTRLR_DATA_MIC_MPORTS_SHIFT (0)
+#define NVME_CTRLR_DATA_MIC_MPORTS_MASK (0x1)
+/* More then one controller */
+#define NVME_CTRLR_DATA_MIC_MCTRLRS_SHIFT (1)
+#define NVME_CTRLR_DATA_MIC_MCTRLRS_MASK (0x1)
+/* SR-IOV Virtual Function */
+#define NVME_CTRLR_DATA_MIC_SRIOVVF_SHIFT (2)
+#define NVME_CTRLR_DATA_MIC_SRIOVVF_MASK (0x1)
+
+/** OACS - optional admin command support */
+/* supports security send/receive commands */
+#define NVME_CTRLR_DATA_OACS_SECURITY_SHIFT (0)
+#define NVME_CTRLR_DATA_OACS_SECURITY_MASK (0x1)
+/* supports format nvm command */
+#define NVME_CTRLR_DATA_OACS_FORMAT_SHIFT (1)
+#define NVME_CTRLR_DATA_OACS_FORMAT_MASK (0x1)
+/* supports firmware activate/download commands */
+#define NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT (2)
+#define NVME_CTRLR_DATA_OACS_FIRMWARE_MASK (0x1)
+/* supports namespace management commands */
+#define NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT (3)
+#define NVME_CTRLR_DATA_OACS_NSMGMT_MASK (0x1)
+/* supports Device Self-test command */
+#define NVME_CTRLR_DATA_OACS_SELFTEST_SHIFT (4)
+#define NVME_CTRLR_DATA_OACS_SELFTEST_MASK (0x1)
+/* supports Directives */
+#define NVME_CTRLR_DATA_OACS_DIRECTIVES_SHIFT (5)
+#define NVME_CTRLR_DATA_OACS_DIRECTIVES_MASK (0x1)
+/* supports NVMe-MI Send/Receive */
+#define NVME_CTRLR_DATA_OACS_NVMEMI_SHIFT (6)
+#define NVME_CTRLR_DATA_OACS_NVMEMI_MASK (0x1)
+/* supports Virtualization Management */
+#define NVME_CTRLR_DATA_OACS_VM_SHIFT (7)
+#define NVME_CTRLR_DATA_OACS_VM_MASK (0x1)
+/* supports Doorbell Buffer Config */
+#define NVME_CTRLR_DATA_OACS_DBBUFFER_SHIFT (8)
+#define NVME_CTRLR_DATA_OACS_DBBUFFER_MASK (0x1)
+
+/** firmware updates */
+/* first slot is read-only */
+#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT (0)
+#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK (0x1)
+/* number of firmware slots */
+#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT (1)
+#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK (0x7)
+
+/** log page attributes */
+/* per namespace smart/health log page */
+#define NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT (0)
+#define NVME_CTRLR_DATA_LPA_NS_SMART_MASK (0x1)
+
+/** AVSCC - admin vendor specific command configuration */
+/* admin vendor specific commands use spec format */
+#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_SHIFT (0)
+#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_MASK (0x1)
+
+/** Autonomous Power State Transition Attributes */
+/* Autonomous Power State Transitions supported */
+#define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT (0)
+#define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK (0x1)
+
+/** submission queue entry size */
+#define NVME_CTRLR_DATA_SQES_MIN_SHIFT (0)
+#define NVME_CTRLR_DATA_SQES_MIN_MASK (0xF)
+#define NVME_CTRLR_DATA_SQES_MAX_SHIFT (4)
+#define NVME_CTRLR_DATA_SQES_MAX_MASK (0xF)
+
+/** completion queue entry size */
+#define NVME_CTRLR_DATA_CQES_MIN_SHIFT (0)
+#define NVME_CTRLR_DATA_CQES_MIN_MASK (0xF)
+#define NVME_CTRLR_DATA_CQES_MAX_SHIFT (4)
+#define NVME_CTRLR_DATA_CQES_MAX_MASK (0xF)
+
+/** optional nvm command support */
+#define NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT (0)
+#define NVME_CTRLR_DATA_ONCS_COMPARE_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT (1)
+#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_DSM_SHIFT (2)
+#define NVME_CTRLR_DATA_ONCS_DSM_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_WRZERO_SHIFT (3)
+#define NVME_CTRLR_DATA_ONCS_WRZERO_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_SAVEFEAT_SHIFT (4)
+#define NVME_CTRLR_DATA_ONCS_SAVEFEAT_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_RESERV_SHIFT (5)
+#define NVME_CTRLR_DATA_ONCS_RESERV_MASK (0x1)
+#define NVME_CTRLR_DATA_ONCS_TIMESTAMP_SHIFT (6)
+#define NVME_CTRLR_DATA_ONCS_TIMESTAMP_MASK (0x1)
+
+/** Fused Operation Support */
+#define NVME_CTRLR_DATA_FUSES_CNW_SHIFT (0)
+#define NVME_CTRLR_DATA_FUSES_CNW_MASK (0x1)
+
+/** Format NVM Attributes */
+#define NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT (0)
+#define NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK (0x1)
+#define NVME_CTRLR_DATA_FNA_ERASE_ALL_SHIFT (1)
+#define NVME_CTRLR_DATA_FNA_ERASE_ALL_MASK (0x1)
+#define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_SHIFT (2)
+#define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK (0x1)
+
+/** volatile write cache */
+#define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT (0)
+#define NVME_CTRLR_DATA_VWC_PRESENT_MASK (0x1)
+
+/** namespace features */
+/* thin provisioning */
+#define NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT (0)
+#define NVME_NS_DATA_NSFEAT_THIN_PROV_MASK (0x1)
+/* NAWUN, NAWUPF, and NACWU fields are valid */
+#define NVME_NS_DATA_NSFEAT_NA_FIELDS_SHIFT (1)
+#define NVME_NS_DATA_NSFEAT_NA_FIELDS_MASK (0x1)
+/* Deallocated or Unwritten Logical Block errors supported */
+#define NVME_NS_DATA_NSFEAT_DEALLOC_SHIFT (2)
+#define NVME_NS_DATA_NSFEAT_DEALLOC_MASK (0x1)
+/* NGUID and EUI64 fields are not reusable */
+#define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_SHIFT (3)
+#define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_MASK (0x1)
+
+/** formatted lba size */
+#define NVME_NS_DATA_FLBAS_FORMAT_SHIFT (0)
+#define NVME_NS_DATA_FLBAS_FORMAT_MASK (0xF)
+#define NVME_NS_DATA_FLBAS_EXTENDED_SHIFT (4)
+#define NVME_NS_DATA_FLBAS_EXTENDED_MASK (0x1)
+
+/** metadata capabilities */
+/* metadata can be transferred as part of data prp list */
+#define NVME_NS_DATA_MC_EXTENDED_SHIFT (0)
+#define NVME_NS_DATA_MC_EXTENDED_MASK (0x1)
+/* metadata can be transferred with separate metadata pointer */
+#define NVME_NS_DATA_MC_POINTER_SHIFT (1)
+#define NVME_NS_DATA_MC_POINTER_MASK (0x1)
+
+/** end-to-end data protection capabilities */
+/* protection information type 1 */
+#define NVME_NS_DATA_DPC_PIT1_SHIFT (0)
+#define NVME_NS_DATA_DPC_PIT1_MASK (0x1)
+/* protection information type 2 */
+#define NVME_NS_DATA_DPC_PIT2_SHIFT (1)
+#define NVME_NS_DATA_DPC_PIT2_MASK (0x1)
+/* protection information type 3 */
+#define NVME_NS_DATA_DPC_PIT3_SHIFT (2)
+#define NVME_NS_DATA_DPC_PIT3_MASK (0x1)
+/* first eight bytes of metadata */
+#define NVME_NS_DATA_DPC_MD_START_SHIFT (3)
+#define NVME_NS_DATA_DPC_MD_START_MASK (0x1)
+/* last eight bytes of metadata */
+#define NVME_NS_DATA_DPC_MD_END_SHIFT (4)
+#define NVME_NS_DATA_DPC_MD_END_MASK (0x1)
+
+/** end-to-end data protection type settings */
+/* protection information type */
+#define NVME_NS_DATA_DPS_PIT_SHIFT (0)
+#define NVME_NS_DATA_DPS_PIT_MASK (0x7)
+/* 1 == protection info transferred at start of metadata */
+/* 0 == protection info transferred at end of metadata */
+#define NVME_NS_DATA_DPS_MD_START_SHIFT (3)
+#define NVME_NS_DATA_DPS_MD_START_MASK (0x1)
+
+/** Namespace Multi-path I/O and Namespace Sharing Capabilities */
+/* the namespace may be attached to two or more controllers */
+#define NVME_NS_DATA_NMIC_MAY_BE_SHARED_SHIFT (0)
+#define NVME_NS_DATA_NMIC_MAY_BE_SHARED_MASK (0x1)
+
+/** Reservation Capabilities */
+/* Persist Through Power Loss */
+#define NVME_NS_DATA_RESCAP_PTPL_SHIFT (0)
+#define NVME_NS_DATA_RESCAP_PTPL_MASK (0x1)
+/* supports the Write Exclusive */
+#define NVME_NS_DATA_RESCAP_WR_EX_SHIFT (1)
+#define NVME_NS_DATA_RESCAP_WR_EX_MASK (0x1)
+/* supports the Exclusive Access */
+#define NVME_NS_DATA_RESCAP_EX_AC_SHIFT (2)
+#define NVME_NS_DATA_RESCAP_EX_AC_MASK (0x1)
+/* supports the Write Exclusive – Registrants Only */
+#define NVME_NS_DATA_RESCAP_WR_EX_RO_SHIFT (3)
+#define NVME_NS_DATA_RESCAP_WR_EX_RO_MASK (0x1)
+/* supports the Exclusive Access - Registrants Only */
+#define NVME_NS_DATA_RESCAP_EX_AC_RO_SHIFT (4)
+#define NVME_NS_DATA_RESCAP_EX_AC_RO_MASK (0x1)
+/* supports the Write Exclusive – All Registrants */
+#define NVME_NS_DATA_RESCAP_WR_EX_AR_SHIFT (5)
+#define NVME_NS_DATA_RESCAP_WR_EX_AR_MASK (0x1)
+/* supports the Exclusive Access - All Registrants */
+#define NVME_NS_DATA_RESCAP_EX_AC_AR_SHIFT (6)
+#define NVME_NS_DATA_RESCAP_EX_AC_AR_MASK (0x1)
+/* Ignore Existing Key is used as defined in revision 1.3 or later */
+#define NVME_NS_DATA_RESCAP_IEKEY13_SHIFT (7)
+#define NVME_NS_DATA_RESCAP_IEKEY13_MASK (0x1)
+
+/** Format Progress Indicator */
+/* percentage of the Format NVM command that remains to be completed */
+#define NVME_NS_DATA_FPI_PERC_SHIFT (0)
+#define NVME_NS_DATA_FPI_PERC_MASK (0x7f)
+/* namespace supports the Format Progress Indicator */
+#define NVME_NS_DATA_FPI_SUPP_SHIFT (7)
+#define NVME_NS_DATA_FPI_SUPP_MASK (0x1)
+
+/** lba format support */
+/* metadata size */
+#define NVME_NS_DATA_LBAF_MS_SHIFT (0)
+#define NVME_NS_DATA_LBAF_MS_MASK (0xFFFF)
+/* lba data size */
+#define NVME_NS_DATA_LBAF_LBADS_SHIFT (16)
+#define NVME_NS_DATA_LBAF_LBADS_MASK (0xFF)
+/* relative performance */
+#define NVME_NS_DATA_LBAF_RP_SHIFT (24)
+#define NVME_NS_DATA_LBAF_RP_MASK (0x3)
+
+enum nvme_critical_warning_state {
+ NVME_CRIT_WARN_ST_AVAILABLE_SPARE = 0x1,
+ NVME_CRIT_WARN_ST_TEMPERATURE = 0x2,
+ NVME_CRIT_WARN_ST_DEVICE_RELIABILITY = 0x4,
+ NVME_CRIT_WARN_ST_READ_ONLY = 0x8,
+ NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP = 0x10,
+};
+#define NVME_CRIT_WARN_ST_RESERVED_MASK (0xE0)
+
+/* slot for current FW */
+#define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT (0)
+#define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK (0x7)
+
+/* CC register SHN field values */
+enum shn_value {
+ NVME_SHN_NORMAL = 0x1,
+ NVME_SHN_ABRUPT = 0x2,
+};
+
+/* CSTS register SHST field values */
+enum shst_value {
+ NVME_SHST_NORMAL = 0x0,
+ NVME_SHST_OCCURRING = 0x1,
+ NVME_SHST_COMPLETE = 0x2,
+};
+
+struct nvme_registers
+{
+ /** controller capabilities */
+ uint32_t cap_lo;
+ uint32_t cap_hi;
+
+ uint32_t vs; /* version */
+ uint32_t intms; /* interrupt mask set */
+ uint32_t intmc; /* interrupt mask clear */
+
+ /** controller configuration */
+ uint32_t cc;
+
+ uint32_t reserved1;
+
+ /** controller status */
+ uint32_t csts;
+
+ uint32_t reserved2;
+
+ /** admin queue attributes */
+ uint32_t aqa;
+
+ uint64_t asq; /* admin submission queue base addr */
+ uint64_t acq; /* admin completion queue base addr */
+ uint32_t reserved3[0x3f2];
+
+ struct {
+ uint32_t sq_tdbl; /* submission queue tail doorbell */
+ uint32_t cq_hdbl; /* completion queue head doorbell */
+ } doorbell[1] __packed;
+} __packed;
+
+_Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_registers");
+
+struct nvme_command
+{
+ /* dword 0 */
+ uint8_t opc; /* opcode */
+ uint8_t fuse; /* fused operation */
+ uint16_t cid; /* command identifier */
+
+ /* dword 1 */
+ uint32_t nsid; /* namespace identifier */
+
+ /* dword 2-3 */
+ uint32_t rsvd2;
+ uint32_t rsvd3;
+
+ /* dword 4-5 */
+ uint64_t mptr; /* metadata pointer */
+
+ /* dword 6-7 */
+ uint64_t prp1; /* prp entry 1 */
+
+ /* dword 8-9 */
+ uint64_t prp2; /* prp entry 2 */
+
+ /* dword 10-15 */
+ uint32_t cdw10; /* command-specific */
+ uint32_t cdw11; /* command-specific */
+ uint32_t cdw12; /* command-specific */
+ uint32_t cdw13; /* command-specific */
+ uint32_t cdw14; /* command-specific */
+ uint32_t cdw15; /* command-specific */
+} __packed;
+
+_Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command");
+
+struct nvme_completion {
+
+ /* dword 0 */
+ uint32_t cdw0; /* command-specific */
+
+ /* dword 1 */
+ uint32_t rsvd1;
+
+ /* dword 2 */
+ uint16_t sqhd; /* submission queue head pointer */
+ uint16_t sqid; /* submission queue identifier */
+
+ /* dword 3 */
+ uint16_t cid; /* command identifier */
+ uint16_t status;
+} __packed;
+
+_Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion");
+
+struct nvme_dsm_range {
+ uint32_t attributes;
+ uint32_t length;
+ uint64_t starting_lba;
+} __packed;
+
+/* Largest DSM Trim that can be done */
+#define NVME_MAX_DSM_TRIM 4096
+
+_Static_assert(sizeof(struct nvme_dsm_range) == 16, "bad size for nvme_dsm_ranage");
+
+/* status code types */
+enum nvme_status_code_type {
+ NVME_SCT_GENERIC = 0x0,
+ NVME_SCT_COMMAND_SPECIFIC = 0x1,
+ NVME_SCT_MEDIA_ERROR = 0x2,
+ /* 0x3-0x6 - reserved */
+ NVME_SCT_VENDOR_SPECIFIC = 0x7,
+};
+
+/* generic command status codes */
+enum nvme_generic_command_status_code {
+ NVME_SC_SUCCESS = 0x00,
+ NVME_SC_INVALID_OPCODE = 0x01,
+ NVME_SC_INVALID_FIELD = 0x02,
+ NVME_SC_COMMAND_ID_CONFLICT = 0x03,
+ NVME_SC_DATA_TRANSFER_ERROR = 0x04,
+ NVME_SC_ABORTED_POWER_LOSS = 0x05,
+ NVME_SC_INTERNAL_DEVICE_ERROR = 0x06,
+ NVME_SC_ABORTED_BY_REQUEST = 0x07,
+ NVME_SC_ABORTED_SQ_DELETION = 0x08,
+ NVME_SC_ABORTED_FAILED_FUSED = 0x09,
+ NVME_SC_ABORTED_MISSING_FUSED = 0x0a,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b,
+ NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c,
+ NVME_SC_INVALID_SGL_SEGMENT_DESCR = 0x0d,
+ NVME_SC_INVALID_NUMBER_OF_SGL_DESCR = 0x0e,
+ NVME_SC_DATA_SGL_LENGTH_INVALID = 0x0f,
+ NVME_SC_METADATA_SGL_LENGTH_INVALID = 0x10,
+ NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID = 0x11,
+ NVME_SC_INVALID_USE_OF_CMB = 0x12,
+ NVME_SC_PRP_OFFET_INVALID = 0x13,
+ NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED = 0x14,
+ NVME_SC_OPERATION_DENIED = 0x15,
+ NVME_SC_SGL_OFFSET_INVALID = 0x16,
+ /* 0x17 - reserved */
+ NVME_SC_HOST_ID_INCONSISTENT_FORMAT = 0x18,
+ NVME_SC_KEEP_ALIVE_TIMEOUT_EXPIRED = 0x19,
+ NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID = 0x1a,
+ NVME_SC_ABORTED_DUE_TO_PREEMPT = 0x1b,
+ NVME_SC_SANITIZE_FAILED = 0x1c,
+ NVME_SC_SANITIZE_IN_PROGRESS = 0x1d,
+ NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID = 0x1e,
+ NVME_SC_NOT_SUPPORTED_IN_CMB = 0x1f,
+
+ NVME_SC_LBA_OUT_OF_RANGE = 0x80,
+ NVME_SC_CAPACITY_EXCEEDED = 0x81,
+ NVME_SC_NAMESPACE_NOT_READY = 0x82,
+ NVME_SC_RESERVATION_CONFLICT = 0x83,
+ NVME_SC_FORMAT_IN_PROGRESS = 0x84,
+};
+
+/* command specific status codes */
+enum nvme_command_specific_status_code {
+ NVME_SC_COMPLETION_QUEUE_INVALID = 0x00,
+ NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01,
+ NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED = 0x02,
+ NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03,
+ /* 0x04 - reserved */
+ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
+ NVME_SC_INVALID_FIRMWARE_SLOT = 0x06,
+ NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07,
+ NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08,
+ NVME_SC_INVALID_LOG_PAGE = 0x09,
+ NVME_SC_INVALID_FORMAT = 0x0a,
+ NVME_SC_FIRMWARE_REQUIRES_RESET = 0x0b,
+ NVME_SC_INVALID_QUEUE_DELETION = 0x0c,
+ NVME_SC_FEATURE_NOT_SAVEABLE = 0x0d,
+ NVME_SC_FEATURE_NOT_CHANGEABLE = 0x0e,
+ NVME_SC_FEATURE_NOT_NS_SPECIFIC = 0x0f,
+ NVME_SC_FW_ACT_REQUIRES_NVMS_RESET = 0x10,
+ NVME_SC_FW_ACT_REQUIRES_RESET = 0x11,
+ NVME_SC_FW_ACT_REQUIRES_TIME = 0x12,
+ NVME_SC_FW_ACT_PROHIBITED = 0x13,
+ NVME_SC_OVERLAPPING_RANGE = 0x14,
+ NVME_SC_NS_INSUFFICIENT_CAPACITY = 0x15,
+ NVME_SC_NS_ID_UNAVAILABLE = 0x16,
+ /* 0x17 - reserved */
+ NVME_SC_NS_ALREADY_ATTACHED = 0x18,
+ NVME_SC_NS_IS_PRIVATE = 0x19,
+ NVME_SC_NS_NOT_ATTACHED = 0x1a,
+ NVME_SC_THIN_PROV_NOT_SUPPORTED = 0x1b,
+ NVME_SC_CTRLR_LIST_INVALID = 0x1c,
+ NVME_SC_SELT_TEST_IN_PROGRESS = 0x1d,
+ NVME_SC_BOOT_PART_WRITE_PROHIB = 0x1e,
+ NVME_SC_INVALID_CTRLR_ID = 0x1f,
+ NVME_SC_INVALID_SEC_CTRLR_STATE = 0x20,
+ NVME_SC_INVALID_NUM_OF_CTRLR_RESRC = 0x21,
+ NVME_SC_INVALID_RESOURCE_ID = 0x22,
+
+ NVME_SC_CONFLICTING_ATTRIBUTES = 0x80,
+ NVME_SC_INVALID_PROTECTION_INFO = 0x81,
+ NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE = 0x82,
+};
+
+/* media error status codes */
+enum nvme_media_error_status_code {
+ NVME_SC_WRITE_FAULTS = 0x80,
+ NVME_SC_UNRECOVERED_READ_ERROR = 0x81,
+ NVME_SC_GUARD_CHECK_ERROR = 0x82,
+ NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83,
+ NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84,
+ NVME_SC_COMPARE_FAILURE = 0x85,
+ NVME_SC_ACCESS_DENIED = 0x86,
+ NVME_SC_DEALLOCATED_OR_UNWRITTEN = 0x87,
+};
+
+/* admin opcodes */
+enum nvme_admin_opcode {
+ NVME_OPC_DELETE_IO_SQ = 0x00,
+ NVME_OPC_CREATE_IO_SQ = 0x01,
+ NVME_OPC_GET_LOG_PAGE = 0x02,
+ /* 0x03 - reserved */
+ NVME_OPC_DELETE_IO_CQ = 0x04,
+ NVME_OPC_CREATE_IO_CQ = 0x05,
+ NVME_OPC_IDENTIFY = 0x06,
+ /* 0x07 - reserved */
+ NVME_OPC_ABORT = 0x08,
+ NVME_OPC_SET_FEATURES = 0x09,
+ NVME_OPC_GET_FEATURES = 0x0a,
+ /* 0x0b - reserved */
+ NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c,
+ NVME_OPC_NAMESPACE_MANAGEMENT = 0x0d,
+ /* 0x0e-0x0f - reserved */
+ NVME_OPC_FIRMWARE_ACTIVATE = 0x10,
+ NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11,
+ NVME_OPC_DEVICE_SELF_TEST = 0x14,
+ NVME_OPC_NAMESPACE_ATTACHMENT = 0x15,
+ NVME_OPC_KEEP_ALIVE = 0x18,
+ NVME_OPC_DIRECTIVE_SEND = 0x19,
+ NVME_OPC_DIRECTIVE_RECEIVE = 0x1a,
+ NVME_OPC_VIRTUALIZATION_MANAGEMENT = 0x1c,
+ NVME_OPC_NVME_MI_SEND = 0x1d,
+ NVME_OPC_NVME_MI_RECEIVE = 0x1e,
+ NVME_OPC_DOORBELL_BUFFER_CONFIG = 0x7c,
+
+ NVME_OPC_FORMAT_NVM = 0x80,
+ NVME_OPC_SECURITY_SEND = 0x81,
+ NVME_OPC_SECURITY_RECEIVE = 0x82,
+ NVME_OPC_SANITIZE = 0x84,
+};
+
+/* nvme nvm opcodes */
+enum nvme_nvm_opcode {
+ NVME_OPC_FLUSH = 0x00,
+ NVME_OPC_WRITE = 0x01,
+ NVME_OPC_READ = 0x02,
+ /* 0x03 - reserved */
+ NVME_OPC_WRITE_UNCORRECTABLE = 0x04,
+ NVME_OPC_COMPARE = 0x05,
+ /* 0x06 - reserved */
+ NVME_OPC_WRITE_ZEROES = 0x08,
+ /* 0x07 - reserved */
+ NVME_OPC_DATASET_MANAGEMENT = 0x09,
+ /* 0x0a-0x0c - reserved */
+ NVME_OPC_RESERVATION_REGISTER = 0x0d,
+ NVME_OPC_RESERVATION_REPORT = 0x0e,
+ /* 0x0f-0x10 - reserved */
+ NVME_OPC_RESERVATION_ACQUIRE = 0x11,
+ /* 0x12-0x14 - reserved */
+ NVME_OPC_RESERVATION_RELEASE = 0x15,
+};
+
+enum nvme_feature {
+ /* 0x00 - reserved */
+ NVME_FEAT_ARBITRATION = 0x01,
+ NVME_FEAT_POWER_MANAGEMENT = 0x02,
+ NVME_FEAT_LBA_RANGE_TYPE = 0x03,
+ NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04,
+ NVME_FEAT_ERROR_RECOVERY = 0x05,
+ NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06,
+ NVME_FEAT_NUMBER_OF_QUEUES = 0x07,
+ NVME_FEAT_INTERRUPT_COALESCING = 0x08,
+ NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
+ NVME_FEAT_WRITE_ATOMICITY = 0x0A,
+ NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B,
+ NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C,
+ NVME_FEAT_HOST_MEMORY_BUFFER = 0x0D,
+ NVME_FEAT_TIMESTAMP = 0x0E,
+ NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F,
+ NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT = 0x10,
+ NVME_FEAT_NON_OP_POWER_STATE_CONFIG = 0x11,
+ /* 0x12-0x77 - reserved */
+ /* 0x78-0x7f - NVMe Management Interface */
+ NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80,
+ /* 0x81-0xBF - command set specific (reserved) */
+ /* 0xC0-0xFF - vendor specific */
+};
+
+enum nvme_dsm_attribute {
+ NVME_DSM_ATTR_INTEGRAL_READ = 0x1,
+ NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2,
+ NVME_DSM_ATTR_DEALLOCATE = 0x4,
+};
+
+enum nvme_activate_action {
+ NVME_AA_REPLACE_NO_ACTIVATE = 0x0,
+ NVME_AA_REPLACE_ACTIVATE = 0x1,
+ NVME_AA_ACTIVATE = 0x2,
+};
+
+struct nvme_power_state {
+ /** Maximum Power */
+ uint16_t mp; /* Maximum Power */
+ uint8_t ps_rsvd1;
+ uint8_t mps_nops; /* Max Power Scale, Non-Operational State */
+
+ uint32_t enlat; /* Entry Latency */
+ uint32_t exlat; /* Exit Latency */
+
+ uint8_t rrt; /* Relative Read Throughput */
+ uint8_t rrl; /* Relative Read Latency */
+ uint8_t rwt; /* Relative Write Throughput */
+ uint8_t rwl; /* Relative Write Latency */
+
+ uint16_t idlp; /* Idle Power */
+ uint8_t ips; /* Idle Power Scale */
+ uint8_t ps_rsvd8;
+
+ uint16_t actp; /* Active Power */
+ uint8_t apw_aps; /* Active Power Workload, Active Power Scale */
+ uint8_t ps_rsvd10[9];
+} __packed;
+
+_Static_assert(sizeof(struct nvme_power_state) == 32, "bad size for nvme_power_state");
+
+#define NVME_SERIAL_NUMBER_LENGTH 20
+#define NVME_MODEL_NUMBER_LENGTH 40
+#define NVME_FIRMWARE_REVISION_LENGTH 8
+
+struct nvme_controller_data {
+
+ /* bytes 0-255: controller capabilities and features */
+
+ /** pci vendor id */
+ uint16_t vid;
+
+ /** pci subsystem vendor id */
+ uint16_t ssvid;
+
+ /** serial number */
+ uint8_t sn[NVME_SERIAL_NUMBER_LENGTH];
+
+ /** model number */
+ uint8_t mn[NVME_MODEL_NUMBER_LENGTH];
+
+ /** firmware revision */
+ uint8_t fr[NVME_FIRMWARE_REVISION_LENGTH];
+
+ /** recommended arbitration burst */
+ uint8_t rab;
+
+ /** ieee oui identifier */
+ uint8_t ieee[3];
+
+ /** multi-interface capabilities */
+ uint8_t mic;
+
+ /** maximum data transfer size */
+ uint8_t mdts;
+
+ /** Controller ID */
+ uint16_t ctrlr_id;
+
+ /** Version */
+ uint32_t ver;
+
+ /** RTD3 Resume Latency */
+ uint32_t rtd3r;
+
+ /** RTD3 Enter Latency */
+ uint32_t rtd3e;
+
+ /** Optional Asynchronous Events Supported */
+ uint32_t oaes; /* bitfield really */
+
+ /** Controller Attributes */
+ uint32_t ctratt; /* bitfield really */
+
+ uint8_t reserved1[12];
+
+ /** FRU Globally Unique Identifier */
+ uint8_t fguid[16];
+
+ uint8_t reserved2[128];
+
+ /* bytes 256-511: admin command set attributes */
+
+ /** optional admin command support */
+ uint16_t oacs;
+
+ /** abort command limit */
+ uint8_t acl;
+
+ /** asynchronous event request limit */
+ uint8_t aerl;
+
+ /** firmware updates */
+ uint8_t frmw;
+
+ /** log page attributes */
+ uint8_t lpa;
+
+ /** error log page entries */
+ uint8_t elpe;
+
+ /** number of power states supported */
+ uint8_t npss;
+
+ /** admin vendor specific command configuration */
+ uint8_t avscc;
+
+ /** Autonomous Power State Transition Attributes */
+ uint8_t apsta;
+
+ /** Warning Composite Temperature Threshold */
+ uint16_t wctemp;
+
+ /** Critical Composite Temperature Threshold */
+ uint16_t cctemp;
+
+ /** Maximum Time for Firmware Activation */
+ uint16_t mtfa;
+
+ /** Host Memory Buffer Preferred Size */
+ uint32_t hmpre;
+
+ /** Host Memory Buffer Minimum Size */
+ uint32_t hmmin;
+
+ /** Name space capabilities */
+ struct {
+ /* if nsmgmt, report tnvmcap and unvmcap */
+ uint8_t tnvmcap[16];
+ uint8_t unvmcap[16];
+ } __packed untncap;
+
+ /** Replay Protected Memory Block Support */
+ uint32_t rpmbs; /* Really a bitfield */
+
+ /** Extended Device Self-test Time */
+ uint16_t edstt;
+
+ /** Device Self-test Options */
+ uint8_t dsto; /* Really a bitfield */
+
+ /** Firmware Update Granularity */
+ uint8_t fwug;
+
+ /** Keep Alive Support */
+ uint16_t kas;
+
+ /** Host Controlled Thermal Management Attributes */
+ uint16_t hctma; /* Really a bitfield */
+
+ /** Minimum Thermal Management Temperature */
+ uint16_t mntmt;
+
+ /** Maximum Thermal Management Temperature */
+ uint16_t mxtmt;
+
+ /** Sanitize Capabilities */
+ uint32_t sanicap; /* Really a bitfield */
+
+ uint8_t reserved3[180];
+ /* bytes 512-703: nvm command set attributes */
+
+ /** submission queue entry size */
+ uint8_t sqes;
+
+ /** completion queue entry size */
+ uint8_t cqes;
+
+ /** Maximum Outstanding Commands */
+ uint16_t maxcmd;
+
+ /** number of namespaces */
+ uint32_t nn;
+
+ /** optional nvm command support */
+ uint16_t oncs;
+
+ /** fused operation support */
+ uint16_t fuses;
+
+ /** format nvm attributes */
+ uint8_t fna;
+
+ /** volatile write cache */
+ uint8_t vwc;
+
+ /** Atomic Write Unit Normal */
+ uint16_t awun;
+
+ /** Atomic Write Unit Power Fail */
+ uint16_t awupf;
+
+ /** NVM Vendor Specific Command Configuration */
+ uint8_t nvscc;
+ uint8_t reserved5;
+
+ /** Atomic Compare & Write Unit */
+ uint16_t acwu;
+ uint16_t reserved6;
+
+ /** SGL Support */
+ uint32_t sgls;
+
+ /* bytes 540-767: Reserved */
+ uint8_t reserved7[228];
+
+ /** NVM Subsystem NVMe Qualified Name */
+ uint8_t subnqn[256];
+
+ /* bytes 1024-1791: Reserved */
+ uint8_t reserved8[768];
+
+ /* bytes 1792-2047: NVMe over Fabrics specification */
+ uint8_t reserved9[256];
+
+ /* bytes 2048-3071: power state descriptors */
+ struct nvme_power_state power_state[32];
+
+ /* bytes 3072-4095: vendor specific */
+ uint8_t vs[1024];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_controller_data) == 4096, "bad size for nvme_controller_data");
+
+struct nvme_namespace_data {
+
+ /** namespace size */
+ uint64_t nsze;
+
+ /** namespace capacity */
+ uint64_t ncap;
+
+ /** namespace utilization */
+ uint64_t nuse;
+
+ /** namespace features */
+ uint8_t nsfeat;
+
+ /** number of lba formats */
+ uint8_t nlbaf;
+
+ /** formatted lba size */
+ uint8_t flbas;
+
+ /** metadata capabilities */
+ uint8_t mc;
+
+ /** end-to-end data protection capabilities */
+ uint8_t dpc;
+
+ /** end-to-end data protection type settings */
+ uint8_t dps;
+
+ /** Namespace Multi-path I/O and Namespace Sharing Capabilities */
+ uint8_t nmic;
+
+ /** Reservation Capabilities */
+ uint8_t rescap;
+
+ /** Format Progress Indicator */
+ uint8_t fpi;
+
+ /** Deallocate Logical Block Features */
+ uint8_t dlfeat;
+
+ /** Namespace Atomic Write Unit Normal */
+ uint16_t nawun;
+
+ /** Namespace Atomic Write Unit Power Fail */
+ uint16_t nawupf;
+
+ /** Namespace Atomic Compare & Write Unit */
+ uint16_t nacwu;
+
+ /** Namespace Atomic Boundary Size Normal */
+ uint16_t nabsn;
+
+ /** Namespace Atomic Boundary Offset */
+ uint16_t nabo;
+
+ /** Namespace Atomic Boundary Size Power Fail */
+ uint16_t nabspf;
+
+ /** Namespace Optimal IO Boundary */
+ uint16_t noiob;
+
+ /** NVM Capacity */
+ uint8_t nvmcap[16];
+
+ /* bytes 64-103: Reserved */
+ uint8_t reserved5[40];
+
+ /** Namespace Globally Unique Identifier */
+ uint8_t nguid[16];
+
+ /** IEEE Extended Unique Identifier */
+ uint8_t eui64[8];
+
+ /** lba format support */
+ uint32_t lbaf[16];
+
+ uint8_t reserved6[192];
+
+ uint8_t vendor_specific[3712];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_namespace_data) == 4096, "bad size for nvme_namepsace_data");
+
+enum nvme_log_page {
+
+ /* 0x00 - reserved */
+ NVME_LOG_ERROR = 0x01,
+ NVME_LOG_HEALTH_INFORMATION = 0x02,
+ NVME_LOG_FIRMWARE_SLOT = 0x03,
+ NVME_LOG_CHANGED_NAMESPACE = 0x04,
+ NVME_LOG_COMMAND_EFFECT = 0x05,
+ /* 0x06-0x7F - reserved */
+ /* 0x80-0xBF - I/O command set specific */
+ NVME_LOG_RES_NOTIFICATION = 0x80,
+ /* 0xC0-0xFF - vendor specific */
+
+ /*
+ * The following are Intel Specific log pages, but they seem
+ * to be widely implemented.
+ */
+ INTEL_LOG_READ_LAT_LOG = 0xc1,
+ INTEL_LOG_WRITE_LAT_LOG = 0xc2,
+ INTEL_LOG_TEMP_STATS = 0xc5,
+ INTEL_LOG_ADD_SMART = 0xca,
+ INTEL_LOG_DRIVE_MKT_NAME = 0xdd,
+
+ /*
+ * HGST log page, with lots ofs sub pages.
+ */
+ HGST_INFO_LOG = 0xc1,
+};
+
+struct nvme_error_information_entry {
+
+ uint64_t error_count;
+ uint16_t sqid;
+ uint16_t cid;
+ uint16_t status;
+ uint16_t error_location;
+ uint64_t lba;
+ uint32_t nsid;
+ uint8_t vendor_specific;
+ uint8_t reserved[35];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry");
+
+struct nvme_health_information_page {
+
+ uint8_t critical_warning;
+ uint16_t temperature;
+ uint8_t available_spare;
+ uint8_t available_spare_threshold;
+ uint8_t percentage_used;
+
+ uint8_t reserved[26];
+
+ /*
+ * Note that the following are 128-bit values, but are
+ * defined as an array of 2 64-bit values.
+ */
+ /* Data Units Read is always in 512-byte units. */
+ uint64_t data_units_read[2];
+ /* Data Units Written is always in 512-byte units. */
+ uint64_t data_units_written[2];
+ /* For NVM command set, this includes Compare commands. */
+ uint64_t host_read_commands[2];
+ uint64_t host_write_commands[2];
+ /* Controller Busy Time is reported in minutes. */
+ uint64_t controller_busy_time[2];
+ uint64_t power_cycles[2];
+ uint64_t power_on_hours[2];
+ uint64_t unsafe_shutdowns[2];
+ uint64_t media_errors[2];
+ uint64_t num_error_info_log_entries[2];
+ uint32_t warning_temp_time;
+ uint32_t error_temp_time;
+ uint16_t temp_sensor[8];
+
+ uint8_t reserved2[296];
+} __packed __aligned(4);
+
+/* Currently sparse/smatch incorrectly packs this struct in some situations. */
+#ifndef __CHECKER__
+_Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for nvme_health_information_page");
+#endif
+
+struct nvme_firmware_page {
+
+ uint8_t afi;
+ uint8_t reserved[7];
+ uint64_t revision[7]; /* revisions for 7 slots */
+ uint8_t reserved2[448];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_firmware_page) == 512, "bad size for nvme_firmware_page");
+
+struct nvme_ns_list {
+ uint32_t ns[1024];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_ns_list) == 4096, "bad size for nvme_ns_list");
+
+struct intel_log_temp_stats
+{
+ uint64_t current;
+ uint64_t overtemp_flag_last;
+ uint64_t overtemp_flag_life;
+ uint64_t max_temp;
+ uint64_t min_temp;
+ uint64_t _rsvd[5];
+ uint64_t max_oper_temp;
+ uint64_t min_oper_temp;
+ uint64_t est_offset;
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats");
+
+#define NVME_TEST_MAX_THREADS 128
+
+struct nvme_io_test {
+
+ enum nvme_nvm_opcode opc;
+ uint32_t size;
+ uint32_t time; /* in seconds */
+ uint32_t num_threads;
+ uint32_t flags;
+ uint64_t io_completed[NVME_TEST_MAX_THREADS];
+};
+
+enum nvme_io_test_flags {
+
+ /*
+ * Specifies whether dev_refthread/dev_relthread should be
+ * called during NVME_BIO_TEST. Ignored for other test
+ * types.
+ */
+ NVME_TEST_FLAG_REFTHREAD = 0x1,
+};
+
+struct nvme_pt_command {
+
+ /*
+ * cmd is used to specify a passthrough command to a controller or
+ * namespace.
+ *
+ * The following fields from cmd may be specified by the caller:
+ * * opc (opcode)
+ * * nsid (namespace id) - for admin commands only
+ * * cdw10-cdw15
+ *
+ * Remaining fields must be set to 0 by the caller.
+ */
+ struct nvme_command cmd;
+
+ /*
+ * cpl returns completion status for the passthrough command
+ * specified by cmd.
+ *
+ * The following fields will be filled out by the driver, for
+ * consumption by the caller:
+ * * cdw0
+ * * status (except for phase)
+ *
+ * Remaining fields will be set to 0 by the driver.
+ */
+ struct nvme_completion cpl;
+
+ /* buf is the data buffer associated with this passthrough command. */
+ void * buf;
+
+ /*
+ * len is the length of the data buffer associated with this
+ * passthrough command.
+ */
+ uint32_t len;
+
+ /*
+ * is_read = 1 if the passthrough command will read data into the
+ * supplied buffer from the controller.
+ *
+ * is_read = 0 if the passthrough command will write data from the
+ * supplied buffer to the controller.
+ */
+ uint32_t is_read;
+
+ /*
+ * driver_lock is used by the driver only. It must be set to 0
+ * by the caller.
+ */
+ struct mtx * driver_lock;
+};
+
+#define nvme_completion_is_error(cpl) \
+ (NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0)
+
+void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
+
+#ifdef _KERNEL
+
+struct bio;
+
+struct nvme_namespace;
+struct nvme_controller;
+struct nvme_consumer;
+
+typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
+
+typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *);
+typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *);
+typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
+ uint32_t, void *, uint32_t);
+typedef void (*nvme_cons_fail_fn_t)(void *);
+
+enum nvme_namespace_flags {
+ NVME_NS_DEALLOCATE_SUPPORTED = 0x1,
+ NVME_NS_FLUSH_SUPPORTED = 0x2,
+};
+
+int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
+ struct nvme_pt_command *pt,
+ uint32_t nsid, int is_user_buffer,
+ int is_admin_cmd);
+
+/* Admin functions */
+void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
+ uint8_t feature, uint32_t cdw11,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
+ uint8_t feature, uint32_t cdw11,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+void nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr,
+ uint8_t log_page, uint32_t nsid,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/* NVM I/O functions */
+int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
+ uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
+ uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+int nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
+ uint8_t num_ranges, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset,
+ size_t len);
+
+/* Registration functions */
+struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn,
+ nvme_cons_ctrlr_fn_t ctrlr_fn,
+ nvme_cons_async_fn_t async_fn,
+ nvme_cons_fail_fn_t fail_fn);
+void nvme_unregister_consumer(struct nvme_consumer *consumer);
+
+/* Controller helper functions */
+device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr);
+const struct nvme_controller_data *
+ nvme_ctrlr_get_data(struct nvme_controller *ctrlr);
+
+/* Namespace helper functions */
+uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns);
+uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns);
+uint64_t nvme_ns_get_size(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_flags(struct nvme_namespace *ns);
+const char * nvme_ns_get_serial_number(struct nvme_namespace *ns);
+const char * nvme_ns_get_model_number(struct nvme_namespace *ns);
+const struct nvme_namespace_data *
+ nvme_ns_get_data(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns);
+
+int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn);
+
+/*
+ * Command building helper functions -- shared with CAM
+ * These functions assume allocator zeros out cmd structure
+ * CAM's xpt_get_ccb and the request allocator for nvme both
+ * do zero'd allocations.
+ */
+static inline
+void nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid)
+{
+
+ cmd->opc = NVME_OPC_FLUSH;
+ cmd->nsid = htole32(nsid);
+}
+
+static inline
+void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ cmd->opc = rwcmd;
+ cmd->nsid = htole32(nsid);
+ cmd->cdw10 = htole32(lba & 0xffffffffu);
+ cmd->cdw11 = htole32(lba >> 32);
+ cmd->cdw12 = htole32(count-1);
+}
+
+static inline
+void nvme_ns_write_cmd(struct nvme_command *cmd, uint32_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count);
+}
+
+static inline
+void nvme_ns_read_cmd(struct nvme_command *cmd, uint32_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count);
+}
+
+static inline
+void nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid,
+ uint32_t num_ranges)
+{
+ cmd->opc = NVME_OPC_DATASET_MANAGEMENT;
+ cmd->nsid = htole32(nsid);
+ cmd->cdw10 = htole32(num_ranges - 1);
+ cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE);
+}
+
+extern int nvme_use_nvd;
+
+#endif /* _KERNEL */
+
+/* Endianess conversion functions for NVMe structs */
+static inline
+void nvme_completion_swapbytes(struct nvme_completion *s)
+{
+
+ s->cdw0 = le32toh(s->cdw0);
+ /* omit rsvd1 */
+ s->sqhd = le16toh(s->sqhd);
+ s->sqid = le16toh(s->sqid);
+ /* omit cid */
+ s->status = le16toh(s->status);
+}
+
+static inline
+void nvme_power_state_swapbytes(struct nvme_power_state *s)
+{
+
+ s->mp = le16toh(s->mp);
+ s->enlat = le32toh(s->enlat);
+ s->exlat = le32toh(s->exlat);
+ s->idlp = le16toh(s->idlp);
+ s->actp = le16toh(s->actp);
+}
+
+static inline
+void nvme_controller_data_swapbytes(struct nvme_controller_data *s)
+{
+ int i;
+
+ s->vid = le16toh(s->vid);
+ s->ssvid = le16toh(s->ssvid);
+ s->ctrlr_id = le16toh(s->ctrlr_id);
+ s->ver = le32toh(s->ver);
+ s->rtd3r = le32toh(s->rtd3r);
+ s->rtd3e = le32toh(s->rtd3e);
+ s->oaes = le32toh(s->oaes);
+ s->ctratt = le32toh(s->ctratt);
+ s->oacs = le16toh(s->oacs);
+ s->wctemp = le16toh(s->wctemp);
+ s->cctemp = le16toh(s->cctemp);
+ s->mtfa = le16toh(s->mtfa);
+ s->hmpre = le32toh(s->hmpre);
+ s->hmmin = le32toh(s->hmmin);
+ s->rpmbs = le32toh(s->rpmbs);
+ s->edstt = le16toh(s->edstt);
+ s->kas = le16toh(s->kas);
+ s->hctma = le16toh(s->hctma);
+ s->mntmt = le16toh(s->mntmt);
+ s->mxtmt = le16toh(s->mxtmt);
+ s->sanicap = le32toh(s->sanicap);
+ s->maxcmd = le16toh(s->maxcmd);
+ s->nn = le32toh(s->nn);
+ s->oncs = le16toh(s->oncs);
+ s->fuses = le16toh(s->fuses);
+ s->awun = le16toh(s->awun);
+ s->awupf = le16toh(s->awupf);
+ s->acwu = le16toh(s->acwu);
+ s->sgls = le32toh(s->sgls);
+ for (i = 0; i < 32; i++)
+ nvme_power_state_swapbytes(&s->power_state[i]);
+}
+
+static inline
+void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s)
+{
+ int i;
+
+ s->nsze = le64toh(s->nsze);
+ s->ncap = le64toh(s->ncap);
+ s->nuse = le64toh(s->nuse);
+ s->nawun = le16toh(s->nawun);
+ s->nawupf = le16toh(s->nawupf);
+ s->nacwu = le16toh(s->nacwu);
+ s->nabsn = le16toh(s->nabsn);
+ s->nabo = le16toh(s->nabo);
+ s->nabspf = le16toh(s->nabspf);
+ s->noiob = le16toh(s->noiob);
+ for (i = 0; i < 16; i++)
+ s->lbaf[i] = le32toh(s->lbaf[i]);
+}
+
+static inline
+void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry *s)
+{
+
+ s->error_count = le64toh(s->error_count);
+ s->sqid = le16toh(s->sqid);
+ s->cid = le16toh(s->cid);
+ s->status = le16toh(s->status);
+ s->error_location = le16toh(s->error_location);
+ s->lba = le64toh(s->lba);
+ s->nsid = le32toh(s->nsid);
+}
+
+static inline
+void nvme_le128toh(void *p)
+{
+ /*
+ * Upstream, this uses the following comparison:
+ * #if _BYTE_ORDER != _LITTLE_ENDIAN
+ *
+ * Rather than keep this file in compat with only that little bit
+ * changed, we'll just float a little patch here for now.
+ */
+#ifndef _LITTLE_ENDIAN
+ /* Swap 16 bytes in place */
+ char *tmp = (char*)p;
+ char b;
+ int i;
+ for (i = 0; i < 8; i++) {
+ b = tmp[i];
+ tmp[i] = tmp[15-i];
+ tmp[15-i] = b;
+ }
+#else
+ (void)p;
+#endif
+}
+
+static inline
+void nvme_health_information_page_swapbytes(struct nvme_health_information_page *s)
+{
+ int i;
+
+ s->temperature = le16toh(s->temperature);
+ nvme_le128toh((void *)s->data_units_read);
+ nvme_le128toh((void *)s->data_units_written);
+ nvme_le128toh((void *)s->host_read_commands);
+ nvme_le128toh((void *)s->host_write_commands);
+ nvme_le128toh((void *)s->controller_busy_time);
+ nvme_le128toh((void *)s->power_cycles);
+ nvme_le128toh((void *)s->power_on_hours);
+ nvme_le128toh((void *)s->unsafe_shutdowns);
+ nvme_le128toh((void *)s->media_errors);
+ nvme_le128toh((void *)s->num_error_info_log_entries);
+ s->warning_temp_time = le32toh(s->warning_temp_time);
+ s->error_temp_time = le32toh(s->error_temp_time);
+ for (i = 0; i < 8; i++)
+ s->temp_sensor[i] = le16toh(s->temp_sensor[i]);
+}
+
+
+static inline
+void nvme_firmware_page_swapbytes(struct nvme_firmware_page *s)
+{
+ int i;
+
+ for (i = 0; i < 7; i++)
+ s->revision[i] = le64toh(s->revision[i]);
+}
+
+static inline
+void nvme_ns_list_swapbytes(struct nvme_ns_list *s)
+{
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ s->ns[i] = le32toh(s->ns[i]);
+}
+
+static inline
+void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s)
+{
+
+ s->current = le64toh(s->current);
+ s->overtemp_flag_last = le64toh(s->overtemp_flag_last);
+ s->overtemp_flag_life = le64toh(s->overtemp_flag_life);
+ s->max_temp = le64toh(s->max_temp);
+ s->min_temp = le64toh(s->min_temp);
+ /* omit _rsvd[] */
+ s->max_oper_temp = le64toh(s->max_oper_temp);
+ s->min_oper_temp = le64toh(s->min_oper_temp);
+ s->est_offset = le64toh(s->est_offset);
+}
+
+#endif /* __NVME_H__ */
diff --git a/usr/src/contrib/bhyve/dev/pci/pcireg.h b/usr/src/contrib/bhyve/dev/pci/pcireg.h
new file mode 100644
index 0000000000..32a569dbd4
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/pci/pcireg.h
@@ -0,0 +1,922 @@
+/*-
+ * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/pci/pcireg.h 266468 2014-05-20 14:39:22Z mav $
+ *
+ */
+
+/*
+ * PCIM_xxx: mask to locate subfield in register
+ * PCIR_xxx: config register offset
+ * PCIC_xxx: device class
+ * PCIS_xxx: device subclass
+ * PCIP_xxx: device programming interface
+ * PCIV_xxx: PCI vendor ID (only required to fixup ancient devices)
+ * PCID_xxx: device ID
+ * PCIY_xxx: capability identification number
+ * PCIZ_xxx: extended capability identification number
+ */
+
+/* some PCI bus constants */
+#define PCI_DOMAINMAX 65535 /* highest supported domain number */
+#define PCI_BUSMAX 255 /* highest supported bus number */
+#define PCI_SLOTMAX 31 /* highest supported slot number */
+#define PCI_FUNCMAX 7 /* highest supported function number */
+#define PCI_REGMAX 255 /* highest supported config register addr. */
+#define PCIE_REGMAX 4095 /* highest supported config register addr. */
+#define PCI_MAXHDRTYPE 2
+
+#define PCIE_ARI_SLOTMAX 0
+#define PCIE_ARI_FUNCMAX 255
+
+#define PCI_RID_BUS_SHIFT 8
+#define PCI_RID_SLOT_SHIFT 3
+#define PCI_RID_FUNC_SHIFT 0
+
+#define PCI_RID(bus, slot, func) \
+ ((((bus) & PCI_BUSMAX) << PCI_RID_BUS_SHIFT) | \
+ (((slot) & PCI_SLOTMAX) << PCI_RID_SLOT_SHIFT) | \
+ (((func) & PCI_FUNCMAX) << PCI_RID_FUNC_SHIFT))
+
+#define PCI_ARI_RID(bus, func) \
+ ((((bus) & PCI_BUSMAX) << PCI_RID_BUS_SHIFT) | \
+ (((func) & PCIE_ARI_FUNCMAX) << PCI_RID_FUNC_SHIFT))
+
+#define PCI_RID2BUS(rid) (((rid) >> PCI_RID_BUS_SHIFT) & PCI_BUSMAX)
+#define PCI_RID2SLOT(rid) (((rid) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
+#define PCI_RID2FUNC(rid) (((rid) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
+
+#define PCIE_ARI_SLOT(func) (((func) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
+#define PCIE_ARI_FUNC(func) (((func) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
+
+/* PCI config header registers for all devices */
+
+#define PCIR_DEVVENDOR 0x00
+#define PCIR_VENDOR 0x00
+#define PCIR_DEVICE 0x02
+#define PCIR_COMMAND 0x04
+#define PCIM_CMD_PORTEN 0x0001
+#define PCIM_CMD_MEMEN 0x0002
+#define PCIM_CMD_BUSMASTEREN 0x0004
+#define PCIM_CMD_SPECIALEN 0x0008
+#define PCIM_CMD_MWRICEN 0x0010
+#define PCIM_CMD_PERRESPEN 0x0040
+#define PCIM_CMD_SERRESPEN 0x0100
+#define PCIM_CMD_BACKTOBACK 0x0200
+#define PCIM_CMD_INTxDIS 0x0400
+#define PCIR_STATUS 0x06
+#define PCIM_STATUS_INTxSTATE 0x0008
+#define PCIM_STATUS_CAPPRESENT 0x0010
+#define PCIM_STATUS_66CAPABLE 0x0020
+#define PCIM_STATUS_BACKTOBACK 0x0080
+#define PCIM_STATUS_MDPERR 0x0100
+#define PCIM_STATUS_SEL_FAST 0x0000
+#define PCIM_STATUS_SEL_MEDIMUM 0x0200
+#define PCIM_STATUS_SEL_SLOW 0x0400
+#define PCIM_STATUS_SEL_MASK 0x0600
+#define PCIM_STATUS_STABORT 0x0800
+#define PCIM_STATUS_RTABORT 0x1000
+#define PCIM_STATUS_RMABORT 0x2000
+#define PCIM_STATUS_SERR 0x4000
+#define PCIM_STATUS_PERR 0x8000
+#define PCIR_REVID 0x08
+#define PCIR_PROGIF 0x09
+#define PCIR_SUBCLASS 0x0a
+#define PCIR_CLASS 0x0b
+#define PCIR_CACHELNSZ 0x0c
+#define PCIR_LATTIMER 0x0d
+#define PCIR_HDRTYPE 0x0e
+#define PCIM_HDRTYPE 0x7f
+#define PCIM_HDRTYPE_NORMAL 0x00
+#define PCIM_HDRTYPE_BRIDGE 0x01
+#define PCIM_HDRTYPE_CARDBUS 0x02
+#define PCIM_MFDEV 0x80
+#define PCIR_BIST 0x0f
+
+/* Capability Register Offsets */
+
+#define PCICAP_ID 0x0
+#define PCICAP_NEXTPTR 0x1
+
+/* Capability Identification Numbers */
+
+#define PCIY_PMG 0x01 /* PCI Power Management */
+#define PCIY_AGP 0x02 /* AGP */
+#define PCIY_VPD 0x03 /* Vital Product Data */
+#define PCIY_SLOTID 0x04 /* Slot Identification */
+#define PCIY_MSI 0x05 /* Message Signaled Interrupts */
+#define PCIY_CHSWP 0x06 /* CompactPCI Hot Swap */
+#define PCIY_PCIX 0x07 /* PCI-X */
+#define PCIY_HT 0x08 /* HyperTransport */
+#define PCIY_VENDOR 0x09 /* Vendor Unique */
+#define PCIY_DEBUG 0x0a /* Debug port */
+#define PCIY_CRES 0x0b /* CompactPCI central resource control */
+#define PCIY_HOTPLUG 0x0c /* PCI Hot-Plug */
+#define PCIY_SUBVENDOR 0x0d /* PCI-PCI bridge subvendor ID */
+#define PCIY_AGP8X 0x0e /* AGP 8x */
+#define PCIY_SECDEV 0x0f /* Secure Device */
+#define PCIY_EXPRESS 0x10 /* PCI Express */
+#define PCIY_MSIX 0x11 /* MSI-X */
+#define PCIY_SATA 0x12 /* SATA */
+#define PCIY_PCIAF 0x13 /* PCI Advanced Features */
+
+/* Extended Capability Register Fields */
+
+#define PCIR_EXTCAP 0x100
+#define PCIM_EXTCAP_ID 0x0000ffff
+#define PCIM_EXTCAP_VER 0x000f0000
+#define PCIM_EXTCAP_NEXTPTR 0xfff00000
+#define PCI_EXTCAP_ID(ecap) ((ecap) & PCIM_EXTCAP_ID)
+#define PCI_EXTCAP_VER(ecap) (((ecap) & PCIM_EXTCAP_VER) >> 16)
+#define PCI_EXTCAP_NEXTPTR(ecap) (((ecap) & PCIM_EXTCAP_NEXTPTR) >> 20)
+
+/* Extended Capability Identification Numbers */
+
+#define PCIZ_AER 0x0001 /* Advanced Error Reporting */
+#define PCIZ_VC 0x0002 /* Virtual Channel if MFVC Ext Cap not set */
+#define PCIZ_SERNUM 0x0003 /* Device Serial Number */
+#define PCIZ_PWRBDGT 0x0004 /* Power Budgeting */
+#define PCIZ_RCLINK_DCL 0x0005 /* Root Complex Link Declaration */
+#define PCIZ_RCLINK_CTL 0x0006 /* Root Complex Internal Link Control */
+#define PCIZ_RCEC_ASSOC 0x0007 /* Root Complex Event Collector Association */
+#define PCIZ_MFVC 0x0008 /* Multi-Function Virtual Channel */
+#define PCIZ_VC2 0x0009 /* Virtual Channel if MFVC Ext Cap set */
+#define PCIZ_RCRB 0x000a /* RCRB Header */
+#define PCIZ_VENDOR 0x000b /* Vendor Unique */
+#define PCIZ_CAC 0x000c /* Configuration Access Correction -- obsolete */
+#define PCIZ_ACS 0x000d /* Access Control Services */
+#define PCIZ_ARI 0x000e /* Alternative Routing-ID Interpretation */
+#define PCIZ_ATS 0x000f /* Address Translation Services */
+#define PCIZ_SRIOV 0x0010 /* Single Root IO Virtualization */
+#define PCIZ_MRIOV 0x0011 /* Multiple Root IO Virtualization */
+#define PCIZ_MULTICAST 0x0012 /* Multicast */
+#define PCIZ_PAGE_REQ 0x0013 /* Page Request */
+#define PCIZ_AMD 0x0014 /* Reserved for AMD */
+#define PCIZ_RESIZE_BAR 0x0015 /* Resizable BAR */
+#define PCIZ_DPA 0x0016 /* Dynamic Power Allocation */
+#define PCIZ_TPH_REQ 0x0017 /* TPH Requester */
+#define PCIZ_LTR 0x0018 /* Latency Tolerance Reporting */
+#define PCIZ_SEC_PCIE 0x0019 /* Secondary PCI Express */
+#define PCIZ_PMUX 0x001a /* Protocol Multiplexing */
+#define PCIZ_PASID 0x001b /* Process Address Space ID */
+#define PCIZ_LN_REQ 0x001c /* LN Requester */
+#define PCIZ_DPC 0x001d /* Downstream Porto Containment */
+#define PCIZ_L1PM 0x001e /* L1 PM Substates */
+
+/* config registers for header type 0 devices */
+
+#define PCIR_BARS 0x10
+#define PCIR_BAR(x) (PCIR_BARS + (x) * 4)
+#define PCIR_MAX_BAR_0 5
+#define PCI_RID2BAR(rid) (((rid) - PCIR_BARS) / 4)
+#define PCI_BAR_IO(x) (((x) & PCIM_BAR_SPACE) == PCIM_BAR_IO_SPACE)
+#define PCI_BAR_MEM(x) (((x) & PCIM_BAR_SPACE) == PCIM_BAR_MEM_SPACE)
+#define PCIM_BAR_SPACE 0x00000001
+#define PCIM_BAR_MEM_SPACE 0
+#define PCIM_BAR_IO_SPACE 1
+#define PCIM_BAR_MEM_TYPE 0x00000006
+#define PCIM_BAR_MEM_32 0
+#define PCIM_BAR_MEM_1MB 2 /* Locate below 1MB in PCI <= 2.1 */
+#define PCIM_BAR_MEM_64 4
+#define PCIM_BAR_MEM_PREFETCH 0x00000008
+#define PCIM_BAR_MEM_BASE 0xfffffffffffffff0ULL
+#define PCIM_BAR_IO_RESERVED 0x00000002
+#define PCIM_BAR_IO_BASE 0xfffffffc
+#define PCIR_CIS 0x28
+#define PCIM_CIS_ASI_MASK 0x00000007
+#define PCIM_CIS_ASI_CONFIG 0
+#define PCIM_CIS_ASI_BAR0 1
+#define PCIM_CIS_ASI_BAR1 2
+#define PCIM_CIS_ASI_BAR2 3
+#define PCIM_CIS_ASI_BAR3 4
+#define PCIM_CIS_ASI_BAR4 5
+#define PCIM_CIS_ASI_BAR5 6
+#define PCIM_CIS_ASI_ROM 7
+#define PCIM_CIS_ADDR_MASK 0x0ffffff8
+#define PCIM_CIS_ROM_MASK 0xf0000000
+#define PCIM_CIS_CONFIG_MASK 0xff
+#define PCIR_SUBVEND_0 0x2c
+#define PCIR_SUBDEV_0 0x2e
+#define PCIR_BIOS 0x30
+#define PCIM_BIOS_ENABLE 0x01
+#define PCIM_BIOS_ADDR_MASK 0xfffff800
+#define PCIR_CAP_PTR 0x34
+#define PCIR_INTLINE 0x3c
+#define PCIR_INTPIN 0x3d
+#define PCIR_MINGNT 0x3e
+#define PCIR_MAXLAT 0x3f
+
+/* config registers for header type 1 (PCI-to-PCI bridge) devices */
+
+#define PCIR_MAX_BAR_1 1
+#define PCIR_SECSTAT_1 0x1e
+
+#define PCIR_PRIBUS_1 0x18
+#define PCIR_SECBUS_1 0x19
+#define PCIR_SUBBUS_1 0x1a
+#define PCIR_SECLAT_1 0x1b
+
+#define PCIR_IOBASEL_1 0x1c
+#define PCIR_IOLIMITL_1 0x1d
+#define PCIR_IOBASEH_1 0x30
+#define PCIR_IOLIMITH_1 0x32
+#define PCIM_BRIO_16 0x0
+#define PCIM_BRIO_32 0x1
+#define PCIM_BRIO_MASK 0xf
+
+#define PCIR_MEMBASE_1 0x20
+#define PCIR_MEMLIMIT_1 0x22
+
+#define PCIR_PMBASEL_1 0x24
+#define PCIR_PMLIMITL_1 0x26
+#define PCIR_PMBASEH_1 0x28
+#define PCIR_PMLIMITH_1 0x2c
+#define PCIM_BRPM_32 0x0
+#define PCIM_BRPM_64 0x1
+#define PCIM_BRPM_MASK 0xf
+
+#define PCIR_BIOS_1 0x38
+#define PCIR_BRIDGECTL_1 0x3e
+
+/* config registers for header type 2 (CardBus) devices */
+
+#define PCIR_MAX_BAR_2 0
+#define PCIR_CAP_PTR_2 0x14
+#define PCIR_SECSTAT_2 0x16
+
+#define PCIR_PRIBUS_2 0x18
+#define PCIR_SECBUS_2 0x19
+#define PCIR_SUBBUS_2 0x1a
+#define PCIR_SECLAT_2 0x1b
+
+#define PCIR_MEMBASE0_2 0x1c
+#define PCIR_MEMLIMIT0_2 0x20
+#define PCIR_MEMBASE1_2 0x24
+#define PCIR_MEMLIMIT1_2 0x28
+#define PCIR_IOBASE0_2 0x2c
+#define PCIR_IOLIMIT0_2 0x30
+#define PCIR_IOBASE1_2 0x34
+#define PCIR_IOLIMIT1_2 0x38
+
+#define PCIR_BRIDGECTL_2 0x3e
+
+#define PCIR_SUBVEND_2 0x40
+#define PCIR_SUBDEV_2 0x42
+
+#define PCIR_PCCARDIF_2 0x44
+
+/* PCI device class, subclass and programming interface definitions */
+
+#define PCIC_OLD 0x00
+#define PCIS_OLD_NONVGA 0x00
+#define PCIS_OLD_VGA 0x01
+
+#define PCIC_STORAGE 0x01
+#define PCIS_STORAGE_SCSI 0x00
+#define PCIS_STORAGE_IDE 0x01
+#define PCIP_STORAGE_IDE_MODEPRIM 0x01
+#define PCIP_STORAGE_IDE_PROGINDPRIM 0x02
+#define PCIP_STORAGE_IDE_MODESEC 0x04
+#define PCIP_STORAGE_IDE_PROGINDSEC 0x08
+#define PCIP_STORAGE_IDE_MASTERDEV 0x80
+#define PCIS_STORAGE_FLOPPY 0x02
+#define PCIS_STORAGE_IPI 0x03
+#define PCIS_STORAGE_RAID 0x04
+#define PCIS_STORAGE_ATA_ADMA 0x05
+#define PCIS_STORAGE_SATA 0x06
+#define PCIP_STORAGE_SATA_AHCI_1_0 0x01
+#define PCIS_STORAGE_SAS 0x07
+#define PCIS_STORAGE_NVM 0x08
+#define PCIP_STORAGE_NVM_NVMHCI_1_0 0x01
+#define PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0 0x02
+#define PCIS_STORAGE_OTHER 0x80
+
+#define PCIC_NETWORK 0x02
+#define PCIS_NETWORK_ETHERNET 0x00
+#define PCIS_NETWORK_TOKENRING 0x01
+#define PCIS_NETWORK_FDDI 0x02
+#define PCIS_NETWORK_ATM 0x03
+#define PCIS_NETWORK_ISDN 0x04
+#define PCIS_NETWORK_WORLDFIP 0x05
+#define PCIS_NETWORK_PICMG 0x06
+#define PCIS_NETWORK_OTHER 0x80
+
+#define PCIC_DISPLAY 0x03
+#define PCIS_DISPLAY_VGA 0x00
+#define PCIS_DISPLAY_XGA 0x01
+#define PCIS_DISPLAY_3D 0x02
+#define PCIS_DISPLAY_OTHER 0x80
+
+#define PCIC_MULTIMEDIA 0x04
+#define PCIS_MULTIMEDIA_VIDEO 0x00
+#define PCIS_MULTIMEDIA_AUDIO 0x01
+#define PCIS_MULTIMEDIA_TELE 0x02
+#define PCIS_MULTIMEDIA_HDA 0x03
+#define PCIS_MULTIMEDIA_OTHER 0x80
+
+#define PCIC_MEMORY 0x05
+#define PCIS_MEMORY_RAM 0x00
+#define PCIS_MEMORY_FLASH 0x01
+#define PCIS_MEMORY_OTHER 0x80
+
+#define PCIC_BRIDGE 0x06
+#define PCIS_BRIDGE_HOST 0x00
+#define PCIS_BRIDGE_ISA 0x01
+#define PCIS_BRIDGE_EISA 0x02
+#define PCIS_BRIDGE_MCA 0x03
+#define PCIS_BRIDGE_PCI 0x04
+#define PCIP_BRIDGE_PCI_SUBTRACTIVE 0x01
+#define PCIS_BRIDGE_PCMCIA 0x05
+#define PCIS_BRIDGE_NUBUS 0x06
+#define PCIS_BRIDGE_CARDBUS 0x07
+#define PCIS_BRIDGE_RACEWAY 0x08
+#define PCIS_BRIDGE_PCI_TRANSPARENT 0x09
+#define PCIS_BRIDGE_INFINIBAND 0x0a
+#define PCIS_BRIDGE_OTHER 0x80
+
+#define PCIC_SIMPLECOMM 0x07
+#define PCIS_SIMPLECOMM_UART 0x00
+#define PCIP_SIMPLECOMM_UART_8250 0x00
+#define PCIP_SIMPLECOMM_UART_16450A 0x01
+#define PCIP_SIMPLECOMM_UART_16550A 0x02
+#define PCIP_SIMPLECOMM_UART_16650A 0x03
+#define PCIP_SIMPLECOMM_UART_16750A 0x04
+#define PCIP_SIMPLECOMM_UART_16850A 0x05
+#define PCIP_SIMPLECOMM_UART_16950A 0x06
+#define PCIS_SIMPLECOMM_PAR 0x01
+#define PCIS_SIMPLECOMM_MULSER 0x02
+#define PCIS_SIMPLECOMM_MODEM 0x03
+#define PCIS_SIMPLECOMM_GPIB 0x04
+#define PCIS_SIMPLECOMM_SMART_CARD 0x05
+#define PCIS_SIMPLECOMM_OTHER 0x80
+
+#define PCIC_BASEPERIPH 0x08
+#define PCIS_BASEPERIPH_PIC 0x00
+#define PCIP_BASEPERIPH_PIC_8259A 0x00
+#define PCIP_BASEPERIPH_PIC_ISA 0x01
+#define PCIP_BASEPERIPH_PIC_EISA 0x02
+#define PCIP_BASEPERIPH_PIC_IO_APIC 0x10
+#define PCIP_BASEPERIPH_PIC_IOX_APIC 0x20
+#define PCIS_BASEPERIPH_DMA 0x01
+#define PCIS_BASEPERIPH_TIMER 0x02
+#define PCIS_BASEPERIPH_RTC 0x03
+#define PCIS_BASEPERIPH_PCIHOT 0x04
+#define PCIS_BASEPERIPH_SDHC 0x05
+#define PCIS_BASEPERIPH_IOMMU 0x06
+#define PCIS_BASEPERIPH_OTHER 0x80
+
+#define PCIC_INPUTDEV 0x09
+#define PCIS_INPUTDEV_KEYBOARD 0x00
+#define PCIS_INPUTDEV_DIGITIZER 0x01
+#define PCIS_INPUTDEV_MOUSE 0x02
+#define PCIS_INPUTDEV_SCANNER 0x03
+#define PCIS_INPUTDEV_GAMEPORT 0x04
+#define PCIS_INPUTDEV_OTHER 0x80
+
+#define PCIC_DOCKING 0x0a
+#define PCIS_DOCKING_GENERIC 0x00
+#define PCIS_DOCKING_OTHER 0x80
+
+#define PCIC_PROCESSOR 0x0b
+#define PCIS_PROCESSOR_386 0x00
+#define PCIS_PROCESSOR_486 0x01
+#define PCIS_PROCESSOR_PENTIUM 0x02
+#define PCIS_PROCESSOR_ALPHA 0x10
+#define PCIS_PROCESSOR_POWERPC 0x20
+#define PCIS_PROCESSOR_MIPS 0x30
+#define PCIS_PROCESSOR_COPROC 0x40
+
+#define PCIC_SERIALBUS 0x0c
+#define PCIS_SERIALBUS_FW 0x00
+#define PCIS_SERIALBUS_ACCESS 0x01
+#define PCIS_SERIALBUS_SSA 0x02
+#define PCIS_SERIALBUS_USB 0x03
+#define PCIP_SERIALBUS_USB_UHCI 0x00
+#define PCIP_SERIALBUS_USB_OHCI 0x10
+#define PCIP_SERIALBUS_USB_EHCI 0x20
+#define PCIP_SERIALBUS_USB_XHCI 0x30
+#define PCIP_SERIALBUS_USB_DEVICE 0xfe
+#define PCIS_SERIALBUS_FC 0x04
+#define PCIS_SERIALBUS_SMBUS 0x05
+#define PCIS_SERIALBUS_INFINIBAND 0x06
+#define PCIS_SERIALBUS_IPMI 0x07
+#define PCIP_SERIALBUS_IPMI_SMIC 0x00
+#define PCIP_SERIALBUS_IPMI_KCS 0x01
+#define PCIP_SERIALBUS_IPMI_BT 0x02
+#define PCIS_SERIALBUS_SERCOS 0x08
+#define PCIS_SERIALBUS_CANBUS 0x09
+
+#define PCIC_WIRELESS 0x0d
+#define PCIS_WIRELESS_IRDA 0x00
+#define PCIS_WIRELESS_IR 0x01
+#define PCIS_WIRELESS_RF 0x10
+#define PCIS_WIRELESS_BLUETOOTH 0x11
+#define PCIS_WIRELESS_BROADBAND 0x12
+#define PCIS_WIRELESS_80211A 0x20
+#define PCIS_WIRELESS_80211B 0x21
+#define PCIS_WIRELESS_OTHER 0x80
+
+#define PCIC_INTELLIIO 0x0e
+#define PCIS_INTELLIIO_I2O 0x00
+
+#define PCIC_SATCOM 0x0f
+#define PCIS_SATCOM_TV 0x01
+#define PCIS_SATCOM_AUDIO 0x02
+#define PCIS_SATCOM_VOICE 0x03
+#define PCIS_SATCOM_DATA 0x04
+
+#define PCIC_CRYPTO 0x10
+#define PCIS_CRYPTO_NETCOMP 0x00
+#define PCIS_CRYPTO_ENTERTAIN 0x10
+#define PCIS_CRYPTO_OTHER 0x80
+
+#define PCIC_DASP 0x11
+#define PCIS_DASP_DPIO 0x00
+#define PCIS_DASP_PERFCNTRS 0x01
+#define PCIS_DASP_COMM_SYNC 0x10
+#define PCIS_DASP_MGMT_CARD 0x20
+#define PCIS_DASP_OTHER 0x80
+
+#define PCIC_OTHER 0xff
+
+/* Bridge Control Values. */
+#define PCIB_BCR_PERR_ENABLE 0x0001
+#define PCIB_BCR_SERR_ENABLE 0x0002
+#define PCIB_BCR_ISA_ENABLE 0x0004
+#define PCIB_BCR_VGA_ENABLE 0x0008
+#define PCIB_BCR_MASTER_ABORT_MODE 0x0020
+#define PCIB_BCR_SECBUS_RESET 0x0040
+#define PCIB_BCR_SECBUS_BACKTOBACK 0x0080
+#define PCIB_BCR_PRI_DISCARD_TIMEOUT 0x0100
+#define PCIB_BCR_SEC_DISCARD_TIMEOUT 0x0200
+#define PCIB_BCR_DISCARD_TIMER_STATUS 0x0400
+#define PCIB_BCR_DISCARD_TIMER_SERREN 0x0800
+
+/* PCI power manangement */
+#define PCIR_POWER_CAP 0x2
+#define PCIM_PCAP_SPEC 0x0007
+#define PCIM_PCAP_PMEREQCLK 0x0008
+#define PCIM_PCAP_DEVSPECINIT 0x0020
+#define PCIM_PCAP_AUXPWR_0 0x0000
+#define PCIM_PCAP_AUXPWR_55 0x0040
+#define PCIM_PCAP_AUXPWR_100 0x0080
+#define PCIM_PCAP_AUXPWR_160 0x00c0
+#define PCIM_PCAP_AUXPWR_220 0x0100
+#define PCIM_PCAP_AUXPWR_270 0x0140
+#define PCIM_PCAP_AUXPWR_320 0x0180
+#define PCIM_PCAP_AUXPWR_375 0x01c0
+#define PCIM_PCAP_AUXPWRMASK 0x01c0
+#define PCIM_PCAP_D1SUPP 0x0200
+#define PCIM_PCAP_D2SUPP 0x0400
+#define PCIM_PCAP_D0PME 0x0800
+#define PCIM_PCAP_D1PME 0x1000
+#define PCIM_PCAP_D2PME 0x2000
+#define PCIM_PCAP_D3PME_HOT 0x4000
+#define PCIM_PCAP_D3PME_COLD 0x8000
+
+#define PCIR_POWER_STATUS 0x4
+#define PCIM_PSTAT_D0 0x0000
+#define PCIM_PSTAT_D1 0x0001
+#define PCIM_PSTAT_D2 0x0002
+#define PCIM_PSTAT_D3 0x0003
+#define PCIM_PSTAT_DMASK 0x0003
+#define PCIM_PSTAT_NOSOFTRESET 0x0008
+#define PCIM_PSTAT_PMEENABLE 0x0100
+#define PCIM_PSTAT_D0POWER 0x0000
+#define PCIM_PSTAT_D1POWER 0x0200
+#define PCIM_PSTAT_D2POWER 0x0400
+#define PCIM_PSTAT_D3POWER 0x0600
+#define PCIM_PSTAT_D0HEAT 0x0800
+#define PCIM_PSTAT_D1HEAT 0x0a00
+#define PCIM_PSTAT_D2HEAT 0x0c00
+#define PCIM_PSTAT_D3HEAT 0x0e00
+#define PCIM_PSTAT_DATASELMASK 0x1e00
+#define PCIM_PSTAT_DATAUNKN 0x0000
+#define PCIM_PSTAT_DATADIV10 0x2000
+#define PCIM_PSTAT_DATADIV100 0x4000
+#define PCIM_PSTAT_DATADIV1000 0x6000
+#define PCIM_PSTAT_DATADIVMASK 0x6000
+#define PCIM_PSTAT_PME 0x8000
+
+#define PCIR_POWER_BSE 0x6
+#define PCIM_PMCSR_BSE_D3B3 0x00
+#define PCIM_PMCSR_BSE_D3B2 0x40
+#define PCIM_PMCSR_BSE_BPCCE 0x80
+
+#define PCIR_POWER_DATA 0x7
+
+/* VPD capability registers */
+#define PCIR_VPD_ADDR 0x2
+#define PCIR_VPD_DATA 0x4
+
+/* PCI Message Signalled Interrupts (MSI) */
+#define PCIR_MSI_CTRL 0x2
+#define PCIM_MSICTRL_VECTOR 0x0100
+#define PCIM_MSICTRL_64BIT 0x0080
+#define PCIM_MSICTRL_MME_MASK 0x0070
+#define PCIM_MSICTRL_MME_1 0x0000
+#define PCIM_MSICTRL_MME_2 0x0010
+#define PCIM_MSICTRL_MME_4 0x0020
+#define PCIM_MSICTRL_MME_8 0x0030
+#define PCIM_MSICTRL_MME_16 0x0040
+#define PCIM_MSICTRL_MME_32 0x0050
+#define PCIM_MSICTRL_MMC_MASK 0x000E
+#define PCIM_MSICTRL_MMC_1 0x0000
+#define PCIM_MSICTRL_MMC_2 0x0002
+#define PCIM_MSICTRL_MMC_4 0x0004
+#define PCIM_MSICTRL_MMC_8 0x0006
+#define PCIM_MSICTRL_MMC_16 0x0008
+#define PCIM_MSICTRL_MMC_32 0x000A
+#define PCIM_MSICTRL_MSI_ENABLE 0x0001
+#define PCIR_MSI_ADDR 0x4
+#define PCIR_MSI_ADDR_HIGH 0x8
+#define PCIR_MSI_DATA 0x8
+#define PCIR_MSI_DATA_64BIT 0xc
+#define PCIR_MSI_MASK 0x10
+#define PCIR_MSI_PENDING 0x14
+
+/* PCI-X definitions */
+
+/* For header type 0 devices */
+#define PCIXR_COMMAND 0x2
+#define PCIXM_COMMAND_DPERR_E 0x0001 /* Data Parity Error Recovery */
+#define PCIXM_COMMAND_ERO 0x0002 /* Enable Relaxed Ordering */
+#define PCIXM_COMMAND_MAX_READ 0x000c /* Maximum Burst Read Count */
+#define PCIXM_COMMAND_MAX_READ_512 0x0000
+#define PCIXM_COMMAND_MAX_READ_1024 0x0004
+#define PCIXM_COMMAND_MAX_READ_2048 0x0008
+#define PCIXM_COMMAND_MAX_READ_4096 0x000c
+#define PCIXM_COMMAND_MAX_SPLITS 0x0070 /* Maximum Split Transactions */
+#define PCIXM_COMMAND_MAX_SPLITS_1 0x0000
+#define PCIXM_COMMAND_MAX_SPLITS_2 0x0010
+#define PCIXM_COMMAND_MAX_SPLITS_3 0x0020
+#define PCIXM_COMMAND_MAX_SPLITS_4 0x0030
+#define PCIXM_COMMAND_MAX_SPLITS_8 0x0040
+#define PCIXM_COMMAND_MAX_SPLITS_12 0x0050
+#define PCIXM_COMMAND_MAX_SPLITS_16 0x0060
+#define PCIXM_COMMAND_MAX_SPLITS_32 0x0070
+#define PCIXM_COMMAND_VERSION 0x3000
+#define PCIXR_STATUS 0x4
+#define PCIXM_STATUS_DEVFN 0x000000FF
+#define PCIXM_STATUS_BUS 0x0000FF00
+#define PCIXM_STATUS_64BIT 0x00010000
+#define PCIXM_STATUS_133CAP 0x00020000
+#define PCIXM_STATUS_SC_DISCARDED 0x00040000
+#define PCIXM_STATUS_UNEXP_SC 0x00080000
+#define PCIXM_STATUS_COMPLEX_DEV 0x00100000
+#define PCIXM_STATUS_MAX_READ 0x00600000
+#define PCIXM_STATUS_MAX_READ_512 0x00000000
+#define PCIXM_STATUS_MAX_READ_1024 0x00200000
+#define PCIXM_STATUS_MAX_READ_2048 0x00400000
+#define PCIXM_STATUS_MAX_READ_4096 0x00600000
+#define PCIXM_STATUS_MAX_SPLITS 0x03800000
+#define PCIXM_STATUS_MAX_SPLITS_1 0x00000000
+#define PCIXM_STATUS_MAX_SPLITS_2 0x00800000
+#define PCIXM_STATUS_MAX_SPLITS_3 0x01000000
+#define PCIXM_STATUS_MAX_SPLITS_4 0x01800000
+#define PCIXM_STATUS_MAX_SPLITS_8 0x02000000
+#define PCIXM_STATUS_MAX_SPLITS_12 0x02800000
+#define PCIXM_STATUS_MAX_SPLITS_16 0x03000000
+#define PCIXM_STATUS_MAX_SPLITS_32 0x03800000
+#define PCIXM_STATUS_MAX_CUM_READ 0x1C000000
+#define PCIXM_STATUS_RCVD_SC_ERR 0x20000000
+#define PCIXM_STATUS_266CAP 0x40000000
+#define PCIXM_STATUS_533CAP 0x80000000
+
+/* For header type 1 devices (PCI-X bridges) */
+#define PCIXR_SEC_STATUS 0x2
+#define PCIXM_SEC_STATUS_64BIT 0x0001
+#define PCIXM_SEC_STATUS_133CAP 0x0002
+#define PCIXM_SEC_STATUS_SC_DISC 0x0004
+#define PCIXM_SEC_STATUS_UNEXP_SC 0x0008
+#define PCIXM_SEC_STATUS_SC_OVERRUN 0x0010
+#define PCIXM_SEC_STATUS_SR_DELAYED 0x0020
+#define PCIXM_SEC_STATUS_BUS_MODE 0x03c0
+#define PCIXM_SEC_STATUS_VERSION 0x3000
+#define PCIXM_SEC_STATUS_266CAP 0x4000
+#define PCIXM_SEC_STATUS_533CAP 0x8000
+#define PCIXR_BRIDGE_STATUS 0x4
+#define PCIXM_BRIDGE_STATUS_DEVFN 0x000000FF
+#define PCIXM_BRIDGE_STATUS_BUS 0x0000FF00
+#define PCIXM_BRIDGE_STATUS_64BIT 0x00010000
+#define PCIXM_BRIDGE_STATUS_133CAP 0x00020000
+#define PCIXM_BRIDGE_STATUS_SC_DISCARDED 0x00040000
+#define PCIXM_BRIDGE_STATUS_UNEXP_SC 0x00080000
+#define PCIXM_BRIDGE_STATUS_SC_OVERRUN 0x00100000
+#define PCIXM_BRIDGE_STATUS_SR_DELAYED 0x00200000
+#define PCIXM_BRIDGE_STATUS_DEVID_MSGCAP 0x20000000
+#define PCIXM_BRIDGE_STATUS_266CAP 0x40000000
+#define PCIXM_BRIDGE_STATUS_533CAP 0x80000000
+
+/* HT (HyperTransport) Capability definitions */
+#define PCIR_HT_COMMAND 0x2
+#define PCIM_HTCMD_CAP_MASK 0xf800 /* Capability type. */
+#define PCIM_HTCAP_SLAVE 0x0000 /* 000xx */
+#define PCIM_HTCAP_HOST 0x2000 /* 001xx */
+#define PCIM_HTCAP_SWITCH 0x4000 /* 01000 */
+#define PCIM_HTCAP_INTERRUPT 0x8000 /* 10000 */
+#define PCIM_HTCAP_REVISION_ID 0x8800 /* 10001 */
+#define PCIM_HTCAP_UNITID_CLUMPING 0x9000 /* 10010 */
+#define PCIM_HTCAP_EXT_CONFIG_SPACE 0x9800 /* 10011 */
+#define PCIM_HTCAP_ADDRESS_MAPPING 0xa000 /* 10100 */
+#define PCIM_HTCAP_MSI_MAPPING 0xa800 /* 10101 */
+#define PCIM_HTCAP_DIRECT_ROUTE 0xb000 /* 10110 */
+#define PCIM_HTCAP_VCSET 0xb800 /* 10111 */
+#define PCIM_HTCAP_RETRY_MODE 0xc000 /* 11000 */
+#define PCIM_HTCAP_X86_ENCODING 0xc800 /* 11001 */
+#define PCIM_HTCAP_GEN3 0xd000 /* 11010 */
+#define PCIM_HTCAP_FLE 0xd800 /* 11011 */
+#define PCIM_HTCAP_PM 0xe000 /* 11100 */
+#define PCIM_HTCAP_HIGH_NODE_COUNT 0xe800 /* 11101 */
+
+/* HT MSI Mapping Capability definitions. */
+#define PCIM_HTCMD_MSI_ENABLE 0x0001
+#define PCIM_HTCMD_MSI_FIXED 0x0002
+#define PCIR_HTMSI_ADDRESS_LO 0x4
+#define PCIR_HTMSI_ADDRESS_HI 0x8
+
+/* PCI Vendor capability definitions */
+#define PCIR_VENDOR_LENGTH 0x2
+#define PCIR_VENDOR_DATA 0x3
+
+/* PCI EHCI Debug Port definitions */
+#define PCIR_DEBUG_PORT 0x2
+#define PCIM_DEBUG_PORT_OFFSET 0x1FFF
+#define PCIM_DEBUG_PORT_BAR 0xe000
+
+/* PCI-PCI Bridge Subvendor definitions */
+#define PCIR_SUBVENDCAP_ID 0x4
+
+/* PCI Express definitions */
+#define PCIER_FLAGS 0x2
+#define PCIEM_FLAGS_VERSION 0x000F
+#define PCIEM_FLAGS_TYPE 0x00F0
+#define PCIEM_TYPE_ENDPOINT 0x0000
+#define PCIEM_TYPE_LEGACY_ENDPOINT 0x0010
+#define PCIEM_TYPE_ROOT_PORT 0x0040
+#define PCIEM_TYPE_UPSTREAM_PORT 0x0050
+#define PCIEM_TYPE_DOWNSTREAM_PORT 0x0060
+#define PCIEM_TYPE_PCI_BRIDGE 0x0070
+#define PCIEM_TYPE_PCIE_BRIDGE 0x0080
+#define PCIEM_TYPE_ROOT_INT_EP 0x0090
+#define PCIEM_TYPE_ROOT_EC 0x00a0
+#define PCIEM_FLAGS_SLOT 0x0100
+#define PCIEM_FLAGS_IRQ 0x3e00
+#define PCIER_DEVICE_CAP 0x4
+#define PCIEM_CAP_MAX_PAYLOAD 0x00000007
+#define PCIEM_CAP_PHANTHOM_FUNCS 0x00000018
+#define PCIEM_CAP_EXT_TAG_FIELD 0x00000020
+#define PCIEM_CAP_L0S_LATENCY 0x000001c0
+#define PCIEM_CAP_L1_LATENCY 0x00000e00
+#define PCIEM_CAP_ROLE_ERR_RPT 0x00008000
+#define PCIEM_CAP_SLOT_PWR_LIM_VAL 0x03fc0000
+#define PCIEM_CAP_SLOT_PWR_LIM_SCALE 0x0c000000
+#define PCIEM_CAP_FLR 0x10000000
+#define PCIER_DEVICE_CTL 0x8
+#define PCIEM_CTL_COR_ENABLE 0x0001
+#define PCIEM_CTL_NFER_ENABLE 0x0002
+#define PCIEM_CTL_FER_ENABLE 0x0004
+#define PCIEM_CTL_URR_ENABLE 0x0008
+#define PCIEM_CTL_RELAXED_ORD_ENABLE 0x0010
+#define PCIEM_CTL_MAX_PAYLOAD 0x00e0
+#define PCIEM_CTL_EXT_TAG_FIELD 0x0100
+#define PCIEM_CTL_PHANTHOM_FUNCS 0x0200
+#define PCIEM_CTL_AUX_POWER_PM 0x0400
+#define PCIEM_CTL_NOSNOOP_ENABLE 0x0800
+#define PCIEM_CTL_MAX_READ_REQUEST 0x7000
+#define PCIEM_CTL_BRDG_CFG_RETRY 0x8000 /* PCI-E - PCI/PCI-X bridges */
+#define PCIEM_CTL_INITIATE_FLR 0x8000 /* FLR capable endpoints */
+#define PCIER_DEVICE_STA 0xa
+#define PCIEM_STA_CORRECTABLE_ERROR 0x0001
+#define PCIEM_STA_NON_FATAL_ERROR 0x0002
+#define PCIEM_STA_FATAL_ERROR 0x0004
+#define PCIEM_STA_UNSUPPORTED_REQ 0x0008
+#define PCIEM_STA_AUX_POWER 0x0010
+#define PCIEM_STA_TRANSACTION_PND 0x0020
+#define PCIER_LINK_CAP 0xc
+#define PCIEM_LINK_CAP_MAX_SPEED 0x0000000f
+#define PCIEM_LINK_CAP_MAX_WIDTH 0x000003f0
+#define PCIEM_LINK_CAP_ASPM 0x00000c00
+#define PCIEM_LINK_CAP_L0S_EXIT 0x00007000
+#define PCIEM_LINK_CAP_L1_EXIT 0x00038000
+#define PCIEM_LINK_CAP_CLOCK_PM 0x00040000
+#define PCIEM_LINK_CAP_SURPRISE_DOWN 0x00080000
+#define PCIEM_LINK_CAP_DL_ACTIVE 0x00100000
+#define PCIEM_LINK_CAP_LINK_BW_NOTIFY 0x00200000
+#define PCIEM_LINK_CAP_ASPM_COMPLIANCE 0x00400000
+#define PCIEM_LINK_CAP_PORT 0xff000000
+#define PCIER_LINK_CTL 0x10
+#define PCIEM_LINK_CTL_ASPMC_DIS 0x0000
+#define PCIEM_LINK_CTL_ASPMC_L0S 0x0001
+#define PCIEM_LINK_CTL_ASPMC_L1 0x0002
+#define PCIEM_LINK_CTL_ASPMC 0x0003
+#define PCIEM_LINK_CTL_RCB 0x0008
+#define PCIEM_LINK_CTL_LINK_DIS 0x0010
+#define PCIEM_LINK_CTL_RETRAIN_LINK 0x0020
+#define PCIEM_LINK_CTL_COMMON_CLOCK 0x0040
+#define PCIEM_LINK_CTL_EXTENDED_SYNC 0x0080
+#define PCIEM_LINK_CTL_ECPM 0x0100
+#define PCIEM_LINK_CTL_HAWD 0x0200
+#define PCIEM_LINK_CTL_LBMIE 0x0400
+#define PCIEM_LINK_CTL_LABIE 0x0800
+#define PCIER_LINK_STA 0x12
+#define PCIEM_LINK_STA_SPEED 0x000f
+#define PCIEM_LINK_STA_WIDTH 0x03f0
+#define PCIEM_LINK_STA_TRAINING_ERROR 0x0400
+#define PCIEM_LINK_STA_TRAINING 0x0800
+#define PCIEM_LINK_STA_SLOT_CLOCK 0x1000
+#define PCIEM_LINK_STA_DL_ACTIVE 0x2000
+#define PCIEM_LINK_STA_LINK_BW_MGMT 0x4000
+#define PCIEM_LINK_STA_LINK_AUTO_BW 0x8000
+#define PCIER_SLOT_CAP 0x14
+#define PCIEM_SLOT_CAP_APB 0x00000001
+#define PCIEM_SLOT_CAP_PCP 0x00000002
+#define PCIEM_SLOT_CAP_MRLSP 0x00000004
+#define PCIEM_SLOT_CAP_AIP 0x00000008
+#define PCIEM_SLOT_CAP_PIP 0x00000010
+#define PCIEM_SLOT_CAP_HPS 0x00000020
+#define PCIEM_SLOT_CAP_HPC 0x00000040
+#define PCIEM_SLOT_CAP_SPLV 0x00007f80
+#define PCIEM_SLOT_CAP_SPLS 0x00018000
+#define PCIEM_SLOT_CAP_EIP 0x00020000
+#define PCIEM_SLOT_CAP_NCCS 0x00040000
+#define PCIEM_SLOT_CAP_PSN 0xfff80000
+#define PCIER_SLOT_CTL 0x18
+#define PCIEM_SLOT_CTL_ABPE 0x0001
+#define PCIEM_SLOT_CTL_PFDE 0x0002
+#define PCIEM_SLOT_CTL_MRLSCE 0x0004
+#define PCIEM_SLOT_CTL_PDCE 0x0008
+#define PCIEM_SLOT_CTL_CCIE 0x0010
+#define PCIEM_SLOT_CTL_HPIE 0x0020
+#define PCIEM_SLOT_CTL_AIC 0x00c0
+#define PCIEM_SLOT_CTL_PIC 0x0300
+#define PCIEM_SLOT_CTL_PCC 0x0400
+#define PCIEM_SLOT_CTL_EIC 0x0800
+#define PCIEM_SLOT_CTL_DLLSCE 0x1000
+#define PCIER_SLOT_STA 0x1a
+#define PCIEM_SLOT_STA_ABP 0x0001
+#define PCIEM_SLOT_STA_PFD 0x0002
+#define PCIEM_SLOT_STA_MRLSC 0x0004
+#define PCIEM_SLOT_STA_PDC 0x0008
+#define PCIEM_SLOT_STA_CC 0x0010
+#define PCIEM_SLOT_STA_MRLSS 0x0020
+#define PCIEM_SLOT_STA_PDS 0x0040
+#define PCIEM_SLOT_STA_EIS 0x0080
+#define PCIEM_SLOT_STA_DLLSC 0x0100
+#define PCIER_ROOT_CTL 0x1c
+#define PCIEM_ROOT_CTL_SERR_CORR 0x0001
+#define PCIEM_ROOT_CTL_SERR_NONFATAL 0x0002
+#define PCIEM_ROOT_CTL_SERR_FATAL 0x0004
+#define PCIEM_ROOT_CTL_PME 0x0008
+#define PCIEM_ROOT_CTL_CRS_VIS 0x0010
+#define PCIER_ROOT_CAP 0x1e
+#define PCIEM_ROOT_CAP_CRS_VIS 0x0001
+#define PCIER_ROOT_STA 0x20
+#define PCIEM_ROOT_STA_PME_REQID_MASK 0x0000ffff
+#define PCIEM_ROOT_STA_PME_STATUS 0x00010000
+#define PCIEM_ROOT_STA_PME_PEND 0x00020000
+#define PCIER_DEVICE_CAP2 0x24
+#define PCIEM_CAP2_ARI 0x20
+#define PCIER_DEVICE_CTL2 0x28
+#define PCIEM_CTL2_COMP_TIMEOUT_VAL 0x000f
+#define PCIEM_CTL2_COMP_TIMEOUT_DIS 0x0010
+#define PCIEM_CTL2_ARI 0x0020
+#define PCIEM_CTL2_ATOMIC_REQ_ENABLE 0x0040
+#define PCIEM_CTL2_ATOMIC_EGR_BLOCK 0x0080
+#define PCIEM_CTL2_ID_ORDERED_REQ_EN 0x0100
+#define PCIEM_CTL2_ID_ORDERED_CMP_EN 0x0200
+#define PCIEM_CTL2_LTR_ENABLE 0x0400
+#define PCIEM_CTL2_OBFF 0x6000
+#define PCIEM_OBFF_DISABLE 0x0000
+#define PCIEM_OBFF_MSGA_ENABLE 0x2000
+#define PCIEM_OBFF_MSGB_ENABLE 0x4000
+#define PCIEM_OBFF_WAKE_ENABLE 0x6000
+#define PCIEM_CTL2_END2END_TLP 0x8000
+#define PCIER_DEVICE_STA2 0x2a
+#define PCIER_LINK_CAP2 0x2c
+#define PCIER_LINK_CTL2 0x30
+#define PCIER_LINK_STA2 0x32
+#define PCIER_SLOT_CAP2 0x34
+#define PCIER_SLOT_CTL2 0x38
+#define PCIER_SLOT_STA2 0x3a
+
+/* MSI-X definitions */
+#define PCIR_MSIX_CTRL 0x2
+#define PCIM_MSIXCTRL_MSIX_ENABLE 0x8000
+#define PCIM_MSIXCTRL_FUNCTION_MASK 0x4000
+#define PCIM_MSIXCTRL_TABLE_SIZE 0x07FF
+#define PCIR_MSIX_TABLE 0x4
+#define PCIR_MSIX_PBA 0x8
+#define PCIM_MSIX_BIR_MASK 0x7
+#define PCIM_MSIX_BIR_BAR_10 0
+#define PCIM_MSIX_BIR_BAR_14 1
+#define PCIM_MSIX_BIR_BAR_18 2
+#define PCIM_MSIX_BIR_BAR_1C 3
+#define PCIM_MSIX_BIR_BAR_20 4
+#define PCIM_MSIX_BIR_BAR_24 5
+#define PCIM_MSIX_VCTRL_MASK 0x1
+
+/* PCI Advanced Features definitions */
+#define PCIR_PCIAF_CAP 0x3
+#define PCIM_PCIAFCAP_TP 0x01
+#define PCIM_PCIAFCAP_FLR 0x02
+#define PCIR_PCIAF_CTRL 0x4
+#define PCIR_PCIAFCTRL_FLR 0x01
+#define PCIR_PCIAF_STATUS 0x5
+#define PCIR_PCIAFSTATUS_TP 0x01
+
+/* Advanced Error Reporting */
+#define PCIR_AER_UC_STATUS 0x04
+#define PCIM_AER_UC_TRAINING_ERROR 0x00000001
+#define PCIM_AER_UC_DL_PROTOCOL_ERROR 0x00000010
+#define PCIM_AER_UC_SURPRISE_LINK_DOWN 0x00000020
+#define PCIM_AER_UC_POISONED_TLP 0x00001000
+#define PCIM_AER_UC_FC_PROTOCOL_ERROR 0x00002000
+#define PCIM_AER_UC_COMPLETION_TIMEOUT 0x00004000
+#define PCIM_AER_UC_COMPLETER_ABORT 0x00008000
+#define PCIM_AER_UC_UNEXPECTED_COMPLETION 0x00010000
+#define PCIM_AER_UC_RECEIVER_OVERFLOW 0x00020000
+#define PCIM_AER_UC_MALFORMED_TLP 0x00040000
+#define PCIM_AER_UC_ECRC_ERROR 0x00080000
+#define PCIM_AER_UC_UNSUPPORTED_REQUEST 0x00100000
+#define PCIM_AER_UC_ACS_VIOLATION 0x00200000
+#define PCIM_AER_UC_INTERNAL_ERROR 0x00400000
+#define PCIM_AER_UC_MC_BLOCKED_TLP 0x00800000
+#define PCIM_AER_UC_ATOMIC_EGRESS_BLK 0x01000000
+#define PCIM_AER_UC_TLP_PREFIX_BLOCKED 0x02000000
+#define PCIR_AER_UC_MASK 0x08 /* Shares bits with UC_STATUS */
+#define PCIR_AER_UC_SEVERITY 0x0c /* Shares bits with UC_STATUS */
+#define PCIR_AER_COR_STATUS 0x10
+#define PCIM_AER_COR_RECEIVER_ERROR 0x00000001
+#define PCIM_AER_COR_BAD_TLP 0x00000040
+#define PCIM_AER_COR_BAD_DLLP 0x00000080
+#define PCIM_AER_COR_REPLAY_ROLLOVER 0x00000100
+#define PCIM_AER_COR_REPLAY_TIMEOUT 0x00001000
+#define PCIM_AER_COR_ADVISORY_NF_ERROR 0x00002000
+#define PCIM_AER_COR_INTERNAL_ERROR 0x00004000
+#define PCIM_AER_COR_HEADER_LOG_OVFLOW 0x00008000
+#define PCIR_AER_COR_MASK 0x14 /* Shares bits with COR_STATUS */
+#define PCIR_AER_CAP_CONTROL 0x18
+#define PCIM_AER_FIRST_ERROR_PTR 0x0000001f
+#define PCIM_AER_ECRC_GEN_CAPABLE 0x00000020
+#define PCIM_AER_ECRC_GEN_ENABLE 0x00000040
+#define PCIM_AER_ECRC_CHECK_CAPABLE 0x00000080
+#define PCIM_AER_ECRC_CHECK_ENABLE 0x00000100
+#define PCIM_AER_MULT_HDR_CAPABLE 0x00000200
+#define PCIM_AER_MULT_HDR_ENABLE 0x00000400
+#define PCIM_AER_TLP_PREFIX_LOG_PRESENT 0x00000800
+#define PCIR_AER_HEADER_LOG 0x1c
+#define PCIR_AER_ROOTERR_CMD 0x2c /* Only for root complex ports */
+#define PCIM_AER_ROOTERR_COR_ENABLE 0x00000001
+#define PCIM_AER_ROOTERR_NF_ENABLE 0x00000002
+#define PCIM_AER_ROOTERR_F_ENABLE 0x00000004
+#define PCIR_AER_ROOTERR_STATUS 0x30 /* Only for root complex ports */
+#define PCIM_AER_ROOTERR_COR_ERR 0x00000001
+#define PCIM_AER_ROOTERR_MULTI_COR_ERR 0x00000002
+#define PCIM_AER_ROOTERR_UC_ERR 0x00000004
+#define PCIM_AER_ROOTERR_MULTI_UC_ERR 0x00000008
+#define PCIM_AER_ROOTERR_FIRST_UC_FATAL 0x00000010
+#define PCIM_AER_ROOTERR_NF_ERR 0x00000020
+#define PCIM_AER_ROOTERR_F_ERR 0x00000040
+#define PCIM_AER_ROOTERR_INT_MESSAGE 0xf8000000
+#define PCIR_AER_COR_SOURCE_ID 0x34 /* Only for root complex ports */
+#define PCIR_AER_ERR_SOURCE_ID 0x36 /* Only for root complex ports */
+#define PCIR_AER_TLP_PREFIX_LOG 0x38 /* Only for TLP prefix functions */
+
+/* Virtual Channel definitions */
+#define PCIR_VC_CAP1 0x04
+#define PCIM_VC_CAP1_EXT_COUNT 0x00000007
+#define PCIM_VC_CAP1_LOWPRI_EXT_COUNT 0x00000070
+#define PCIR_VC_CAP2 0x08
+#define PCIR_VC_CONTROL 0x0C
+#define PCIR_VC_STATUS 0x0E
+#define PCIR_VC_RESOURCE_CAP(n) (0x10 + (n) * 0x0C)
+#define PCIR_VC_RESOURCE_CTL(n) (0x14 + (n) * 0x0C)
+#define PCIR_VC_RESOURCE_STA(n) (0x18 + (n) * 0x0C)
+
+/* Serial Number definitions */
+#define PCIR_SERIAL_LOW 0x04
+#define PCIR_SERIAL_HIGH 0x08
+
diff --git a/usr/src/contrib/bhyve/dev/usb/controller/xhcireg.h b/usr/src/contrib/bhyve/dev/usb/controller/xhcireg.h
new file mode 100644
index 0000000000..0e588ecba3
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/usb/controller/xhcireg.h
@@ -0,0 +1,224 @@
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2010 Hans Petter Selasky. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _XHCIREG_H_
+#define _XHCIREG_H_
+
+/* XHCI PCI config registers */
+#define PCI_XHCI_CBMEM 0x10 /* configuration base MEM */
+#define PCI_XHCI_USBREV 0x60 /* RO USB protocol revision */
+#define PCI_USB_REV_3_0 0x30 /* USB 3.0 */
+#define PCI_XHCI_FLADJ 0x61 /* RW frame length adjust */
+
+#define PCI_XHCI_INTEL_XUSB2PR 0xD0 /* Intel USB2 Port Routing */
+#define PCI_XHCI_INTEL_USB2PRM 0xD4 /* Intel USB2 Port Routing Mask */
+#define PCI_XHCI_INTEL_USB3_PSSEN 0xD8 /* Intel USB3 Port SuperSpeed Enable */
+#define PCI_XHCI_INTEL_USB3PRM 0xDC /* Intel USB3 Port Routing Mask */
+
+/* XHCI capability registers */
+#define XHCI_CAPLENGTH 0x00 /* RO capability */
+#define XHCI_RESERVED 0x01 /* Reserved */
+#define XHCI_HCIVERSION 0x02 /* RO Interface version number */
+#define XHCI_HCIVERSION_0_9 0x0090 /* xHCI version 0.9 */
+#define XHCI_HCIVERSION_1_0 0x0100 /* xHCI version 1.0 */
+#define XHCI_HCSPARAMS1 0x04 /* RO structural parameters 1 */
+#define XHCI_HCS1_DEVSLOT_MAX(x)((x) & 0xFF)
+#define XHCI_HCS1_IRQ_MAX(x) (((x) >> 8) & 0x3FF)
+#define XHCI_HCS1_N_PORTS(x) (((x) >> 24) & 0xFF)
+#define XHCI_HCSPARAMS2 0x08 /* RO structural parameters 2 */
+#define XHCI_HCS2_IST(x) ((x) & 0xF)
+#define XHCI_HCS2_ERST_MAX(x) (((x) >> 4) & 0xF)
+#define XHCI_HCS2_SPR(x) (((x) >> 26) & 0x1)
+#define XHCI_HCS2_SPB_MAX(x) ((((x) >> 16) & 0x3E0) | (((x) >> 27) & 0x1F))
+#define XHCI_HCSPARAMS3 0x0C /* RO structural parameters 3 */
+#define XHCI_HCS3_U1_DEL(x) ((x) & 0xFF)
+#define XHCI_HCS3_U2_DEL(x) (((x) >> 16) & 0xFFFF)
+#define XHCI_HCSPARAMS0 0x10 /* RO capability parameters */
+#define XHCI_HCS0_AC64(x) ((x) & 0x1) /* 64-bit capable */
+#define XHCI_HCS0_BNC(x) (((x) >> 1) & 0x1) /* BW negotiation */
+#define XHCI_HCS0_CSZ(x) (((x) >> 2) & 0x1) /* context size */
+#define XHCI_HCS0_PPC(x) (((x) >> 3) & 0x1) /* port power control */
+#define XHCI_HCS0_PIND(x) (((x) >> 4) & 0x1) /* port indicators */
+#define XHCI_HCS0_LHRC(x) (((x) >> 5) & 0x1) /* light HC reset */
+#define XHCI_HCS0_LTC(x) (((x) >> 6) & 0x1) /* latency tolerance msg */
+#define XHCI_HCS0_NSS(x) (((x) >> 7) & 0x1) /* no secondary sid */
+#define XHCI_HCS0_PSA_SZ_MAX(x) (((x) >> 12) & 0xF) /* max pri. stream array size */
+#define XHCI_HCS0_XECP(x) (((x) >> 16) & 0xFFFF) /* extended capabilities pointer */
+#define XHCI_DBOFF 0x14 /* RO doorbell offset */
+#define XHCI_RTSOFF 0x18 /* RO runtime register space offset */
+
+/* XHCI operational registers. Offset given by XHCI_CAPLENGTH register */
+#define XHCI_USBCMD 0x00 /* XHCI command */
+#define XHCI_CMD_RS 0x00000001 /* RW Run/Stop */
+#define XHCI_CMD_HCRST 0x00000002 /* RW Host Controller Reset */
+#define XHCI_CMD_INTE 0x00000004 /* RW Interrupter Enable */
+#define XHCI_CMD_HSEE 0x00000008 /* RW Host System Error Enable */
+#define XHCI_CMD_LHCRST 0x00000080 /* RO/RW Light Host Controller Reset */
+#define XHCI_CMD_CSS 0x00000100 /* RW Controller Save State */
+#define XHCI_CMD_CRS 0x00000200 /* RW Controller Restore State */
+#define XHCI_CMD_EWE 0x00000400 /* RW Enable Wrap Event */
+#define XHCI_CMD_EU3S 0x00000800 /* RW Enable U3 MFINDEX Stop */
+#define XHCI_USBSTS 0x04 /* XHCI status */
+#define XHCI_STS_HCH 0x00000001 /* RO - Host Controller Halted */
+#define XHCI_STS_HSE 0x00000004 /* RW - Host System Error */
+#define XHCI_STS_EINT 0x00000008 /* RW - Event Interrupt */
+#define XHCI_STS_PCD 0x00000010 /* RW - Port Change Detect */
+#define XHCI_STS_SSS 0x00000100 /* RO - Save State Status */
+#define XHCI_STS_RSS 0x00000200 /* RO - Restore State Status */
+#define XHCI_STS_SRE 0x00000400 /* RW - Save/Restore Error */
+#define XHCI_STS_CNR 0x00000800 /* RO - Controller Not Ready */
+#define XHCI_STS_HCE 0x00001000 /* RO - Host Controller Error */
+#define XHCI_PAGESIZE 0x08 /* XHCI page size mask */
+#define XHCI_PAGESIZE_4K 0x00000001 /* 4K Page Size */
+#define XHCI_PAGESIZE_8K 0x00000002 /* 8K Page Size */
+#define XHCI_PAGESIZE_16K 0x00000004 /* 16K Page Size */
+#define XHCI_PAGESIZE_32K 0x00000008 /* 32K Page Size */
+#define XHCI_PAGESIZE_64K 0x00000010 /* 64K Page Size */
+#define XHCI_DNCTRL 0x14 /* XHCI device notification control */
+#define XHCI_DNCTRL_MASK(n) (1U << (n))
+#define XHCI_CRCR_LO 0x18 /* XHCI command ring control */
+#define XHCI_CRCR_LO_RCS 0x00000001 /* RW - consumer cycle state */
+#define XHCI_CRCR_LO_CS 0x00000002 /* RW - command stop */
+#define XHCI_CRCR_LO_CA 0x00000004 /* RW - command abort */
+#define XHCI_CRCR_LO_CRR 0x00000008 /* RW - command ring running */
+#define XHCI_CRCR_LO_MASK 0x0000000F
+#define XHCI_CRCR_HI 0x1C /* XHCI command ring control */
+#define XHCI_DCBAAP_LO 0x30 /* XHCI dev context BA pointer */
+#define XHCI_DCBAAP_HI 0x34 /* XHCI dev context BA pointer */
+#define XHCI_CONFIG 0x38
+#define XHCI_CONFIG_SLOTS_MASK 0x000000FF /* RW - number of device slots enabled */
+
+/* XHCI port status registers */
+#define XHCI_PORTSC(n) (0x3F0 + (0x10 * (n))) /* XHCI port status */
+#define XHCI_PS_CCS 0x00000001 /* RO - current connect status */
+#define XHCI_PS_PED 0x00000002 /* RW - port enabled / disabled */
+#define XHCI_PS_OCA 0x00000008 /* RO - over current active */
+#define XHCI_PS_PR 0x00000010 /* RW - port reset */
+#define XHCI_PS_PLS_GET(x) (((x) >> 5) & 0xF) /* RW - port link state */
+#define XHCI_PS_PLS_SET(x) (((x) & 0xF) << 5) /* RW - port link state */
+#define XHCI_PS_PP 0x00000200 /* RW - port power */
+#define XHCI_PS_SPEED_GET(x) (((x) >> 10) & 0xF) /* RO - port speed */
+#define XHCI_PS_PIC_GET(x) (((x) >> 14) & 0x3) /* RW - port indicator */
+#define XHCI_PS_PIC_SET(x) (((x) & 0x3) << 14) /* RW - port indicator */
+#define XHCI_PS_LWS 0x00010000 /* RW - port link state write strobe */
+#define XHCI_PS_CSC 0x00020000 /* RW - connect status change */
+#define XHCI_PS_PEC 0x00040000 /* RW - port enable/disable change */
+#define XHCI_PS_WRC 0x00080000 /* RW - warm port reset change */
+#define XHCI_PS_OCC 0x00100000 /* RW - over-current change */
+#define XHCI_PS_PRC 0x00200000 /* RW - port reset change */
+#define XHCI_PS_PLC 0x00400000 /* RW - port link state change */
+#define XHCI_PS_CEC 0x00800000 /* RW - config error change */
+#define XHCI_PS_CAS 0x01000000 /* RO - cold attach status */
+#define XHCI_PS_WCE 0x02000000 /* RW - wake on connect enable */
+#define XHCI_PS_WDE 0x04000000 /* RW - wake on disconnect enable */
+#define XHCI_PS_WOE 0x08000000 /* RW - wake on over-current enable */
+#define XHCI_PS_DR 0x40000000 /* RO - device removable */
+#define XHCI_PS_WPR 0x80000000U /* RW - warm port reset */
+#define XHCI_PS_CLEAR 0x80FF01FFU /* command bits */
+
+#define XHCI_PORTPMSC(n) (0x3F4 + (0x10 * (n))) /* XHCI status and control */
+#define XHCI_PM3_U1TO_GET(x) (((x) >> 0) & 0xFF) /* RW - U1 timeout */
+#define XHCI_PM3_U1TO_SET(x) (((x) & 0xFF) << 0) /* RW - U1 timeout */
+#define XHCI_PM3_U2TO_GET(x) (((x) >> 8) & 0xFF) /* RW - U2 timeout */
+#define XHCI_PM3_U2TO_SET(x) (((x) & 0xFF) << 8) /* RW - U2 timeout */
+#define XHCI_PM3_FLA 0x00010000 /* RW - Force Link PM Accept */
+#define XHCI_PM2_L1S_GET(x) (((x) >> 0) & 0x7) /* RO - L1 status */
+#define XHCI_PM2_RWE 0x00000008 /* RW - remote wakup enable */
+#define XHCI_PM2_HIRD_GET(x) (((x) >> 4) & 0xF) /* RW - host initiated resume duration */
+#define XHCI_PM2_HIRD_SET(x) (((x) & 0xF) << 4) /* RW - host initiated resume duration */
+#define XHCI_PM2_L1SLOT_GET(x) (((x) >> 8) & 0xFF) /* RW - L1 device slot */
+#define XHCI_PM2_L1SLOT_SET(x) (((x) & 0xFF) << 8) /* RW - L1 device slot */
+#define XHCI_PM2_HLE 0x00010000 /* RW - hardware LPM enable */
+#define XHCI_PORTLI(n) (0x3F8 + (0x10 * (n))) /* XHCI port link info */
+#define XHCI_PLI3_ERR_GET(x) (((x) >> 0) & 0xFFFF) /* RO - port link errors */
+#define XHCI_PORTRSV(n) (0x3FC + (0x10 * (n))) /* XHCI port reserved */
+
+/* XHCI runtime registers. Offset given by XHCI_CAPLENGTH + XHCI_RTSOFF registers */
+#define XHCI_MFINDEX 0x0000 /* RO - microframe index */
+#define XHCI_MFINDEX_GET(x) ((x) & 0x3FFF)
+#define XHCI_IMAN(n) (0x0020 + (0x20 * (n))) /* XHCI interrupt management */
+#define XHCI_IMAN_INTR_PEND 0x00000001 /* RW - interrupt pending */
+#define XHCI_IMAN_INTR_ENA 0x00000002 /* RW - interrupt enable */
+#define XHCI_IMOD(n) (0x0024 + (0x20 * (n))) /* XHCI interrupt moderation */
+#define XHCI_IMOD_IVAL_GET(x) (((x) >> 0) & 0xFFFF) /* 250ns unit */
+#define XHCI_IMOD_IVAL_SET(x) (((x) & 0xFFFF) << 0) /* 250ns unit */
+#define XHCI_IMOD_ICNT_GET(x) (((x) >> 16) & 0xFFFF) /* 250ns unit */
+#define XHCI_IMOD_ICNT_SET(x) (((x) & 0xFFFF) << 16) /* 250ns unit */
+#define XHCI_IMOD_DEFAULT 0x000001F4U /* 8000 IRQs/second */
+#define XHCI_IMOD_DEFAULT_LP 0x000003F8U /* 4000 IRQs/second - LynxPoint */
+#define XHCI_ERSTSZ(n) (0x0028 + (0x20 * (n))) /* XHCI event ring segment table size */
+#define XHCI_ERSTS_GET(x) ((x) & 0xFFFF)
+#define XHCI_ERSTS_SET(x) ((x) & 0xFFFF)
+#define XHCI_ERSTBA_LO(n) (0x0030 + (0x20 * (n))) /* XHCI event ring segment table BA */
+#define XHCI_ERSTBA_HI(n) (0x0034 + (0x20 * (n))) /* XHCI event ring segment table BA */
+#define XHCI_ERDP_LO(n) (0x0038 + (0x20 * (n))) /* XHCI event ring dequeue pointer */
+#define XHCI_ERDP_LO_SINDEX(x) ((x) & 0x7) /* RO - dequeue segment index */
+#define XHCI_ERDP_LO_BUSY 0x00000008 /* RW - event handler busy */
+#define XHCI_ERDP_HI(n) (0x003C + (0x20 * (n))) /* XHCI event ring dequeue pointer */
+
+/* XHCI doorbell registers. Offset given by XHCI_CAPLENGTH + XHCI_DBOFF registers */
+#define XHCI_DOORBELL(n) (0x0000 + (4 * (n)))
+#define XHCI_DB_TARGET_GET(x) ((x) & 0xFF) /* RW - doorbell target */
+#define XHCI_DB_TARGET_SET(x) ((x) & 0xFF) /* RW - doorbell target */
+#define XHCI_DB_SID_GET(x) (((x) >> 16) & 0xFFFF) /* RW - doorbell stream ID */
+#define XHCI_DB_SID_SET(x) (((x) & 0xFFFF) << 16) /* RW - doorbell stream ID */
+
+/* XHCI legacy support */
+#define XHCI_XECP_ID(x) ((x) & 0xFF)
+#define XHCI_XECP_NEXT(x) (((x) >> 8) & 0xFF)
+#define XHCI_XECP_BIOS_SEM 0x0002
+#define XHCI_XECP_OS_SEM 0x0003
+
+/* XHCI capability ID's */
+#define XHCI_ID_USB_LEGACY 0x0001
+#define XHCI_ID_PROTOCOLS 0x0002
+#define XHCI_ID_POWER_MGMT 0x0003
+#define XHCI_ID_VIRTUALIZATION 0x0004
+#define XHCI_ID_MSG_IRQ 0x0005
+#define XHCI_ID_USB_LOCAL_MEM 0x0006
+
+/* XHCI register R/W wrappers */
+#define XREAD1(sc, what, a) \
+ bus_space_read_1((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off)
+#define XREAD2(sc, what, a) \
+ bus_space_read_2((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off)
+#define XREAD4(sc, what, a) \
+ bus_space_read_4((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off)
+#define XWRITE1(sc, what, a, x) \
+ bus_space_write_1((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off, (x))
+#define XWRITE2(sc, what, a, x) \
+ bus_space_write_2((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off, (x))
+#define XWRITE4(sc, what, a, x) \
+ bus_space_write_4((sc)->sc_io_tag, (sc)->sc_io_hdl, \
+ (a) + (sc)->sc_##what##_off, (x))
+
+#endif /* _XHCIREG_H_ */
diff --git a/usr/src/contrib/bhyve/dev/usb/usb.h b/usr/src/contrib/bhyve/dev/usb/usb.h
new file mode 100644
index 0000000000..bcea2ac8bd
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/usb/usb.h
@@ -0,0 +1,801 @@
+/* $FreeBSD$ */
+/*-
+ * Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
+ * Copyright (c) 1998 The NetBSD Foundation, Inc. All rights reserved.
+ * Copyright (c) 1998 Lennart Augustsson. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file contains standard definitions for the following USB
+ * protocol versions:
+ *
+ * USB v1.0
+ * USB v1.1
+ * USB v2.0
+ * USB v3.0
+ */
+
+#ifndef _USB_STANDARD_H_
+#define _USB_STANDARD_H_
+
+#if defined(_KERNEL)
+#ifndef USB_GLOBAL_INCLUDE_FILE
+#include "opt_usb.h"
+#endif
+
+/* Declare parent SYSCTL USB node. */
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_hw_usb);
+#endif
+
+#ifndef USB_GLOBAL_INCLUDE_FILE
+#include <sys/malloc.h>
+#endif
+
+MALLOC_DECLARE(M_USB);
+MALLOC_DECLARE(M_USBDEV);
+#endif /* _KERNEL */
+
+#ifndef USB_GLOBAL_INCLUDE_FILE
+#include <dev/usb/usb_endian.h>
+#include <dev/usb/usb_freebsd.h>
+#endif
+
+#define USB_STACK_VERSION 2000 /* 2.0 */
+
+/* Definition of some hardcoded USB constants. */
+
+#define USB_MAX_IPACKET 8 /* initial USB packet size */
+#define USB_EP_MAX (2*16) /* hardcoded */
+#define USB_ROOT_HUB_ADDR 1 /* index */
+#define USB_MIN_DEVICES 2 /* unused + root HUB */
+#define USB_UNCONFIG_INDEX 0xFF /* internal use only */
+#define USB_IFACE_INDEX_ANY 0xFF /* internal use only */
+#define USB_START_ADDR 0 /* default USB device BUS address
+ * after USB bus reset */
+#define USB_CONTROL_ENDPOINT 0 /* default control endpoint */
+
+#define USB_FRAMES_PER_SECOND_FS 1000 /* full speed */
+#define USB_FRAMES_PER_SECOND_HS 8000 /* high speed */
+
+#define USB_FS_BYTES_PER_HS_UFRAME 188 /* bytes */
+#define USB_HS_MICRO_FRAMES_MAX 8 /* units */
+
+#define USB_ISOC_TIME_MAX 128 /* ms */
+
+/*
+ * Minimum time a device needs to be powered down to go through a
+ * power cycle. These values are not in the USB specification.
+ */
+#define USB_POWER_DOWN_TIME 200 /* ms */
+#define USB_PORT_POWER_DOWN_TIME 100 /* ms */
+
+/* Definition of software USB power modes */
+#define USB_POWER_MODE_OFF 0 /* turn off device */
+#define USB_POWER_MODE_ON 1 /* always on */
+#define USB_POWER_MODE_SAVE 2 /* automatic suspend and resume */
+#define USB_POWER_MODE_SUSPEND 3 /* force suspend */
+#define USB_POWER_MODE_RESUME 4 /* force resume */
+
+/* These are the values from the USB specification. */
+#define USB_PORT_RESET_DELAY_SPEC 10 /* ms */
+#define USB_PORT_ROOT_RESET_DELAY_SPEC 50 /* ms */
+#define USB_PORT_RESET_RECOVERY_SPEC 10 /* ms */
+#define USB_PORT_POWERUP_DELAY_SPEC 100 /* ms */
+#define USB_PORT_RESUME_DELAY_SPEC 20 /* ms */
+#define USB_SET_ADDRESS_SETTLE_SPEC 2 /* ms */
+#define USB_RESUME_DELAY_SPEC (20*5) /* ms */
+#define USB_RESUME_WAIT_SPEC 10 /* ms */
+#define USB_RESUME_RECOVERY_SPEC 10 /* ms */
+#define USB_EXTRA_POWER_UP_TIME_SPEC 0 /* ms */
+
+/* Allow for marginal and non-conforming devices. */
+#define USB_PORT_RESET_DELAY 50 /* ms */
+#define USB_PORT_ROOT_RESET_DELAY 200 /* ms */
+#define USB_PORT_RESET_RECOVERY 250 /* ms */
+#define USB_PORT_POWERUP_DELAY 300 /* ms */
+#define USB_PORT_RESUME_DELAY (20*2) /* ms */
+#define USB_SET_ADDRESS_SETTLE 10 /* ms */
+#define USB_RESUME_DELAY (50*5) /* ms */
+#define USB_RESUME_WAIT 50 /* ms */
+#define USB_RESUME_RECOVERY 50 /* ms */
+#define USB_EXTRA_POWER_UP_TIME 20 /* ms */
+
+#define USB_MIN_POWER 100 /* mA */
+#define USB_MAX_POWER 500 /* mA */
+
+#define USB_BUS_RESET_DELAY 100 /* ms */
+
+/*
+ * USB record layout in memory:
+ *
+ * - USB config 0
+ * - USB interfaces
+ * - USB alternative interfaces
+ * - USB endpoints
+ *
+ * - USB config 1
+ * - USB interfaces
+ * - USB alternative interfaces
+ * - USB endpoints
+ */
+
+/* Declaration of USB records */
+
+struct usb_device_request {
+ uByte bmRequestType;
+ uByte bRequest;
+ uWord wValue;
+ uWord wIndex;
+ uWord wLength;
+} __packed;
+typedef struct usb_device_request usb_device_request_t;
+
+#define UT_WRITE 0x00
+#define UT_READ 0x80
+#define UT_STANDARD 0x00
+#define UT_CLASS 0x20
+#define UT_VENDOR 0x40
+#define UT_DEVICE 0x00
+#define UT_INTERFACE 0x01
+#define UT_ENDPOINT 0x02
+#define UT_OTHER 0x03
+
+#define UT_READ_DEVICE (UT_READ | UT_STANDARD | UT_DEVICE)
+#define UT_READ_INTERFACE (UT_READ | UT_STANDARD | UT_INTERFACE)
+#define UT_READ_ENDPOINT (UT_READ | UT_STANDARD | UT_ENDPOINT)
+#define UT_WRITE_DEVICE (UT_WRITE | UT_STANDARD | UT_DEVICE)
+#define UT_WRITE_INTERFACE (UT_WRITE | UT_STANDARD | UT_INTERFACE)
+#define UT_WRITE_ENDPOINT (UT_WRITE | UT_STANDARD | UT_ENDPOINT)
+#define UT_READ_CLASS_DEVICE (UT_READ | UT_CLASS | UT_DEVICE)
+#define UT_READ_CLASS_INTERFACE (UT_READ | UT_CLASS | UT_INTERFACE)
+#define UT_READ_CLASS_OTHER (UT_READ | UT_CLASS | UT_OTHER)
+#define UT_READ_CLASS_ENDPOINT (UT_READ | UT_CLASS | UT_ENDPOINT)
+#define UT_WRITE_CLASS_DEVICE (UT_WRITE | UT_CLASS | UT_DEVICE)
+#define UT_WRITE_CLASS_INTERFACE (UT_WRITE | UT_CLASS | UT_INTERFACE)
+#define UT_WRITE_CLASS_OTHER (UT_WRITE | UT_CLASS | UT_OTHER)
+#define UT_WRITE_CLASS_ENDPOINT (UT_WRITE | UT_CLASS | UT_ENDPOINT)
+#define UT_READ_VENDOR_DEVICE (UT_READ | UT_VENDOR | UT_DEVICE)
+#define UT_READ_VENDOR_INTERFACE (UT_READ | UT_VENDOR | UT_INTERFACE)
+#define UT_READ_VENDOR_OTHER (UT_READ | UT_VENDOR | UT_OTHER)
+#define UT_READ_VENDOR_ENDPOINT (UT_READ | UT_VENDOR | UT_ENDPOINT)
+#define UT_WRITE_VENDOR_DEVICE (UT_WRITE | UT_VENDOR | UT_DEVICE)
+#define UT_WRITE_VENDOR_INTERFACE (UT_WRITE | UT_VENDOR | UT_INTERFACE)
+#define UT_WRITE_VENDOR_OTHER (UT_WRITE | UT_VENDOR | UT_OTHER)
+#define UT_WRITE_VENDOR_ENDPOINT (UT_WRITE | UT_VENDOR | UT_ENDPOINT)
+
+/* Requests */
+#define UR_GET_STATUS 0x00
+#define UR_CLEAR_FEATURE 0x01
+#define UR_SET_FEATURE 0x03
+#define UR_SET_ADDRESS 0x05
+#define UR_GET_DESCRIPTOR 0x06
+#define UDESC_DEVICE 0x01
+#define UDESC_CONFIG 0x02
+#define UDESC_STRING 0x03
+#define USB_LANGUAGE_TABLE 0x00 /* language ID string index */
+#define UDESC_INTERFACE 0x04
+#define UDESC_ENDPOINT 0x05
+#define UDESC_DEVICE_QUALIFIER 0x06
+#define UDESC_OTHER_SPEED_CONFIGURATION 0x07
+#define UDESC_INTERFACE_POWER 0x08
+#define UDESC_OTG 0x09
+#define UDESC_DEBUG 0x0A
+#define UDESC_IFACE_ASSOC 0x0B /* interface association */
+#define UDESC_BOS 0x0F /* binary object store */
+#define UDESC_DEVICE_CAPABILITY 0x10
+#define UDESC_CS_DEVICE 0x21 /* class specific */
+#define UDESC_CS_CONFIG 0x22
+#define UDESC_CS_STRING 0x23
+#define UDESC_CS_INTERFACE 0x24
+#define UDESC_CS_ENDPOINT 0x25
+#define UDESC_HUB 0x29
+#define UDESC_SS_HUB 0x2A /* super speed */
+#define UDESC_ENDPOINT_SS_COMP 0x30 /* super speed */
+#define UR_SET_DESCRIPTOR 0x07
+#define UR_GET_CONFIG 0x08
+#define UR_SET_CONFIG 0x09
+#define UR_GET_INTERFACE 0x0a
+#define UR_SET_INTERFACE 0x0b
+#define UR_SYNCH_FRAME 0x0c
+#define UR_SET_SEL 0x30
+#define UR_ISOCH_DELAY 0x31
+
+/* HUB specific request */
+#define UR_GET_BUS_STATE 0x02
+#define UR_CLEAR_TT_BUFFER 0x08
+#define UR_RESET_TT 0x09
+#define UR_GET_TT_STATE 0x0a
+#define UR_STOP_TT 0x0b
+#define UR_SET_AND_TEST 0x0c /* USB 2.0 only */
+#define UR_SET_HUB_DEPTH 0x0c /* USB 3.0 only */
+#define USB_SS_HUB_DEPTH_MAX 5
+#define UR_GET_PORT_ERR_COUNT 0x0d
+
+/* Feature numbers */
+#define UF_ENDPOINT_HALT 0
+#define UF_DEVICE_REMOTE_WAKEUP 1
+#define UF_TEST_MODE 2
+#define UF_U1_ENABLE 0x30
+#define UF_U2_ENABLE 0x31
+#define UF_LTM_ENABLE 0x32
+
+/* HUB specific features */
+#define UHF_C_HUB_LOCAL_POWER 0
+#define UHF_C_HUB_OVER_CURRENT 1
+#define UHF_PORT_CONNECTION 0
+#define UHF_PORT_ENABLE 1
+#define UHF_PORT_SUSPEND 2
+#define UHF_PORT_OVER_CURRENT 3
+#define UHF_PORT_RESET 4
+#define UHF_PORT_LINK_STATE 5
+#define UHF_PORT_POWER 8
+#define UHF_PORT_LOW_SPEED 9
+#define UHF_PORT_L1 10
+#define UHF_C_PORT_CONNECTION 16
+#define UHF_C_PORT_ENABLE 17
+#define UHF_C_PORT_SUSPEND 18
+#define UHF_C_PORT_OVER_CURRENT 19
+#define UHF_C_PORT_RESET 20
+#define UHF_PORT_TEST 21
+#define UHF_PORT_INDICATOR 22
+#define UHF_C_PORT_L1 23
+
+/* SuperSpeed HUB specific features */
+#define UHF_PORT_U1_TIMEOUT 23
+#define UHF_PORT_U2_TIMEOUT 24
+#define UHF_C_PORT_LINK_STATE 25
+#define UHF_C_PORT_CONFIG_ERROR 26
+#define UHF_PORT_REMOTE_WAKE_MASK 27
+#define UHF_BH_PORT_RESET 28
+#define UHF_C_BH_PORT_RESET 29
+#define UHF_FORCE_LINKPM_ACCEPT 30
+
+struct usb_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bDescriptorSubtype;
+} __packed;
+typedef struct usb_descriptor usb_descriptor_t;
+
+struct usb_device_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uWord bcdUSB;
+#define UD_USB_2_0 0x0200
+#define UD_USB_3_0 0x0300
+#define UD_IS_USB2(d) ((d)->bcdUSB[1] == 0x02)
+#define UD_IS_USB3(d) ((d)->bcdUSB[1] == 0x03)
+ uByte bDeviceClass;
+ uByte bDeviceSubClass;
+ uByte bDeviceProtocol;
+ uByte bMaxPacketSize;
+ /* The fields below are not part of the initial descriptor. */
+ uWord idVendor;
+ uWord idProduct;
+ uWord bcdDevice;
+ uByte iManufacturer;
+ uByte iProduct;
+ uByte iSerialNumber;
+ uByte bNumConfigurations;
+} __packed;
+typedef struct usb_device_descriptor usb_device_descriptor_t;
+
+/* Binary Device Object Store (BOS) */
+struct usb_bos_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uWord wTotalLength;
+ uByte bNumDeviceCaps;
+} __packed;
+typedef struct usb_bos_descriptor usb_bos_descriptor_t;
+
+/* Binary Device Object Store Capability */
+struct usb_bos_cap_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bDevCapabilityType;
+#define USB_DEVCAP_RESERVED 0x00
+#define USB_DEVCAP_WUSB 0x01
+#define USB_DEVCAP_USB2EXT 0x02
+#define USB_DEVCAP_SUPER_SPEED 0x03
+#define USB_DEVCAP_CONTAINER_ID 0x04
+ /* data ... */
+} __packed;
+typedef struct usb_bos_cap_descriptor usb_bos_cap_descriptor_t;
+
+struct usb_devcap_usb2ext_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bDevCapabilityType;
+ uDWord bmAttributes;
+#define USB_V2EXT_LPM (1U << 1)
+#define USB_V2EXT_BESL_SUPPORTED (1U << 2)
+#define USB_V2EXT_BESL_BASELINE_VALID (1U << 3)
+#define USB_V2EXT_BESL_DEEP_VALID (1U << 4)
+#define USB_V2EXT_BESL_BASELINE_GET(x) (((x) >> 8) & 0xF)
+#define USB_V2EXT_BESL_DEEP_GET(x) (((x) >> 12) & 0xF)
+} __packed;
+typedef struct usb_devcap_usb2ext_descriptor usb_devcap_usb2ext_descriptor_t;
+
+struct usb_devcap_ss_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bDevCapabilityType;
+ uByte bmAttributes;
+ uWord wSpeedsSupported;
+ uByte bFunctionalitySupport;
+ uByte bU1DevExitLat;
+ uWord wU2DevExitLat;
+} __packed;
+typedef struct usb_devcap_ss_descriptor usb_devcap_ss_descriptor_t;
+
+struct usb_devcap_container_id_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bDevCapabilityType;
+ uByte bReserved;
+ uByte bContainerID;
+} __packed;
+typedef struct usb_devcap_container_id_descriptor
+ usb_devcap_container_id_descriptor_t;
+
+/* Device class codes */
+#define UDCLASS_IN_INTERFACE 0x00
+#define UDCLASS_COMM 0x02
+#define UDCLASS_HUB 0x09
+#define UDSUBCLASS_HUB 0x00
+#define UDPROTO_FSHUB 0x00
+#define UDPROTO_HSHUBSTT 0x01
+#define UDPROTO_HSHUBMTT 0x02
+#define UDPROTO_SSHUB 0x03
+#define UDCLASS_DIAGNOSTIC 0xdc
+#define UDCLASS_WIRELESS 0xe0
+#define UDSUBCLASS_RF 0x01
+#define UDPROTO_BLUETOOTH 0x01
+#define UDCLASS_VENDOR 0xff
+
+struct usb_config_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uWord wTotalLength;
+ uByte bNumInterface;
+ uByte bConfigurationValue;
+#define USB_UNCONFIG_NO 0
+ uByte iConfiguration;
+ uByte bmAttributes;
+#define UC_BUS_POWERED 0x80
+#define UC_SELF_POWERED 0x40
+#define UC_REMOTE_WAKEUP 0x20
+ uByte bMaxPower; /* max current in 2 mA units */
+#define UC_POWER_FACTOR 2
+} __packed;
+typedef struct usb_config_descriptor usb_config_descriptor_t;
+
+struct usb_interface_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bInterfaceNumber;
+ uByte bAlternateSetting;
+ uByte bNumEndpoints;
+ uByte bInterfaceClass;
+ uByte bInterfaceSubClass;
+ uByte bInterfaceProtocol;
+ uByte iInterface;
+} __packed;
+typedef struct usb_interface_descriptor usb_interface_descriptor_t;
+
+struct usb_interface_assoc_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bFirstInterface;
+ uByte bInterfaceCount;
+ uByte bFunctionClass;
+ uByte bFunctionSubClass;
+ uByte bFunctionProtocol;
+ uByte iFunction;
+} __packed;
+typedef struct usb_interface_assoc_descriptor usb_interface_assoc_descriptor_t;
+
+/* Interface class codes */
+#define UICLASS_UNSPEC 0x00
+#define UICLASS_AUDIO 0x01 /* audio */
+#define UISUBCLASS_AUDIOCONTROL 1
+#define UISUBCLASS_AUDIOSTREAM 2
+#define UISUBCLASS_MIDISTREAM 3
+
+#define UICLASS_CDC 0x02 /* communication */
+#define UISUBCLASS_DIRECT_LINE_CONTROL_MODEL 1
+#define UISUBCLASS_ABSTRACT_CONTROL_MODEL 2
+#define UISUBCLASS_TELEPHONE_CONTROL_MODEL 3
+#define UISUBCLASS_MULTICHANNEL_CONTROL_MODEL 4
+#define UISUBCLASS_CAPI_CONTROLMODEL 5
+#define UISUBCLASS_ETHERNET_NETWORKING_CONTROL_MODEL 6
+#define UISUBCLASS_ATM_NETWORKING_CONTROL_MODEL 7
+#define UISUBCLASS_WIRELESS_HANDSET_CM 8
+#define UISUBCLASS_DEVICE_MGMT 9
+#define UISUBCLASS_MOBILE_DIRECT_LINE_MODEL 10
+#define UISUBCLASS_OBEX 11
+#define UISUBCLASS_ETHERNET_EMULATION_MODEL 12
+#define UISUBCLASS_NETWORK_CONTROL_MODEL 13
+
+#define UIPROTO_CDC_NONE 0
+#define UIPROTO_CDC_AT 1
+
+#define UICLASS_HID 0x03
+#define UISUBCLASS_BOOT 1
+#define UIPROTO_BOOT_KEYBOARD 1
+#define UIPROTO_MOUSE 2
+
+#define UICLASS_PHYSICAL 0x05
+#define UICLASS_IMAGE 0x06
+#define UISUBCLASS_SIC 1 /* still image class */
+#define UICLASS_PRINTER 0x07
+#define UISUBCLASS_PRINTER 1
+#define UIPROTO_PRINTER_UNI 1
+#define UIPROTO_PRINTER_BI 2
+#define UIPROTO_PRINTER_1284 3
+
+#define UICLASS_MASS 0x08
+#define UISUBCLASS_RBC 1
+#define UISUBCLASS_SFF8020I 2
+#define UISUBCLASS_QIC157 3
+#define UISUBCLASS_UFI 4
+#define UISUBCLASS_SFF8070I 5
+#define UISUBCLASS_SCSI 6
+#define UIPROTO_MASS_CBI_I 0
+#define UIPROTO_MASS_CBI 1
+#define UIPROTO_MASS_BBB_OLD 2 /* Not in the spec anymore */
+#define UIPROTO_MASS_BBB 80 /* 'P' for the Iomega Zip drive */
+
+#define UICLASS_HUB 0x09
+#define UISUBCLASS_HUB 0
+#define UIPROTO_FSHUB 0
+#define UIPROTO_HSHUBSTT 0 /* Yes, same as previous */
+#define UIPROTO_HSHUBMTT 1
+
+#define UICLASS_CDC_DATA 0x0a
+#define UISUBCLASS_DATA 0x00
+#define UIPROTO_DATA_ISDNBRI 0x30 /* Physical iface */
+#define UIPROTO_DATA_HDLC 0x31 /* HDLC */
+#define UIPROTO_DATA_TRANSPARENT 0x32 /* Transparent */
+#define UIPROTO_DATA_Q921M 0x50 /* Management for Q921 */
+#define UIPROTO_DATA_Q921 0x51 /* Data for Q921 */
+#define UIPROTO_DATA_Q921TM 0x52 /* TEI multiplexer for Q921 */
+#define UIPROTO_DATA_V42BIS 0x90 /* Data compression */
+#define UIPROTO_DATA_Q931 0x91 /* Euro-ISDN */
+#define UIPROTO_DATA_V120 0x92 /* V.24 rate adaption */
+#define UIPROTO_DATA_CAPI 0x93 /* CAPI 2.0 commands */
+#define UIPROTO_DATA_HOST_BASED 0xfd /* Host based driver */
+#define UIPROTO_DATA_PUF 0xfe /* see Prot. Unit Func. Desc. */
+#define UIPROTO_DATA_VENDOR 0xff /* Vendor specific */
+#define UIPROTO_DATA_NCM 0x01 /* Network Control Model */
+
+#define UICLASS_SMARTCARD 0x0b
+#define UICLASS_FIRM_UPD 0x0c
+#define UICLASS_SECURITY 0x0d
+#define UICLASS_DIAGNOSTIC 0xdc
+#define UICLASS_WIRELESS 0xe0
+#define UISUBCLASS_RF 0x01
+#define UIPROTO_BLUETOOTH 0x01
+#define UIPROTO_RNDIS 0x03
+
+#define UICLASS_IAD 0xEF /* Interface Association Descriptor */
+#define UISUBCLASS_SYNC 0x01
+#define UIPROTO_ACTIVESYNC 0x01
+
+#define UICLASS_APPL_SPEC 0xfe
+#define UISUBCLASS_FIRMWARE_DOWNLOAD 1
+#define UISUBCLASS_IRDA 2
+#define UIPROTO_IRDA 0
+
+#define UICLASS_VENDOR 0xff
+#define UISUBCLASS_XBOX360_CONTROLLER 0x5d
+#define UIPROTO_XBOX360_GAMEPAD 0x01
+
+struct usb_endpoint_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bEndpointAddress;
+#define UE_GET_DIR(a) ((a) & 0x80)
+#define UE_SET_DIR(a,d) ((a) | (((d)&1) << 7))
+#define UE_DIR_IN 0x80 /* IN-token endpoint, fixed */
+#define UE_DIR_OUT 0x00 /* OUT-token endpoint, fixed */
+#define UE_DIR_RX 0xfd /* for internal use only! */
+#define UE_DIR_TX 0xfe /* for internal use only! */
+#define UE_DIR_ANY 0xff /* for internal use only! */
+#define UE_ADDR 0x0f
+#define UE_ADDR_ANY 0xff /* for internal use only! */
+#define UE_GET_ADDR(a) ((a) & UE_ADDR)
+ uByte bmAttributes;
+#define UE_XFERTYPE 0x03
+#define UE_CONTROL 0x00
+#define UE_ISOCHRONOUS 0x01
+#define UE_BULK 0x02
+#define UE_INTERRUPT 0x03
+#define UE_BULK_INTR 0xfe /* for internal use only! */
+#define UE_TYPE_ANY 0xff /* for internal use only! */
+#define UE_GET_XFERTYPE(a) ((a) & UE_XFERTYPE)
+#define UE_ISO_TYPE 0x0c
+#define UE_ISO_ASYNC 0x04
+#define UE_ISO_ADAPT 0x08
+#define UE_ISO_SYNC 0x0c
+#define UE_GET_ISO_TYPE(a) ((a) & UE_ISO_TYPE)
+#define UE_ISO_USAGE 0x30
+#define UE_ISO_USAGE_DATA 0x00
+#define UE_ISO_USAGE_FEEDBACK 0x10
+#define UE_ISO_USAGE_IMPLICT_FB 0x20
+#define UE_GET_ISO_USAGE(a) ((a) & UE_ISO_USAGE)
+ uWord wMaxPacketSize;
+#define UE_ZERO_MPS 0xFFFF /* for internal use only */
+ uByte bInterval;
+} __packed;
+typedef struct usb_endpoint_descriptor usb_endpoint_descriptor_t;
+
+struct usb_endpoint_ss_comp_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bMaxBurst;
+ uByte bmAttributes;
+#define UE_GET_BULK_STREAMS(x) ((x) & 0x0F)
+#define UE_GET_SS_ISO_MULT(x) ((x) & 0x03)
+ uWord wBytesPerInterval;
+} __packed;
+typedef struct usb_endpoint_ss_comp_descriptor
+ usb_endpoint_ss_comp_descriptor_t;
+
+struct usb_string_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uWord bString[126];
+ uByte bUnused;
+} __packed;
+typedef struct usb_string_descriptor usb_string_descriptor_t;
+
+#define USB_MAKE_STRING_DESC(m,name) \
+static const struct { \
+ uByte bLength; \
+ uByte bDescriptorType; \
+ uByte bData[sizeof((uint8_t []){m})]; \
+} __packed name = { \
+ .bLength = sizeof(name), \
+ .bDescriptorType = UDESC_STRING, \
+ .bData = { m }, \
+}
+
+struct usb_string_lang {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bData[2];
+} __packed;
+typedef struct usb_string_lang usb_string_lang_t;
+
+struct usb_hub_descriptor {
+ uByte bDescLength;
+ uByte bDescriptorType;
+ uByte bNbrPorts;
+ uWord wHubCharacteristics;
+#define UHD_PWR 0x0003
+#define UHD_PWR_GANGED 0x0000
+#define UHD_PWR_INDIVIDUAL 0x0001
+#define UHD_PWR_NO_SWITCH 0x0002
+#define UHD_COMPOUND 0x0004
+#define UHD_OC 0x0018
+#define UHD_OC_GLOBAL 0x0000
+#define UHD_OC_INDIVIDUAL 0x0008
+#define UHD_OC_NONE 0x0010
+#define UHD_TT_THINK 0x0060
+#define UHD_TT_THINK_8 0x0000
+#define UHD_TT_THINK_16 0x0020
+#define UHD_TT_THINK_24 0x0040
+#define UHD_TT_THINK_32 0x0060
+#define UHD_PORT_IND 0x0080
+ uByte bPwrOn2PwrGood; /* delay in 2 ms units */
+#define UHD_PWRON_FACTOR 2
+ uByte bHubContrCurrent;
+ uByte DeviceRemovable[32]; /* max 255 ports */
+#define UHD_NOT_REMOV(desc, i) \
+ (((desc)->DeviceRemovable[(i)/8] >> ((i) % 8)) & 1)
+ uByte PortPowerCtrlMask[1]; /* deprecated */
+} __packed;
+typedef struct usb_hub_descriptor usb_hub_descriptor_t;
+
+struct usb_hub_ss_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bNbrPorts;
+ uWord wHubCharacteristics;
+ uByte bPwrOn2PwrGood; /* delay in 2 ms units */
+ uByte bHubContrCurrent;
+ uByte bHubHdrDecLat;
+ uWord wHubDelay;
+ uByte DeviceRemovable[32]; /* max 255 ports */
+} __packed;
+typedef struct usb_hub_ss_descriptor usb_hub_ss_descriptor_t;
+
+/* minimum HUB descriptor (8-ports maximum) */
+struct usb_hub_descriptor_min {
+ uByte bDescLength;
+ uByte bDescriptorType;
+ uByte bNbrPorts;
+ uWord wHubCharacteristics;
+ uByte bPwrOn2PwrGood;
+ uByte bHubContrCurrent;
+ uByte DeviceRemovable[1];
+ uByte PortPowerCtrlMask[1];
+} __packed;
+typedef struct usb_hub_descriptor_min usb_hub_descriptor_min_t;
+
+struct usb_device_qualifier {
+ uByte bLength;
+ uByte bDescriptorType;
+ uWord bcdUSB;
+ uByte bDeviceClass;
+ uByte bDeviceSubClass;
+ uByte bDeviceProtocol;
+ uByte bMaxPacketSize0;
+ uByte bNumConfigurations;
+ uByte bReserved;
+} __packed;
+typedef struct usb_device_qualifier usb_device_qualifier_t;
+
+struct usb_otg_descriptor {
+ uByte bLength;
+ uByte bDescriptorType;
+ uByte bmAttributes;
+#define UOTG_SRP 0x01
+#define UOTG_HNP 0x02
+} __packed;
+typedef struct usb_otg_descriptor usb_otg_descriptor_t;
+
+/* OTG feature selectors */
+#define UOTG_B_HNP_ENABLE 3
+#define UOTG_A_HNP_SUPPORT 4
+#define UOTG_A_ALT_HNP_SUPPORT 5
+
+struct usb_status {
+ uWord wStatus;
+/* Device status flags */
+#define UDS_SELF_POWERED 0x0001
+#define UDS_REMOTE_WAKEUP 0x0002
+/* Endpoint status flags */
+#define UES_HALT 0x0001
+} __packed;
+typedef struct usb_status usb_status_t;
+
+struct usb_hub_status {
+ uWord wHubStatus;
+#define UHS_LOCAL_POWER 0x0001
+#define UHS_OVER_CURRENT 0x0002
+ uWord wHubChange;
+} __packed;
+typedef struct usb_hub_status usb_hub_status_t;
+
+struct usb_port_status {
+ uWord wPortStatus;
+#define UPS_CURRENT_CONNECT_STATUS 0x0001
+#define UPS_PORT_ENABLED 0x0002
+#define UPS_SUSPEND 0x0004
+#define UPS_OVERCURRENT_INDICATOR 0x0008
+#define UPS_RESET 0x0010
+#define UPS_PORT_L1 0x0020 /* USB 2.0 only */
+/* The link-state bits are valid for Super-Speed USB HUBs */
+#define UPS_PORT_LINK_STATE_GET(x) (((x) >> 5) & 0xF)
+#define UPS_PORT_LINK_STATE_SET(x) (((x) & 0xF) << 5)
+#define UPS_PORT_LS_U0 0x00
+#define UPS_PORT_LS_U1 0x01
+#define UPS_PORT_LS_U2 0x02
+#define UPS_PORT_LS_U3 0x03
+#define UPS_PORT_LS_SS_DIS 0x04
+#define UPS_PORT_LS_RX_DET 0x05
+#define UPS_PORT_LS_SS_INA 0x06
+#define UPS_PORT_LS_POLL 0x07
+#define UPS_PORT_LS_RECOVER 0x08
+#define UPS_PORT_LS_HOT_RST 0x09
+#define UPS_PORT_LS_COMP_MODE 0x0A
+#define UPS_PORT_LS_LOOPBACK 0x0B
+#define UPS_PORT_LS_RESUME 0x0F
+#define UPS_PORT_POWER 0x0100
+#define UPS_PORT_POWER_SS 0x0200 /* super-speed only */
+#define UPS_LOW_SPEED 0x0200
+#define UPS_HIGH_SPEED 0x0400
+#define UPS_OTHER_SPEED 0x0600 /* currently FreeBSD specific */
+#define UPS_PORT_TEST 0x0800
+#define UPS_PORT_INDICATOR 0x1000
+#define UPS_PORT_MODE_DEVICE 0x8000 /* currently FreeBSD specific */
+ uWord wPortChange;
+#define UPS_C_CONNECT_STATUS 0x0001
+#define UPS_C_PORT_ENABLED 0x0002
+#define UPS_C_SUSPEND 0x0004
+#define UPS_C_OVERCURRENT_INDICATOR 0x0008
+#define UPS_C_PORT_RESET 0x0010
+#define UPS_C_PORT_L1 0x0020 /* USB 2.0 only */
+#define UPS_C_BH_PORT_RESET 0x0020 /* USB 3.0 only */
+#define UPS_C_PORT_LINK_STATE 0x0040
+#define UPS_C_PORT_CONFIG_ERROR 0x0080
+} __packed;
+typedef struct usb_port_status usb_port_status_t;
+
+/*
+ * The "USB_SPEED" macros defines all the supported USB speeds.
+ */
+enum usb_dev_speed {
+ USB_SPEED_VARIABLE,
+ USB_SPEED_LOW,
+ USB_SPEED_FULL,
+ USB_SPEED_HIGH,
+ USB_SPEED_SUPER,
+};
+#define USB_SPEED_MAX (USB_SPEED_SUPER+1)
+
+/*
+ * The "USB_REV" macros defines all the supported USB revisions.
+ */
+enum usb_revision {
+ USB_REV_UNKNOWN,
+ USB_REV_PRE_1_0,
+ USB_REV_1_0,
+ USB_REV_1_1,
+ USB_REV_2_0,
+ USB_REV_2_5,
+ USB_REV_3_0
+};
+#define USB_REV_MAX (USB_REV_3_0+1)
+
+/*
+ * Supported host controller modes.
+ */
+enum usb_hc_mode {
+ USB_MODE_HOST, /* initiates transfers */
+ USB_MODE_DEVICE, /* bus transfer target */
+ USB_MODE_DUAL /* can be host or device */
+};
+#define USB_MODE_MAX (USB_MODE_DUAL+1)
+
+/*
+ * The "USB_STATE" enums define all the supported device states.
+ */
+enum usb_dev_state {
+ USB_STATE_DETACHED,
+ USB_STATE_ATTACHED,
+ USB_STATE_POWERED,
+ USB_STATE_ADDRESSED,
+ USB_STATE_CONFIGURED,
+};
+#define USB_STATE_MAX (USB_STATE_CONFIGURED+1)
+
+/*
+ * The "USB_EP_MODE" macros define all the currently supported
+ * endpoint modes.
+ */
+enum usb_ep_mode {
+ USB_EP_MODE_DEFAULT,
+ USB_EP_MODE_STREAMS, /* USB3.0 specific */
+ USB_EP_MODE_HW_MASS_STORAGE,
+ USB_EP_MODE_HW_SERIAL,
+ USB_EP_MODE_HW_ETHERNET_CDC,
+ USB_EP_MODE_HW_ETHERNET_NCM,
+ USB_EP_MODE_MAX
+};
+#endif /* _USB_STANDARD_H_ */
diff --git a/usr/src/contrib/bhyve/dev/usb/usb_endian.h b/usr/src/contrib/bhyve/dev/usb/usb_endian.h
new file mode 100644
index 0000000000..0bbcb9bf82
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/usb/usb_endian.h
@@ -0,0 +1,121 @@
+/* $FreeBSD$ */
+/*
+ * Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _USB_ENDIAN_H_
+#define _USB_ENDIAN_H_
+
+#ifndef USB_GLOBAL_INCLUDE_FILE
+#include <sys/stdint.h>
+#include <sys/endian.h>
+#endif
+
+/*
+ * Declare the basic USB record types. USB records have an alignment
+ * of 1 byte and are always packed.
+ */
+typedef uint8_t uByte;
+typedef uint8_t uWord[2];
+typedef uint8_t uDWord[4];
+typedef uint8_t uQWord[8];
+
+/*
+ * Define a set of macros that can get and set data independent of
+ * CPU endianness and CPU alignment requirements:
+ */
+#define UGETB(w) \
+ ((w)[0])
+
+#define UGETW(w) \
+ ((w)[0] | \
+ (((uint16_t)((w)[1])) << 8))
+
+#define UGETDW(w) \
+ ((w)[0] | \
+ (((uint16_t)((w)[1])) << 8) | \
+ (((uint32_t)((w)[2])) << 16) | \
+ (((uint32_t)((w)[3])) << 24))
+
+#define UGETQW(w) \
+ ((w)[0] | \
+ (((uint16_t)((w)[1])) << 8) | \
+ (((uint32_t)((w)[2])) << 16) | \
+ (((uint32_t)((w)[3])) << 24) | \
+ (((uint64_t)((w)[4])) << 32) | \
+ (((uint64_t)((w)[5])) << 40) | \
+ (((uint64_t)((w)[6])) << 48) | \
+ (((uint64_t)((w)[7])) << 56))
+
+#define USETB(w,v) do { \
+ (w)[0] = (uint8_t)(v); \
+} while (0)
+
+#define USETW(w,v) do { \
+ (w)[0] = (uint8_t)(v); \
+ (w)[1] = (uint8_t)((v) >> 8); \
+} while (0)
+
+#define USETDW(w,v) do { \
+ (w)[0] = (uint8_t)(v); \
+ (w)[1] = (uint8_t)((v) >> 8); \
+ (w)[2] = (uint8_t)((v) >> 16); \
+ (w)[3] = (uint8_t)((v) >> 24); \
+} while (0)
+
+#define USETQW(w,v) do { \
+ (w)[0] = (uint8_t)(v); \
+ (w)[1] = (uint8_t)((v) >> 8); \
+ (w)[2] = (uint8_t)((v) >> 16); \
+ (w)[3] = (uint8_t)((v) >> 24); \
+ (w)[4] = (uint8_t)((v) >> 32); \
+ (w)[5] = (uint8_t)((v) >> 40); \
+ (w)[6] = (uint8_t)((v) >> 48); \
+ (w)[7] = (uint8_t)((v) >> 56); \
+} while (0)
+
+#define USETW2(w,b1,b0) do { \
+ (w)[0] = (uint8_t)(b0); \
+ (w)[1] = (uint8_t)(b1); \
+} while (0)
+
+#define USETW4(w,b3,b2,b1,b0) do { \
+ (w)[0] = (uint8_t)(b0); \
+ (w)[1] = (uint8_t)(b1); \
+ (w)[2] = (uint8_t)(b2); \
+ (w)[3] = (uint8_t)(b3); \
+} while (0)
+
+#define USETW8(w,b7,b6,b5,b4,b3,b2,b1,b0) do { \
+ (w)[0] = (uint8_t)(b0); \
+ (w)[1] = (uint8_t)(b1); \
+ (w)[2] = (uint8_t)(b2); \
+ (w)[3] = (uint8_t)(b3); \
+ (w)[4] = (uint8_t)(b4); \
+ (w)[5] = (uint8_t)(b5); \
+ (w)[6] = (uint8_t)(b6); \
+ (w)[7] = (uint8_t)(b7); \
+} while (0)
+
+#endif /* _USB_ENDIAN_H_ */
diff --git a/usr/src/contrib/bhyve/dev/usb/usb_freebsd.h b/usr/src/contrib/bhyve/dev/usb/usb_freebsd.h
new file mode 100644
index 0000000000..3bc9d2c1eb
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/usb/usb_freebsd.h
@@ -0,0 +1,101 @@
+/* $FreeBSD$ */
+/*-
+ * Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Including this file is mandatory for all USB related c-files in the kernel.
+ */
+
+#ifndef _USB_FREEBSD_H_
+#define _USB_FREEBSD_H_
+
+/* Default USB configuration */
+#define USB_HAVE_UGEN 1
+#define USB_HAVE_DEVCTL 1
+#define USB_HAVE_BUSDMA 1
+#define USB_HAVE_COMPAT_LINUX 1
+#define USB_HAVE_USER_IO 1
+#define USB_HAVE_MBUF 1
+#define USB_HAVE_TT_SUPPORT 1
+#define USB_HAVE_POWERD 1
+#define USB_HAVE_MSCTEST 1
+#define USB_HAVE_MSCTEST_DETACH 1
+#define USB_HAVE_PF 1
+#define USB_HAVE_ROOT_MOUNT_HOLD 1
+#define USB_HAVE_ID_SECTION 1
+#define USB_HAVE_PER_BUS_PROCESS 1
+#define USB_HAVE_FIXED_ENDPOINT 0
+#define USB_HAVE_FIXED_IFACE 0
+#define USB_HAVE_FIXED_CONFIG 0
+#define USB_HAVE_FIXED_PORT 0
+#define USB_HAVE_DISABLE_ENUM 1
+
+/* define zero ticks callout value */
+#define USB_CALLOUT_ZERO_TICKS 1
+
+#define USB_TD_GET_PROC(td) (td)->td_proc
+#define USB_PROC_GET_GID(td) (td)->p_pgid
+
+#if (!defined(USB_HOST_ALIGN)) || (USB_HOST_ALIGN <= 0)
+/* Use default value. */
+#undef USB_HOST_ALIGN
+#if defined(__arm__) || defined(__mips__) || defined(__powerpc__)
+#define USB_HOST_ALIGN 32 /* Arm and MIPS need at least this much, if not more */
+#else
+#define USB_HOST_ALIGN 8 /* bytes, must be power of two */
+#endif
+#endif
+/* Sanity check for USB_HOST_ALIGN: Verify power of two. */
+#if ((-USB_HOST_ALIGN) & USB_HOST_ALIGN) != USB_HOST_ALIGN
+#error "USB_HOST_ALIGN is not power of two."
+#endif
+#define USB_FS_ISOC_UFRAME_MAX 4 /* exclusive unit */
+#define USB_BUS_MAX 256 /* units */
+#define USB_MAX_DEVICES 128 /* units */
+#define USB_CONFIG_MAX 65535 /* bytes */
+#define USB_IFACE_MAX 32 /* units */
+#define USB_FIFO_MAX 128 /* units */
+#define USB_MAX_EP_STREAMS 8 /* units */
+#define USB_MAX_EP_UNITS 32 /* units */
+#define USB_MAX_PORTS 255 /* units */
+
+#define USB_MAX_FS_ISOC_FRAMES_PER_XFER (120) /* units */
+#define USB_MAX_HS_ISOC_FRAMES_PER_XFER (8*120) /* units */
+
+#define USB_HUB_MAX_DEPTH 5
+#define USB_EP0_BUFSIZE 1024 /* bytes */
+#define USB_CS_RESET_LIMIT 20 /* failures = 20 * 50 ms = 1sec */
+
+#define USB_MAX_AUTO_QUIRK 8 /* maximum number of dynamic quirks */
+
+typedef uint32_t usb_timeout_t; /* milliseconds */
+typedef uint32_t usb_frlength_t; /* bytes */
+typedef uint32_t usb_frcount_t; /* units */
+typedef uint32_t usb_size_t; /* bytes */
+typedef uint32_t usb_ticks_t; /* system defined */
+typedef uint16_t usb_power_mask_t; /* see "USB_HW_POWER_XXX" */
+typedef uint16_t usb_stream_t; /* stream ID */
+
+#endif /* _USB_FREEBSD_H_ */
diff --git a/usr/src/contrib/bhyve/dev/usb/usbdi.h b/usr/src/contrib/bhyve/dev/usb/usbdi.h
new file mode 100644
index 0000000000..202ad89fa7
--- /dev/null
+++ b/usr/src/contrib/bhyve/dev/usb/usbdi.h
@@ -0,0 +1,657 @@
+/*-
+ * Copyright (c) 2009 Andrew Thompson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _USB_USBDI_H_
+#define _USB_USBDI_H_
+
+struct usb_fifo;
+struct usb_xfer;
+struct usb_device;
+struct usb_attach_arg;
+struct usb_interface;
+struct usb_endpoint;
+struct usb_page_cache;
+struct usb_page_search;
+struct usb_process;
+struct usb_proc_msg;
+struct usb_mbuf;
+struct usb_fs_privdata;
+struct mbuf;
+
+typedef enum { /* keep in sync with usb_errstr_table */
+ USB_ERR_NORMAL_COMPLETION = 0,
+ USB_ERR_PENDING_REQUESTS, /* 1 */
+ USB_ERR_NOT_STARTED, /* 2 */
+ USB_ERR_INVAL, /* 3 */
+ USB_ERR_NOMEM, /* 4 */
+ USB_ERR_CANCELLED, /* 5 */
+ USB_ERR_BAD_ADDRESS, /* 6 */
+ USB_ERR_BAD_BUFSIZE, /* 7 */
+ USB_ERR_BAD_FLAG, /* 8 */
+ USB_ERR_NO_CALLBACK, /* 9 */
+ USB_ERR_IN_USE, /* 10 */
+ USB_ERR_NO_ADDR, /* 11 */
+ USB_ERR_NO_PIPE, /* 12 */
+ USB_ERR_ZERO_NFRAMES, /* 13 */
+ USB_ERR_ZERO_MAXP, /* 14 */
+ USB_ERR_SET_ADDR_FAILED, /* 15 */
+ USB_ERR_NO_POWER, /* 16 */
+ USB_ERR_TOO_DEEP, /* 17 */
+ USB_ERR_IOERROR, /* 18 */
+ USB_ERR_NOT_CONFIGURED, /* 19 */
+ USB_ERR_TIMEOUT, /* 20 */
+ USB_ERR_SHORT_XFER, /* 21 */
+ USB_ERR_STALLED, /* 22 */
+ USB_ERR_INTERRUPTED, /* 23 */
+ USB_ERR_DMA_LOAD_FAILED, /* 24 */
+ USB_ERR_BAD_CONTEXT, /* 25 */
+ USB_ERR_NO_ROOT_HUB, /* 26 */
+ USB_ERR_NO_INTR_THREAD, /* 27 */
+ USB_ERR_NOT_LOCKED, /* 28 */
+ USB_ERR_MAX
+} usb_error_t;
+
+/*
+ * Flags for transfers
+ */
+#define USB_FORCE_SHORT_XFER 0x0001 /* force a short transmit last */
+#define USB_SHORT_XFER_OK 0x0004 /* allow short reads */
+#define USB_DELAY_STATUS_STAGE 0x0010 /* insert delay before STATUS stage */
+#define USB_USER_DATA_PTR 0x0020 /* internal flag */
+#define USB_MULTI_SHORT_OK 0x0040 /* allow multiple short frames */
+#define USB_MANUAL_STATUS 0x0080 /* manual ctrl status */
+
+#define USB_NO_TIMEOUT 0
+#define USB_DEFAULT_TIMEOUT 5000 /* 5000 ms = 5 seconds */
+
+#if defined(_KERNEL)
+/* typedefs */
+
+typedef void (usb_callback_t)(struct usb_xfer *, usb_error_t);
+typedef void (usb_proc_callback_t)(struct usb_proc_msg *);
+typedef usb_error_t (usb_handle_req_t)(struct usb_device *,
+ struct usb_device_request *, const void **, uint16_t *);
+
+typedef int (usb_fifo_open_t)(struct usb_fifo *fifo, int fflags);
+typedef void (usb_fifo_close_t)(struct usb_fifo *fifo, int fflags);
+typedef int (usb_fifo_ioctl_t)(struct usb_fifo *fifo, u_long cmd, void *addr, int fflags);
+typedef void (usb_fifo_cmd_t)(struct usb_fifo *fifo);
+typedef void (usb_fifo_filter_t)(struct usb_fifo *fifo, struct usb_mbuf *m);
+
+
+/* USB events */
+#ifndef USB_GLOBAL_INCLUDE_FILE
+#include <sys/eventhandler.h>
+#endif
+typedef void (*usb_dev_configured_t)(void *, struct usb_device *,
+ struct usb_attach_arg *);
+EVENTHANDLER_DECLARE(usb_dev_configured, usb_dev_configured_t);
+
+/*
+ * The following macros are used used to convert milliseconds into
+ * HZ. We use 1024 instead of 1000 milliseconds per second to save a
+ * full division.
+ */
+#define USB_MS_HZ 1024
+
+#define USB_MS_TO_TICKS(ms) \
+ (((uint32_t)((((uint32_t)(ms)) * ((uint32_t)(hz))) + USB_MS_HZ - 1)) / USB_MS_HZ)
+
+/*
+ * Common queue structure for USB transfers.
+ */
+struct usb_xfer_queue {
+ TAILQ_HEAD(, usb_xfer) head;
+ struct usb_xfer *curr; /* current USB transfer processed */
+ void (*command) (struct usb_xfer_queue *pq);
+ uint8_t recurse_1:1;
+ uint8_t recurse_2:1;
+ uint8_t recurse_3:1;
+ uint8_t reserved:5;
+};
+
+/*
+ * The following structure defines an USB endpoint
+ * USB endpoint.
+ */
+struct usb_endpoint {
+ /* queue of USB transfers */
+ struct usb_xfer_queue endpoint_q[USB_MAX_EP_STREAMS];
+
+ struct usb_endpoint_descriptor *edesc;
+ struct usb_endpoint_ss_comp_descriptor *ecomp;
+ const struct usb_pipe_methods *methods; /* set by HC driver */
+
+ uint16_t isoc_next;
+
+ uint8_t toggle_next:1; /* next data toggle value */
+ uint8_t is_stalled:1; /* set if endpoint is stalled */
+ uint8_t is_synced:1; /* set if we a synchronised */
+ uint8_t unused:5;
+ uint8_t iface_index; /* not used by "default endpoint" */
+
+ uint8_t refcount_alloc; /* allocation refcount */
+ uint8_t refcount_bw; /* bandwidth refcount */
+#define USB_EP_REF_MAX 0x3f
+
+ /* High-Speed resource allocation (valid if "refcount_bw" > 0) */
+
+ uint8_t usb_smask; /* USB start mask */
+ uint8_t usb_cmask; /* USB complete mask */
+ uint8_t usb_uframe; /* USB microframe */
+
+ /* USB endpoint mode, see USB_EP_MODE_XXX */
+
+ uint8_t ep_mode;
+};
+
+/*
+ * The following structure defines an USB interface.
+ */
+struct usb_interface {
+ struct usb_interface_descriptor *idesc;
+ device_t subdev;
+ uint8_t alt_index;
+ uint8_t parent_iface_index;
+
+ /* Linux compat */
+ struct usb_host_interface *altsetting;
+ struct usb_host_interface *cur_altsetting;
+ struct usb_device *linux_udev;
+ void *bsd_priv_sc; /* device specific information */
+ char *pnpinfo; /* additional PnP-info for this interface */
+ uint8_t num_altsetting; /* number of alternate settings */
+ uint8_t bsd_iface_index;
+};
+
+/*
+ * The following structure defines a set of USB transfer flags.
+ */
+struct usb_xfer_flags {
+ uint8_t force_short_xfer:1; /* force a short transmit transfer
+ * last */
+ uint8_t short_xfer_ok:1; /* allow short receive transfers */
+ uint8_t short_frames_ok:1; /* allow short frames */
+ uint8_t pipe_bof:1; /* block pipe on failure */
+ uint8_t proxy_buffer:1; /* makes buffer size a factor of
+ * "max_frame_size" */
+ uint8_t ext_buffer:1; /* uses external DMA buffer */
+ uint8_t manual_status:1; /* non automatic status stage on
+ * control transfers */
+ uint8_t no_pipe_ok:1; /* set if "USB_ERR_NO_PIPE" error can
+ * be ignored */
+ uint8_t stall_pipe:1; /* set if the endpoint belonging to
+ * this USB transfer should be stalled
+ * before starting this transfer! */
+ uint8_t pre_scale_frames:1; /* "usb_config->frames" is
+ * assumed to give the
+ * buffering time in
+ * milliseconds and is
+ * converted into the nearest
+ * number of frames when the
+ * USB transfer is setup. This
+ * option only has effect for
+ * ISOCHRONOUS transfers.
+ */
+};
+
+/*
+ * The following structure define an USB configuration, that basically
+ * is used when setting up an USB transfer.
+ */
+struct usb_config {
+ usb_callback_t *callback; /* USB transfer callback */
+ usb_frlength_t bufsize; /* total pipe buffer size in bytes */
+ usb_frcount_t frames; /* maximum number of USB frames */
+ usb_timeout_t interval; /* interval in milliseconds */
+#define USB_DEFAULT_INTERVAL 0
+ usb_timeout_t timeout; /* transfer timeout in milliseconds */
+ struct usb_xfer_flags flags; /* transfer flags */
+ usb_stream_t stream_id; /* USB3.0 specific */
+ enum usb_hc_mode usb_mode; /* host or device mode */
+ uint8_t type; /* pipe type */
+ uint8_t endpoint; /* pipe number */
+ uint8_t direction; /* pipe direction */
+ uint8_t ep_index; /* pipe index match to use */
+ uint8_t if_index; /* "ifaces" index to use */
+};
+
+/*
+ * Use these macro when defining USB device ID arrays if you want to
+ * have your driver module automatically loaded in host, device or
+ * both modes respectively:
+ */
+#if USB_HAVE_ID_SECTION
+#define STRUCT_USB_HOST_ID \
+ struct usb_device_id __section("usb_host_id")
+#define STRUCT_USB_DEVICE_ID \
+ struct usb_device_id __section("usb_device_id")
+#define STRUCT_USB_DUAL_ID \
+ struct usb_device_id __section("usb_dual_id")
+#else
+#define STRUCT_USB_HOST_ID \
+ struct usb_device_id
+#define STRUCT_USB_DEVICE_ID \
+ struct usb_device_id
+#define STRUCT_USB_DUAL_ID \
+ struct usb_device_id
+#endif /* USB_HAVE_ID_SECTION */
+
+/*
+ * The following structure is used when looking up an USB driver for
+ * an USB device. It is inspired by the Linux structure called
+ * "usb_device_id".
+ */
+struct usb_device_id {
+
+ /* Select which fields to match against */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint16_t
+ match_flag_vendor:1,
+ match_flag_product:1,
+ match_flag_dev_lo:1,
+ match_flag_dev_hi:1,
+
+ match_flag_dev_class:1,
+ match_flag_dev_subclass:1,
+ match_flag_dev_protocol:1,
+ match_flag_int_class:1,
+
+ match_flag_int_subclass:1,
+ match_flag_int_protocol:1,
+ match_flag_unused:6;
+#else
+ uint16_t
+ match_flag_unused:6,
+ match_flag_int_protocol:1,
+ match_flag_int_subclass:1,
+
+ match_flag_int_class:1,
+ match_flag_dev_protocol:1,
+ match_flag_dev_subclass:1,
+ match_flag_dev_class:1,
+
+ match_flag_dev_hi:1,
+ match_flag_dev_lo:1,
+ match_flag_product:1,
+ match_flag_vendor:1;
+#endif
+
+ /* Used for product specific matches; the BCD range is inclusive */
+ uint16_t idVendor;
+ uint16_t idProduct;
+ uint16_t bcdDevice_lo;
+ uint16_t bcdDevice_hi;
+
+ /* Used for device class matches */
+ uint8_t bDeviceClass;
+ uint8_t bDeviceSubClass;
+ uint8_t bDeviceProtocol;
+
+ /* Used for interface class matches */
+ uint8_t bInterfaceClass;
+ uint8_t bInterfaceSubClass;
+ uint8_t bInterfaceProtocol;
+
+#if USB_HAVE_COMPAT_LINUX
+ /* which fields to match against */
+ uint16_t match_flags;
+#define USB_DEVICE_ID_MATCH_VENDOR 0x0001
+#define USB_DEVICE_ID_MATCH_PRODUCT 0x0002
+#define USB_DEVICE_ID_MATCH_DEV_LO 0x0004
+#define USB_DEVICE_ID_MATCH_DEV_HI 0x0008
+#define USB_DEVICE_ID_MATCH_DEV_CLASS 0x0010
+#define USB_DEVICE_ID_MATCH_DEV_SUBCLASS 0x0020
+#define USB_DEVICE_ID_MATCH_DEV_PROTOCOL 0x0040
+#define USB_DEVICE_ID_MATCH_INT_CLASS 0x0080
+#define USB_DEVICE_ID_MATCH_INT_SUBCLASS 0x0100
+#define USB_DEVICE_ID_MATCH_INT_PROTOCOL 0x0200
+#endif
+
+ /* Hook for driver specific information */
+ unsigned long driver_info;
+} __aligned(32);
+
+#define USB_STD_PNP_INFO "M16:mask;U16:vendor;U16:product;L16:product;G16:product;" \
+ "U8:devclass;U8:devsubclass;U8:devprotocol;" \
+ "U8:intclass;U8:intsubclass;U8:intprotocol;"
+#define USB_STD_PNP_HOST_INFO USB_STD_PNP_INFO "T:mode=host;"
+#define USB_STD_PNP_DEVICE_INFO USB_STD_PNP_INFO "T:mode=device;"
+#define USB_PNP_HOST_INFO(table) \
+ MODULE_PNP_INFO(USB_STD_PNP_HOST_INFO, usb, table, table, sizeof(table[0]), \
+ sizeof(table) / sizeof(table[0]))
+#define USB_PNP_DEVICE_INFO(table) \
+ MODULE_PNP_INFO(USB_STD_PNP_DEVICE_INFO, usb, table, table, sizeof(table[0]), \
+ sizeof(table) / sizeof(table[0]))
+#define USB_PNP_DUAL_INFO(table) \
+ MODULE_PNP_INFO(USB_STD_PNP_INFO, usb, table, table, sizeof(table[0]), \
+ sizeof(table) / sizeof(table[0]))
+
+/* check that the size of the structure above is correct */
+extern char usb_device_id_assert[(sizeof(struct usb_device_id) == 32) ? 1 : -1];
+
+#define USB_VENDOR(vend) \
+ .match_flag_vendor = 1, .idVendor = (vend)
+
+#define USB_PRODUCT(prod) \
+ .match_flag_product = 1, .idProduct = (prod)
+
+#define USB_VP(vend,prod) \
+ USB_VENDOR(vend), USB_PRODUCT(prod)
+
+#define USB_VPI(vend,prod,info) \
+ USB_VENDOR(vend), USB_PRODUCT(prod), USB_DRIVER_INFO(info)
+
+#define USB_DEV_BCD_GTEQ(lo) /* greater than or equal */ \
+ .match_flag_dev_lo = 1, .bcdDevice_lo = (lo)
+
+#define USB_DEV_BCD_LTEQ(hi) /* less than or equal */ \
+ .match_flag_dev_hi = 1, .bcdDevice_hi = (hi)
+
+#define USB_DEV_CLASS(dc) \
+ .match_flag_dev_class = 1, .bDeviceClass = (dc)
+
+#define USB_DEV_SUBCLASS(dsc) \
+ .match_flag_dev_subclass = 1, .bDeviceSubClass = (dsc)
+
+#define USB_DEV_PROTOCOL(dp) \
+ .match_flag_dev_protocol = 1, .bDeviceProtocol = (dp)
+
+#define USB_IFACE_CLASS(ic) \
+ .match_flag_int_class = 1, .bInterfaceClass = (ic)
+
+#define USB_IFACE_SUBCLASS(isc) \
+ .match_flag_int_subclass = 1, .bInterfaceSubClass = (isc)
+
+#define USB_IFACE_PROTOCOL(ip) \
+ .match_flag_int_protocol = 1, .bInterfaceProtocol = (ip)
+
+#define USB_IF_CSI(class,subclass,info) \
+ USB_IFACE_CLASS(class), USB_IFACE_SUBCLASS(subclass), USB_DRIVER_INFO(info)
+
+#define USB_DRIVER_INFO(n) \
+ .driver_info = (n)
+
+#define USB_GET_DRIVER_INFO(did) \
+ (did)->driver_info
+
+/*
+ * The following structure keeps information that is used to match
+ * against an array of "usb_device_id" elements.
+ */
+struct usbd_lookup_info {
+ uint16_t idVendor;
+ uint16_t idProduct;
+ uint16_t bcdDevice;
+ uint8_t bDeviceClass;
+ uint8_t bDeviceSubClass;
+ uint8_t bDeviceProtocol;
+ uint8_t bInterfaceClass;
+ uint8_t bInterfaceSubClass;
+ uint8_t bInterfaceProtocol;
+ uint8_t bIfaceIndex;
+ uint8_t bIfaceNum;
+ uint8_t bConfigIndex;
+ uint8_t bConfigNum;
+};
+
+/* Structure used by probe and attach */
+
+struct usb_attach_arg {
+ struct usbd_lookup_info info;
+ device_t temp_dev; /* for internal use */
+ unsigned long driver_info; /* for internal use */
+ void *driver_ivar;
+ struct usb_device *device; /* current device */
+ struct usb_interface *iface; /* current interface */
+ enum usb_hc_mode usb_mode; /* host or device mode */
+ uint8_t port;
+ uint8_t dev_state;
+#define UAA_DEV_READY 0
+#define UAA_DEV_DISABLED 1
+#define UAA_DEV_EJECTING 2
+};
+
+/*
+ * The following is a wrapper for the callout structure to ease
+ * porting the code to other platforms.
+ */
+struct usb_callout {
+ struct callout co;
+};
+#define usb_callout_init_mtx(c,m,f) callout_init_mtx(&(c)->co,m,f)
+#define usb_callout_reset(c,t,f,d) callout_reset(&(c)->co,t,f,d)
+#define usb_callout_stop(c) callout_stop(&(c)->co)
+#define usb_callout_drain(c) callout_drain(&(c)->co)
+#define usb_callout_pending(c) callout_pending(&(c)->co)
+
+/* USB transfer states */
+
+#define USB_ST_SETUP 0
+#define USB_ST_TRANSFERRED 1
+#define USB_ST_ERROR 2
+
+/* USB handle request states */
+#define USB_HR_NOT_COMPLETE 0
+#define USB_HR_COMPLETE_OK 1
+#define USB_HR_COMPLETE_ERR 2
+
+/*
+ * The following macro will return the current state of an USB
+ * transfer like defined by the "USB_ST_XXX" enums.
+ */
+#define USB_GET_STATE(xfer) (usbd_xfer_state(xfer))
+
+/*
+ * The following structure defines the USB process message header.
+ */
+struct usb_proc_msg {
+ TAILQ_ENTRY(usb_proc_msg) pm_qentry;
+ usb_proc_callback_t *pm_callback;
+ usb_size_t pm_num;
+};
+
+#define USB_FIFO_TX 0
+#define USB_FIFO_RX 1
+
+/*
+ * Locking note for the following functions. All the
+ * "usb_fifo_cmd_t" and "usb_fifo_filter_t" functions are called
+ * locked. The others are called unlocked.
+ */
+struct usb_fifo_methods {
+ usb_fifo_open_t *f_open;
+ usb_fifo_close_t *f_close;
+ usb_fifo_ioctl_t *f_ioctl;
+ /*
+ * NOTE: The post-ioctl callback is called after the USB reference
+ * gets locked in the IOCTL handler:
+ */
+ usb_fifo_ioctl_t *f_ioctl_post;
+ usb_fifo_cmd_t *f_start_read;
+ usb_fifo_cmd_t *f_stop_read;
+ usb_fifo_cmd_t *f_start_write;
+ usb_fifo_cmd_t *f_stop_write;
+ usb_fifo_filter_t *f_filter_read;
+ usb_fifo_filter_t *f_filter_write;
+ const char *basename[4];
+ const char *postfix[4];
+};
+
+struct usb_fifo_sc {
+ struct usb_fifo *fp[2];
+ struct usb_fs_privdata *dev;
+};
+
+const char *usbd_errstr(usb_error_t error);
+void *usbd_find_descriptor(struct usb_device *udev, void *id,
+ uint8_t iface_index, uint8_t type, uint8_t type_mask,
+ uint8_t subtype, uint8_t subtype_mask);
+struct usb_config_descriptor *usbd_get_config_descriptor(
+ struct usb_device *udev);
+struct usb_device_descriptor *usbd_get_device_descriptor(
+ struct usb_device *udev);
+struct usb_interface *usbd_get_iface(struct usb_device *udev,
+ uint8_t iface_index);
+struct usb_interface_descriptor *usbd_get_interface_descriptor(
+ struct usb_interface *iface);
+struct usb_endpoint *usbd_get_endpoint(struct usb_device *udev, uint8_t iface_index,
+ const struct usb_config *setup);
+struct usb_endpoint *usbd_get_ep_by_addr(struct usb_device *udev, uint8_t ea_val);
+usb_error_t usbd_interface_count(struct usb_device *udev, uint8_t *count);
+enum usb_hc_mode usbd_get_mode(struct usb_device *udev);
+enum usb_dev_speed usbd_get_speed(struct usb_device *udev);
+void device_set_usb_desc(device_t dev);
+void usb_pause_mtx(struct mtx *mtx, int _ticks);
+usb_error_t usbd_set_pnpinfo(struct usb_device *udev,
+ uint8_t iface_index, const char *pnpinfo);
+usb_error_t usbd_add_dynamic_quirk(struct usb_device *udev,
+ uint16_t quirk);
+usb_error_t usbd_set_endpoint_mode(struct usb_device *udev,
+ struct usb_endpoint *ep, uint8_t ep_mode);
+uint8_t usbd_get_endpoint_mode(struct usb_device *udev,
+ struct usb_endpoint *ep);
+
+const struct usb_device_id *usbd_lookup_id_by_info(
+ const struct usb_device_id *id, usb_size_t sizeof_id,
+ const struct usbd_lookup_info *info);
+int usbd_lookup_id_by_uaa(const struct usb_device_id *id,
+ usb_size_t sizeof_id, struct usb_attach_arg *uaa);
+
+usb_error_t usbd_do_request_flags(struct usb_device *udev, struct mtx *mtx,
+ struct usb_device_request *req, void *data, uint16_t flags,
+ uint16_t *actlen, usb_timeout_t timeout);
+#define usbd_do_request(u,m,r,d) \
+ usbd_do_request_flags(u,m,r,d,0,NULL,USB_DEFAULT_TIMEOUT)
+
+uint8_t usbd_clear_stall_callback(struct usb_xfer *xfer1,
+ struct usb_xfer *xfer2);
+uint8_t usbd_get_interface_altindex(struct usb_interface *iface);
+usb_error_t usbd_set_alt_interface_index(struct usb_device *udev,
+ uint8_t iface_index, uint8_t alt_index);
+uint32_t usbd_get_isoc_fps(struct usb_device *udev);
+usb_error_t usbd_transfer_setup(struct usb_device *udev,
+ const uint8_t *ifaces, struct usb_xfer **pxfer,
+ const struct usb_config *setup_start, uint16_t n_setup,
+ void *priv_sc, struct mtx *priv_mtx);
+void usbd_transfer_submit(struct usb_xfer *xfer);
+void usbd_transfer_clear_stall(struct usb_xfer *xfer);
+void usbd_transfer_drain(struct usb_xfer *xfer);
+uint8_t usbd_transfer_pending(struct usb_xfer *xfer);
+void usbd_transfer_start(struct usb_xfer *xfer);
+void usbd_transfer_stop(struct usb_xfer *xfer);
+void usbd_transfer_unsetup(struct usb_xfer **pxfer, uint16_t n_setup);
+void usbd_transfer_poll(struct usb_xfer **ppxfer, uint16_t max);
+void usbd_set_parent_iface(struct usb_device *udev, uint8_t iface_index,
+ uint8_t parent_index);
+uint8_t usbd_get_bus_index(struct usb_device *udev);
+uint8_t usbd_get_device_index(struct usb_device *udev);
+void usbd_set_power_mode(struct usb_device *udev, uint8_t power_mode);
+uint8_t usbd_filter_power_mode(struct usb_device *udev, uint8_t power_mode);
+uint8_t usbd_device_attached(struct usb_device *udev);
+
+usb_frlength_t
+ usbd_xfer_old_frame_length(struct usb_xfer *xfer, usb_frcount_t frindex);
+void usbd_xfer_status(struct usb_xfer *xfer, int *actlen, int *sumlen,
+ int *aframes, int *nframes);
+struct usb_page_cache *usbd_xfer_get_frame(struct usb_xfer *, usb_frcount_t);
+void *usbd_xfer_get_frame_buffer(struct usb_xfer *, usb_frcount_t);
+void *usbd_xfer_softc(struct usb_xfer *xfer);
+void *usbd_xfer_get_priv(struct usb_xfer *xfer);
+void usbd_xfer_set_priv(struct usb_xfer *xfer, void *);
+void usbd_xfer_set_interval(struct usb_xfer *xfer, int);
+uint8_t usbd_xfer_state(struct usb_xfer *xfer);
+void usbd_xfer_set_frame_data(struct usb_xfer *xfer, usb_frcount_t frindex,
+ void *ptr, usb_frlength_t len);
+void usbd_xfer_frame_data(struct usb_xfer *xfer, usb_frcount_t frindex,
+ void **ptr, int *len);
+void usbd_xfer_set_frame_offset(struct usb_xfer *xfer, usb_frlength_t offset,
+ usb_frcount_t frindex);
+usb_frlength_t usbd_xfer_max_len(struct usb_xfer *xfer);
+usb_frlength_t usbd_xfer_max_framelen(struct usb_xfer *xfer);
+usb_frcount_t usbd_xfer_max_frames(struct usb_xfer *xfer);
+uint8_t usbd_xfer_get_fps_shift(struct usb_xfer *xfer);
+usb_frlength_t usbd_xfer_frame_len(struct usb_xfer *xfer,
+ usb_frcount_t frindex);
+void usbd_xfer_set_frame_len(struct usb_xfer *xfer, usb_frcount_t frindex,
+ usb_frlength_t len);
+void usbd_xfer_set_timeout(struct usb_xfer *xfer, int timeout);
+void usbd_xfer_set_frames(struct usb_xfer *xfer, usb_frcount_t n);
+void usbd_xfer_set_stall(struct usb_xfer *xfer);
+int usbd_xfer_is_stalled(struct usb_xfer *xfer);
+void usbd_xfer_set_flag(struct usb_xfer *xfer, int flag);
+void usbd_xfer_clr_flag(struct usb_xfer *xfer, int flag);
+uint16_t usbd_xfer_get_timestamp(struct usb_xfer *xfer);
+uint8_t usbd_xfer_maxp_was_clamped(struct usb_xfer *xfer);
+
+void usbd_copy_in(struct usb_page_cache *cache, usb_frlength_t offset,
+ const void *ptr, usb_frlength_t len);
+int usbd_copy_in_user(struct usb_page_cache *cache, usb_frlength_t offset,
+ const void *ptr, usb_frlength_t len);
+void usbd_copy_out(struct usb_page_cache *cache, usb_frlength_t offset,
+ void *ptr, usb_frlength_t len);
+int usbd_copy_out_user(struct usb_page_cache *cache, usb_frlength_t offset,
+ void *ptr, usb_frlength_t len);
+void usbd_get_page(struct usb_page_cache *pc, usb_frlength_t offset,
+ struct usb_page_search *res);
+void usbd_m_copy_in(struct usb_page_cache *cache, usb_frlength_t dst_offset,
+ struct mbuf *m, usb_size_t src_offset, usb_frlength_t src_len);
+void usbd_frame_zero(struct usb_page_cache *cache, usb_frlength_t offset,
+ usb_frlength_t len);
+void usbd_start_re_enumerate(struct usb_device *udev);
+usb_error_t
+ usbd_start_set_config(struct usb_device *, uint8_t);
+
+int usb_fifo_attach(struct usb_device *udev, void *priv_sc,
+ struct mtx *priv_mtx, struct usb_fifo_methods *pm,
+ struct usb_fifo_sc *f_sc, uint16_t unit, int16_t subunit,
+ uint8_t iface_index, uid_t uid, gid_t gid, int mode);
+void usb_fifo_detach(struct usb_fifo_sc *f_sc);
+int usb_fifo_alloc_buffer(struct usb_fifo *f, uint32_t bufsize,
+ uint16_t nbuf);
+void usb_fifo_free_buffer(struct usb_fifo *f);
+uint32_t usb_fifo_put_bytes_max(struct usb_fifo *fifo);
+void usb_fifo_put_data(struct usb_fifo *fifo, struct usb_page_cache *pc,
+ usb_frlength_t offset, usb_frlength_t len, uint8_t what);
+void usb_fifo_put_data_linear(struct usb_fifo *fifo, void *ptr,
+ usb_size_t len, uint8_t what);
+uint8_t usb_fifo_put_data_buffer(struct usb_fifo *f, void *ptr, usb_size_t len);
+void usb_fifo_put_data_error(struct usb_fifo *fifo);
+uint8_t usb_fifo_get_data(struct usb_fifo *fifo, struct usb_page_cache *pc,
+ usb_frlength_t offset, usb_frlength_t len, usb_frlength_t *actlen,
+ uint8_t what);
+uint8_t usb_fifo_get_data_linear(struct usb_fifo *fifo, void *ptr,
+ usb_size_t len, usb_size_t *actlen, uint8_t what);
+uint8_t usb_fifo_get_data_buffer(struct usb_fifo *f, void **pptr,
+ usb_size_t *plen);
+void usb_fifo_reset(struct usb_fifo *f);
+void usb_fifo_wakeup(struct usb_fifo *f);
+void usb_fifo_get_data_error(struct usb_fifo *fifo);
+void *usb_fifo_softc(struct usb_fifo *fifo);
+void usb_fifo_set_close_zlp(struct usb_fifo *, uint8_t);
+void usb_fifo_set_write_defrag(struct usb_fifo *, uint8_t);
+void usb_fifo_free(struct usb_fifo *f);
+#endif /* _KERNEL */
+#endif /* _USB_USBDI_H_ */
diff --git a/usr/src/contrib/bhyve/isa/isareg.h b/usr/src/contrib/bhyve/isa/isareg.h
new file mode 100644
index 0000000000..e83e34674f
--- /dev/null
+++ b/usr/src/contrib/bhyve/isa/isareg.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)isa.h 5.7 (Berkeley) 5/9/91
+ * $FreeBSD: head/sys/isa/isareg.h 263379 2014-03-19 21:03:04Z imp $
+ */
+
+#ifdef PC98
+#error isareg.h is included from PC-9801 source
+#endif
+
+#ifndef _ISA_ISA_H_
+#define _ISA_ISA_H_
+
+/*
+ * ISA Bus conventions
+ */
+
+/*
+ * Input / Output Port Assignments
+ */
+#ifndef IO_ISABEGIN
+#define IO_ISABEGIN 0x000 /* 0x000 - Beginning of I/O Registers */
+#define IO_ICU1 0x020 /* 8259A Interrupt Controller #1 */
+#define IO_KBD 0x060 /* 8042 Keyboard */
+#define IO_RTC 0x070 /* RTC */
+#define IO_ICU2 0x0A0 /* 8259A Interrupt Controller #2 */
+
+#define IO_MDA 0x3B0 /* Monochome Adapter */
+#define IO_VGA 0x3C0 /* E/VGA Ports */
+#define IO_CGA 0x3D0 /* CGA Ports */
+
+#endif /* !IO_ISABEGIN */
+
+/*
+ * Input / Output Port Sizes
+ */
+#define IO_CGASIZE 12 /* CGA controllers */
+#define IO_MDASIZE 12 /* Monochrome display controllers */
+#define IO_VGASIZE 16 /* VGA controllers */
+
+#endif /* !_ISA_ISA_H_ */
diff --git a/usr/src/contrib/bhyve/isa/rtc.h b/usr/src/contrib/bhyve/isa/rtc.h
new file mode 100644
index 0000000000..bb964ddf6a
--- /dev/null
+++ b/usr/src/contrib/bhyve/isa/rtc.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)rtc.h 7.1 (Berkeley) 5/12/91
+ * $FreeBSD$
+ */
+
+#ifndef _I386_ISA_RTC_H_
+#define _I386_ISA_RTC_H_ 1
+
+/*
+ * MC146818 RTC Register locations
+ */
+
+#define RTC_SEC 0x00 /* seconds */
+#define RTC_SECALRM 0x01 /* seconds alarm */
+#define RTC_MIN 0x02 /* minutes */
+#define RTC_MINALRM 0x03 /* minutes alarm */
+#define RTC_HRS 0x04 /* hours */
+#define RTC_HRSALRM 0x05 /* hours alarm */
+#define RTC_WDAY 0x06 /* week day */
+#define RTC_DAY 0x07 /* day of month */
+#define RTC_MONTH 0x08 /* month of year */
+#define RTC_YEAR 0x09 /* month of year */
+
+#define RTC_STATUSA 0x0a /* status register A */
+#define RTCSA_TUP 0x80 /* time update, don't look now */
+#define RTCSA_RESET 0x70 /* reset divider */
+#define RTCSA_DIVIDER 0x20 /* divider correct for 32768 Hz */
+#define RTCSA_8192 0x03 /* 8192 Hz interrupt */
+#define RTCSA_4096 0x04
+#define RTCSA_2048 0x05
+#define RTCSA_1024 0x06 /* default for profiling */
+#define RTCSA_PROF RTCSA_1024
+#define RTC_PROFRATE 1024
+#define RTCSA_512 0x07
+#define RTCSA_256 0x08
+#define RTCSA_128 0x09
+#define RTCSA_NOPROF RTCSA_128
+#define RTC_NOPROFRATE 128
+#define RTCSA_64 0x0a
+#define RTCSA_32 0x0b /* 32 Hz interrupt */
+
+#define RTC_STATUSB 0x0b /* status register B */
+#define RTCSB_DST 0x01 /* USA Daylight Savings Time enable */
+#define RTCSB_24HR 0x02 /* 0 = 12 hours, 1 = 24 hours */
+#define RTCSB_BCD 0x04 /* 0 = BCD, 1 = Binary coded time */
+#define RTCSB_SQWE 0x08 /* 1 = output sqare wave at SQW pin */
+#define RTCSB_UINTR 0x10 /* 1 = enable update-ended interrupt */
+#define RTCSB_AINTR 0x20 /* 1 = enable alarm interrupt */
+#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */
+#define RTCSB_HALT 0x80 /* stop clock updates */
+
+#define RTC_INTR 0x0c /* status register C (R) interrupt source */
+#define RTCIR_UPDATE 0x10 /* update intr */
+#define RTCIR_ALARM 0x20 /* alarm intr */
+#define RTCIR_PERIOD 0x40 /* periodic intr */
+#define RTCIR_INT 0x80 /* interrupt output signal */
+
+#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */
+#define RTCSD_PWR 0x80 /* clock power OK */
+
+#define RTC_DIAG 0x0e /* status register E - bios diagnostic */
+#define RTCDG_BITS "\020\010clock_battery\007ROM_cksum\006config_unit\005memory_size\004fixed_disk\003invalid_time"
+
+#define RTC_RESET 0x0f /* status register F - reset code byte */
+#define RTCRS_RST 0x00 /* normal reset */
+#define RTCRS_LOAD 0x04 /* load system */
+
+#define RTC_FDISKETTE 0x10 /* diskette drive type in upper/lower nibble */
+#define RTCFDT_NONE 0 /* none present */
+#define RTCFDT_360K 0x10 /* 360K */
+#define RTCFDT_12M 0x20 /* 1.2M */
+#define RTCFDT_720K 0x30 /* 720K */
+#define RTCFDT_144M 0x40 /* 1.44M */
+#define RTCFDT_288M_1 0x50 /* 2.88M, some BIOSes */
+#define RTCFDT_288M 0x60 /* 2.88M */
+
+#define RTC_BASELO 0x15 /* low byte of basemem size */
+#define RTC_BASEHI 0x16 /* high byte of basemem size */
+#define RTC_EXTLO 0x17 /* low byte of extended mem size */
+#define RTC_EXTHI 0x18 /* low byte of extended mem size */
+
+#define RTC_CENTURY 0x32 /* current century */
+
+#ifdef __FreeBSD__
+#ifdef _KERNEL
+extern struct mtx clock_lock;
+extern int atrtcclock_disable;
+int rtcin(int reg);
+void atrtc_restore(void);
+void writertc(int reg, u_char val);
+void atrtc_set(struct timespec *ts);
+#endif
+#endif
+
+#endif /* _I386_ISA_RTC_H_ */
diff --git a/usr/src/contrib/bhyve/lib/libutil/expand_number.c b/usr/src/contrib/bhyve/lib/libutil/expand_number.c
new file mode 100644
index 0000000000..f3b4da89f9
--- /dev/null
+++ b/usr/src/contrib/bhyve/lib/libutil/expand_number.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 2007 Eric Anderson <anderson@FreeBSD.org>
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/lib/libutil/expand_number.c 255069 2013-08-30 11:21:52Z pluknet $");
+
+#include <sys/types.h>
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <libutil.h>
+#include <stdint.h>
+
+int
+expand_number(const char *buf, uint64_t *num)
+{
+ char *endptr;
+ uintmax_t umaxval;
+ uint64_t number;
+ unsigned shift;
+ int serrno;
+
+ serrno = errno;
+ errno = 0;
+ umaxval = strtoumax(buf, &endptr, 0);
+ if (umaxval > UINT64_MAX)
+ errno = ERANGE;
+ if (errno != 0)
+ return (-1);
+ errno = serrno;
+ number = umaxval;
+
+ switch (tolower((unsigned char)*endptr)) {
+ case 'e':
+ shift = 60;
+ break;
+ case 'p':
+ shift = 50;
+ break;
+ case 't':
+ shift = 40;
+ break;
+ case 'g':
+ shift = 30;
+ break;
+ case 'm':
+ shift = 20;
+ break;
+ case 'k':
+ shift = 10;
+ break;
+ case 'b':
+ case '\0': /* No unit. */
+ *num = number;
+ return (0);
+ default:
+ /* Unrecognized unit. */
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if ((number << shift) >> shift != number) {
+ /* Overflow */
+ errno = ERANGE;
+ return (-1);
+ }
+ *num = number << shift;
+ return (0);
+}
diff --git a/usr/src/contrib/bhyve/lib/libutil/humanize_number.c b/usr/src/contrib/bhyve/lib/libutil/humanize_number.c
new file mode 100644
index 0000000000..675a969aaa
--- /dev/null
+++ b/usr/src/contrib/bhyve/lib/libutil/humanize_number.c
@@ -0,0 +1,179 @@
+/* $NetBSD: humanize_number.c,v 1.14 2008/04/28 20:22:59 martin Exp $ */
+
+/*
+ * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
+ * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
+ * NASA Ames Research Center, by Luke Mewburn and by Tomas Svensson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <libutil.h>
+
+static const int maxscale = 6;
+
+int
+humanize_number(char *buf, size_t len, int64_t quotient,
+ const char *suffix, int scale, int flags)
+{
+ const char *prefixes, *sep;
+ int i, r, remainder, s1, s2, sign;
+ int divisordeccut;
+ int64_t divisor, max;
+ size_t baselen;
+
+ /* Since so many callers don't check -1, NUL terminate the buffer */
+ if (len > 0)
+ buf[0] = '\0';
+
+ /* validate args */
+ if (buf == NULL || suffix == NULL)
+ return (-1);
+ if (scale < 0)
+ return (-1);
+ else if (scale > maxscale &&
+ ((scale & ~(HN_AUTOSCALE|HN_GETSCALE)) != 0))
+ return (-1);
+ if ((flags & HN_DIVISOR_1000) && (flags & HN_IEC_PREFIXES))
+ return (-1);
+
+ /* setup parameters */
+ remainder = 0;
+
+ if (flags & HN_IEC_PREFIXES) {
+ baselen = 2;
+ /*
+ * Use the prefixes for power of two recommended by
+ * the International Electrotechnical Commission
+ * (IEC) in IEC 80000-3 (i.e. Ki, Mi, Gi...).
+ *
+ * HN_IEC_PREFIXES implies a divisor of 1024 here
+ * (use of HN_DIVISOR_1000 would have triggered
+ * an assertion earlier).
+ */
+ divisor = 1024;
+ divisordeccut = 973; /* ceil(.95 * 1024) */
+ if (flags & HN_B)
+ prefixes = "B\0\0Ki\0Mi\0Gi\0Ti\0Pi\0Ei";
+ else
+ prefixes = "\0\0\0Ki\0Mi\0Gi\0Ti\0Pi\0Ei";
+ } else {
+ baselen = 1;
+ if (flags & HN_DIVISOR_1000) {
+ divisor = 1000;
+ divisordeccut = 950;
+ if (flags & HN_B)
+ prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ else
+ prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ } else {
+ divisor = 1024;
+ divisordeccut = 973; /* ceil(.95 * 1024) */
+ if (flags & HN_B)
+ prefixes = "B\0\0K\0\0M\0\0G\0\0T\0\0P\0\0E";
+ else
+ prefixes = "\0\0\0K\0\0M\0\0G\0\0T\0\0P\0\0E";
+ }
+ }
+
+#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3])
+
+ if (quotient < 0) {
+ sign = -1;
+ quotient = -quotient;
+ baselen += 2; /* sign, digit */
+ } else {
+ sign = 1;
+ baselen += 1; /* digit */
+ }
+ if (flags & HN_NOSPACE)
+ sep = "";
+ else {
+ sep = " ";
+ baselen++;
+ }
+ baselen += strlen(suffix);
+
+ /* Check if enough room for `x y' + suffix + `\0' */
+ if (len < baselen + 1)
+ return (-1);
+
+ if (scale & (HN_AUTOSCALE | HN_GETSCALE)) {
+ /* See if there is additional columns can be used. */
+ for (max = 1, i = len - baselen; i-- > 0;)
+ max *= 10;
+
+ /*
+ * Divide the number until it fits the given column.
+ * If there will be an overflow by the rounding below,
+ * divide once more.
+ */
+ for (i = 0;
+ (quotient >= max || (quotient == max - 1 &&
+ remainder >= divisordeccut)) && i < maxscale; i++) {
+ remainder = quotient % divisor;
+ quotient /= divisor;
+ }
+
+ if (scale & HN_GETSCALE)
+ return (i);
+ } else {
+ for (i = 0; i < scale && i < maxscale; i++) {
+ remainder = quotient % divisor;
+ quotient /= divisor;
+ }
+ }
+
+ /* If a value <= 9.9 after rounding and ... */
+ /*
+ * XXX - should we make sure there is enough space for the decimal
+ * place and if not, don't do HN_DECIMAL?
+ */
+ if (((quotient == 9 && remainder < divisordeccut) || quotient < 9) &&
+ i > 0 && flags & HN_DECIMAL) {
+ s1 = (int)quotient + ((remainder * 10 + divisor / 2) /
+ divisor / 10);
+ s2 = ((remainder * 10 + divisor / 2) / divisor) % 10;
+ r = snprintf(buf, len, "%d%s%d%s%s%s",
+ sign * s1, localeconv()->decimal_point, s2,
+ sep, SCALE2PREFIX(i), suffix);
+ } else
+ r = snprintf(buf, len, "%" PRId64 "%s%s%s",
+ sign * (quotient + (remainder + divisor / 2) / divisor),
+ sep, SCALE2PREFIX(i), suffix);
+
+ return (r);
+}
diff --git a/usr/src/contrib/bhyve/sys/ata.h b/usr/src/contrib/bhyve/sys/ata.h
new file mode 100644
index 0000000000..223bd7b3eb
--- /dev/null
+++ b/usr/src/contrib/bhyve/sys/ata.h
@@ -0,0 +1,1015 @@
+/*-
+ * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer,
+ * without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_ATA_H_
+#define _SYS_ATA_H_
+
+#include <sys/ioccom.h>
+
+/* ATA/ATAPI device parameters */
+struct ata_params {
+/*000*/ u_int16_t config; /* configuration info */
+#define ATA_PROTO_MASK 0x8003
+#define ATA_PROTO_ATAPI 0x8000
+#define ATA_PROTO_ATAPI_12 0x8000
+#define ATA_PROTO_ATAPI_16 0x8001
+#define ATA_PROTO_CFA 0x848a
+#define ATA_ATAPI_TYPE_MASK 0x1f00
+#define ATA_ATAPI_TYPE_DIRECT 0x0000 /* disk/floppy */
+#define ATA_ATAPI_TYPE_TAPE 0x0100 /* streaming tape */
+#define ATA_ATAPI_TYPE_CDROM 0x0500 /* CD-ROM device */
+#define ATA_ATAPI_TYPE_OPTICAL 0x0700 /* optical disk */
+#define ATA_DRQ_MASK 0x0060
+#define ATA_DRQ_SLOW 0x0000 /* cpu 3 ms delay */
+#define ATA_DRQ_INTR 0x0020 /* interrupt 10 ms delay */
+#define ATA_DRQ_FAST 0x0040 /* accel 50 us delay */
+#define ATA_RESP_INCOMPLETE 0x0004
+
+/*001*/ u_int16_t cylinders; /* # of cylinders */
+/*002*/ u_int16_t specconf; /* specific configuration */
+/*003*/ u_int16_t heads; /* # heads */
+ u_int16_t obsolete4;
+ u_int16_t obsolete5;
+/*006*/ u_int16_t sectors; /* # sectors/track */
+/*007*/ u_int16_t vendor7[3];
+/*010*/ u_int8_t serial[20]; /* serial number */
+/*020*/ u_int16_t retired20;
+ u_int16_t retired21;
+ u_int16_t obsolete22;
+/*023*/ u_int8_t revision[8]; /* firmware revision */
+/*027*/ u_int8_t model[40]; /* model name */
+/*047*/ u_int16_t sectors_intr; /* sectors per interrupt */
+/*048*/ u_int16_t usedmovsd; /* double word read/write? */
+/*049*/ u_int16_t capabilities1;
+#define ATA_SUPPORT_DMA 0x0100
+#define ATA_SUPPORT_LBA 0x0200
+#define ATA_SUPPORT_IORDY 0x0400
+#define ATA_SUPPORT_IORDYDIS 0x0800
+#define ATA_SUPPORT_OVERLAP 0x4000
+
+/*050*/ u_int16_t capabilities2;
+/*051*/ u_int16_t retired_piomode; /* PIO modes 0-2 */
+#define ATA_RETIRED_PIO_MASK 0x0300
+
+/*052*/ u_int16_t retired_dmamode; /* DMA modes */
+#define ATA_RETIRED_DMA_MASK 0x0003
+
+/*053*/ u_int16_t atavalid; /* fields valid */
+#define ATA_FLAG_54_58 0x0001 /* words 54-58 valid */
+#define ATA_FLAG_64_70 0x0002 /* words 64-70 valid */
+#define ATA_FLAG_88 0x0004 /* word 88 valid */
+
+/*054*/ u_int16_t current_cylinders;
+/*055*/ u_int16_t current_heads;
+/*056*/ u_int16_t current_sectors;
+/*057*/ u_int16_t current_size_1;
+/*058*/ u_int16_t current_size_2;
+/*059*/ u_int16_t multi;
+#define ATA_MULTI_VALID 0x0100
+
+/*060*/ u_int16_t lba_size_1;
+ u_int16_t lba_size_2;
+ u_int16_t obsolete62;
+/*063*/ u_int16_t mwdmamodes; /* multiword DMA modes */
+/*064*/ u_int16_t apiomodes; /* advanced PIO modes */
+
+/*065*/ u_int16_t mwdmamin; /* min. M/W DMA time/word ns */
+/*066*/ u_int16_t mwdmarec; /* rec. M/W DMA time ns */
+/*067*/ u_int16_t pioblind; /* min. PIO cycle w/o flow */
+/*068*/ u_int16_t pioiordy; /* min. PIO cycle IORDY flow */
+/*069*/ u_int16_t support3;
+#define ATA_SUPPORT_RZAT 0x0020
+#define ATA_SUPPORT_DRAT 0x4000
+#define ATA_SUPPORT_ZONE_MASK 0x0003
+#define ATA_SUPPORT_ZONE_NR 0x0000
+#define ATA_SUPPORT_ZONE_HOST_AWARE 0x0001
+#define ATA_SUPPORT_ZONE_DEV_MANAGED 0x0002
+ u_int16_t reserved70;
+/*071*/ u_int16_t rlsovlap; /* rel time (us) for overlap */
+/*072*/ u_int16_t rlsservice; /* rel time (us) for service */
+ u_int16_t reserved73;
+ u_int16_t reserved74;
+/*075*/ u_int16_t queue;
+#define ATA_QUEUE_LEN(x) ((x) & 0x001f)
+
+/*76*/ u_int16_t satacapabilities;
+#define ATA_SATA_GEN1 0x0002
+#define ATA_SATA_GEN2 0x0004
+#define ATA_SATA_GEN3 0x0008
+#define ATA_SUPPORT_NCQ 0x0100
+#define ATA_SUPPORT_IFPWRMNGTRCV 0x0200
+#define ATA_SUPPORT_PHYEVENTCNT 0x0400
+#define ATA_SUPPORT_NCQ_UNLOAD 0x0800
+#define ATA_SUPPORT_NCQ_PRIO 0x1000
+#define ATA_SUPPORT_HAPST 0x2000
+#define ATA_SUPPORT_DAPST 0x4000
+#define ATA_SUPPORT_READLOGDMAEXT 0x8000
+
+/*77*/ u_int16_t satacapabilities2;
+#define ATA_SATA_CURR_GEN_MASK 0x0006
+#define ATA_SUPPORT_NCQ_STREAM 0x0010
+#define ATA_SUPPORT_NCQ_QMANAGEMENT 0x0020
+#define ATA_SUPPORT_RCVSND_FPDMA_QUEUED 0x0040
+/*78*/ u_int16_t satasupport;
+#define ATA_SUPPORT_NONZERO 0x0002
+#define ATA_SUPPORT_AUTOACTIVATE 0x0004
+#define ATA_SUPPORT_IFPWRMNGT 0x0008
+#define ATA_SUPPORT_INORDERDATA 0x0010
+#define ATA_SUPPORT_ASYNCNOTIF 0x0020
+#define ATA_SUPPORT_SOFTSETPRESERVE 0x0040
+/*79*/ u_int16_t sataenabled;
+#define ATA_ENABLED_DAPST 0x0080
+
+/*080*/ u_int16_t version_major;
+/*081*/ u_int16_t version_minor;
+
+ struct {
+/*082/085*/ u_int16_t command1;
+#define ATA_SUPPORT_SMART 0x0001
+#define ATA_SUPPORT_SECURITY 0x0002
+#define ATA_SUPPORT_REMOVABLE 0x0004
+#define ATA_SUPPORT_POWERMGT 0x0008
+#define ATA_SUPPORT_PACKET 0x0010
+#define ATA_SUPPORT_WRITECACHE 0x0020
+#define ATA_SUPPORT_LOOKAHEAD 0x0040
+#define ATA_SUPPORT_RELEASEIRQ 0x0080
+#define ATA_SUPPORT_SERVICEIRQ 0x0100
+#define ATA_SUPPORT_RESET 0x0200
+#define ATA_SUPPORT_PROTECTED 0x0400
+#define ATA_SUPPORT_WRITEBUFFER 0x1000
+#define ATA_SUPPORT_READBUFFER 0x2000
+#define ATA_SUPPORT_NOP 0x4000
+
+/*083/086*/ u_int16_t command2;
+#define ATA_SUPPORT_MICROCODE 0x0001
+#define ATA_SUPPORT_QUEUED 0x0002
+#define ATA_SUPPORT_CFA 0x0004
+#define ATA_SUPPORT_APM 0x0008
+#define ATA_SUPPORT_NOTIFY 0x0010
+#define ATA_SUPPORT_STANDBY 0x0020
+#define ATA_SUPPORT_SPINUP 0x0040
+#define ATA_SUPPORT_MAXSECURITY 0x0100
+#define ATA_SUPPORT_AUTOACOUSTIC 0x0200
+#define ATA_SUPPORT_ADDRESS48 0x0400
+#define ATA_SUPPORT_OVERLAY 0x0800
+#define ATA_SUPPORT_FLUSHCACHE 0x1000
+#define ATA_SUPPORT_FLUSHCACHE48 0x2000
+
+/*084/087*/ u_int16_t extension;
+#define ATA_SUPPORT_SMARTLOG 0x0001
+#define ATA_SUPPORT_SMARTTEST 0x0002
+#define ATA_SUPPORT_MEDIASN 0x0004
+#define ATA_SUPPORT_MEDIAPASS 0x0008
+#define ATA_SUPPORT_STREAMING 0x0010
+#define ATA_SUPPORT_GENLOG 0x0020
+#define ATA_SUPPORT_WRITEDMAFUAEXT 0x0040
+#define ATA_SUPPORT_WRITEDMAQFUAEXT 0x0080
+#define ATA_SUPPORT_64BITWWN 0x0100
+#define ATA_SUPPORT_UNLOAD 0x2000
+ } __packed support, enabled;
+
+/*088*/ u_int16_t udmamodes; /* UltraDMA modes */
+/*089*/ u_int16_t erase_time; /* time req'd in 2min units */
+/*090*/ u_int16_t enhanced_erase_time; /* time req'd in 2min units */
+/*091*/ u_int16_t apm_value;
+/*092*/ u_int16_t master_passwd_revision; /* password revision code */
+/*093*/ u_int16_t hwres;
+#define ATA_CABLE_ID 0x2000
+
+/*094*/ u_int16_t acoustic;
+#define ATA_ACOUSTIC_CURRENT(x) ((x) & 0x00ff)
+#define ATA_ACOUSTIC_VENDOR(x) (((x) & 0xff00) >> 8)
+
+/*095*/ u_int16_t stream_min_req_size;
+/*096*/ u_int16_t stream_transfer_time;
+/*097*/ u_int16_t stream_access_latency;
+/*098*/ u_int32_t stream_granularity;
+/*100*/ u_int16_t lba_size48_1;
+ u_int16_t lba_size48_2;
+ u_int16_t lba_size48_3;
+ u_int16_t lba_size48_4;
+ u_int16_t reserved104;
+/*105*/ u_int16_t max_dsm_blocks;
+/*106*/ u_int16_t pss;
+#define ATA_PSS_LSPPS 0x000F
+#define ATA_PSS_LSSABOVE512 0x1000
+#define ATA_PSS_MULTLS 0x2000
+#define ATA_PSS_VALID_MASK 0xC000
+#define ATA_PSS_VALID_VALUE 0x4000
+/*107*/ u_int16_t isd;
+/*108*/ u_int16_t wwn[4];
+ u_int16_t reserved112[5];
+/*117*/ u_int16_t lss_1;
+/*118*/ u_int16_t lss_2;
+/*119*/ u_int16_t support2;
+#define ATA_SUPPORT_WRITEREADVERIFY 0x0002
+#define ATA_SUPPORT_WRITEUNCORREXT 0x0004
+#define ATA_SUPPORT_RWLOGDMAEXT 0x0008
+#define ATA_SUPPORT_MICROCODE3 0x0010
+#define ATA_SUPPORT_FREEFALL 0x0020
+#define ATA_SUPPORT_SENSE_REPORT 0x0040
+#define ATA_SUPPORT_EPC 0x0080
+/*120*/ u_int16_t enabled2;
+#define ATA_ENABLED_WRITEREADVERIFY 0x0002
+#define ATA_ENABLED_WRITEUNCORREXT 0x0004
+#define ATA_ENABLED_FREEFALL 0x0020
+#define ATA_ENABLED_SENSE_REPORT 0x0040
+#define ATA_ENABLED_EPC 0x0080
+ u_int16_t reserved121[6];
+/*127*/ u_int16_t removable_status;
+/*128*/ u_int16_t security_status;
+#define ATA_SECURITY_LEVEL 0x0100 /* 0: high, 1: maximum */
+#define ATA_SECURITY_ENH_SUPP 0x0020 /* enhanced erase supported */
+#define ATA_SECURITY_COUNT_EXP 0x0010 /* count expired */
+#define ATA_SECURITY_FROZEN 0x0008 /* security config is frozen */
+#define ATA_SECURITY_LOCKED 0x0004 /* drive is locked */
+#define ATA_SECURITY_ENABLED 0x0002 /* ATA Security is enabled */
+#define ATA_SECURITY_SUPPORTED 0x0001 /* ATA Security is supported */
+
+ u_int16_t reserved129[31];
+/*160*/ u_int16_t cfa_powermode1;
+ u_int16_t reserved161;
+/*162*/ u_int16_t cfa_kms_support;
+/*163*/ u_int16_t cfa_trueide_modes;
+/*164*/ u_int16_t cfa_memory_modes;
+ u_int16_t reserved165[4];
+/*169*/ u_int16_t support_dsm;
+#define ATA_SUPPORT_DSM_TRIM 0x0001
+ u_int16_t reserved170[6];
+/*176*/ u_int8_t media_serial[60];
+/*206*/ u_int16_t sct;
+ u_int16_t reserved207[2];
+/*209*/ u_int16_t lsalign;
+/*210*/ u_int16_t wrv_sectors_m3_1;
+ u_int16_t wrv_sectors_m3_2;
+/*212*/ u_int16_t wrv_sectors_m2_1;
+ u_int16_t wrv_sectors_m2_2;
+/*214*/ u_int16_t nv_cache_caps;
+/*215*/ u_int16_t nv_cache_size_1;
+ u_int16_t nv_cache_size_2;
+/*217*/ u_int16_t media_rotation_rate;
+#define ATA_RATE_NOT_REPORTED 0x0000
+#define ATA_RATE_NON_ROTATING 0x0001
+ u_int16_t reserved218;
+/*219*/ u_int16_t nv_cache_opt;
+/*220*/ u_int16_t wrv_mode;
+ u_int16_t reserved221;
+/*222*/ u_int16_t transport_major;
+/*223*/ u_int16_t transport_minor;
+ u_int16_t reserved224[31];
+/*255*/ u_int16_t integrity;
+} __packed;
+
+/* ATA Dataset Management */
+#define ATA_DSM_BLK_SIZE 512
+#define ATA_DSM_BLK_RANGES 64
+#define ATA_DSM_RANGE_SIZE 8
+#define ATA_DSM_RANGE_MAX 65535
+
+/*
+ * ATA Device Register
+ *
+ * bit 7 Obsolete (was 1 in early ATA specs)
+ * bit 6 Sets LBA/CHS mode. 1=LBA, 0=CHS
+ * bit 5 Obsolete (was 1 in early ATA specs)
+ * bit 4 1 = Slave Drive, 0 = Master Drive
+ * bit 3-0 In LBA mode, 27-24 of address. In CHS mode, head number
+*/
+
+#define ATA_DEV_MASTER 0x00
+#define ATA_DEV_SLAVE 0x10
+#define ATA_DEV_LBA 0x40
+
+/* ATA limits */
+#define ATA_MAX_28BIT_LBA 268435455UL
+
+/* ATA Status Register */
+#define ATA_STATUS_ERROR 0x01
+#define ATA_STATUS_SENSE_AVAIL 0x02
+#define ATA_STATUS_ALIGN_ERR 0x04
+#define ATA_STATUS_DATA_REQ 0x08
+#define ATA_STATUS_DEF_WRITE_ERR 0x10
+#define ATA_STATUS_DEVICE_FAULT 0x20
+#define ATA_STATUS_DEVICE_READY 0x40
+#define ATA_STATUS_BUSY 0x80
+
+/* ATA Error Register */
+#define ATA_ERROR_ABORT 0x04
+#define ATA_ERROR_ID_NOT_FOUND 0x10
+
+/* ATA HPA Features */
+#define ATA_HPA_FEAT_MAX_ADDR 0x00
+#define ATA_HPA_FEAT_SET_PWD 0x01
+#define ATA_HPA_FEAT_LOCK 0x02
+#define ATA_HPA_FEAT_UNLOCK 0x03
+#define ATA_HPA_FEAT_FREEZE 0x04
+
+/* ATA transfer modes */
+#define ATA_MODE_MASK 0x0f
+#define ATA_DMA_MASK 0xf0
+#define ATA_PIO 0x00
+#define ATA_PIO0 0x08
+#define ATA_PIO1 0x09
+#define ATA_PIO2 0x0a
+#define ATA_PIO3 0x0b
+#define ATA_PIO4 0x0c
+#define ATA_PIO_MAX 0x0f
+#define ATA_DMA 0x10
+#define ATA_WDMA0 0x20
+#define ATA_WDMA1 0x21
+#define ATA_WDMA2 0x22
+#define ATA_UDMA0 0x40
+#define ATA_UDMA1 0x41
+#define ATA_UDMA2 0x42
+#define ATA_UDMA3 0x43
+#define ATA_UDMA4 0x44
+#define ATA_UDMA5 0x45
+#define ATA_UDMA6 0x46
+#define ATA_SA150 0x47
+#define ATA_SA300 0x48
+#define ATA_SA600 0x49
+#define ATA_DMA_MAX 0x4f
+
+
+/* ATA commands */
+#define ATA_NOP 0x00 /* NOP */
+#define ATA_NF_FLUSHQUEUE 0x00 /* flush queued cmd's */
+#define ATA_NF_AUTOPOLL 0x01 /* start autopoll function */
+#define ATA_DATA_SET_MANAGEMENT 0x06
+#define ATA_DSM_TRIM 0x01
+#define ATA_DEVICE_RESET 0x08 /* reset device */
+#define ATA_READ 0x20 /* read */
+#define ATA_READ48 0x24 /* read 48bit LBA */
+#define ATA_READ_DMA48 0x25 /* read DMA 48bit LBA */
+#define ATA_READ_DMA_QUEUED48 0x26 /* read DMA QUEUED 48bit LBA */
+#define ATA_READ_NATIVE_MAX_ADDRESS48 0x27 /* read native max addr 48bit */
+#define ATA_READ_MUL48 0x29 /* read multi 48bit LBA */
+#define ATA_READ_STREAM_DMA48 0x2a /* read DMA stream 48bit LBA */
+#define ATA_READ_LOG_EXT 0x2f /* read log ext - PIO Data-In */
+#define ATA_READ_STREAM48 0x2b /* read stream 48bit LBA */
+#define ATA_WRITE 0x30 /* write */
+#define ATA_WRITE48 0x34 /* write 48bit LBA */
+#define ATA_WRITE_DMA48 0x35 /* write DMA 48bit LBA */
+#define ATA_WRITE_DMA_QUEUED48 0x36 /* write DMA QUEUED 48bit LBA*/
+#define ATA_SET_MAX_ADDRESS48 0x37 /* set max address 48bit */
+#define ATA_WRITE_MUL48 0x39 /* write multi 48bit LBA */
+#define ATA_WRITE_STREAM_DMA48 0x3a
+#define ATA_WRITE_STREAM48 0x3b
+#define ATA_WRITE_DMA_FUA48 0x3d
+#define ATA_WRITE_DMA_QUEUED_FUA48 0x3e
+#define ATA_WRITE_LOG_EXT 0x3f
+#define ATA_READ_VERIFY 0x40
+#define ATA_READ_VERIFY48 0x42
+#define ATA_WRITE_UNCORRECTABLE48 0x45 /* write uncorrectable 48bit LBA */
+#define ATA_WU_PSEUDO 0x55 /* pseudo-uncorrectable error */
+#define ATA_WU_FLAGGED 0xaa /* flagged-uncorrectable error */
+#define ATA_READ_LOG_DMA_EXT 0x47 /* read log DMA ext - PIO Data-In */
+#define ATA_ZAC_MANAGEMENT_IN 0x4a /* ZAC management in */
+#define ATA_ZM_REPORT_ZONES 0x00 /* report zones */
+#define ATA_READ_FPDMA_QUEUED 0x60 /* read DMA NCQ */
+#define ATA_WRITE_FPDMA_QUEUED 0x61 /* write DMA NCQ */
+#define ATA_NCQ_NON_DATA 0x63 /* NCQ non-data command */
+#define ATA_ABORT_NCQ_QUEUE 0x00 /* abort NCQ queue */
+#define ATA_DEADLINE_HANDLING 0x01 /* deadline handling */
+#define ATA_SET_FEATURES 0x05 /* set features */
+#define ATA_ZERO_EXT 0x06 /* zero ext */
+#define ATA_NCQ_ZAC_MGMT_OUT 0x07 /* NCQ ZAC mgmt out no data */
+#define ATA_SEND_FPDMA_QUEUED 0x64 /* send DMA NCQ */
+#define ATA_SFPDMA_DSM 0x00 /* Data set management */
+#define ATA_SFPDMA_DSM_TRIM 0x01 /* Set trim bit in auxiliary */
+#define ATA_SFPDMA_HYBRID_EVICT 0x01 /* Hybrid Evict */
+#define ATA_SFPDMA_WLDMA 0x02 /* Write Log DMA EXT */
+#define ATA_SFPDMA_ZAC_MGMT_OUT 0x03 /* NCQ ZAC mgmt out w/data */
+#define ATA_RECV_FPDMA_QUEUED 0x65 /* receive DMA NCQ */
+#define ATA_RFPDMA_RL_DMA_EXT 0x00 /* Read Log DMA EXT */
+#define ATA_RFPDMA_ZAC_MGMT_IN 0x02 /* NCQ ZAC mgmt in w/data */
+#define ATA_SEP_ATTN 0x67 /* SEP request */
+#define ATA_SEEK 0x70 /* seek */
+#define ATA_ZAC_MANAGEMENT_OUT 0x9f /* ZAC management out */
+#define ATA_ZM_CLOSE_ZONE 0x01 /* close zone */
+#define ATA_ZM_FINISH_ZONE 0x02 /* finish zone */
+#define ATA_ZM_OPEN_ZONE 0x03 /* open zone */
+#define ATA_ZM_RWP 0x04 /* reset write pointer */
+#define ATA_PACKET_CMD 0xa0 /* packet command */
+#define ATA_ATAPI_IDENTIFY 0xa1 /* get ATAPI params*/
+#define ATA_SERVICE 0xa2 /* service command */
+#define ATA_SMART_CMD 0xb0 /* SMART command */
+#define ATA_CFA_ERASE 0xc0 /* CFA erase */
+#define ATA_READ_MUL 0xc4 /* read multi */
+#define ATA_WRITE_MUL 0xc5 /* write multi */
+#define ATA_SET_MULTI 0xc6 /* set multi size */
+#define ATA_READ_DMA_QUEUED 0xc7 /* read DMA QUEUED */
+#define ATA_READ_DMA 0xc8 /* read DMA */
+#define ATA_WRITE_DMA 0xca /* write DMA */
+#define ATA_WRITE_DMA_QUEUED 0xcc /* write DMA QUEUED */
+#define ATA_WRITE_MUL_FUA48 0xce
+#define ATA_STANDBY_IMMEDIATE 0xe0 /* standby immediate */
+#define ATA_IDLE_IMMEDIATE 0xe1 /* idle immediate */
+#define ATA_STANDBY_CMD 0xe2 /* standby */
+#define ATA_IDLE_CMD 0xe3 /* idle */
+#define ATA_READ_BUFFER 0xe4 /* read buffer */
+#define ATA_READ_PM 0xe4 /* read portmultiplier */
+#define ATA_CHECK_POWER_MODE 0xe5 /* device power mode */
+#define ATA_SLEEP 0xe6 /* sleep */
+#define ATA_FLUSHCACHE 0xe7 /* flush cache to disk */
+#define ATA_WRITE_PM 0xe8 /* write portmultiplier */
+#define ATA_FLUSHCACHE48 0xea /* flush cache to disk */
+#define ATA_ATA_IDENTIFY 0xec /* get ATA params */
+#define ATA_SETFEATURES 0xef /* features command */
+#define ATA_SF_ENAB_WCACHE 0x02 /* enable write cache */
+#define ATA_SF_DIS_WCACHE 0x82 /* disable write cache */
+#define ATA_SF_SETXFER 0x03 /* set transfer mode */
+#define ATA_SF_APM 0x05 /* Enable APM feature set */
+#define ATA_SF_ENAB_PUIS 0x06 /* enable PUIS */
+#define ATA_SF_DIS_PUIS 0x86 /* disable PUIS */
+#define ATA_SF_PUIS_SPINUP 0x07 /* PUIS spin-up */
+#define ATA_SF_WRV 0x0b /* Enable Write-Read-Verify */
+#define ATA_SF_DLC 0x0c /* Enable device life control */
+#define ATA_SF_SATA 0x10 /* Enable use of SATA feature */
+#define ATA_SF_FFC 0x41 /* Free-fall Control */
+#define ATA_SF_MHIST 0x43 /* Set Max Host Sect. Times */
+#define ATA_SF_RATE 0x45 /* Set Rate Basis */
+#define ATA_SF_EPC 0x4A /* Extended Power Conditions */
+#define ATA_SF_ENAB_RCACHE 0xaa /* enable readahead cache */
+#define ATA_SF_DIS_RCACHE 0x55 /* disable readahead cache */
+#define ATA_SF_ENAB_RELIRQ 0x5d /* enable release interrupt */
+#define ATA_SF_DIS_RELIRQ 0xdd /* disable release interrupt */
+#define ATA_SF_ENAB_SRVIRQ 0x5e /* enable service interrupt */
+#define ATA_SF_DIS_SRVIRQ 0xde /* disable service interrupt */
+#define ATA_SF_LPSAERC 0x62 /* Long Phys Sect Align ErrRep*/
+#define ATA_SF_DSN 0x63 /* Device Stats Notification */
+#define ATA_CHECK_POWER_MODE 0xe5 /* Check Power Mode */
+#define ATA_SECURITY_SET_PASSWORD 0xf1 /* set drive password */
+#define ATA_SECURITY_UNLOCK 0xf2 /* unlock drive using passwd */
+#define ATA_SECURITY_ERASE_PREPARE 0xf3 /* prepare to erase drive */
+#define ATA_SECURITY_ERASE_UNIT 0xf4 /* erase all blocks on drive */
+#define ATA_SECURITY_FREEZE_LOCK 0xf5 /* freeze security config */
+#define ATA_SECURITY_DISABLE_PASSWORD 0xf6 /* disable drive password */
+#define ATA_READ_NATIVE_MAX_ADDRESS 0xf8 /* read native max address */
+#define ATA_SET_MAX_ADDRESS 0xf9 /* set max address */
+
+
+/* ATAPI commands */
+#define ATAPI_TEST_UNIT_READY 0x00 /* check if device is ready */
+#define ATAPI_REZERO 0x01 /* rewind */
+#define ATAPI_REQUEST_SENSE 0x03 /* get sense data */
+#define ATAPI_FORMAT 0x04 /* format unit */
+#define ATAPI_READ 0x08 /* read data */
+#define ATAPI_WRITE 0x0a /* write data */
+#define ATAPI_WEOF 0x10 /* write filemark */
+#define ATAPI_WF_WRITE 0x01
+#define ATAPI_SPACE 0x11 /* space command */
+#define ATAPI_SP_FM 0x01
+#define ATAPI_SP_EOD 0x03
+#define ATAPI_INQUIRY 0x12 /* get inquiry data */
+#define ATAPI_MODE_SELECT 0x15 /* mode select */
+#define ATAPI_ERASE 0x19 /* erase */
+#define ATAPI_MODE_SENSE 0x1a /* mode sense */
+#define ATAPI_START_STOP 0x1b /* start/stop unit */
+#define ATAPI_SS_LOAD 0x01
+#define ATAPI_SS_RETENSION 0x02
+#define ATAPI_SS_EJECT 0x04
+#define ATAPI_PREVENT_ALLOW 0x1e /* media removal */
+#define ATAPI_READ_FORMAT_CAPACITIES 0x23 /* get format capacities */
+#define ATAPI_READ_CAPACITY 0x25 /* get volume capacity */
+#define ATAPI_READ_BIG 0x28 /* read data */
+#define ATAPI_WRITE_BIG 0x2a /* write data */
+#define ATAPI_LOCATE 0x2b /* locate to position */
+#define ATAPI_READ_POSITION 0x34 /* read position */
+#define ATAPI_SYNCHRONIZE_CACHE 0x35 /* flush buf, close channel */
+#define ATAPI_WRITE_BUFFER 0x3b /* write device buffer */
+#define ATAPI_READ_BUFFER 0x3c /* read device buffer */
+#define ATAPI_READ_SUBCHANNEL 0x42 /* get subchannel info */
+#define ATAPI_READ_TOC 0x43 /* get table of contents */
+#define ATAPI_PLAY_10 0x45 /* play by lba */
+#define ATAPI_PLAY_MSF 0x47 /* play by MSF address */
+#define ATAPI_PLAY_TRACK 0x48 /* play by track number */
+#define ATAPI_PAUSE 0x4b /* pause audio operation */
+#define ATAPI_READ_DISK_INFO 0x51 /* get disk info structure */
+#define ATAPI_READ_TRACK_INFO 0x52 /* get track info structure */
+#define ATAPI_RESERVE_TRACK 0x53 /* reserve track */
+#define ATAPI_SEND_OPC_INFO 0x54 /* send OPC structurek */
+#define ATAPI_MODE_SELECT_BIG 0x55 /* set device parameters */
+#define ATAPI_REPAIR_TRACK 0x58 /* repair track */
+#define ATAPI_READ_MASTER_CUE 0x59 /* read master CUE info */
+#define ATAPI_MODE_SENSE_BIG 0x5a /* get device parameters */
+#define ATAPI_CLOSE_TRACK 0x5b /* close track/session */
+#define ATAPI_READ_BUFFER_CAPACITY 0x5c /* get buffer capicity */
+#define ATAPI_SEND_CUE_SHEET 0x5d /* send CUE sheet */
+#define ATAPI_SERVICE_ACTION_IN 0x96 /* get service data */
+#define ATAPI_BLANK 0xa1 /* blank the media */
+#define ATAPI_SEND_KEY 0xa3 /* send DVD key structure */
+#define ATAPI_REPORT_KEY 0xa4 /* get DVD key structure */
+#define ATAPI_PLAY_12 0xa5 /* play by lba */
+#define ATAPI_LOAD_UNLOAD 0xa6 /* changer control command */
+#define ATAPI_READ_STRUCTURE 0xad /* get DVD structure */
+#define ATAPI_PLAY_CD 0xb4 /* universal play command */
+#define ATAPI_SET_SPEED 0xbb /* set drive speed */
+#define ATAPI_MECH_STATUS 0xbd /* get changer status */
+#define ATAPI_READ_CD 0xbe /* read data */
+#define ATAPI_POLL_DSC 0xff /* poll DSC status bit */
+
+
+struct ata_ioc_devices {
+ int channel;
+ char name[2][32];
+ struct ata_params params[2];
+};
+
+/* pr channel ATA ioctl calls */
+#define IOCATAGMAXCHANNEL _IOR('a', 1, int)
+#define IOCATAREINIT _IOW('a', 2, int)
+#define IOCATAATTACH _IOW('a', 3, int)
+#define IOCATADETACH _IOW('a', 4, int)
+#define IOCATADEVICES _IOWR('a', 5, struct ata_ioc_devices)
+
+/* ATAPI request sense structure */
+struct atapi_sense {
+ u_int8_t error; /* current or deferred errors */
+#define ATA_SENSE_VALID 0x80
+
+ u_int8_t segment; /* segment number */
+ u_int8_t key; /* sense key */
+#define ATA_SENSE_KEY_MASK 0x0f /* sense key mask */
+#define ATA_SENSE_NO_SENSE 0x00 /* no specific sense key info */
+#define ATA_SENSE_RECOVERED_ERROR 0x01 /* command OK, data recovered */
+#define ATA_SENSE_NOT_READY 0x02 /* no access to drive */
+#define ATA_SENSE_MEDIUM_ERROR 0x03 /* non-recovered data error */
+#define ATA_SENSE_HARDWARE_ERROR 0x04 /* non-recoverable HW failure */
+#define ATA_SENSE_ILLEGAL_REQUEST 0x05 /* invalid command param(s) */
+#define ATA_SENSE_UNIT_ATTENTION 0x06 /* media changed */
+#define ATA_SENSE_DATA_PROTECT 0x07 /* write protect */
+#define ATA_SENSE_BLANK_CHECK 0x08 /* blank check */
+#define ATA_SENSE_VENDOR_SPECIFIC 0x09 /* vendor specific skey */
+#define ATA_SENSE_COPY_ABORTED 0x0a /* copy aborted */
+#define ATA_SENSE_ABORTED_COMMAND 0x0b /* command aborted, try again */
+#define ATA_SENSE_EQUAL 0x0c /* equal */
+#define ATA_SENSE_VOLUME_OVERFLOW 0x0d /* volume overflow */
+#define ATA_SENSE_MISCOMPARE 0x0e /* data dont match the medium */
+#define ATA_SENSE_RESERVED 0x0f
+#define ATA_SENSE_ILI 0x20;
+#define ATA_SENSE_EOM 0x40;
+#define ATA_SENSE_FILEMARK 0x80;
+
+ u_int32_t cmd_info; /* cmd information */
+ u_int8_t sense_length; /* additional sense len (n-7) */
+ u_int32_t cmd_specific_info; /* additional cmd spec info */
+ u_int8_t asc; /* additional sense code */
+ u_int8_t ascq; /* additional sense code qual */
+ u_int8_t replaceable_unit_code; /* replaceable unit code */
+ u_int8_t specific; /* sense key specific */
+#define ATA_SENSE_SPEC_VALID 0x80
+#define ATA_SENSE_SPEC_MASK 0x7f
+
+ u_int8_t specific1; /* sense key specific */
+ u_int8_t specific2; /* sense key specific */
+} __packed;
+
+/*
+ * SET FEATURES subcommands
+ */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * These values go in the LBA 3:0.
+ */
+#define ATA_SF_EPC_RESTORE 0x00 /* Restore Power Condition Settings */
+#define ATA_SF_EPC_GOTO 0x01 /* Go To Power Condition */
+#define ATA_SF_EPC_SET_TIMER 0x02 /* Set Power Condition Timer */
+#define ATA_SF_EPC_SET_STATE 0x03 /* Set Power Condition State */
+#define ATA_SF_EPC_ENABLE 0x04 /* Enable the EPC feature set */
+#define ATA_SF_EPC_DISABLE 0x05 /* Disable the EPC feature set */
+#define ATA_SF_EPC_SET_SOURCE 0x06 /* Set EPC Power Source */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Power Condition ID field
+ * These values go in the count register.
+ */
+#define ATA_EPC_STANDBY_Z 0x00 /* Substate of PM2:Standby */
+#define ATA_EPC_STANDBY_Y 0x01 /* Substate of PM2:Standby */
+#define ATA_EPC_IDLE_A 0x81 /* Substate of PM1:Idle */
+#define ATA_EPC_IDLE_B 0x82 /* Substate of PM1:Idle */
+#define ATA_EPC_IDLE_C 0x83 /* Substate of PM1:Idle */
+#define ATA_EPC_ALL 0xff /* All supported power conditions */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Restore Power Conditions Settings subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_RST_DFLT 0x40 /* 1=Rst from Default, 0= from Saved */
+#define ATA_SF_EPC_RST_SAVE 0x10 /* 1=Save on completion */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Got To Power Condition subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_GOTO_DELAY 0x02000000 /* Delayed entry bit */
+#define ATA_SF_EPC_GOTO_HOLD 0x01000000 /* Hold Power Cond bit */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Set Power Condition Timer subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_TIMER_MASK 0x00ffff00 /* Timer field */
+#define ATA_SF_EPC_TIMER_SHIFT 8
+#define ATA_SF_EPC_TIMER_SEC 0x00000080 /* Timer units, 1=sec, 0=.1s */
+#define ATA_SF_EPC_TIMER_EN 0x00000020 /* Enable/disable cond. */
+#define ATA_SF_EPC_TIMER_SAVE 0x00000010 /* Save settings on comp. */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Set Power Condition State subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_SETCON_EN 0x00000020 /* Enable power cond. */
+#define ATA_SF_EPC_SETCON_SAVE 0x00000010 /* Save settings on comp */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Set EPC Power Source subcommand
+ * These values go in the count register.
+ */
+#define ATA_SF_EPC_SRC_UNKNOWN 0x0000 /* Unknown source */
+#define ATA_SF_EPC_SRC_BAT 0x0001 /* battery source */
+#define ATA_SF_EPC_SRC_NOT_BAT 0x0002 /* not battery source */
+
+#define ATA_LOG_DIRECTORY 0x00 /* Directory of all logs */
+#define ATA_POWER_COND_LOG 0x08 /* Power Conditions Log */
+#define ATA_PCL_IDLE 0x00 /* Idle Power Conditions Page */
+#define ATA_PCL_STANDBY 0x01 /* Standby Power Conditions Page */
+#define ATA_IDENTIFY_DATA_LOG 0x30 /* Identify Device Data Log */
+#define ATA_IDL_PAGE_LIST 0x00 /* List of supported pages */
+#define ATA_IDL_IDENTIFY_DATA 0x01 /* Copy of Identify Device data */
+#define ATA_IDL_CAPACITY 0x02 /* Capacity */
+#define ATA_IDL_SUP_CAP 0x03 /* Supported Capabilities */
+#define ATA_IDL_CUR_SETTINGS 0x04 /* Current Settings */
+#define ATA_IDL_ATA_STRINGS 0x05 /* ATA Strings */
+#define ATA_IDL_SECURITY 0x06 /* Security */
+#define ATA_IDL_PARALLEL_ATA 0x07 /* Parallel ATA */
+#define ATA_IDL_SERIAL_ATA 0x08 /* Seiral ATA */
+#define ATA_IDL_ZDI 0x09 /* Zoned Device Information */
+
+struct ata_gp_log_dir {
+ uint8_t header[2];
+#define ATA_GP_LOG_DIR_VERSION 0x0001
+ uint8_t num_pages[255*2]; /* Number of log pages at address */
+};
+
+/*
+ * ATA Power Conditions log descriptor
+ */
+struct ata_power_cond_log_desc {
+ uint8_t reserved1;
+ uint8_t flags;
+#define ATA_PCL_COND_SUPPORTED 0x80
+#define ATA_PCL_COND_SAVEABLE 0x40
+#define ATA_PCL_COND_CHANGEABLE 0x20
+#define ATA_PCL_DEFAULT_TIMER_EN 0x10
+#define ATA_PCL_SAVED_TIMER_EN 0x08
+#define ATA_PCL_CURRENT_TIMER_EN 0x04
+#define ATA_PCL_HOLD_PC_NOT_SUP 0x02
+ uint8_t reserved2[2];
+ uint8_t default_timer[4];
+ uint8_t saved_timer[4];
+ uint8_t current_timer[4];
+ uint8_t nom_time_to_active[4];
+ uint8_t min_timer[4];
+ uint8_t max_timer[4];
+ uint8_t num_transitions_to_pc[4];
+ uint8_t hours_in_pc[4];
+ uint8_t reserved3[28];
+};
+
+/*
+ * ATA Power Conditions Log (0x08), Idle power conditions page (0x00)
+ */
+struct ata_power_cond_log_idle {
+ struct ata_power_cond_log_desc idle_a_desc;
+ struct ata_power_cond_log_desc idle_b_desc;
+ struct ata_power_cond_log_desc idle_c_desc;
+ uint8_t reserved[320];
+};
+
+/*
+ * ATA Power Conditions Log (0x08), Standby power conditions page (0x01)
+ */
+struct ata_power_cond_log_standby {
+ uint8_t reserved[384];
+ struct ata_power_cond_log_desc standby_y_desc;
+ struct ata_power_cond_log_desc standby_z_desc;
+};
+
+/*
+ * ATA IDENTIFY DEVICE data log (0x30) page 0x00
+ * List of Supported IDENTIFY DEVICE data pages.
+ */
+struct ata_identify_log_pages {
+ uint8_t header[8];
+#define ATA_IDLOG_REVISION 0x0000000000000001
+ uint8_t entry_count;
+ uint8_t entries[503];
+};
+
+/*
+ * ATA IDENTIFY DEVICE data log (0x30)
+ * Capacity (Page 0x02).
+ */
+struct ata_identify_log_capacity {
+ uint8_t header[8];
+#define ATA_CAP_HEADER_VALID 0x8000000000000000
+#define ATA_CAP_PAGE_NUM_MASK 0x0000000000ff0000
+#define ATA_CAP_PAGE_NUM_SHIFT 16
+#define ATA_CAP_REV_MASK 0x00000000000000ff
+ uint8_t capacity[8];
+#define ATA_CAP_CAPACITY_VALID 0x8000000000000000
+#define ATA_CAP_ACCESSIBLE_CAP 0x0000ffffffffffff
+ uint8_t phys_logical_sect_size[8];
+#define ATA_CAP_PL_VALID 0x8000000000000000
+#define ATA_CAP_LTOP_REL_SUP 0x4000000000000000
+#define ATA_CAP_LOG_SECT_SUP 0x2000000000000000
+#define ATA_CAP_ALIGN_ERR_MASK 0x0000000000300000
+#define ATA_CAP_LTOP_MASK 0x00000000000f0000
+#define ATA_CAP_LOG_SECT_OFF 0x000000000000ffff
+ uint8_t logical_sect_size[8];
+#define ATA_CAP_LOG_SECT_VALID 0x8000000000000000
+#define ATA_CAP_LOG_SECT_SIZE 0x00000000ffffffff
+ uint8_t nominal_buffer_size[8];
+#define ATA_CAP_NOM_BUF_VALID 0x8000000000000000
+#define ATA_CAP_NOM_BUF_SIZE 0x7fffffffffffffff
+ uint8_t reserved[472];
+};
+
+/*
+ * ATA IDENTIFY DEVICE data log (0x30)
+ * Supported Capabilities (Page 0x03).
+ */
+
+struct ata_identify_log_sup_cap {
+ uint8_t header[8];
+#define ATA_SUP_CAP_HEADER_VALID 0x8000000000000000
+#define ATA_SUP_CAP_PAGE_NUM_MASK 0x0000000000ff0000
+#define ATA_SUP_CAP_PAGE_NUM_SHIFT 16
+#define ATA_SUP_CAP_REV_MASK 0x00000000000000ff
+ uint8_t sup_cap[8];
+#define ATA_SUP_CAP_VALID 0x8000000000000000
+#define ATA_SC_SET_SECT_CONFIG_SUP 0x0002000000000000 /* Set Sect Conf*/
+#define ATA_SC_ZERO_EXT_SUP 0x0001000000000000 /* Zero EXT */
+#define ATA_SC_SUCC_NCQ_SENSE_SUP 0x0000800000000000 /* Succ. NCQ Sns */
+#define ATA_SC_DLC_SUP 0x0000400000000000 /* DLC */
+#define ATA_SC_RQSN_DEV_FAULT_SUP 0x0000200000000000 /* Req Sns Dev Flt*/
+#define ATA_SC_DSN_SUP 0x0000100000000000 /* DSN */
+#define ATA_SC_LP_STANDBY_SUP 0x0000080000000000 /* LP Standby */
+#define ATA_SC_SET_EPC_PS_SUP 0x0000040000000000 /* Set EPC PS */
+#define ATA_SC_AMAX_ADDR_SUP 0x0000020000000000 /* AMAX Addr */
+#define ATA_SC_DRAT_SUP 0x0000008000000000 /* DRAT */
+#define ATA_SC_LPS_MISALGN_SUP 0x0000004000000000 /* LPS Misalign */
+#define ATA_SC_RB_DMA_SUP 0x0000001000000000 /* Read Buf DMA */
+#define ATA_SC_WB_DMA_SUP 0x0000000800000000 /* Write Buf DMA */
+#define ATA_SC_DNLD_MC_DMA_SUP 0x0000000200000000 /* DL MCode DMA */
+#define ATA_SC_28BIT_SUP 0x0000000100000000 /* 28-bit */
+#define ATA_SC_RZAT_SUP 0x0000000080000000 /* RZAT */
+#define ATA_SC_NOP_SUP 0x0000000020000000 /* NOP */
+#define ATA_SC_READ_BUFFER_SUP 0x0000000010000000 /* Read Buffer */
+#define ATA_SC_WRITE_BUFFER_SUP 0x0000000008000000 /* Write Buffer */
+#define ATA_SC_READ_LOOK_AHEAD_SUP 0x0000000002000000 /* Read Look-Ahead*/
+#define ATA_SC_VOLATILE_WC_SUP 0x0000000001000000 /* Volatile WC */
+#define ATA_SC_SMART_SUP 0x0000000000800000 /* SMART */
+#define ATA_SC_FLUSH_CACHE_EXT_SUP 0x0000000000400000 /* Flush Cache Ext */
+#define ATA_SC_48BIT_SUP 0x0000000000100000 /* 48-Bit */
+#define ATA_SC_SPINUP_SUP 0x0000000000040000 /* Spin-Up */
+#define ATA_SC_PUIS_SUP 0x0000000000020000 /* PUIS */
+#define ATA_SC_APM_SUP 0x0000000000010000 /* APM */
+#define ATA_SC_DL_MICROCODE_SUP 0x0000000000004000 /* DL Microcode */
+#define ATA_SC_UNLOAD_SUP 0x0000000000002000 /* Unload */
+#define ATA_SC_WRITE_FUA_EXT_SUP 0x0000000000001000 /* Write FUA EXT */
+#define ATA_SC_GPL_SUP 0x0000000000000800 /* GPL */
+#define ATA_SC_STREAMING_SUP 0x0000000000000400 /* Streaming */
+#define ATA_SC_SMART_SELFTEST_SUP 0x0000000000000100 /* SMART self-test */
+#define ATA_SC_SMART_ERR_LOG_SUP 0x0000000000000080 /* SMART Err Log */
+#define ATA_SC_EPC_SUP 0x0000000000000040 /* EPC */
+#define ATA_SC_SENSE_SUP 0x0000000000000020 /* Sense data */
+#define ATA_SC_FREEFALL_SUP 0x0000000000000010 /* Free-Fall */
+#define ATA_SC_DM_MODE3_SUP 0x0000000000000008 /* DM Mode 3 */
+#define ATA_SC_GPL_DMA_SUP 0x0000000000000004 /* GPL DMA */
+#define ATA_SC_WRITE_UNCOR_SUP 0x0000000000000002 /* Write uncorr. */
+#define ATA_SC_WRV_SUP 0x0000000000000001 /* WRV */
+ uint8_t download_code_cap[8];
+#define ATA_DL_CODE_VALID 0x8000000000000000
+#define ATA_DLC_DM_OFFSETS_DEFER_SUP 0x0000000400000000
+#define ATA_DLC_DM_IMMED_SUP 0x0000000200000000
+#define ATA_DLC_DM_OFF_IMMED_SUP 0x0000000100000000
+#define ATA_DLC_DM_MAX_XFER_SIZE_MASK 0x00000000ffff0000
+#define ATA_DLC_DM_MAX_XFER_SIZE_SHIFT 16
+#define ATA_DLC_DM_MIN_XFER_SIZE_MASK 0x000000000000ffff
+ uint8_t nom_media_rotation_rate[8];
+#define ATA_NOM_MEDIA_ROTATION_VALID 0x8000000000000000
+#define ATA_ROTATION_MASK 0x000000000000ffff
+ uint8_t form_factor[8];
+#define ATA_FORM_FACTOR_VALID 0x8000000000000000
+#define ATA_FF_MASK 0x000000000000000f
+#define ATA_FF_NOT_REPORTED 0x0000000000000000 /* Not reported */
+#define ATA_FF_525_IN 0x0000000000000001 /* 5.25 inch */
+#define ATA_FF_35_IN 0x0000000000000002 /* 3.5 inch */
+#define ATA_FF_25_IN 0x0000000000000003 /* 2.5 inch */
+#define ATA_FF_18_IN 0x0000000000000004 /* 1.8 inch */
+#define ATA_FF_LT_18_IN 0x0000000000000005 /* < 1.8 inch */
+#define ATA_FF_MSATA 0x0000000000000006 /* mSATA */
+#define ATA_FF_M2 0x0000000000000007 /* M.2 */
+#define ATA_FF_MICROSSD 0x0000000000000008 /* MicroSSD */
+#define ATA_FF_CFAST 0x0000000000000009 /* CFast */
+ uint8_t wrv_sec_cnt_mode3[8];
+#define ATA_WRV_MODE3_VALID 0x8000000000000000
+#define ATA_WRV_MODE3_COUNT 0x00000000ffffffff
+ uint8_t wrv_sec_cnt_mode2[8];
+#define ATA_WRV_MODE2_VALID 0x8000000000000000
+#define ATA_WRV_MODE2_COUNT 0x00000000ffffffff
+ uint8_t wwn[16];
+ /* XXX KDM need to figure out how to handle 128-bit fields */
+ uint8_t dsm[8];
+#define ATA_DSM_VALID 0x8000000000000000
+#define ATA_LB_MARKUP_SUP 0x000000000000ff00
+#define ATA_TRIM_SUP 0x0000000000000001
+ uint8_t util_per_unit_time[16];
+ /* XXX KDM need to figure out how to handle 128-bit fields */
+ uint8_t util_usage_rate_sup[8];
+#define ATA_UTIL_USAGE_RATE_VALID 0x8000000000000000
+#define ATA_SETTING_RATE_SUP 0x0000000000800000
+#define ATA_SINCE_POWERON_SUP 0x0000000000000100
+#define ATA_POH_RATE_SUP 0x0000000000000010
+#define ATA_DATE_TIME_RATE_SUP 0x0000000000000001
+ uint8_t zoned_cap[8];
+#define ATA_ZONED_VALID 0x8000000000000000
+#define ATA_ZONED_MASK 0x0000000000000003
+ uint8_t sup_zac_cap[8];
+#define ATA_SUP_ZAC_CAP_VALID 0x8000000000000000
+#define ATA_ND_RWP_SUP 0x0000000000000010 /* Reset Write Ptr*/
+#define ATA_ND_FINISH_ZONE_SUP 0x0000000000000008 /* Finish Zone */
+#define ATA_ND_CLOSE_ZONE_SUP 0x0000000000000004 /* Close Zone */
+#define ATA_ND_OPEN_ZONE_SUP 0x0000000000000002 /* Open Zone */
+#define ATA_REPORT_ZONES_SUP 0x0000000000000001 /* Report Zones */
+ uint8_t reserved[392];
+};
+
+/*
+ * ATA Identify Device Data Log Zoned Device Information Page (0x09).
+ * Current as of ZAC r04a, August 25, 2015.
+ */
+struct ata_zoned_info_log {
+ uint8_t header[8];
+#define ATA_ZDI_HEADER_VALID 0x8000000000000000
+#define ATA_ZDI_PAGE_NUM_MASK 0x0000000000ff0000
+#define ATA_ZDI_PAGE_NUM_SHIFT 16
+#define ATA_ZDI_REV_MASK 0x00000000000000ff
+ uint8_t zoned_cap[8];
+#define ATA_ZDI_CAP_VALID 0x8000000000000000
+#define ATA_ZDI_CAP_URSWRZ 0x0000000000000001
+ uint8_t zoned_settings[8];
+#define ATA_ZDI_SETTINGS_VALID 0x8000000000000000
+ uint8_t optimal_seq_zones[8];
+#define ATA_ZDI_OPT_SEQ_VALID 0x8000000000000000
+#define ATA_ZDI_OPT_SEQ_MASK 0x00000000ffffffff
+ uint8_t optimal_nonseq_zones[8];
+#define ATA_ZDI_OPT_NS_VALID 0x8000000000000000
+#define ATA_ZDI_OPT_NS_MASK 0x00000000ffffffff
+ uint8_t max_seq_req_zones[8];
+#define ATA_ZDI_MAX_SEQ_VALID 0x8000000000000000
+#define ATA_ZDI_MAX_SEQ_MASK 0x00000000ffffffff
+ uint8_t version_info[8];
+#define ATA_ZDI_VER_VALID 0x8000000000000000
+#define ATA_ZDI_VER_ZAC_SUP 0x0100000000000000
+#define ATA_ZDI_VER_ZAC_MASK 0x00000000000000ff
+ uint8_t reserved[456];
+};
+
+struct ata_ioc_request {
+ union {
+ struct {
+ u_int8_t command;
+ u_int8_t feature;
+ u_int64_t lba;
+ u_int16_t count;
+ } ata;
+ struct {
+ char ccb[16];
+ struct atapi_sense sense;
+ } atapi;
+ } u;
+ caddr_t data;
+ int count;
+ int flags;
+#define ATA_CMD_CONTROL 0x01
+#define ATA_CMD_READ 0x02
+#define ATA_CMD_WRITE 0x04
+#define ATA_CMD_ATAPI 0x08
+
+ int timeout;
+ int error;
+};
+
+struct ata_security_password {
+ u_int16_t ctrl;
+#define ATA_SECURITY_PASSWORD_USER 0x0000
+#define ATA_SECURITY_PASSWORD_MASTER 0x0001
+#define ATA_SECURITY_ERASE_NORMAL 0x0000
+#define ATA_SECURITY_ERASE_ENHANCED 0x0002
+#define ATA_SECURITY_LEVEL_HIGH 0x0000
+#define ATA_SECURITY_LEVEL_MAXIMUM 0x0100
+
+ u_int8_t password[32];
+ u_int16_t revision;
+ u_int16_t reserved[238];
+};
+
+/* pr device ATA ioctl calls */
+#define IOCATAREQUEST _IOWR('a', 100, struct ata_ioc_request)
+#define IOCATAGPARM _IOR('a', 101, struct ata_params)
+#define IOCATAGMODE _IOR('a', 102, int)
+#define IOCATASMODE _IOW('a', 103, int)
+
+#define IOCATAGSPINDOWN _IOR('a', 104, int)
+#define IOCATASSPINDOWN _IOW('a', 105, int)
+
+
+struct ata_ioc_raid_config {
+ int lun;
+ int type;
+#define AR_JBOD 0x0001
+#define AR_SPAN 0x0002
+#define AR_RAID0 0x0004
+#define AR_RAID1 0x0008
+#define AR_RAID01 0x0010
+#define AR_RAID3 0x0020
+#define AR_RAID4 0x0040
+#define AR_RAID5 0x0080
+
+ int interleave;
+ int status;
+#define AR_READY 1
+#define AR_DEGRADED 2
+#define AR_REBUILDING 4
+
+ int progress;
+ int total_disks;
+ int disks[16];
+};
+
+struct ata_ioc_raid_status {
+ int lun;
+ int type;
+ int interleave;
+ int status;
+ int progress;
+ int total_disks;
+ struct {
+ int state;
+#define AR_DISK_ONLINE 0x01
+#define AR_DISK_PRESENT 0x02
+#define AR_DISK_SPARE 0x04
+ int lun;
+ } disks[16];
+};
+
+/* ATA RAID ioctl calls */
+#define IOCATARAIDCREATE _IOWR('a', 200, struct ata_ioc_raid_config)
+#define IOCATARAIDDELETE _IOW('a', 201, int)
+#define IOCATARAIDSTATUS _IOWR('a', 202, struct ata_ioc_raid_status)
+#define IOCATARAIDADDSPARE _IOW('a', 203, struct ata_ioc_raid_config)
+#define IOCATARAIDREBUILD _IOW('a', 204, int)
+
+#endif /* _SYS_ATA_H_ */
diff --git a/usr/src/contrib/bhyve/sys/pciio.h b/usr/src/contrib/bhyve/sys/pciio.h
new file mode 100644
index 0000000000..d70bfbcf6f
--- /dev/null
+++ b/usr/src/contrib/bhyve/sys/pciio.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1997, Stefan Esser <se@FreeBSD.ORG>
+ * Copyright (c) 1997, 1998, 1999, Kenneth D. Merry <ken@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _SYS_PCIIO_H_
+#define _SYS_PCIIO_H_
+
+#include <sys/ioccom.h>
+
+#define PCI_MAXNAMELEN 16
+
+typedef enum {
+ PCI_GETCONF_LAST_DEVICE,
+ PCI_GETCONF_LIST_CHANGED,
+ PCI_GETCONF_MORE_DEVS,
+ PCI_GETCONF_ERROR
+} pci_getconf_status;
+
+typedef enum {
+ PCI_GETCONF_NO_MATCH = 0x0000,
+ PCI_GETCONF_MATCH_DOMAIN = 0x0001,
+ PCI_GETCONF_MATCH_BUS = 0x0002,
+ PCI_GETCONF_MATCH_DEV = 0x0004,
+ PCI_GETCONF_MATCH_FUNC = 0x0008,
+ PCI_GETCONF_MATCH_NAME = 0x0010,
+ PCI_GETCONF_MATCH_UNIT = 0x0020,
+ PCI_GETCONF_MATCH_VENDOR = 0x0040,
+ PCI_GETCONF_MATCH_DEVICE = 0x0080,
+ PCI_GETCONF_MATCH_CLASS = 0x0100
+} pci_getconf_flags;
+
+struct pcisel {
+ u_int32_t pc_domain; /* domain number */
+ u_int8_t pc_bus; /* bus number */
+ u_int8_t pc_dev; /* device on this bus */
+ u_int8_t pc_func; /* function on this device */
+};
+
+struct pci_conf {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ u_int8_t pc_hdr; /* PCI header type */
+ u_int16_t pc_subvendor; /* card vendor ID */
+ u_int16_t pc_subdevice; /* card device ID, assigned by
+ card vendor */
+ u_int16_t pc_vendor; /* chip vendor ID */
+ u_int16_t pc_device; /* chip device ID, assigned by
+ chip vendor */
+ u_int8_t pc_class; /* chip PCI class */
+ u_int8_t pc_subclass; /* chip PCI subclass */
+ u_int8_t pc_progif; /* chip PCI programming interface */
+ u_int8_t pc_revid; /* chip revision ID */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_long pd_unit; /* device unit number */
+};
+
+struct pci_match_conf {
+ struct pcisel pc_sel; /* domain+bus+slot+function */
+ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */
+ u_long pd_unit; /* Unit number */
+ u_int16_t pc_vendor; /* PCI Vendor ID */
+ u_int16_t pc_device; /* PCI Device ID */
+ u_int8_t pc_class; /* PCI class */
+ pci_getconf_flags flags; /* Matching expression */
+};
+
+struct pci_conf_io {
+ u_int32_t pat_buf_len; /* pattern buffer length */
+ u_int32_t num_patterns; /* number of patterns */
+ struct pci_match_conf *patterns; /* pattern buffer */
+ u_int32_t match_buf_len; /* match buffer length */
+ u_int32_t num_matches; /* number of matches returned */
+ struct pci_conf *matches; /* match buffer */
+ u_int32_t offset; /* offset into device list */
+ u_int32_t generation; /* device list generation */
+ pci_getconf_status status; /* request status */
+};
+
+struct pci_io {
+ struct pcisel pi_sel; /* device to operate on */
+ int pi_reg; /* configuration register to examine */
+ int pi_width; /* width (in bytes) of read or write */
+ u_int32_t pi_data; /* data to write or result of read */
+};
+
+struct pci_bar_io {
+ struct pcisel pbi_sel; /* device to operate on */
+ int pbi_reg; /* starting address of BAR */
+ int pbi_enabled; /* decoding enabled */
+ uint64_t pbi_base; /* current value of BAR */
+ uint64_t pbi_length; /* length of BAR */
+};
+
+struct pci_vpd_element {
+ char pve_keyword[2];
+ uint8_t pve_flags;
+ uint8_t pve_datalen;
+ uint8_t pve_data[0];
+};
+
+#define PVE_FLAG_IDENT 0x01 /* Element is the string identifier */
+#define PVE_FLAG_RW 0x02 /* Element is read/write */
+
+#define PVE_NEXT(pve) \
+ ((struct pci_vpd_element *)((char *)(pve) + \
+ sizeof(struct pci_vpd_element) + (pve)->pve_datalen))
+
+struct pci_list_vpd_io {
+ struct pcisel plvi_sel; /* device to operate on */
+ size_t plvi_len; /* size of the data area */
+ struct pci_vpd_element *plvi_data;
+};
+
+#define PCIOCGETCONF _IOWR('p', 5, struct pci_conf_io)
+#define PCIOCREAD _IOWR('p', 2, struct pci_io)
+#define PCIOCWRITE _IOWR('p', 3, struct pci_io)
+#define PCIOCATTACHED _IOWR('p', 4, struct pci_io)
+#define PCIOCGETBAR _IOWR('p', 6, struct pci_bar_io)
+#define PCIOCLISTVPD _IOWR('p', 7, struct pci_list_vpd_io)
+
+#endif /* !_SYS_PCIIO_H_ */
diff --git a/usr/src/contrib/bhyve/sys/queue.h b/usr/src/contrib/bhyve/sys/queue.h
new file mode 100644
index 0000000000..f26c492af1
--- /dev/null
+++ b/usr/src/contrib/bhyve/sys/queue.h
@@ -0,0 +1,787 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_QUEUE_H_
+#define _SYS_QUEUE_H_
+
+#include <sys/cdefs.h>
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * singly-linked tail queues, lists and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction. Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A singly-linked tail queue is headed by a pair of pointers, one to the
+ * head of the list and the other to the tail of the list. The elements are
+ * singly linked for minimum space and pointer manipulation overhead at the
+ * expense of O(n) removal for arbitrary elements. New elements can be added
+ * to the list after an existing element, at the head of the list, or at the
+ * end of the list. Elements being removed from the head of the tail queue
+ * should use the explicit macro for this purpose for optimum efficiency.
+ * A singly-linked tail queue may only be traversed in the forward direction.
+ * Singly-linked tail queues are ideal for applications with large datasets
+ * and few or no removals or for implementing a FIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may be traversed in either direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ *
+ * Below is a summary of implemented functions where:
+ * + means the macro is available
+ * - means the macro is not available
+ * s means the macro is available but is slow (runs in O(n) time)
+ *
+ * SLIST LIST STAILQ TAILQ
+ * _HEAD + + + +
+ * _CLASS_HEAD + + + +
+ * _HEAD_INITIALIZER + + + +
+ * _ENTRY + + + +
+ * _CLASS_ENTRY + + + +
+ * _INIT + + + +
+ * _EMPTY + + + +
+ * _FIRST + + + +
+ * _NEXT + + + +
+ * _PREV - + - +
+ * _LAST - - + +
+ * _FOREACH + + + +
+ * _FOREACH_FROM + + + +
+ * _FOREACH_SAFE + + + +
+ * _FOREACH_FROM_SAFE + + + +
+ * _FOREACH_REVERSE - - - +
+ * _FOREACH_REVERSE_FROM - - - +
+ * _FOREACH_REVERSE_SAFE - - - +
+ * _FOREACH_REVERSE_FROM_SAFE - - - +
+ * _INSERT_HEAD + + + +
+ * _INSERT_BEFORE - + - +
+ * _INSERT_AFTER + + + +
+ * _INSERT_TAIL - - + +
+ * _CONCAT s s + +
+ * _REMOVE_AFTER + - + -
+ * _REMOVE_HEAD + - + -
+ * _REMOVE s + s +
+ * _SWAP + + + +
+ *
+ */
+#ifdef QUEUE_MACRO_DEBUG
+/* Store the last 2 places the queue element or head was altered */
+struct qm_trace {
+ unsigned long lastline;
+ unsigned long prevline;
+ const char *lastfile;
+ const char *prevfile;
+};
+
+#define TRACEBUF struct qm_trace trace;
+#define TRACEBUF_INITIALIZER { __LINE__, 0, __FILE__, NULL } ,
+#define TRASHIT(x) do {(x) = (void *)-1;} while (0)
+#define QMD_SAVELINK(name, link) void **name = (void *)&(link)
+
+#define QMD_TRACE_HEAD(head) do { \
+ (head)->trace.prevline = (head)->trace.lastline; \
+ (head)->trace.prevfile = (head)->trace.lastfile; \
+ (head)->trace.lastline = __LINE__; \
+ (head)->trace.lastfile = __FILE__; \
+} while (0)
+
+#define QMD_TRACE_ELEM(elem) do { \
+ (elem)->trace.prevline = (elem)->trace.lastline; \
+ (elem)->trace.prevfile = (elem)->trace.lastfile; \
+ (elem)->trace.lastline = __LINE__; \
+ (elem)->trace.lastfile = __FILE__; \
+} while (0)
+
+#else
+#define QMD_TRACE_ELEM(elem)
+#define QMD_TRACE_HEAD(head)
+#define QMD_SAVELINK(name, link)
+#define TRACEBUF
+#define TRACEBUF_INITIALIZER
+#define TRASHIT(x)
+#endif /* QUEUE_MACRO_DEBUG */
+
+#ifdef __cplusplus
+/*
+ * In C++ there can be structure lists and class lists:
+ */
+#define QUEUE_TYPEOF(type) type
+#else
+#define QUEUE_TYPEOF(type) struct type
+#endif
+
+/*
+ * Singly-linked List declarations.
+ */
+#define SLIST_HEAD(name, type) \
+struct name { \
+ struct type *slh_first; /* first element */ \
+}
+
+#define SLIST_CLASS_HEAD(name, type) \
+struct name { \
+ class type *slh_first; /* first element */ \
+}
+
+#define SLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define SLIST_ENTRY(type) \
+struct { \
+ struct type *sle_next; /* next element */ \
+}
+
+#define SLIST_CLASS_ENTRY(type) \
+struct { \
+ class type *sle_next; /* next element */ \
+}
+
+/*
+ * Singly-linked List functions.
+ */
+#define SLIST_CONCAT(head1, head2, type, field) do { \
+ QUEUE_TYPEOF(type) *curelm = SLIST_FIRST(head1); \
+ if (curelm == NULL) { \
+ if ((SLIST_FIRST(head1) = SLIST_FIRST(head2)) != NULL) \
+ SLIST_INIT(head2); \
+ } else if (SLIST_FIRST(head2) != NULL) { \
+ while (SLIST_NEXT(curelm, field) != NULL) \
+ curelm = SLIST_NEXT(curelm, field); \
+ SLIST_NEXT(curelm, field) = SLIST_FIRST(head2); \
+ SLIST_INIT(head2); \
+ } \
+} while (0)
+
+#define SLIST_EMPTY(head) ((head)->slh_first == NULL)
+
+#define SLIST_FIRST(head) ((head)->slh_first)
+
+#define SLIST_FOREACH(var, head, field) \
+ for ((var) = SLIST_FIRST((head)); \
+ (var); \
+ (var) = SLIST_NEXT((var), field))
+
+#define SLIST_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : SLIST_FIRST((head))); \
+ (var); \
+ (var) = SLIST_NEXT((var), field))
+
+#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = SLIST_FIRST((head)); \
+ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define SLIST_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : SLIST_FIRST((head))); \
+ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \
+ for ((varp) = &SLIST_FIRST((head)); \
+ ((var) = *(varp)) != NULL; \
+ (varp) = &SLIST_NEXT((var), field))
+
+#define SLIST_INIT(head) do { \
+ SLIST_FIRST((head)) = NULL; \
+} while (0)
+
+#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
+ SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \
+ SLIST_NEXT((slistelm), field) = (elm); \
+} while (0)
+
+#define SLIST_INSERT_HEAD(head, elm, field) do { \
+ SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \
+ SLIST_FIRST((head)) = (elm); \
+} while (0)
+
+#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
+
+#define SLIST_REMOVE(head, elm, type, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.sle_next); \
+ if (SLIST_FIRST((head)) == (elm)) { \
+ SLIST_REMOVE_HEAD((head), field); \
+ } \
+ else { \
+ QUEUE_TYPEOF(type) *curelm = SLIST_FIRST(head); \
+ while (SLIST_NEXT(curelm, field) != (elm)) \
+ curelm = SLIST_NEXT(curelm, field); \
+ SLIST_REMOVE_AFTER(curelm, field); \
+ } \
+ TRASHIT(*oldnext); \
+} while (0)
+
+#define SLIST_REMOVE_AFTER(elm, field) do { \
+ SLIST_NEXT(elm, field) = \
+ SLIST_NEXT(SLIST_NEXT(elm, field), field); \
+} while (0)
+
+#define SLIST_REMOVE_HEAD(head, field) do { \
+ SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \
+} while (0)
+
+#define SLIST_SWAP(head1, head2, type) do { \
+ QUEUE_TYPEOF(type) *swap_first = SLIST_FIRST(head1); \
+ SLIST_FIRST(head1) = SLIST_FIRST(head2); \
+ SLIST_FIRST(head2) = swap_first; \
+} while (0)
+
+/*
+ * Singly-linked Tail queue declarations.
+ */
+#define STAILQ_HEAD(name, type) \
+struct name { \
+ struct type *stqh_first;/* first element */ \
+ struct type **stqh_last;/* addr of last next element */ \
+}
+
+#define STAILQ_CLASS_HEAD(name, type) \
+struct name { \
+ class type *stqh_first; /* first element */ \
+ class type **stqh_last; /* addr of last next element */ \
+}
+
+#define STAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).stqh_first }
+
+#define STAILQ_ENTRY(type) \
+struct { \
+ struct type *stqe_next; /* next element */ \
+}
+
+#define STAILQ_CLASS_ENTRY(type) \
+struct { \
+ class type *stqe_next; /* next element */ \
+}
+
+/*
+ * Singly-linked Tail queue functions.
+ */
+#define STAILQ_CONCAT(head1, head2) do { \
+ if (!STAILQ_EMPTY((head2))) { \
+ *(head1)->stqh_last = (head2)->stqh_first; \
+ (head1)->stqh_last = (head2)->stqh_last; \
+ STAILQ_INIT((head2)); \
+ } \
+} while (0)
+
+#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL)
+
+#define STAILQ_FIRST(head) ((head)->stqh_first)
+
+#define STAILQ_FOREACH(var, head, field) \
+ for((var) = STAILQ_FIRST((head)); \
+ (var); \
+ (var) = STAILQ_NEXT((var), field))
+
+#define STAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \
+ (var); \
+ (var) = STAILQ_NEXT((var), field))
+
+#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = STAILQ_FIRST((head)); \
+ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \
+ (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define STAILQ_INIT(head) do { \
+ STAILQ_FIRST((head)) = NULL; \
+ (head)->stqh_last = &STAILQ_FIRST((head)); \
+} while (0)
+
+#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \
+ if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+ STAILQ_NEXT((tqelm), field) = (elm); \
+} while (0)
+
+#define STAILQ_INSERT_HEAD(head, elm, field) do { \
+ if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+ STAILQ_FIRST((head)) = (elm); \
+} while (0)
+
+#define STAILQ_INSERT_TAIL(head, elm, field) do { \
+ STAILQ_NEXT((elm), field) = NULL; \
+ *(head)->stqh_last = (elm); \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+} while (0)
+
+#define STAILQ_LAST(head, type, field) \
+ (STAILQ_EMPTY((head)) ? NULL : \
+ __containerof((head)->stqh_last, \
+ QUEUE_TYPEOF(type), field.stqe_next))
+
+#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next)
+
+#define STAILQ_REMOVE(head, elm, type, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.stqe_next); \
+ if (STAILQ_FIRST((head)) == (elm)) { \
+ STAILQ_REMOVE_HEAD((head), field); \
+ } \
+ else { \
+ QUEUE_TYPEOF(type) *curelm = STAILQ_FIRST(head); \
+ while (STAILQ_NEXT(curelm, field) != (elm)) \
+ curelm = STAILQ_NEXT(curelm, field); \
+ STAILQ_REMOVE_AFTER(head, curelm, field); \
+ } \
+ TRASHIT(*oldnext); \
+} while (0)
+
+#define STAILQ_REMOVE_AFTER(head, elm, field) do { \
+ if ((STAILQ_NEXT(elm, field) = \
+ STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL) \
+ (head)->stqh_last = &STAILQ_NEXT((elm), field); \
+} while (0)
+
+#define STAILQ_REMOVE_HEAD(head, field) do { \
+ if ((STAILQ_FIRST((head)) = \
+ STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \
+ (head)->stqh_last = &STAILQ_FIRST((head)); \
+} while (0)
+
+#define STAILQ_SWAP(head1, head2, type) do { \
+ QUEUE_TYPEOF(type) *swap_first = STAILQ_FIRST(head1); \
+ QUEUE_TYPEOF(type) **swap_last = (head1)->stqh_last; \
+ STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \
+ (head1)->stqh_last = (head2)->stqh_last; \
+ STAILQ_FIRST(head2) = swap_first; \
+ (head2)->stqh_last = swap_last; \
+ if (STAILQ_EMPTY(head1)) \
+ (head1)->stqh_last = &STAILQ_FIRST(head1); \
+ if (STAILQ_EMPTY(head2)) \
+ (head2)->stqh_last = &STAILQ_FIRST(head2); \
+} while (0)
+
+
+/*
+ * List declarations.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define LIST_CLASS_HEAD(name, type) \
+struct name { \
+ class type *lh_first; /* first element */ \
+}
+
+#define LIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define LIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+#define LIST_CLASS_ENTRY(type) \
+struct { \
+ class type *le_next; /* next element */ \
+ class type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+
+#if (defined(_KERNEL) && defined(INVARIANTS))
+#define QMD_LIST_CHECK_HEAD(head, field) do { \
+ if (LIST_FIRST((head)) != NULL && \
+ LIST_FIRST((head))->field.le_prev != \
+ &LIST_FIRST((head))) \
+ panic("Bad list head %p first->prev != head", (head)); \
+} while (0)
+
+#define QMD_LIST_CHECK_NEXT(elm, field) do { \
+ if (LIST_NEXT((elm), field) != NULL && \
+ LIST_NEXT((elm), field)->field.le_prev != \
+ &((elm)->field.le_next)) \
+ panic("Bad link elm %p next->prev != elm", (elm)); \
+} while (0)
+
+#define QMD_LIST_CHECK_PREV(elm, field) do { \
+ if (*(elm)->field.le_prev != (elm)) \
+ panic("Bad link elm %p prev->next != elm", (elm)); \
+} while (0)
+#else
+#define QMD_LIST_CHECK_HEAD(head, field)
+#define QMD_LIST_CHECK_NEXT(elm, field)
+#define QMD_LIST_CHECK_PREV(elm, field)
+#endif /* (_KERNEL && INVARIANTS) */
+
+#define LIST_CONCAT(head1, head2, type, field) do { \
+ QUEUE_TYPEOF(type) *curelm = LIST_FIRST(head1); \
+ if (curelm == NULL) { \
+ if ((LIST_FIRST(head1) = LIST_FIRST(head2)) != NULL) { \
+ LIST_FIRST(head2)->field.le_prev = \
+ &LIST_FIRST((head1)); \
+ LIST_INIT(head2); \
+ } \
+ } else if (LIST_FIRST(head2) != NULL) { \
+ while (LIST_NEXT(curelm, field) != NULL) \
+ curelm = LIST_NEXT(curelm, field); \
+ LIST_NEXT(curelm, field) = LIST_FIRST(head2); \
+ LIST_FIRST(head2)->field.le_prev = &LIST_NEXT(curelm, field); \
+ LIST_INIT(head2); \
+ } \
+} while (0)
+
+#define LIST_EMPTY(head) ((head)->lh_first == NULL)
+
+#define LIST_FIRST(head) ((head)->lh_first)
+
+#define LIST_FOREACH(var, head, field) \
+ for ((var) = LIST_FIRST((head)); \
+ (var); \
+ (var) = LIST_NEXT((var), field))
+
+#define LIST_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : LIST_FIRST((head))); \
+ (var); \
+ (var) = LIST_NEXT((var), field))
+
+#define LIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = LIST_FIRST((head)); \
+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define LIST_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : LIST_FIRST((head))); \
+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define LIST_INIT(head) do { \
+ LIST_FIRST((head)) = NULL; \
+} while (0)
+
+#define LIST_INSERT_AFTER(listelm, elm, field) do { \
+ QMD_LIST_CHECK_NEXT(listelm, field); \
+ if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\
+ LIST_NEXT((listelm), field)->field.le_prev = \
+ &LIST_NEXT((elm), field); \
+ LIST_NEXT((listelm), field) = (elm); \
+ (elm)->field.le_prev = &LIST_NEXT((listelm), field); \
+} while (0)
+
+#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
+ QMD_LIST_CHECK_PREV(listelm, field); \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ LIST_NEXT((elm), field) = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &LIST_NEXT((elm), field); \
+} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do { \
+ QMD_LIST_CHECK_HEAD((head), field); \
+ if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \
+ LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\
+ LIST_FIRST((head)) = (elm); \
+ (elm)->field.le_prev = &LIST_FIRST((head)); \
+} while (0)
+
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+#define LIST_PREV(elm, head, type, field) \
+ ((elm)->field.le_prev == &LIST_FIRST((head)) ? NULL : \
+ __containerof((elm)->field.le_prev, \
+ QUEUE_TYPEOF(type), field.le_next))
+
+#define LIST_REMOVE(elm, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.le_next); \
+ QMD_SAVELINK(oldprev, (elm)->field.le_prev); \
+ QMD_LIST_CHECK_NEXT(elm, field); \
+ QMD_LIST_CHECK_PREV(elm, field); \
+ if (LIST_NEXT((elm), field) != NULL) \
+ LIST_NEXT((elm), field)->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = LIST_NEXT((elm), field); \
+ TRASHIT(*oldnext); \
+ TRASHIT(*oldprev); \
+} while (0)
+
+#define LIST_SWAP(head1, head2, type, field) do { \
+ QUEUE_TYPEOF(type) *swap_tmp = LIST_FIRST(head1); \
+ LIST_FIRST((head1)) = LIST_FIRST((head2)); \
+ LIST_FIRST((head2)) = swap_tmp; \
+ if ((swap_tmp = LIST_FIRST((head1))) != NULL) \
+ swap_tmp->field.le_prev = &LIST_FIRST((head1)); \
+ if ((swap_tmp = LIST_FIRST((head2))) != NULL) \
+ swap_tmp->field.le_prev = &LIST_FIRST((head2)); \
+} while (0)
+
+/*
+ * Tail queue declarations.
+ */
+#define TAILQ_HEAD(name, type) \
+struct name { \
+ struct type *tqh_first; /* first element */ \
+ struct type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_CLASS_HEAD(name, type) \
+struct name { \
+ class type *tqh_first; /* first element */ \
+ class type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first, TRACEBUF_INITIALIZER }
+
+#define TAILQ_ENTRY(type) \
+struct { \
+ struct type *tqe_next; /* next element */ \
+ struct type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_CLASS_ENTRY(type) \
+struct { \
+ class type *tqe_next; /* next element */ \
+ class type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
+}
+
+/*
+ * Tail queue functions.
+ */
+#if (defined(_KERNEL) && defined(INVARIANTS))
+#define QMD_TAILQ_CHECK_HEAD(head, field) do { \
+ if (!TAILQ_EMPTY(head) && \
+ TAILQ_FIRST((head))->field.tqe_prev != \
+ &TAILQ_FIRST((head))) \
+ panic("Bad tailq head %p first->prev != head", (head)); \
+} while (0)
+
+#define QMD_TAILQ_CHECK_TAIL(head, field) do { \
+ if (*(head)->tqh_last != NULL) \
+ panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \
+} while (0)
+
+#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \
+ if (TAILQ_NEXT((elm), field) != NULL && \
+ TAILQ_NEXT((elm), field)->field.tqe_prev != \
+ &((elm)->field.tqe_next)) \
+ panic("Bad link elm %p next->prev != elm", (elm)); \
+} while (0)
+
+#define QMD_TAILQ_CHECK_PREV(elm, field) do { \
+ if (*(elm)->field.tqe_prev != (elm)) \
+ panic("Bad link elm %p prev->next != elm", (elm)); \
+} while (0)
+#else
+#define QMD_TAILQ_CHECK_HEAD(head, field)
+#define QMD_TAILQ_CHECK_TAIL(head, headname)
+#define QMD_TAILQ_CHECK_NEXT(elm, field)
+#define QMD_TAILQ_CHECK_PREV(elm, field)
+#endif /* (_KERNEL && INVARIANTS) */
+
+#define TAILQ_CONCAT(head1, head2, field) do { \
+ if (!TAILQ_EMPTY(head2)) { \
+ *(head1)->tqh_last = (head2)->tqh_first; \
+ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ TAILQ_INIT((head2)); \
+ QMD_TRACE_HEAD(head1); \
+ QMD_TRACE_HEAD(head2); \
+ } \
+} while (0)
+
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+
+#define TAILQ_FOREACH(var, head, field) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_INIT(head) do { \
+ TAILQ_FIRST((head)) = NULL; \
+ (head)->tqh_last = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+} while (0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ QMD_TAILQ_CHECK_NEXT(listelm, field); \
+ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else { \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ } \
+ TAILQ_NEXT((listelm), field) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ QMD_TAILQ_CHECK_PREV(listelm, field); \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ TAILQ_NEXT((elm), field) = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do { \
+ QMD_TAILQ_CHECK_HEAD(head, field); \
+ if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \
+ TAILQ_FIRST((head))->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ TAILQ_FIRST((head)) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do { \
+ QMD_TAILQ_CHECK_TAIL(head, field); \
+ TAILQ_NEXT((elm), field) = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define TAILQ_REMOVE(head, elm, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \
+ QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \
+ QMD_TAILQ_CHECK_NEXT(elm, field); \
+ QMD_TAILQ_CHECK_PREV(elm, field); \
+ if ((TAILQ_NEXT((elm), field)) != NULL) \
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else { \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ QMD_TRACE_HEAD(head); \
+ } \
+ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \
+ TRASHIT(*oldnext); \
+ TRASHIT(*oldprev); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_SWAP(head1, head2, type, field) do { \
+ QUEUE_TYPEOF(type) *swap_first = (head1)->tqh_first; \
+ QUEUE_TYPEOF(type) **swap_last = (head1)->tqh_last; \
+ (head1)->tqh_first = (head2)->tqh_first; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ (head2)->tqh_first = swap_first; \
+ (head2)->tqh_last = swap_last; \
+ if ((swap_first = (head1)->tqh_first) != NULL) \
+ swap_first->field.tqe_prev = &(head1)->tqh_first; \
+ else \
+ (head1)->tqh_last = &(head1)->tqh_first; \
+ if ((swap_first = (head2)->tqh_first) != NULL) \
+ swap_first->field.tqe_prev = &(head2)->tqh_first; \
+ else \
+ (head2)->tqh_last = &(head2)->tqh_first; \
+} while (0)
+
+#endif /* !_SYS_QUEUE_H_ */
diff --git a/usr/src/contrib/bhyve/sys/tree.h b/usr/src/contrib/bhyve/sys/tree.h
new file mode 100644
index 0000000000..6b47e247bb
--- /dev/null
+++ b/usr/src/contrib/bhyve/sys/tree.h
@@ -0,0 +1,765 @@
+/* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */
+/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */
+/* $FreeBSD: head/sys/sys/tree.h 189204 2009-03-01 04:57:23Z bms $ */
+
+/*-
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_TREE_H_
+#define _SYS_TREE_H_
+
+#include <sys/cdefs.h>
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure. Every operation
+ * on the tree causes a splay to happen. The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree. On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n). The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute. It fulfills a set of conditions:
+ * - every search path from the root to a leaf consists of the
+ * same number of black nodes,
+ * - each red node (except for the root) has a black parent,
+ * - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type) \
+struct name { \
+ struct type *sph_root; /* root of the tree */ \
+}
+
+#define SPLAY_INITIALIZER(root) \
+ { NULL }
+
+#define SPLAY_INIT(root) do { \
+ (root)->sph_root = NULL; \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type) \
+struct { \
+ struct type *spe_left; /* left element */ \
+ struct type *spe_right; /* right element */ \
+}
+
+#define SPLAY_LEFT(elm, field) (elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right
+#define SPLAY_ROOT(head) (head)->sph_root
+#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do { \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do { \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \
+ SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \
+ SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp) \
+void name##_SPLAY(struct name *, struct type *); \
+void name##_SPLAY_MINMAX(struct name *, int); \
+struct type *name##_SPLAY_INSERT(struct name *, struct type *); \
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \
+ \
+/* Finds the node with the same key as elm */ \
+static __inline struct type * \
+name##_SPLAY_FIND(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) \
+ return(NULL); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) \
+ return (head->sph_root); \
+ return (NULL); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_NEXT(struct name *head, struct type *elm) \
+{ \
+ name##_SPLAY(head, elm); \
+ if (SPLAY_RIGHT(elm, field) != NULL) { \
+ elm = SPLAY_RIGHT(elm, field); \
+ while (SPLAY_LEFT(elm, field) != NULL) { \
+ elm = SPLAY_LEFT(elm, field); \
+ } \
+ } else \
+ elm = NULL; \
+ return (elm); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_MIN_MAX(struct name *head, int val) \
+{ \
+ name##_SPLAY_MINMAX(head, val); \
+ return (SPLAY_ROOT(head)); \
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp) \
+struct type * \
+name##_SPLAY_INSERT(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) { \
+ SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \
+ } else { \
+ int __comp; \
+ name##_SPLAY(head, elm); \
+ __comp = (cmp)(elm, (head)->sph_root); \
+ if(__comp < 0) { \
+ SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+ SPLAY_RIGHT(elm, field) = (head)->sph_root; \
+ SPLAY_LEFT((head)->sph_root, field) = NULL; \
+ } else if (__comp > 0) { \
+ SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT(elm, field) = (head)->sph_root; \
+ SPLAY_RIGHT((head)->sph_root, field) = NULL; \
+ } else \
+ return ((head)->sph_root); \
+ } \
+ (head)->sph_root = (elm); \
+ return (NULL); \
+} \
+ \
+struct type * \
+name##_SPLAY_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *__tmp; \
+ if (SPLAY_EMPTY(head)) \
+ return (NULL); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) { \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+ } else { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+ name##_SPLAY(head, elm); \
+ SPLAY_RIGHT((head)->sph_root, field) = __tmp; \
+ } \
+ return (elm); \
+ } \
+ return (NULL); \
+} \
+ \
+void \
+name##_SPLAY(struct name *head, struct type *elm) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+ int __comp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+ __left = __right = &__node; \
+\
+ while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if ((cmp)(elm, __tmp) < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if ((cmp)(elm, __tmp) > 0){ \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+} \
+ \
+/* Splay with either the minimum or the maximum element \
+ * Used to find minimum or maximum element in tree. \
+ */ \
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+ __left = __right = &__node; \
+\
+ while (1) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if (__comp < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if (__comp > 0) { \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+}
+
+#define SPLAY_NEGINF -1
+#define SPLAY_INF 1
+
+#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head) \
+ for ((x) = SPLAY_MIN(name, head); \
+ (x) != NULL; \
+ (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type) \
+struct name { \
+ struct type *rbh_root; /* root of the tree */ \
+}
+
+#define RB_INITIALIZER(root) \
+ { NULL }
+
+#define RB_INIT(root) do { \
+ (root)->rbh_root = NULL; \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK 0
+#define RB_RED 1
+#define RB_ENTRY(type) \
+struct { \
+ struct type *rbe_left; /* left element */ \
+ struct type *rbe_right; /* right element */ \
+ struct type *rbe_parent; /* parent element */ \
+ int rbe_color; /* node color */ \
+}
+
+#define RB_LEFT(elm, field) (elm)->field.rbe_left
+#define RB_RIGHT(elm, field) (elm)->field.rbe_right
+#define RB_PARENT(elm, field) (elm)->field.rbe_parent
+#define RB_COLOR(elm, field) (elm)->field.rbe_color
+#define RB_ROOT(head) (head)->rbh_root
+#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do { \
+ RB_PARENT(elm, field) = parent; \
+ RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \
+ RB_COLOR(elm, field) = RB_RED; \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field) do { \
+ RB_COLOR(black, field) = RB_BLACK; \
+ RB_COLOR(red, field) = RB_RED; \
+} while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x) do {} while (0)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \
+ (tmp) = RB_RIGHT(elm, field); \
+ if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) { \
+ RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_LEFT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \
+ (tmp) = RB_LEFT(elm, field); \
+ if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) { \
+ RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_RIGHT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp) \
+ RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
+#define RB_PROTOTYPE_STATIC(name, type, field, cmp) \
+ RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static)
+#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \
+attr void name##_RB_INSERT_COLOR(struct name *, struct type *); \
+attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+attr struct type *name##_RB_REMOVE(struct name *, struct type *); \
+attr struct type *name##_RB_INSERT(struct name *, struct type *); \
+attr struct type *name##_RB_FIND(struct name *, struct type *); \
+attr struct type *name##_RB_NFIND(struct name *, struct type *); \
+attr struct type *name##_RB_NEXT(struct type *); \
+attr struct type *name##_RB_PREV(struct type *); \
+attr struct type *name##_RB_MINMAX(struct name *, int); \
+ \
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp) \
+ RB_GENERATE_INTERNAL(name, type, field, cmp,)
+#define RB_GENERATE_STATIC(name, type, field, cmp) \
+ RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static)
+#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \
+attr void \
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
+{ \
+ struct type *parent, *gparent, *tmp; \
+ while ((parent = RB_PARENT(elm, field)) != NULL && \
+ RB_COLOR(parent, field) == RB_RED) { \
+ gparent = RB_PARENT(parent, field); \
+ if (parent == RB_LEFT(gparent, field)) { \
+ tmp = RB_RIGHT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_RIGHT(parent, field) == elm) { \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_RIGHT(head, gparent, tmp, field); \
+ } else { \
+ tmp = RB_LEFT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_LEFT(parent, field) == elm) { \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_LEFT(head, gparent, tmp, field); \
+ } \
+ } \
+ RB_COLOR(head->rbh_root, field) = RB_BLACK; \
+} \
+ \
+attr void \
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{ \
+ struct type *tmp; \
+ while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \
+ elm != RB_ROOT(head)) { \
+ if (RB_LEFT(parent, field) == elm) { \
+ tmp = RB_RIGHT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+ struct type *oleft; \
+ if ((oleft = RB_LEFT(tmp, field)) \
+ != NULL) \
+ RB_COLOR(oleft, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_RIGHT(tmp, field)) \
+ RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } else { \
+ tmp = RB_LEFT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+ struct type *oright; \
+ if ((oright = RB_RIGHT(tmp, field)) \
+ != NULL) \
+ RB_COLOR(oright, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_LEFT(head, tmp, oright, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_LEFT(tmp, field)) \
+ RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } \
+ } \
+ if (elm) \
+ RB_COLOR(elm, field) = RB_BLACK; \
+} \
+ \
+attr struct type * \
+name##_RB_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *child, *parent, *old = elm; \
+ int color; \
+ if (RB_LEFT(elm, field) == NULL) \
+ child = RB_RIGHT(elm, field); \
+ else if (RB_RIGHT(elm, field) == NULL) \
+ child = RB_LEFT(elm, field); \
+ else { \
+ struct type *left; \
+ elm = RB_RIGHT(elm, field); \
+ while ((left = RB_LEFT(elm, field)) != NULL) \
+ elm = left; \
+ child = RB_RIGHT(elm, field); \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+ if (RB_PARENT(elm, field) == old) \
+ parent = elm; \
+ (elm)->field = (old)->field; \
+ if (RB_PARENT(old, field)) { \
+ if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+ RB_LEFT(RB_PARENT(old, field), field) = elm;\
+ else \
+ RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+ RB_AUGMENT(RB_PARENT(old, field)); \
+ } else \
+ RB_ROOT(head) = elm; \
+ RB_PARENT(RB_LEFT(old, field), field) = elm; \
+ if (RB_RIGHT(old, field)) \
+ RB_PARENT(RB_RIGHT(old, field), field) = elm; \
+ if (parent) { \
+ left = parent; \
+ do { \
+ RB_AUGMENT(left); \
+ } while ((left = RB_PARENT(left, field)) != NULL); \
+ } \
+ goto color; \
+ } \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+color: \
+ if (color == RB_BLACK) \
+ name##_RB_REMOVE_COLOR(head, parent, child); \
+ return (old); \
+} \
+ \
+/* Inserts a node into the RB tree */ \
+attr struct type * \
+name##_RB_INSERT(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp; \
+ struct type *parent = NULL; \
+ int comp = 0; \
+ tmp = RB_ROOT(head); \
+ while (tmp) { \
+ parent = tmp; \
+ comp = (cmp)(elm, parent); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ RB_SET(elm, parent, field); \
+ if (parent != NULL) { \
+ if (comp < 0) \
+ RB_LEFT(parent, field) = elm; \
+ else \
+ RB_RIGHT(parent, field) = elm; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = elm; \
+ name##_RB_INSERT_COLOR(head, elm); \
+ return (NULL); \
+} \
+ \
+/* Finds the node with the same key as elm */ \
+attr struct type * \
+name##_RB_FIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (NULL); \
+} \
+ \
+/* Finds the first node greater than or equal to the search key */ \
+attr struct type * \
+name##_RB_NFIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *res = NULL; \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) { \
+ res = tmp; \
+ tmp = RB_LEFT(tmp, field); \
+ } \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (res); \
+} \
+ \
+/* ARGSUSED */ \
+attr struct type * \
+name##_RB_NEXT(struct type *elm) \
+{ \
+ if (RB_RIGHT(elm, field)) { \
+ elm = RB_RIGHT(elm, field); \
+ while (RB_LEFT(elm, field)) \
+ elm = RB_LEFT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_LEFT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && \
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+} \
+ \
+/* ARGSUSED */ \
+attr struct type * \
+name##_RB_PREV(struct type *elm) \
+{ \
+ if (RB_LEFT(elm, field)) { \
+ elm = RB_LEFT(elm, field); \
+ while (RB_RIGHT(elm, field)) \
+ elm = RB_RIGHT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && \
+ (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+} \
+ \
+attr struct type * \
+name##_RB_MINMAX(struct name *head, int val) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *parent = NULL; \
+ while (tmp) { \
+ parent = tmp; \
+ if (val < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else \
+ tmp = RB_RIGHT(tmp, field); \
+ } \
+ return (parent); \
+}
+
+#define RB_NEGINF -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
+#define RB_NFIND(name, x, y) name##_RB_NFIND(x, y)
+#define RB_NEXT(name, x, y) name##_RB_NEXT(y)
+#define RB_PREV(name, x, y) name##_RB_PREV(y)
+#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head) \
+ for ((x) = RB_MIN(name, head); \
+ (x) != NULL; \
+ (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y) \
+ for ((x) = (y); \
+ ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \
+ (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y) \
+ for ((x) = RB_MIN(name, head); \
+ ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \
+ (x) = (y))
+
+#define RB_FOREACH_REVERSE(x, name, head) \
+ for ((x) = RB_MAX(name, head); \
+ (x) != NULL; \
+ (x) = name##_RB_PREV(x))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y) \
+ for ((x) = (y); \
+ ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \
+ (x) = (y))
+
+#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \
+ for ((x) = RB_MAX(name, head); \
+ ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \
+ (x) = (y))
+
+#endif /* _SYS_TREE_H_ */
diff --git a/usr/src/contrib/bhyve/x86/apicreg.h b/usr/src/contrib/bhyve/x86/apicreg.h
new file mode 100644
index 0000000000..24006e2733
--- /dev/null
+++ b/usr/src/contrib/bhyve/x86/apicreg.h
@@ -0,0 +1,455 @@
+/*-
+ * Copyright (c) 1996, by Peter Wemm and Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/x86/include/apicreg.h 259140 2013-12-09 21:08:52Z jhb $
+ */
+
+#ifndef _X86_APICREG_H_
+#define _X86_APICREG_H_
+
+/*
+ * Local && I/O APIC definitions.
+ */
+
+/*
+ * Pentium P54C+ Built-in APIC
+ * (Advanced programmable Interrupt Controller)
+ *
+ * Base Address of Built-in APIC in memory location
+ * is 0xfee00000.
+ *
+ * Map of APIC Registers:
+ *
+ * Offset (hex) Description Read/Write state
+ * 000 Reserved
+ * 010 Reserved
+ * 020 ID Local APIC ID R/W
+ * 030 VER Local APIC Version R
+ * 040 Reserved
+ * 050 Reserved
+ * 060 Reserved
+ * 070 Reserved
+ * 080 Task Priority Register R/W
+ * 090 Arbitration Priority Register R
+ * 0A0 Processor Priority Register R
+ * 0B0 EOI Register W
+ * 0C0 RRR Remote read R
+ * 0D0 Logical Destination R/W
+ * 0E0 Destination Format Register 0..27 R; 28..31 R/W
+ * 0F0 SVR Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W
+ * 100 ISR 000-031 R
+ * 110 ISR 032-063 R
+ * 120 ISR 064-095 R
+ * 130 ISR 095-128 R
+ * 140 ISR 128-159 R
+ * 150 ISR 160-191 R
+ * 160 ISR 192-223 R
+ * 170 ISR 224-255 R
+ * 180 TMR 000-031 R
+ * 190 TMR 032-063 R
+ * 1A0 TMR 064-095 R
+ * 1B0 TMR 095-128 R
+ * 1C0 TMR 128-159 R
+ * 1D0 TMR 160-191 R
+ * 1E0 TMR 192-223 R
+ * 1F0 TMR 224-255 R
+ * 200 IRR 000-031 R
+ * 210 IRR 032-063 R
+ * 220 IRR 064-095 R
+ * 230 IRR 095-128 R
+ * 240 IRR 128-159 R
+ * 250 IRR 160-191 R
+ * 260 IRR 192-223 R
+ * 270 IRR 224-255 R
+ * 280 Error Status Register R
+ * 290 Reserved
+ * 2A0 Reserved
+ * 2B0 Reserved
+ * 2C0 Reserved
+ * 2D0 Reserved
+ * 2E0 Reserved
+ * 2F0 Local Vector Table (CMCI) R/W
+ * 300 ICR_LOW Interrupt Command Reg. (0-31) R/W
+ * 310 ICR_HI Interrupt Command Reg. (32-63) R/W
+ * 320 Local Vector Table (Timer) R/W
+ * 330 Local Vector Table (Thermal) R/W (PIV+)
+ * 340 Local Vector Table (Performance) R/W (P6+)
+ * 350 LVT1 Local Vector Table (LINT0) R/W
+ * 360 LVT2 Local Vector Table (LINT1) R/W
+ * 370 LVT3 Local Vector Table (ERROR) R/W
+ * 380 Initial Count Reg. for Timer R/W
+ * 390 Current Count of Timer R
+ * 3A0 Reserved
+ * 3B0 Reserved
+ * 3C0 Reserved
+ * 3D0 Reserved
+ * 3E0 Timer Divide Configuration Reg. R/W
+ * 3F0 Reserved
+ */
+
+
+/******************************************************************************
+ * global defines, etc.
+ */
+
+
+/******************************************************************************
+ * LOCAL APIC structure
+ */
+
+#ifndef LOCORE
+#include <sys/types.h>
+
+#define PAD3 int : 32; int : 32; int : 32
+#define PAD4 int : 32; int : 32; int : 32; int : 32
+
+struct LAPIC {
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ u_int32_t id; PAD3;
+ u_int32_t version; PAD3;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ u_int32_t tpr; PAD3;
+ u_int32_t apr; PAD3;
+ u_int32_t ppr; PAD3;
+ u_int32_t eoi; PAD3;
+ /* reserved */ PAD4;
+ u_int32_t ldr; PAD3;
+ u_int32_t dfr; PAD3;
+ u_int32_t svr; PAD3;
+ u_int32_t isr0; PAD3;
+ u_int32_t isr1; PAD3;
+ u_int32_t isr2; PAD3;
+ u_int32_t isr3; PAD3;
+ u_int32_t isr4; PAD3;
+ u_int32_t isr5; PAD3;
+ u_int32_t isr6; PAD3;
+ u_int32_t isr7; PAD3;
+ u_int32_t tmr0; PAD3;
+ u_int32_t tmr1; PAD3;
+ u_int32_t tmr2; PAD3;
+ u_int32_t tmr3; PAD3;
+ u_int32_t tmr4; PAD3;
+ u_int32_t tmr5; PAD3;
+ u_int32_t tmr6; PAD3;
+ u_int32_t tmr7; PAD3;
+ u_int32_t irr0; PAD3;
+ u_int32_t irr1; PAD3;
+ u_int32_t irr2; PAD3;
+ u_int32_t irr3; PAD3;
+ u_int32_t irr4; PAD3;
+ u_int32_t irr5; PAD3;
+ u_int32_t irr6; PAD3;
+ u_int32_t irr7; PAD3;
+ u_int32_t esr; PAD3;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ u_int32_t lvt_cmci; PAD3;
+ u_int32_t icr_lo; PAD3;
+ u_int32_t icr_hi; PAD3;
+ u_int32_t lvt_timer; PAD3;
+ u_int32_t lvt_thermal; PAD3;
+ u_int32_t lvt_pcint; PAD3;
+ u_int32_t lvt_lint0; PAD3;
+ u_int32_t lvt_lint1; PAD3;
+ u_int32_t lvt_error; PAD3;
+ u_int32_t icr_timer; PAD3;
+ u_int32_t ccr_timer; PAD3;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ /* reserved */ PAD4;
+ u_int32_t dcr_timer; PAD3;
+ /* reserved */ PAD4;
+};
+
+typedef struct LAPIC lapic_t;
+
+/******************************************************************************
+ * I/O APIC structure
+ */
+
+struct IOAPIC {
+ u_int32_t ioregsel; PAD3;
+ u_int32_t iowin; PAD3;
+};
+
+typedef struct IOAPIC ioapic_t;
+
+#undef PAD4
+#undef PAD3
+
+#endif /* !LOCORE */
+
+
+/******************************************************************************
+ * various code 'logical' values
+ */
+
+/******************************************************************************
+ * LOCAL APIC defines
+ */
+
+/* default physical locations of LOCAL (CPU) APICs */
+#define DEFAULT_APIC_BASE 0xfee00000
+
+/* constants relating to APIC ID registers */
+#define APIC_ID_MASK 0xff000000
+#define APIC_ID_SHIFT 24
+#define APIC_ID_CLUSTER 0xf0
+#define APIC_ID_CLUSTER_ID 0x0f
+#define APIC_MAX_CLUSTER 0xe
+#define APIC_MAX_INTRACLUSTER_ID 3
+#define APIC_ID_CLUSTER_SHIFT 4
+
+/* fields in VER */
+#define APIC_VER_VERSION 0x000000ff
+#define APIC_VER_MAXLVT 0x00ff0000
+#define MAXLVTSHIFT 16
+#define APIC_VER_EOI_SUPPRESSION 0x01000000
+
+/* fields in LDR */
+#define APIC_LDR_RESERVED 0x00ffffff
+
+/* fields in DFR */
+#define APIC_DFR_RESERVED 0x0fffffff
+#define APIC_DFR_MODEL_MASK 0xf0000000
+#define APIC_DFR_MODEL_FLAT 0xf0000000
+#define APIC_DFR_MODEL_CLUSTER 0x00000000
+
+/* fields in SVR */
+#define APIC_SVR_VECTOR 0x000000ff
+#define APIC_SVR_VEC_PROG 0x000000f0
+#define APIC_SVR_VEC_FIX 0x0000000f
+#define APIC_SVR_ENABLE 0x00000100
+# define APIC_SVR_SWDIS 0x00000000
+# define APIC_SVR_SWEN 0x00000100
+#define APIC_SVR_FOCUS 0x00000200
+# define APIC_SVR_FEN 0x00000000
+# define APIC_SVR_FDIS 0x00000200
+#define APIC_SVR_EOI_SUPPRESSION 0x00001000
+
+/* fields in TPR */
+#define APIC_TPR_PRIO 0x000000ff
+# define APIC_TPR_INT 0x000000f0
+# define APIC_TPR_SUB 0x0000000f
+
+/* fields in ESR */
+#define APIC_ESR_SEND_CS_ERROR 0x00000001
+#define APIC_ESR_RECEIVE_CS_ERROR 0x00000002
+#define APIC_ESR_SEND_ACCEPT 0x00000004
+#define APIC_ESR_RECEIVE_ACCEPT 0x00000008
+#define APIC_ESR_SEND_ILLEGAL_VECTOR 0x00000020
+#define APIC_ESR_RECEIVE_ILLEGAL_VECTOR 0x00000040
+#define APIC_ESR_ILLEGAL_REGISTER 0x00000080
+
+/* fields in ICR_LOW */
+#define APIC_VECTOR_MASK 0x000000ff
+
+#define APIC_DELMODE_MASK 0x00000700
+# define APIC_DELMODE_FIXED 0x00000000
+# define APIC_DELMODE_LOWPRIO 0x00000100
+# define APIC_DELMODE_SMI 0x00000200
+# define APIC_DELMODE_RR 0x00000300
+# define APIC_DELMODE_NMI 0x00000400
+# define APIC_DELMODE_INIT 0x00000500
+# define APIC_DELMODE_STARTUP 0x00000600
+# define APIC_DELMODE_RESV 0x00000700
+
+#define APIC_DESTMODE_MASK 0x00000800
+# define APIC_DESTMODE_PHY 0x00000000
+# define APIC_DESTMODE_LOG 0x00000800
+
+#define APIC_DELSTAT_MASK 0x00001000
+# define APIC_DELSTAT_IDLE 0x00000000
+# define APIC_DELSTAT_PEND 0x00001000
+
+#define APIC_RESV1_MASK 0x00002000
+
+#define APIC_LEVEL_MASK 0x00004000
+# define APIC_LEVEL_DEASSERT 0x00000000
+# define APIC_LEVEL_ASSERT 0x00004000
+
+#define APIC_TRIGMOD_MASK 0x00008000
+# define APIC_TRIGMOD_EDGE 0x00000000
+# define APIC_TRIGMOD_LEVEL 0x00008000
+
+#define APIC_RRSTAT_MASK 0x00030000
+# define APIC_RRSTAT_INVALID 0x00000000
+# define APIC_RRSTAT_INPROG 0x00010000
+# define APIC_RRSTAT_VALID 0x00020000
+# define APIC_RRSTAT_RESV 0x00030000
+
+#define APIC_DEST_MASK 0x000c0000
+# define APIC_DEST_DESTFLD 0x00000000
+# define APIC_DEST_SELF 0x00040000
+# define APIC_DEST_ALLISELF 0x00080000
+# define APIC_DEST_ALLESELF 0x000c0000
+
+#define APIC_RESV2_MASK 0xfff00000
+
+#define APIC_ICRLO_RESV_MASK (APIC_RESV1_MASK | APIC_RESV2_MASK)
+
+/* fields in LVT1/2 */
+#define APIC_LVT_VECTOR 0x000000ff
+#define APIC_LVT_DM 0x00000700
+# define APIC_LVT_DM_FIXED 0x00000000
+# define APIC_LVT_DM_SMI 0x00000200
+# define APIC_LVT_DM_NMI 0x00000400
+# define APIC_LVT_DM_INIT 0x00000500
+# define APIC_LVT_DM_EXTINT 0x00000700
+#define APIC_LVT_DS 0x00001000
+#define APIC_LVT_IIPP 0x00002000
+#define APIC_LVT_IIPP_INTALO 0x00002000
+#define APIC_LVT_IIPP_INTAHI 0x00000000
+#define APIC_LVT_RIRR 0x00004000
+#define APIC_LVT_TM 0x00008000
+#define APIC_LVT_M 0x00010000
+
+
+/* fields in LVT Timer */
+#define APIC_LVTT_VECTOR 0x000000ff
+#define APIC_LVTT_DS 0x00001000
+#define APIC_LVTT_M 0x00010000
+#define APIC_LVTT_TM 0x00020000
+# define APIC_LVTT_TM_ONE_SHOT 0x00000000
+# define APIC_LVTT_TM_PERIODIC 0x00020000
+
+
+/* APIC timer current count */
+#define APIC_TIMER_MAX_COUNT 0xffffffff
+
+/* fields in TDCR */
+#define APIC_TDCR_2 0x00
+#define APIC_TDCR_4 0x01
+#define APIC_TDCR_8 0x02
+#define APIC_TDCR_16 0x03
+#define APIC_TDCR_32 0x08
+#define APIC_TDCR_64 0x09
+#define APIC_TDCR_128 0x0a
+#define APIC_TDCR_1 0x0b
+
+/* LVT table indices */
+#define APIC_LVT_LINT0 0
+#define APIC_LVT_LINT1 1
+#define APIC_LVT_TIMER 2
+#define APIC_LVT_ERROR 3
+#define APIC_LVT_PMC 4
+#define APIC_LVT_THERMAL 5
+#define APIC_LVT_CMCI 6
+#define APIC_LVT_MAX APIC_LVT_CMCI
+
+/******************************************************************************
+ * I/O APIC defines
+ */
+
+/* default physical locations of an IO APIC */
+#define DEFAULT_IO_APIC_BASE 0xfec00000
+
+/* window register offset */
+#define IOAPIC_WINDOW 0x10
+#define IOAPIC_EOIR 0x40
+
+/* indexes into IO APIC */
+#define IOAPIC_ID 0x00
+#define IOAPIC_VER 0x01
+#define IOAPIC_ARB 0x02
+#define IOAPIC_REDTBL 0x10
+#define IOAPIC_REDTBL0 IOAPIC_REDTBL
+#define IOAPIC_REDTBL1 (IOAPIC_REDTBL+0x02)
+#define IOAPIC_REDTBL2 (IOAPIC_REDTBL+0x04)
+#define IOAPIC_REDTBL3 (IOAPIC_REDTBL+0x06)
+#define IOAPIC_REDTBL4 (IOAPIC_REDTBL+0x08)
+#define IOAPIC_REDTBL5 (IOAPIC_REDTBL+0x0a)
+#define IOAPIC_REDTBL6 (IOAPIC_REDTBL+0x0c)
+#define IOAPIC_REDTBL7 (IOAPIC_REDTBL+0x0e)
+#define IOAPIC_REDTBL8 (IOAPIC_REDTBL+0x10)
+#define IOAPIC_REDTBL9 (IOAPIC_REDTBL+0x12)
+#define IOAPIC_REDTBL10 (IOAPIC_REDTBL+0x14)
+#define IOAPIC_REDTBL11 (IOAPIC_REDTBL+0x16)
+#define IOAPIC_REDTBL12 (IOAPIC_REDTBL+0x18)
+#define IOAPIC_REDTBL13 (IOAPIC_REDTBL+0x1a)
+#define IOAPIC_REDTBL14 (IOAPIC_REDTBL+0x1c)
+#define IOAPIC_REDTBL15 (IOAPIC_REDTBL+0x1e)
+#define IOAPIC_REDTBL16 (IOAPIC_REDTBL+0x20)
+#define IOAPIC_REDTBL17 (IOAPIC_REDTBL+0x22)
+#define IOAPIC_REDTBL18 (IOAPIC_REDTBL+0x24)
+#define IOAPIC_REDTBL19 (IOAPIC_REDTBL+0x26)
+#define IOAPIC_REDTBL20 (IOAPIC_REDTBL+0x28)
+#define IOAPIC_REDTBL21 (IOAPIC_REDTBL+0x2a)
+#define IOAPIC_REDTBL22 (IOAPIC_REDTBL+0x2c)
+#define IOAPIC_REDTBL23 (IOAPIC_REDTBL+0x2e)
+
+/* fields in VER */
+#define IOART_VER_VERSION 0x000000ff
+#define IOART_VER_MAXREDIR 0x00ff0000
+#define MAXREDIRSHIFT 16
+
+/*
+ * fields in the IO APIC's redirection table entries
+ */
+#define IOART_DEST APIC_ID_MASK /* broadcast addr: all APICs */
+
+#define IOART_RESV 0x00fe0000 /* reserved */
+
+#define IOART_INTMASK 0x00010000 /* R/W: INTerrupt mask */
+# define IOART_INTMCLR 0x00000000 /* clear, allow INTs */
+# define IOART_INTMSET 0x00010000 /* set, inhibit INTs */
+
+#define IOART_TRGRMOD 0x00008000 /* R/W: trigger mode */
+# define IOART_TRGREDG 0x00000000 /* edge */
+# define IOART_TRGRLVL 0x00008000 /* level */
+
+#define IOART_REM_IRR 0x00004000 /* RO: remote IRR */
+
+#define IOART_INTPOL 0x00002000 /* R/W: INT input pin polarity */
+# define IOART_INTAHI 0x00000000 /* active high */
+# define IOART_INTALO 0x00002000 /* active low */
+
+#define IOART_DELIVS 0x00001000 /* RO: delivery status */
+
+#define IOART_DESTMOD 0x00000800 /* R/W: destination mode */
+# define IOART_DESTPHY 0x00000000 /* physical */
+# define IOART_DESTLOG 0x00000800 /* logical */
+
+#define IOART_DELMOD 0x00000700 /* R/W: delivery mode */
+# define IOART_DELFIXED 0x00000000 /* fixed */
+# define IOART_DELLOPRI 0x00000100 /* lowest priority */
+# define IOART_DELSMI 0x00000200 /* System Management INT */
+# define IOART_DELRSV1 0x00000300 /* reserved */
+# define IOART_DELNMI 0x00000400 /* NMI signal */
+# define IOART_DELINIT 0x00000500 /* INIT signal */
+# define IOART_DELRSV2 0x00000600 /* reserved */
+# define IOART_DELEXINT 0x00000700 /* External INTerrupt */
+
+#define IOART_INTVEC 0x000000ff /* R/W: INTerrupt vector field */
+
+#endif /* _X86_APICREG_H_ */
diff --git a/usr/src/contrib/bhyve/x86/mptable.h b/usr/src/contrib/bhyve/x86/mptable.h
new file mode 100644
index 0000000000..8f3c62a295
--- /dev/null
+++ b/usr/src/contrib/bhyve/x86/mptable.h
@@ -0,0 +1,204 @@
+/*-
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/x86/include/mptable.h 259228 2013-12-11 21:19:04Z jhb $
+ */
+
+#ifndef __MACHINE_MPTABLE_H__
+#define __MACHINE_MPTABLE_H__
+
+enum busTypes {
+ NOBUS = 0,
+ CBUS = 1,
+ CBUSII = 2,
+ EISA = 3,
+ ISA = 6,
+ MCA = 9,
+ PCI = 13,
+ XPRESS = 18,
+ MAX_BUSTYPE = 18,
+ UNKNOWN_BUSTYPE = 0xff
+};
+
+/* MP Floating Pointer Structure */
+typedef struct MPFPS {
+ uint8_t signature[4];
+ uint32_t pap;
+ uint8_t length;
+ uint8_t spec_rev;
+ uint8_t checksum;
+ uint8_t config_type;
+ uint8_t mpfb2;
+ uint8_t mpfb3;
+ uint8_t mpfb4;
+ uint8_t mpfb5;
+} __packed *mpfps_t;
+
+#define MPFB2_IMCR_PRESENT 0x80
+#define MPFB2_MUL_CLK_SRCS 0x40
+
+/* MP Configuration Table Header */
+typedef struct MPCTH {
+ uint8_t signature[4];
+ uint16_t base_table_length;
+ uint8_t spec_rev;
+ uint8_t checksum;
+ uint8_t oem_id[8];
+ uint8_t product_id[12];
+ uint32_t oem_table_pointer;
+ uint16_t oem_table_size;
+ uint16_t entry_count;
+ uint32_t apic_address;
+ uint16_t extended_table_length;
+ uint8_t extended_table_checksum;
+ uint8_t reserved;
+} __packed *mpcth_t;
+
+/* Base table entries */
+
+#define MPCT_ENTRY_PROCESSOR 0
+#define MPCT_ENTRY_BUS 1
+#define MPCT_ENTRY_IOAPIC 2
+#define MPCT_ENTRY_INT 3
+#define MPCT_ENTRY_LOCAL_INT 4
+
+typedef struct PROCENTRY {
+ uint8_t type;
+ uint8_t apic_id;
+ uint8_t apic_version;
+ uint8_t cpu_flags;
+ uint32_t cpu_signature;
+ uint32_t feature_flags;
+ uint32_t reserved1;
+ uint32_t reserved2;
+} __packed *proc_entry_ptr;
+
+#define PROCENTRY_FLAG_EN 0x01
+#define PROCENTRY_FLAG_BP 0x02
+
+typedef struct BUSENTRY {
+ uint8_t type;
+ uint8_t bus_id;
+ uint8_t bus_type[6];
+} __packed *bus_entry_ptr;
+
+typedef struct IOAPICENTRY {
+ uint8_t type;
+ uint8_t apic_id;
+ uint8_t apic_version;
+ uint8_t apic_flags;
+ uint32_t apic_address;
+} __packed *io_apic_entry_ptr;
+
+#define IOAPICENTRY_FLAG_EN 0x01
+
+typedef struct INTENTRY {
+ uint8_t type;
+ uint8_t int_type;
+ uint16_t int_flags;
+ uint8_t src_bus_id;
+ uint8_t src_bus_irq;
+ uint8_t dst_apic_id;
+ uint8_t dst_apic_int;
+} __packed *int_entry_ptr;
+
+#define INTENTRY_TYPE_INT 0
+#define INTENTRY_TYPE_NMI 1
+#define INTENTRY_TYPE_SMI 2
+#define INTENTRY_TYPE_EXTINT 3
+
+#define INTENTRY_FLAGS_POLARITY 0x3
+#define INTENTRY_FLAGS_POLARITY_CONFORM 0x0
+#define INTENTRY_FLAGS_POLARITY_ACTIVEHI 0x1
+#define INTENTRY_FLAGS_POLARITY_ACTIVELO 0x3
+#define INTENTRY_FLAGS_TRIGGER 0xc
+#define INTENTRY_FLAGS_TRIGGER_CONFORM 0x0
+#define INTENTRY_FLAGS_TRIGGER_EDGE 0x4
+#define INTENTRY_FLAGS_TRIGGER_LEVEL 0xc
+
+/* Extended table entries */
+
+typedef struct EXTENTRY {
+ uint8_t type;
+ uint8_t length;
+} __packed *ext_entry_ptr;
+
+#define MPCT_EXTENTRY_SAS 0x80
+#define MPCT_EXTENTRY_BHD 0x81
+#define MPCT_EXTENTRY_CBASM 0x82
+
+typedef struct SASENTRY {
+ uint8_t type;
+ uint8_t length;
+ uint8_t bus_id;
+ uint8_t address_type;
+ uint64_t address_base;
+ uint64_t address_length;
+} __packed *sas_entry_ptr;
+
+#define SASENTRY_TYPE_IO 0
+#define SASENTRY_TYPE_MEMORY 1
+#define SASENTRY_TYPE_PREFETCH 2
+
+typedef struct BHDENTRY {
+ uint8_t type;
+ uint8_t length;
+ uint8_t bus_id;
+ uint8_t bus_info;
+ uint8_t parent_bus;
+ uint8_t reserved[3];
+} __packed *bhd_entry_ptr;
+
+#define BHDENTRY_INFO_SUBTRACTIVE_DECODE 0x1
+
+typedef struct CBASMENTRY {
+ uint8_t type;
+ uint8_t length;
+ uint8_t bus_id;
+ uint8_t address_mod;
+ uint32_t predefined_range;
+} __packed *cbasm_entry_ptr;
+
+#define CBASMENTRY_ADDRESS_MOD_ADD 0x0
+#define CBASMENTRY_ADDRESS_MOD_SUBTRACT 0x1
+
+#define CBASMENTRY_RANGE_ISA_IO 0
+#define CBASMENTRY_RANGE_VGA_IO 1
+
+#ifdef _KERNEL
+struct mptable_hostb_softc {
+#ifdef NEW_PCIB
+ struct pcib_host_resources sc_host_res;
+ int sc_decodes_vga_io;
+ int sc_decodes_isa_io;
+#endif
+};
+
+#ifdef NEW_PCIB
+void mptable_pci_host_res_init(device_t pcib);
+#endif
+int mptable_pci_probe_table(int bus);
+int mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin);
+#endif
+#endif /* !__MACHINE_MPTABLE_H__ */
diff --git a/usr/src/contrib/bhyve/x86/psl.h b/usr/src/contrib/bhyve/x86/psl.h
new file mode 100644
index 0000000000..6934b4feb7
--- /dev/null
+++ b/usr/src/contrib/bhyve/x86/psl.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)psl.h 5.2 (Berkeley) 1/18/91
+ * $FreeBSD: head/sys/x86/include/psl.h 258135 2013-11-14 15:37:20Z emaste $
+ */
+
+#ifndef _MACHINE_PSL_H_
+#define _MACHINE_PSL_H_
+
+/*
+ * 386 processor status longword.
+ */
+#define PSL_C 0x00000001 /* carry bit */
+#define PSL_PF 0x00000004 /* parity bit */
+#define PSL_AF 0x00000010 /* bcd carry bit */
+#define PSL_Z 0x00000040 /* zero bit */
+#define PSL_N 0x00000080 /* negative bit */
+#define PSL_T 0x00000100 /* trace enable bit */
+#define PSL_I 0x00000200 /* interrupt enable bit */
+#define PSL_D 0x00000400 /* string instruction direction bit */
+#define PSL_V 0x00000800 /* overflow bit */
+#define PSL_IOPL 0x00003000 /* i/o privilege level */
+#define PSL_NT 0x00004000 /* nested task bit */
+#define PSL_RF 0x00010000 /* resume flag bit */
+#define PSL_VM 0x00020000 /* virtual 8086 mode bit */
+#define PSL_AC 0x00040000 /* alignment checking */
+#define PSL_VIF 0x00080000 /* virtual interrupt enable */
+#define PSL_VIP 0x00100000 /* virtual interrupt pending */
+#define PSL_ID 0x00200000 /* identification bit */
+
+/*
+ * The i486 manual says that we are not supposed to change reserved flags,
+ * but this is too much trouble since the reserved flags depend on the cpu
+ * and setting them to their historical values works in practice.
+ */
+#define PSL_RESERVED_DEFAULT 0x00000002
+
+/*
+ * Initial flags for kernel and user mode. The kernel later inherits
+ * PSL_I and some other flags from user mode.
+ */
+#define PSL_KERNEL PSL_RESERVED_DEFAULT
+#define PSL_USER (PSL_RESERVED_DEFAULT | PSL_I)
+
+/*
+ * Bits that can be changed in user mode on 486's. We allow these bits
+ * to be changed using ptrace(), sigreturn() and procfs. Setting PS_NT
+ * is undesirable but it may as well be allowed since users can inflict
+ * it on the kernel directly. Changes to PSL_AC are silently ignored on
+ * 386's.
+ *
+ * Users are allowed to change the privileged flag PSL_RF. The cpu sets PSL_RF
+ * in tf_eflags for faults. Debuggers should sometimes set it there too.
+ * tf_eflags is kept in the signal context during signal handling and there is
+ * no other place to remember it, so the PSL_RF bit may be corrupted by the
+ * signal handler without us knowing. Corruption of the PSL_RF bit at worst
+ * causes one more or one less debugger trap, so allowing it is fairly
+ * harmless.
+ */
+#define PSL_USERCHANGE (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_T \
+ | PSL_D | PSL_V | PSL_NT | PSL_RF | PSL_AC | PSL_ID)
+
+#endif /* !_MACHINE_PSL_H_ */
diff --git a/usr/src/contrib/bhyve/x86/segments.h b/usr/src/contrib/bhyve/x86/segments.h
new file mode 100644
index 0000000000..1b8c4a3c1c
--- /dev/null
+++ b/usr/src/contrib/bhyve/x86/segments.h
@@ -0,0 +1,274 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)segments.h 7.1 (Berkeley) 5/9/91
+ * $FreeBSD$
+ */
+
+#ifndef _X86_SEGMENTS_H_
+#define _X86_SEGMENTS_H_
+
+/*
+ * X86 Segmentation Data Structures and definitions
+ */
+
+/*
+ * Selectors
+ */
+#define SEL_RPL_MASK 3 /* requester priv level */
+#define ISPL(s) ((s)&3) /* priority level of a selector */
+#define SEL_KPL 0 /* kernel priority level */
+#define SEL_UPL 3 /* user priority level */
+#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */
+#define SEL_LDT 4 /* local descriptor table */
+#define IDXSEL(s) (((s)>>3) & 0x1fff) /* index of selector */
+#define LSEL(s,r) (((s)<<3) | SEL_LDT | r) /* a local selector */
+#define GSEL(s,r) (((s)<<3) | r) /* a global selector */
+
+/*
+ * User segment descriptors (%cs, %ds etc for i386 apps. 64 bit wide)
+ * For long-mode apps, %cs only has the conforming bit in sd_type, the sd_dpl,
+ * sd_p, sd_l and sd_def32 which must be zero). %ds only has sd_p.
+ */
+struct segment_descriptor {
+ unsigned sd_lolimit:16; /* segment extent (lsb) */
+ unsigned sd_lobase:24; /* segment base address (lsb) */
+ unsigned sd_type:5; /* segment type */
+ unsigned sd_dpl:2; /* segment descriptor priority level */
+ unsigned sd_p:1; /* segment descriptor present */
+ unsigned sd_hilimit:4; /* segment extent (msb) */
+ unsigned sd_xx:2; /* unused */
+ unsigned sd_def32:1; /* default 32 vs 16 bit size */
+ unsigned sd_gran:1; /* limit granularity (byte/page units)*/
+ unsigned sd_hibase:8; /* segment base address (msb) */
+} __packed;
+
+struct user_segment_descriptor {
+ unsigned sd_lolimit:16; /* segment extent (lsb) */
+ unsigned sd_lobase:24; /* segment base address (lsb) */
+ unsigned sd_type:5; /* segment type */
+ unsigned sd_dpl:2; /* segment descriptor priority level */
+ unsigned sd_p:1; /* segment descriptor present */
+ unsigned sd_hilimit:4; /* segment extent (msb) */
+ unsigned sd_xx:1; /* unused */
+ unsigned sd_long:1; /* long mode (cs only) */
+ unsigned sd_def32:1; /* default 32 vs 16 bit size */
+ unsigned sd_gran:1; /* limit granularity (byte/page units)*/
+ unsigned sd_hibase:8; /* segment base address (msb) */
+} __packed;
+
+#define USD_GETBASE(sd) (((sd)->sd_lobase) | (sd)->sd_hibase << 24)
+#define USD_SETBASE(sd, b) (sd)->sd_lobase = (b); \
+ (sd)->sd_hibase = ((b) >> 24);
+#define USD_GETLIMIT(sd) (((sd)->sd_lolimit) | (sd)->sd_hilimit << 16)
+#define USD_SETLIMIT(sd, l) (sd)->sd_lolimit = (l); \
+ (sd)->sd_hilimit = ((l) >> 16);
+
+#ifdef __i386__
+/*
+ * Gate descriptors (e.g. indirect descriptors)
+ */
+struct gate_descriptor {
+ unsigned gd_looffset:16; /* gate offset (lsb) */
+ unsigned gd_selector:16; /* gate segment selector */
+ unsigned gd_stkcpy:5; /* number of stack wds to cpy */
+ unsigned gd_xx:3; /* unused */
+ unsigned gd_type:5; /* segment type */
+ unsigned gd_dpl:2; /* segment descriptor priority level */
+ unsigned gd_p:1; /* segment descriptor present */
+ unsigned gd_hioffset:16; /* gate offset (msb) */
+} __packed;
+
+/*
+ * Generic descriptor
+ */
+union descriptor {
+ struct segment_descriptor sd;
+ struct gate_descriptor gd;
+};
+#else
+/*
+ * Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit)
+ * Only interrupt and trap gates have gd_ist.
+ */
+struct gate_descriptor {
+ uint64_t gd_looffset:16; /* gate offset (lsb) */
+ uint64_t gd_selector:16; /* gate segment selector */
+ uint64_t gd_ist:3; /* IST table index */
+ uint64_t gd_xx:5; /* unused */
+ uint64_t gd_type:5; /* segment type */
+ uint64_t gd_dpl:2; /* segment descriptor priority level */
+ uint64_t gd_p:1; /* segment descriptor present */
+ uint64_t gd_hioffset:48; /* gate offset (msb) */
+ uint64_t sd_xx1:32;
+} __packed;
+
+/*
+ * Generic descriptor
+ */
+union descriptor {
+ struct user_segment_descriptor sd;
+ struct gate_descriptor gd;
+};
+#endif
+
+ /* system segments and gate types */
+#define SDT_SYSNULL 0 /* system null */
+#define SDT_SYS286TSS 1 /* system 286 TSS available */
+#define SDT_SYSLDT 2 /* system local descriptor table */
+#define SDT_SYS286BSY 3 /* system 286 TSS busy */
+#define SDT_SYS286CGT 4 /* system 286 call gate */
+#define SDT_SYSTASKGT 5 /* system task gate */
+#define SDT_SYS286IGT 6 /* system 286 interrupt gate */
+#define SDT_SYS286TGT 7 /* system 286 trap gate */
+#define SDT_SYSNULL2 8 /* system null again */
+#define SDT_SYS386TSS 9 /* system 386 TSS available */
+#define SDT_SYSTSS 9 /* system available 64 bit TSS */
+#define SDT_SYSNULL3 10 /* system null again */
+#define SDT_SYS386BSY 11 /* system 386 TSS busy */
+#define SDT_SYSBSY 11 /* system busy 64 bit TSS */
+#define SDT_SYS386CGT 12 /* system 386 call gate */
+#define SDT_SYSCGT 12 /* system 64 bit call gate */
+#define SDT_SYSNULL4 13 /* system null again */
+#define SDT_SYS386IGT 14 /* system 386 interrupt gate */
+#define SDT_SYSIGT 14 /* system 64 bit interrupt gate */
+#define SDT_SYS386TGT 15 /* system 386 trap gate */
+#define SDT_SYSTGT 15 /* system 64 bit trap gate */
+
+ /* memory segment types */
+#define SDT_MEMRO 16 /* memory read only */
+#define SDT_MEMROA 17 /* memory read only accessed */
+#define SDT_MEMRW 18 /* memory read write */
+#define SDT_MEMRWA 19 /* memory read write accessed */
+#define SDT_MEMROD 20 /* memory read only expand dwn limit */
+#define SDT_MEMRODA 21 /* memory read only expand dwn limit accessed */
+#define SDT_MEMRWD 22 /* memory read write expand dwn limit */
+#define SDT_MEMRWDA 23 /* memory read write expand dwn limit accessed*/
+#define SDT_MEME 24 /* memory execute only */
+#define SDT_MEMEA 25 /* memory execute only accessed */
+#define SDT_MEMER 26 /* memory execute read */
+#define SDT_MEMERA 27 /* memory execute read accessed */
+#define SDT_MEMEC 28 /* memory execute only conforming */
+#define SDT_MEMEAC 29 /* memory execute only accessed conforming */
+#define SDT_MEMERC 30 /* memory execute read conforming */
+#define SDT_MEMERAC 31 /* memory execute read accessed conforming */
+
+/*
+ * Size of IDT table
+ */
+#define NIDT 256 /* 32 reserved, 0x80 syscall, most are h/w */
+#define NRSVIDT 32 /* reserved entries for cpu exceptions */
+
+/*
+ * Entries in the Interrupt Descriptor Table (IDT)
+ */
+#define IDT_DE 0 /* #DE: Divide Error */
+#define IDT_DB 1 /* #DB: Debug */
+#define IDT_NMI 2 /* Nonmaskable External Interrupt */
+#define IDT_BP 3 /* #BP: Breakpoint */
+#define IDT_OF 4 /* #OF: Overflow */
+#define IDT_BR 5 /* #BR: Bound Range Exceeded */
+#define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */
+#define IDT_NM 7 /* #NM: No Math Coprocessor */
+#define IDT_DF 8 /* #DF: Double Fault */
+#define IDT_FPUGP 9 /* Coprocessor Segment Overrun */
+#define IDT_TS 10 /* #TS: Invalid TSS */
+#define IDT_NP 11 /* #NP: Segment Not Present */
+#define IDT_SS 12 /* #SS: Stack Segment Fault */
+#define IDT_GP 13 /* #GP: General Protection Fault */
+#define IDT_PF 14 /* #PF: Page Fault */
+#define IDT_MF 16 /* #MF: FPU Floating-Point Error */
+#define IDT_AC 17 /* #AC: Alignment Check */
+#define IDT_MC 18 /* #MC: Machine Check */
+#define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */
+#define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */
+#define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */
+#define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */
+#define IDT_EVTCHN 0x93 /* Xen HVM Event Channel Interrupt Vector */
+
+#if defined(__i386__)
+/*
+ * Entries in the Global Descriptor Table (GDT)
+ * Note that each 4 entries share a single 32 byte L1 cache line.
+ * Some of the fast syscall instructions require a specific order here.
+ */
+#define GNULL_SEL 0 /* Null Descriptor */
+#define GPRIV_SEL 1 /* SMP Per-Processor Private Data */
+#define GUFS_SEL 2 /* User %fs Descriptor (order critical: 1) */
+#define GUGS_SEL 3 /* User %gs Descriptor (order critical: 2) */
+#define GCODE_SEL 4 /* Kernel Code Descriptor (order critical: 1) */
+#define GDATA_SEL 5 /* Kernel Data Descriptor (order critical: 2) */
+#define GUCODE_SEL 6 /* User Code Descriptor (order critical: 3) */
+#define GUDATA_SEL 7 /* User Data Descriptor (order critical: 4) */
+#define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be entry 8) */
+#define GPROC0_SEL 9 /* Task state process slot zero and up */
+#define GLDT_SEL 10 /* Default User LDT */
+#define GUSERLDT_SEL 11 /* User LDT */
+#define GPANIC_SEL 12 /* Task state to consider panic from */
+#define GBIOSCODE32_SEL 13 /* BIOS interface (32bit Code) */
+#define GBIOSCODE16_SEL 14 /* BIOS interface (16bit Code) */
+#define GBIOSDATA_SEL 15 /* BIOS interface (Data) */
+#define GBIOSUTIL_SEL 16 /* BIOS interface (Utility) */
+#define GBIOSARGS_SEL 17 /* BIOS interface (Arguments) */
+#define GNDIS_SEL 18 /* For the NDIS layer */
+#define NGDT 19
+
+/*
+ * Entries in the Local Descriptor Table (LDT)
+ */
+#define LSYS5CALLS_SEL 0 /* forced by intel BCS */
+#define LSYS5SIGR_SEL 1
+#define LUCODE_SEL 3
+#define LUDATA_SEL 5
+#define NLDT (LUDATA_SEL + 1)
+
+#else /* !__i386__ */
+/*
+ * Entries in the Global Descriptor Table (GDT)
+ */
+#define GNULL_SEL 0 /* Null Descriptor */
+#define GNULL2_SEL 1 /* Null Descriptor */
+#define GUFS32_SEL 2 /* User 32 bit %fs Descriptor */
+#define GUGS32_SEL 3 /* User 32 bit %gs Descriptor */
+#define GCODE_SEL 4 /* Kernel Code Descriptor */
+#define GDATA_SEL 5 /* Kernel Data Descriptor */
+#define GUCODE32_SEL 6 /* User 32 bit code Descriptor */
+#define GUDATA_SEL 7 /* User 32/64 bit Data Descriptor */
+#define GUCODE_SEL 8 /* User 64 bit Code Descriptor */
+#define GPROC0_SEL 9 /* TSS for entering kernel etc */
+/* slot 10 is second half of GPROC0_SEL */
+#define GUSERLDT_SEL 11 /* LDT */
+/* slot 12 is second half of GUSERLDT_SEL */
+#define NGDT 13
+#endif /* __i386__ */
+
+#endif /* !_X86_SEGMENTS_H_ */
diff --git a/usr/src/contrib/bhyve/x86/specialreg.h b/usr/src/contrib/bhyve/x86/specialreg.h
new file mode 100644
index 0000000000..f528bad55c
--- /dev/null
+++ b/usr/src/contrib/bhyve/x86/specialreg.h
@@ -0,0 +1,1143 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)specialreg.h 7.1 (Berkeley) 5/9/91
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_SPECIALREG_H_
+#define _MACHINE_SPECIALREG_H_
+
+/*
+ * Bits in 386 special registers:
+ */
+#define CR0_PE 0x00000001 /* Protected mode Enable */
+#define CR0_MP 0x00000002 /* "Math" (fpu) Present */
+#define CR0_EM 0x00000004 /* EMulate FPU instructions. (trap ESC only) */
+#define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */
+#define CR0_PG 0x80000000 /* PaGing enable */
+
+/*
+ * Bits in 486 special registers:
+ */
+#define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */
+#define CR0_WP 0x00010000 /* Write Protect (honor page protect in
+ all modes) */
+#define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */
+#define CR0_NW 0x20000000 /* Not Write-through */
+#define CR0_CD 0x40000000 /* Cache Disable */
+
+#define CR3_PCID_SAVE 0x8000000000000000
+#define CR3_PCID_MASK 0xfff
+
+/*
+ * Bits in PPro special registers
+ */
+#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */
+#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */
+#define CR4_TSD 0x00000004 /* Time stamp disable */
+#define CR4_DE 0x00000008 /* Debugging extensions */
+#define CR4_PSE 0x00000010 /* Page size extensions */
+#define CR4_PAE 0x00000020 /* Physical address extension */
+#define CR4_MCE 0x00000040 /* Machine check enable */
+#define CR4_PGE 0x00000080 /* Page global enable */
+#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */
+#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */
+#define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */
+#define CR4_VMXE 0x00002000 /* enable VMX operation (Intel-specific) */
+#define CR4_FSGSBASE 0x00010000 /* Enable FS/GS BASE accessing instructions */
+#define CR4_PCIDE 0x00020000 /* Enable Context ID */
+#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */
+#define CR4_SMEP 0x00100000 /* Supervisor-Mode Execution Prevention */
+#define CR4_SMAP 0x00200000 /* Supervisor-Mode Access Prevention */
+#define CR4_PKE 0x00400000 /* Protection Keys Enable */
+
+/*
+ * Bits in AMD64 special registers. EFER is 64 bits wide.
+ */
+#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */
+#define EFER_LME 0x000000100 /* Long mode enable (R/W) */
+#define EFER_LMA 0x000000400 /* Long mode active (R) */
+#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */
+#define EFER_SVM 0x000001000 /* SVM enable bit for AMD, reserved for Intel */
+#define EFER_LMSLE 0x000002000 /* Long Mode Segment Limit Enable */
+#define EFER_FFXSR 0x000004000 /* Fast FXSAVE/FSRSTOR */
+#define EFER_TCE 0x000008000 /* Translation Cache Extension */
+
+/*
+ * Intel Extended Features registers
+ */
+#define XCR0 0 /* XFEATURE_ENABLED_MASK register */
+
+#define XFEATURE_ENABLED_X87 0x00000001
+#define XFEATURE_ENABLED_SSE 0x00000002
+#define XFEATURE_ENABLED_YMM_HI128 0x00000004
+#define XFEATURE_ENABLED_AVX XFEATURE_ENABLED_YMM_HI128
+#define XFEATURE_ENABLED_BNDREGS 0x00000008
+#define XFEATURE_ENABLED_BNDCSR 0x00000010
+#define XFEATURE_ENABLED_OPMASK 0x00000020
+#define XFEATURE_ENABLED_ZMM_HI256 0x00000040
+#define XFEATURE_ENABLED_HI16_ZMM 0x00000080
+
+#define XFEATURE_AVX \
+ (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
+#define XFEATURE_AVX512 \
+ (XFEATURE_ENABLED_OPMASK | XFEATURE_ENABLED_ZMM_HI256 | \
+ XFEATURE_ENABLED_HI16_ZMM)
+#define XFEATURE_MPX \
+ (XFEATURE_ENABLED_BNDREGS | XFEATURE_ENABLED_BNDCSR)
+
+/*
+ * CPUID instruction features register
+ */
+#define CPUID_FPU 0x00000001
+#define CPUID_VME 0x00000002
+#define CPUID_DE 0x00000004
+#define CPUID_PSE 0x00000008
+#define CPUID_TSC 0x00000010
+#define CPUID_MSR 0x00000020
+#define CPUID_PAE 0x00000040
+#define CPUID_MCE 0x00000080
+#define CPUID_CX8 0x00000100
+#define CPUID_APIC 0x00000200
+#define CPUID_B10 0x00000400
+#define CPUID_SEP 0x00000800
+#define CPUID_MTRR 0x00001000
+#define CPUID_PGE 0x00002000
+#define CPUID_MCA 0x00004000
+#define CPUID_CMOV 0x00008000
+#define CPUID_PAT 0x00010000
+#define CPUID_PSE36 0x00020000
+#define CPUID_PSN 0x00040000
+#define CPUID_CLFSH 0x00080000
+#define CPUID_B20 0x00100000
+#define CPUID_DS 0x00200000
+#define CPUID_ACPI 0x00400000
+#define CPUID_MMX 0x00800000
+#define CPUID_FXSR 0x01000000
+#define CPUID_SSE 0x02000000
+#define CPUID_XMM 0x02000000
+#define CPUID_SSE2 0x04000000
+#define CPUID_SS 0x08000000
+#define CPUID_HTT 0x10000000
+#define CPUID_TM 0x20000000
+#define CPUID_IA64 0x40000000
+#define CPUID_PBE 0x80000000
+
+#define CPUID2_SSE3 0x00000001
+#define CPUID2_PCLMULQDQ 0x00000002
+#define CPUID2_DTES64 0x00000004
+#define CPUID2_MON 0x00000008
+#define CPUID2_DS_CPL 0x00000010
+#define CPUID2_VMX 0x00000020
+#define CPUID2_SMX 0x00000040
+#define CPUID2_EST 0x00000080
+#define CPUID2_TM2 0x00000100
+#define CPUID2_SSSE3 0x00000200
+#define CPUID2_CNXTID 0x00000400
+#define CPUID2_SDBG 0x00000800
+#define CPUID2_FMA 0x00001000
+#define CPUID2_CX16 0x00002000
+#define CPUID2_XTPR 0x00004000
+#define CPUID2_PDCM 0x00008000
+#define CPUID2_PCID 0x00020000
+#define CPUID2_DCA 0x00040000
+#define CPUID2_SSE41 0x00080000
+#define CPUID2_SSE42 0x00100000
+#define CPUID2_X2APIC 0x00200000
+#define CPUID2_MOVBE 0x00400000
+#define CPUID2_POPCNT 0x00800000
+#define CPUID2_TSCDLT 0x01000000
+#define CPUID2_AESNI 0x02000000
+#define CPUID2_XSAVE 0x04000000
+#define CPUID2_OSXSAVE 0x08000000
+#define CPUID2_AVX 0x10000000
+#define CPUID2_F16C 0x20000000
+#define CPUID2_RDRAND 0x40000000
+#define CPUID2_HV 0x80000000
+
+/*
+ * Important bits in the Thermal and Power Management flags
+ * CPUID.6 EAX and ECX.
+ */
+#define CPUTPM1_SENSOR 0x00000001
+#define CPUTPM1_TURBO 0x00000002
+#define CPUTPM1_ARAT 0x00000004
+#define CPUTPM1_HWP 0x00000080
+#define CPUTPM1_HWP_NOTIFICATION 0x00000100
+#define CPUTPM1_HWP_ACTIVITY_WINDOW 0x00000200
+#define CPUTPM1_HWP_PERF_PREF 0x00000400
+#define CPUTPM1_HWP_PKG 0x00000800
+#define CPUTPM1_HWP_FLEXIBLE 0x00020000
+#define CPUTPM2_EFFREQ 0x00000001
+
+/* Intel Processor Trace CPUID. */
+
+/* Leaf 0 ebx. */
+#define CPUPT_CR3 (1 << 0) /* CR3 Filtering Support */
+#define CPUPT_PSB (1 << 1) /* Configurable PSB and Cycle-Accurate Mode Supported */
+#define CPUPT_IPF (1 << 2) /* IP Filtering and TraceStop supported */
+#define CPUPT_MTC (1 << 3) /* MTC Supported */
+#define CPUPT_PRW (1 << 4) /* PTWRITE Supported */
+#define CPUPT_PWR (1 << 5) /* Power Event Trace Supported */
+
+/* Leaf 0 ecx. */
+#define CPUPT_TOPA (1 << 0) /* ToPA Output Supported */
+#define CPUPT_TOPA_MULTI (1 << 1) /* ToPA Tables Allow Multiple Output Entries */
+#define CPUPT_SINGLE (1 << 2) /* Single-Range Output Supported */
+#define CPUPT_TT_OUT (1 << 3) /* Output to Trace Transport Subsystem Supported */
+#define CPUPT_LINEAR_IP (1 << 31) /* IP Payloads are Linear IP, otherwise IP is effective */
+
+/* Leaf 1 eax. */
+#define CPUPT_NADDR_S 0 /* Number of Address Ranges */
+#define CPUPT_NADDR_M (0x7 << CPUPT_NADDR_S)
+#define CPUPT_MTC_BITMAP_S 16 /* Bitmap of supported MTC Period Encodings */
+#define CPUPT_MTC_BITMAP_M (0xffff << CPUPT_MTC_BITMAP_S)
+
+/* Leaf 1 ebx. */
+#define CPUPT_CT_BITMAP_S 0 /* Bitmap of supported Cycle Threshold values */
+#define CPUPT_CT_BITMAP_M (0xffff << CPUPT_CT_BITMAP_S)
+#define CPUPT_PFE_BITMAP_S 16 /* Bitmap of supported Configurable PSB Frequency encoding */
+#define CPUPT_PFE_BITMAP_M (0xffff << CPUPT_PFE_BITMAP_S)
+
+/*
+ * Important bits in the AMD extended cpuid flags
+ */
+#define AMDID_SYSCALL 0x00000800
+#define AMDID_MP 0x00080000
+#define AMDID_NX 0x00100000
+#define AMDID_EXT_MMX 0x00400000
+#define AMDID_FFXSR 0x02000000
+#define AMDID_PAGE1GB 0x04000000
+#define AMDID_RDTSCP 0x08000000
+#define AMDID_LM 0x20000000
+#define AMDID_EXT_3DNOW 0x40000000
+#define AMDID_3DNOW 0x80000000
+
+#define AMDID2_LAHF 0x00000001
+#define AMDID2_CMP 0x00000002
+#define AMDID2_SVM 0x00000004
+#define AMDID2_EXT_APIC 0x00000008
+#define AMDID2_CR8 0x00000010
+#define AMDID2_ABM 0x00000020
+#define AMDID2_SSE4A 0x00000040
+#define AMDID2_MAS 0x00000080
+#define AMDID2_PREFETCH 0x00000100
+#define AMDID2_OSVW 0x00000200
+#define AMDID2_IBS 0x00000400
+#define AMDID2_XOP 0x00000800
+#define AMDID2_SKINIT 0x00001000
+#define AMDID2_WDT 0x00002000
+#define AMDID2_LWP 0x00008000
+#define AMDID2_FMA4 0x00010000
+#define AMDID2_TCE 0x00020000
+#define AMDID2_NODE_ID 0x00080000
+#define AMDID2_TBM 0x00200000
+#define AMDID2_TOPOLOGY 0x00400000
+#define AMDID2_PCXC 0x00800000
+#define AMDID2_PNXC 0x01000000
+#define AMDID2_DBE 0x04000000
+#define AMDID2_PTSC 0x08000000
+#define AMDID2_PTSCEL2I 0x10000000
+#define AMDID2_MWAITX 0x20000000
+
+/*
+ * CPUID instruction 1 eax info
+ */
+#define CPUID_STEPPING 0x0000000f
+#define CPUID_MODEL 0x000000f0
+#define CPUID_FAMILY 0x00000f00
+#define CPUID_EXT_MODEL 0x000f0000
+#define CPUID_EXT_FAMILY 0x0ff00000
+#ifdef __i386__
+#define CPUID_TO_MODEL(id) \
+ ((((id) & CPUID_MODEL) >> 4) | \
+ ((((id) & CPUID_FAMILY) >= 0x600) ? \
+ (((id) & CPUID_EXT_MODEL) >> 12) : 0))
+#define CPUID_TO_FAMILY(id) \
+ ((((id) & CPUID_FAMILY) >> 8) + \
+ ((((id) & CPUID_FAMILY) == 0xf00) ? \
+ (((id) & CPUID_EXT_FAMILY) >> 20) : 0))
+#else
+#define CPUID_TO_MODEL(id) \
+ ((((id) & CPUID_MODEL) >> 4) | \
+ (((id) & CPUID_EXT_MODEL) >> 12))
+#define CPUID_TO_FAMILY(id) \
+ ((((id) & CPUID_FAMILY) >> 8) + \
+ (((id) & CPUID_EXT_FAMILY) >> 20))
+#endif
+
+/*
+ * CPUID instruction 1 ebx info
+ */
+#define CPUID_BRAND_INDEX 0x000000ff
+#define CPUID_CLFUSH_SIZE 0x0000ff00
+#define CPUID_HTT_CORES 0x00ff0000
+#define CPUID_LOCAL_APIC_ID 0xff000000
+
+/*
+ * CPUID instruction 5 info
+ */
+#define CPUID5_MON_MIN_SIZE 0x0000ffff /* eax */
+#define CPUID5_MON_MAX_SIZE 0x0000ffff /* ebx */
+#define CPUID5_MON_MWAIT_EXT 0x00000001 /* ecx */
+#define CPUID5_MWAIT_INTRBREAK 0x00000002 /* ecx */
+
+/*
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
+ */
+#define MWAIT_C0 0xf0
+#define MWAIT_C1 0x00
+#define MWAIT_C2 0x10
+#define MWAIT_C3 0x20
+#define MWAIT_C4 0x30
+
+/*
+ * MWAIT extensions.
+ */
+/* Interrupt breaks MWAIT even when masked. */
+#define MWAIT_INTRBREAK 0x00000001
+
+/*
+ * CPUID instruction 6 ecx info
+ */
+#define CPUID_PERF_STAT 0x00000001
+#define CPUID_PERF_BIAS 0x00000008
+
+/*
+ * CPUID instruction 0xb ebx info.
+ */
+#define CPUID_TYPE_INVAL 0
+#define CPUID_TYPE_SMT 1
+#define CPUID_TYPE_CORE 2
+
+/*
+ * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
+ */
+#define CPUID_EXTSTATE_XSAVEOPT 0x00000001
+#define CPUID_EXTSTATE_XSAVEC 0x00000002
+#define CPUID_EXTSTATE_XINUSE 0x00000004
+#define CPUID_EXTSTATE_XSAVES 0x00000008
+
+/*
+ * AMD extended function 8000_0007h ebx info
+ */
+#define AMDRAS_MCA_OF_RECOV 0x00000001
+#define AMDRAS_SUCCOR 0x00000002
+#define AMDRAS_HW_ASSERT 0x00000004
+#define AMDRAS_SCALABLE_MCA 0x00000008
+#define AMDRAS_PFEH_SUPPORT 0x00000010
+
+/*
+ * AMD extended function 8000_0007h edx info
+ */
+#define AMDPM_TS 0x00000001
+#define AMDPM_FID 0x00000002
+#define AMDPM_VID 0x00000004
+#define AMDPM_TTP 0x00000008
+#define AMDPM_TM 0x00000010
+#define AMDPM_STC 0x00000020
+#define AMDPM_100MHZ_STEPS 0x00000040
+#define AMDPM_HW_PSTATE 0x00000080
+#define AMDPM_TSC_INVARIANT 0x00000100
+#define AMDPM_CPB 0x00000200
+
+/*
+ * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions)
+ */
+#define AMDFEID_CLZERO 0x00000001
+#define AMDFEID_IRPERF 0x00000002
+#define AMDFEID_XSAVEERPTR 0x00000004
+#define AMDFEID_IBPB 0x00001000
+#define AMDFEID_IBRS 0x00004000
+#define AMDFEID_STIBP 0x00008000
+/* The below are only defined if the corresponding base feature above exists. */
+#define AMDFEID_IBRS_ALWAYSON 0x00010000
+#define AMDFEID_STIBP_ALWAYSON 0x00020000
+#define AMDFEID_PREFER_IBRS 0x00040000
+#define AMDFEID_SSBD 0x01000000
+/* SSBD via MSRC001_011F instead of MSR 0x48: */
+#define AMDFEID_VIRT_SSBD 0x02000000
+#define AMDFEID_SSB_NO 0x04000000
+
+/*
+ * AMD extended function 8000_0008h ecx info
+ */
+#define AMDID_CMP_CORES 0x000000ff
+#define AMDID_COREID_SIZE 0x0000f000
+#define AMDID_COREID_SIZE_SHIFT 12
+
+/*
+ * CPUID instruction 7 Structured Extended Features, leaf 0 ebx info
+ */
+#define CPUID_STDEXT_FSGSBASE 0x00000001
+#define CPUID_STDEXT_TSC_ADJUST 0x00000002
+#define CPUID_STDEXT_SGX 0x00000004
+#define CPUID_STDEXT_BMI1 0x00000008
+#define CPUID_STDEXT_HLE 0x00000010
+#define CPUID_STDEXT_AVX2 0x00000020
+#define CPUID_STDEXT_FDP_EXC 0x00000040
+#define CPUID_STDEXT_SMEP 0x00000080
+#define CPUID_STDEXT_BMI2 0x00000100
+#define CPUID_STDEXT_ERMS 0x00000200
+#define CPUID_STDEXT_INVPCID 0x00000400
+#define CPUID_STDEXT_RTM 0x00000800
+#define CPUID_STDEXT_PQM 0x00001000
+#define CPUID_STDEXT_NFPUSG 0x00002000
+#define CPUID_STDEXT_MPX 0x00004000
+#define CPUID_STDEXT_PQE 0x00008000
+#define CPUID_STDEXT_AVX512F 0x00010000
+#define CPUID_STDEXT_AVX512DQ 0x00020000
+#define CPUID_STDEXT_RDSEED 0x00040000
+#define CPUID_STDEXT_ADX 0x00080000
+#define CPUID_STDEXT_SMAP 0x00100000
+#define CPUID_STDEXT_AVX512IFMA 0x00200000
+#define CPUID_STDEXT_PCOMMIT 0x00400000
+#define CPUID_STDEXT_CLFLUSHOPT 0x00800000
+#define CPUID_STDEXT_CLWB 0x01000000
+#define CPUID_STDEXT_PROCTRACE 0x02000000
+#define CPUID_STDEXT_AVX512PF 0x04000000
+#define CPUID_STDEXT_AVX512ER 0x08000000
+#define CPUID_STDEXT_AVX512CD 0x10000000
+#define CPUID_STDEXT_SHA 0x20000000
+#define CPUID_STDEXT_AVX512BW 0x40000000
+#define CPUID_STDEXT_AVX512VL 0x80000000
+
+/*
+ * CPUID instruction 7 Structured Extended Features, leaf 0 ecx info
+ */
+#define CPUID_STDEXT2_PREFETCHWT1 0x00000001
+#define CPUID_STDEXT2_AVX512VBMI 0x00000002
+#define CPUID_STDEXT2_UMIP 0x00000004
+#define CPUID_STDEXT2_PKU 0x00000008
+#define CPUID_STDEXT2_OSPKE 0x00000010
+#define CPUID_STDEXT2_WAITPKG 0x00000020
+#define CPUID_STDEXT2_AVX512VBMI2 0x00000040
+#define CPUID_STDEXT2_GFNI 0x00000100
+#define CPUID_STDEXT2_VAES 0x00000200
+#define CPUID_STDEXT2_VPCLMULQDQ 0x00000400
+#define CPUID_STDEXT2_AVX512VNNI 0x00000800
+#define CPUID_STDEXT2_AVX512BITALG 0x00001000
+#define CPUID_STDEXT2_AVX512VPOPCNTDQ 0x00004000
+#define CPUID_STDEXT2_RDPID 0x00400000
+#define CPUID_STDEXT2_CLDEMOTE 0x02000000
+#define CPUID_STDEXT2_MOVDIRI 0x08000000
+#define CPUID_STDEXT2_MOVDIRI64B 0x10000000
+#define CPUID_STDEXT2_ENQCMD 0x20000000
+#define CPUID_STDEXT2_SGXLC 0x40000000
+
+/*
+ * CPUID instruction 7 Structured Extended Features, leaf 0 edx info
+ */
+#define CPUID_STDEXT3_AVX5124VNNIW 0x00000004
+#define CPUID_STDEXT3_AVX5124FMAPS 0x00000008
+#define CPUID_STDEXT3_AVX512VP2INTERSECT 0x00000100
+#define CPUID_STDEXT3_MD_CLEAR 0x00000400
+#define CPUID_STDEXT3_TSXFA 0x00002000
+#define CPUID_STDEXT3_PCONFIG 0x00040000
+#define CPUID_STDEXT3_IBPB 0x04000000
+#define CPUID_STDEXT3_STIBP 0x08000000
+#define CPUID_STDEXT3_L1D_FLUSH 0x10000000
+#define CPUID_STDEXT3_ARCH_CAP 0x20000000
+#define CPUID_STDEXT3_CORE_CAP 0x40000000
+#define CPUID_STDEXT3_SSBD 0x80000000
+
+/* MSR IA32_ARCH_CAP(ABILITIES) bits */
+#define IA32_ARCH_CAP_RDCL_NO 0x00000001
+#define IA32_ARCH_CAP_IBRS_ALL 0x00000002
+#define IA32_ARCH_CAP_RSBA 0x00000004
+#define IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY 0x00000008
+#define IA32_ARCH_CAP_SSB_NO 0x00000010
+#define IA32_ARCH_CAP_MDS_NO 0x00000020
+
+/*
+ * CPUID manufacturers identifiers
+ */
+#define AMD_VENDOR_ID "AuthenticAMD"
+#define CENTAUR_VENDOR_ID "CentaurHauls"
+#define CYRIX_VENDOR_ID "CyrixInstead"
+#define INTEL_VENDOR_ID "GenuineIntel"
+#define NEXGEN_VENDOR_ID "NexGenDriven"
+#define NSC_VENDOR_ID "Geode by NSC"
+#define RISE_VENDOR_ID "RiseRiseRise"
+#define SIS_VENDOR_ID "SiS SiS SiS "
+#define TRANSMETA_VENDOR_ID "GenuineTMx86"
+#define UMC_VENDOR_ID "UMC UMC UMC "
+
+/*
+ * Model-specific registers for the i386 family
+ */
+#define MSR_P5_MC_ADDR 0x000
+#define MSR_P5_MC_TYPE 0x001
+#define MSR_TSC 0x010
+#define MSR_P5_CESR 0x011
+#define MSR_P5_CTR0 0x012
+#define MSR_P5_CTR1 0x013
+#define MSR_IA32_PLATFORM_ID 0x017
+#define MSR_APICBASE 0x01b
+#define MSR_EBL_CR_POWERON 0x02a
+#define MSR_TEST_CTL 0x033
+#define MSR_IA32_FEATURE_CONTROL 0x03a
+#define MSR_IA32_SPEC_CTRL 0x048
+#define MSR_IA32_PRED_CMD 0x049
+#define MSR_BIOS_UPDT_TRIG 0x079
+#define MSR_BBL_CR_D0 0x088
+#define MSR_BBL_CR_D1 0x089
+#define MSR_BBL_CR_D2 0x08a
+#define MSR_BIOS_SIGN 0x08b
+#define MSR_PERFCTR0 0x0c1
+#define MSR_PERFCTR1 0x0c2
+#define MSR_PLATFORM_INFO 0x0ce
+#define MSR_MPERF 0x0e7
+#define MSR_APERF 0x0e8
+#define MSR_IA32_EXT_CONFIG 0x0ee /* Undocumented. Core Solo/Duo only */
+#define MSR_MTRRcap 0x0fe
+#define MSR_IA32_ARCH_CAP 0x10a
+#define MSR_IA32_FLUSH_CMD 0x10b
+#define MSR_TSX_FORCE_ABORT 0x10f
+#define MSR_BBL_CR_ADDR 0x116
+#define MSR_BBL_CR_DECC 0x118
+#define MSR_BBL_CR_CTL 0x119
+#define MSR_BBL_CR_TRIG 0x11a
+#define MSR_BBL_CR_BUSY 0x11b
+#define MSR_BBL_CR_CTL3 0x11e
+#define MSR_SYSENTER_CS_MSR 0x174
+#define MSR_SYSENTER_ESP_MSR 0x175
+#define MSR_SYSENTER_EIP_MSR 0x176
+#define MSR_MCG_CAP 0x179
+#define MSR_MCG_STATUS 0x17a
+#define MSR_MCG_CTL 0x17b
+#define MSR_EVNTSEL0 0x186
+#define MSR_EVNTSEL1 0x187
+#define MSR_THERM_CONTROL 0x19a
+#define MSR_THERM_INTERRUPT 0x19b
+#define MSR_THERM_STATUS 0x19c
+#define MSR_IA32_MISC_ENABLE 0x1a0
+#define MSR_IA32_TEMPERATURE_TARGET 0x1a2
+#define MSR_TURBO_RATIO_LIMIT 0x1ad
+#define MSR_TURBO_RATIO_LIMIT1 0x1ae
+#define MSR_DEBUGCTLMSR 0x1d9
+#define MSR_LASTBRANCHFROMIP 0x1db
+#define MSR_LASTBRANCHTOIP 0x1dc
+#define MSR_LASTINTFROMIP 0x1dd
+#define MSR_LASTINTTOIP 0x1de
+#define MSR_ROB_CR_BKUPTMPDR6 0x1e0
+#define MSR_MTRRVarBase 0x200
+#define MSR_MTRR64kBase 0x250
+#define MSR_MTRR16kBase 0x258
+#define MSR_MTRR4kBase 0x268
+#define MSR_PAT 0x277
+#define MSR_MC0_CTL2 0x280
+#define MSR_MTRRdefType 0x2ff
+#define MSR_MC0_CTL 0x400
+#define MSR_MC0_STATUS 0x401
+#define MSR_MC0_ADDR 0x402
+#define MSR_MC0_MISC 0x403
+#define MSR_MC1_CTL 0x404
+#define MSR_MC1_STATUS 0x405
+#define MSR_MC1_ADDR 0x406
+#define MSR_MC1_MISC 0x407
+#define MSR_MC2_CTL 0x408
+#define MSR_MC2_STATUS 0x409
+#define MSR_MC2_ADDR 0x40a
+#define MSR_MC2_MISC 0x40b
+#define MSR_MC3_CTL 0x40c
+#define MSR_MC3_STATUS 0x40d
+#define MSR_MC3_ADDR 0x40e
+#define MSR_MC3_MISC 0x40f
+#define MSR_MC4_CTL 0x410
+#define MSR_MC4_STATUS 0x411
+#define MSR_MC4_ADDR 0x412
+#define MSR_MC4_MISC 0x413
+#define MSR_RAPL_POWER_UNIT 0x606
+#define MSR_PKG_ENERGY_STATUS 0x611
+#define MSR_DRAM_ENERGY_STATUS 0x619
+#define MSR_PP0_ENERGY_STATUS 0x639
+#define MSR_PP1_ENERGY_STATUS 0x641
+#define MSR_PPERF 0x64e
+#define MSR_TSC_DEADLINE 0x6e0 /* Writes are not serializing */
+#define MSR_IA32_PM_ENABLE 0x770
+#define MSR_IA32_HWP_CAPABILITIES 0x771
+#define MSR_IA32_HWP_REQUEST_PKG 0x772
+#define MSR_IA32_HWP_INTERRUPT 0x773
+#define MSR_IA32_HWP_REQUEST 0x774
+#define MSR_IA32_HWP_STATUS 0x777
+
+/*
+ * VMX MSRs
+ */
+#define MSR_VMX_BASIC 0x480
+#define MSR_VMX_PINBASED_CTLS 0x481
+#define MSR_VMX_PROCBASED_CTLS 0x482
+#define MSR_VMX_EXIT_CTLS 0x483
+#define MSR_VMX_ENTRY_CTLS 0x484
+#define MSR_VMX_CR0_FIXED0 0x486
+#define MSR_VMX_CR0_FIXED1 0x487
+#define MSR_VMX_CR4_FIXED0 0x488
+#define MSR_VMX_CR4_FIXED1 0x489
+#define MSR_VMX_PROCBASED_CTLS2 0x48b
+#define MSR_VMX_EPT_VPID_CAP 0x48c
+#define MSR_VMX_TRUE_PINBASED_CTLS 0x48d
+#define MSR_VMX_TRUE_PROCBASED_CTLS 0x48e
+#define MSR_VMX_TRUE_EXIT_CTLS 0x48f
+#define MSR_VMX_TRUE_ENTRY_CTLS 0x490
+
+/*
+ * X2APIC MSRs.
+ * Writes are not serializing.
+ */
+#define MSR_APIC_000 0x800
+#define MSR_APIC_ID 0x802
+#define MSR_APIC_VERSION 0x803
+#define MSR_APIC_TPR 0x808
+#define MSR_APIC_EOI 0x80b
+#define MSR_APIC_LDR 0x80d
+#define MSR_APIC_SVR 0x80f
+#define MSR_APIC_ISR0 0x810
+#define MSR_APIC_ISR1 0x811
+#define MSR_APIC_ISR2 0x812
+#define MSR_APIC_ISR3 0x813
+#define MSR_APIC_ISR4 0x814
+#define MSR_APIC_ISR5 0x815
+#define MSR_APIC_ISR6 0x816
+#define MSR_APIC_ISR7 0x817
+#define MSR_APIC_TMR0 0x818
+#define MSR_APIC_IRR0 0x820
+#define MSR_APIC_ESR 0x828
+#define MSR_APIC_LVT_CMCI 0x82F
+#define MSR_APIC_ICR 0x830
+#define MSR_APIC_LVT_TIMER 0x832
+#define MSR_APIC_LVT_THERMAL 0x833
+#define MSR_APIC_LVT_PCINT 0x834
+#define MSR_APIC_LVT_LINT0 0x835
+#define MSR_APIC_LVT_LINT1 0x836
+#define MSR_APIC_LVT_ERROR 0x837
+#define MSR_APIC_ICR_TIMER 0x838
+#define MSR_APIC_CCR_TIMER 0x839
+#define MSR_APIC_DCR_TIMER 0x83e
+#define MSR_APIC_SELF_IPI 0x83f
+
+#define MSR_IA32_XSS 0xda0
+
+/*
+ * Intel Processor Trace (PT) MSRs.
+ */
+#define MSR_IA32_RTIT_OUTPUT_BASE 0x560 /* Trace Output Base Register (R/W) */
+#define MSR_IA32_RTIT_OUTPUT_MASK_PTRS 0x561 /* Trace Output Mask Pointers Register (R/W) */
+#define MSR_IA32_RTIT_CTL 0x570 /* Trace Control Register (R/W) */
+#define RTIT_CTL_TRACEEN (1 << 0)
+#define RTIT_CTL_CYCEN (1 << 1)
+#define RTIT_CTL_OS (1 << 2)
+#define RTIT_CTL_USER (1 << 3)
+#define RTIT_CTL_PWREVTEN (1 << 4)
+#define RTIT_CTL_FUPONPTW (1 << 5)
+#define RTIT_CTL_FABRICEN (1 << 6)
+#define RTIT_CTL_CR3FILTER (1 << 7)
+#define RTIT_CTL_TOPA (1 << 8)
+#define RTIT_CTL_MTCEN (1 << 9)
+#define RTIT_CTL_TSCEN (1 << 10)
+#define RTIT_CTL_DISRETC (1 << 11)
+#define RTIT_CTL_PTWEN (1 << 12)
+#define RTIT_CTL_BRANCHEN (1 << 13)
+#define RTIT_CTL_MTC_FREQ_S 14
+#define RTIT_CTL_MTC_FREQ(n) ((n) << RTIT_CTL_MTC_FREQ_S)
+#define RTIT_CTL_MTC_FREQ_M (0xf << RTIT_CTL_MTC_FREQ_S)
+#define RTIT_CTL_CYC_THRESH_S 19
+#define RTIT_CTL_CYC_THRESH_M (0xf << RTIT_CTL_CYC_THRESH_S)
+#define RTIT_CTL_PSB_FREQ_S 24
+#define RTIT_CTL_PSB_FREQ_M (0xf << RTIT_CTL_PSB_FREQ_S)
+#define RTIT_CTL_ADDR_CFG_S(n) (32 + (n) * 4)
+#define RTIT_CTL_ADDR0_CFG_S 32
+#define RTIT_CTL_ADDR0_CFG_M (0xfULL << RTIT_CTL_ADDR0_CFG_S)
+#define RTIT_CTL_ADDR1_CFG_S 36
+#define RTIT_CTL_ADDR1_CFG_M (0xfULL << RTIT_CTL_ADDR1_CFG_S)
+#define RTIT_CTL_ADDR2_CFG_S 40
+#define RTIT_CTL_ADDR2_CFG_M (0xfULL << RTIT_CTL_ADDR2_CFG_S)
+#define RTIT_CTL_ADDR3_CFG_S 44
+#define RTIT_CTL_ADDR3_CFG_M (0xfULL << RTIT_CTL_ADDR3_CFG_S)
+#define MSR_IA32_RTIT_STATUS 0x571 /* Tracing Status Register (R/W) */
+#define RTIT_STATUS_FILTEREN (1 << 0)
+#define RTIT_STATUS_CONTEXTEN (1 << 1)
+#define RTIT_STATUS_TRIGGEREN (1 << 2)
+#define RTIT_STATUS_ERROR (1 << 4)
+#define RTIT_STATUS_STOPPED (1 << 5)
+#define RTIT_STATUS_PACKETBYTECNT_S 32
+#define RTIT_STATUS_PACKETBYTECNT_M (0x1ffffULL << RTIT_STATUS_PACKETBYTECNT_S)
+#define MSR_IA32_RTIT_CR3_MATCH 0x572 /* Trace Filter CR3 Match Register (R/W) */
+#define MSR_IA32_RTIT_ADDR_A(n) (0x580 + (n) * 2)
+#define MSR_IA32_RTIT_ADDR_B(n) (0x581 + (n) * 2)
+#define MSR_IA32_RTIT_ADDR0_A 0x580 /* Region 0 Start Address (R/W) */
+#define MSR_IA32_RTIT_ADDR0_B 0x581 /* Region 0 End Address (R/W) */
+#define MSR_IA32_RTIT_ADDR1_A 0x582 /* Region 1 Start Address (R/W) */
+#define MSR_IA32_RTIT_ADDR1_B 0x583 /* Region 1 End Address (R/W) */
+#define MSR_IA32_RTIT_ADDR2_A 0x584 /* Region 2 Start Address (R/W) */
+#define MSR_IA32_RTIT_ADDR2_B 0x585 /* Region 2 End Address (R/W) */
+#define MSR_IA32_RTIT_ADDR3_A 0x586 /* Region 3 Start Address (R/W) */
+#define MSR_IA32_RTIT_ADDR3_B 0x587 /* Region 3 End Address (R/W) */
+
+/* Intel Processor Trace Table of Physical Addresses (ToPA). */
+#define TOPA_SIZE_S 6
+#define TOPA_SIZE_M (0xf << TOPA_SIZE_S)
+#define TOPA_SIZE_4K (0 << TOPA_SIZE_S)
+#define TOPA_SIZE_8K (1 << TOPA_SIZE_S)
+#define TOPA_SIZE_16K (2 << TOPA_SIZE_S)
+#define TOPA_SIZE_32K (3 << TOPA_SIZE_S)
+#define TOPA_SIZE_64K (4 << TOPA_SIZE_S)
+#define TOPA_SIZE_128K (5 << TOPA_SIZE_S)
+#define TOPA_SIZE_256K (6 << TOPA_SIZE_S)
+#define TOPA_SIZE_512K (7 << TOPA_SIZE_S)
+#define TOPA_SIZE_1M (8 << TOPA_SIZE_S)
+#define TOPA_SIZE_2M (9 << TOPA_SIZE_S)
+#define TOPA_SIZE_4M (10 << TOPA_SIZE_S)
+#define TOPA_SIZE_8M (11 << TOPA_SIZE_S)
+#define TOPA_SIZE_16M (12 << TOPA_SIZE_S)
+#define TOPA_SIZE_32M (13 << TOPA_SIZE_S)
+#define TOPA_SIZE_64M (14 << TOPA_SIZE_S)
+#define TOPA_SIZE_128M (15 << TOPA_SIZE_S)
+#define TOPA_STOP (1 << 4)
+#define TOPA_INT (1 << 2)
+#define TOPA_END (1 << 0)
+
+/*
+ * Constants related to MSR's.
+ */
+#define APICBASE_RESERVED 0x000002ff
+#define APICBASE_BSP 0x00000100
+#define APICBASE_X2APIC 0x00000400
+#define APICBASE_ENABLED 0x00000800
+#define APICBASE_ADDRESS 0xfffff000
+
+/* MSR_IA32_FEATURE_CONTROL related */
+#define IA32_FEATURE_CONTROL_LOCK 0x01 /* lock bit */
+#define IA32_FEATURE_CONTROL_SMX_EN 0x02 /* enable VMX inside SMX */
+#define IA32_FEATURE_CONTROL_VMX_EN 0x04 /* enable VMX outside SMX */
+
+/* MSR IA32_MISC_ENABLE */
+#define IA32_MISC_EN_FASTSTR 0x0000000000000001ULL
+#define IA32_MISC_EN_ATCCE 0x0000000000000008ULL
+#define IA32_MISC_EN_PERFMON 0x0000000000000080ULL
+#define IA32_MISC_EN_PEBSU 0x0000000000001000ULL
+#define IA32_MISC_EN_ESSTE 0x0000000000010000ULL
+#define IA32_MISC_EN_MONE 0x0000000000040000ULL
+#define IA32_MISC_EN_LIMCPUID 0x0000000000400000ULL
+#define IA32_MISC_EN_xTPRD 0x0000000000800000ULL
+#define IA32_MISC_EN_XDD 0x0000000400000000ULL
+
+/*
+ * IA32_SPEC_CTRL and IA32_PRED_CMD MSRs are described in the Intel'
+ * document 336996-001 Speculative Execution Side Channel Mitigations.
+ *
+ * AMD uses the same MSRs and bit definitions, as described in 111006-B
+ * "Indirect Branch Control Extension" and 124441 "Speculative Store Bypass
+ * Disable."
+ */
+/* MSR IA32_SPEC_CTRL */
+#define IA32_SPEC_CTRL_IBRS 0x00000001
+#define IA32_SPEC_CTRL_STIBP 0x00000002
+#define IA32_SPEC_CTRL_SSBD 0x00000004
+
+/* MSR IA32_PRED_CMD */
+#define IA32_PRED_CMD_IBPB_BARRIER 0x0000000000000001ULL
+
+/* MSR IA32_FLUSH_CMD */
+#define IA32_FLUSH_CMD_L1D 0x00000001
+
+/* MSR IA32_HWP_CAPABILITIES */
+#define IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(x) (((x) >> 0) & 0xff)
+#define IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(x) (((x) >> 8) & 0xff)
+#define IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(x) (((x) >> 16) & 0xff)
+#define IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(x) (((x) >> 24) & 0xff)
+
+/* MSR IA32_HWP_REQUEST */
+#define IA32_HWP_REQUEST_MINIMUM_VALID (1ULL << 63)
+#define IA32_HWP_REQUEST_MAXIMUM_VALID (1ULL << 62)
+#define IA32_HWP_REQUEST_DESIRED_VALID (1ULL << 61)
+#define IA32_HWP_REQUEST_EPP_VALID (1ULL << 60)
+#define IA32_HWP_REQUEST_ACTIVITY_WINDOW_VALID (1ULL << 59)
+#define IA32_HWP_REQUEST_PACKAGE_CONTROL (1ULL << 42)
+#define IA32_HWP_ACTIVITY_WINDOW (0x3ffULL << 32)
+#define IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE (0xffULL << 24)
+#define IA32_HWP_DESIRED_PERFORMANCE (0xffULL << 16)
+#define IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE (0xffULL << 8)
+#define IA32_HWP_MINIMUM_PERFORMANCE (0xffULL << 0)
+
+/*
+ * PAT modes.
+ */
+#define PAT_UNCACHEABLE 0x00
+#define PAT_WRITE_COMBINING 0x01
+#define PAT_WRITE_THROUGH 0x04
+#define PAT_WRITE_PROTECTED 0x05
+#define PAT_WRITE_BACK 0x06
+#define PAT_UNCACHED 0x07
+#define PAT_VALUE(i, m) ((long long)(m) << (8 * (i)))
+#define PAT_MASK(i) PAT_VALUE(i, 0xff)
+
+/*
+ * Constants related to MTRRs
+ */
+#define MTRR_UNCACHEABLE 0x00
+#define MTRR_WRITE_COMBINING 0x01
+#define MTRR_WRITE_THROUGH 0x04
+#define MTRR_WRITE_PROTECTED 0x05
+#define MTRR_WRITE_BACK 0x06
+#define MTRR_N64K 8 /* numbers of fixed-size entries */
+#define MTRR_N16K 16
+#define MTRR_N4K 64
+#define MTRR_CAP_WC 0x0000000000000400
+#define MTRR_CAP_FIXED 0x0000000000000100
+#define MTRR_CAP_VCNT 0x00000000000000ff
+#define MTRR_DEF_ENABLE 0x0000000000000800
+#define MTRR_DEF_FIXED_ENABLE 0x0000000000000400
+#define MTRR_DEF_TYPE 0x00000000000000ff
+#define MTRR_PHYSBASE_PHYSBASE 0x000ffffffffff000
+#define MTRR_PHYSBASE_TYPE 0x00000000000000ff
+#define MTRR_PHYSMASK_PHYSMASK 0x000ffffffffff000
+#define MTRR_PHYSMASK_VALID 0x0000000000000800
+
+/*
+ * Cyrix configuration registers, accessible as IO ports.
+ */
+#define CCR0 0xc0 /* Configuration control register 0 */
+#define CCR0_NC0 0x01 /* First 64K of each 1M memory region is
+ non-cacheable */
+#define CCR0_NC1 0x02 /* 640K-1M region is non-cacheable */
+#define CCR0_A20M 0x04 /* Enables A20M# input pin */
+#define CCR0_KEN 0x08 /* Enables KEN# input pin */
+#define CCR0_FLUSH 0x10 /* Enables FLUSH# input pin */
+#define CCR0_BARB 0x20 /* Flushes internal cache when entering hold
+ state */
+#define CCR0_CO 0x40 /* Cache org: 1=direct mapped, 0=2x set
+ assoc */
+#define CCR0_SUSPEND 0x80 /* Enables SUSP# and SUSPA# pins */
+
+#define CCR1 0xc1 /* Configuration control register 1 */
+#define CCR1_RPL 0x01 /* Enables RPLSET and RPLVAL# pins */
+#define CCR1_SMI 0x02 /* Enables SMM pins */
+#define CCR1_SMAC 0x04 /* System management memory access */
+#define CCR1_MMAC 0x08 /* Main memory access */
+#define CCR1_NO_LOCK 0x10 /* Negate LOCK# */
+#define CCR1_SM3 0x80 /* SMM address space address region 3 */
+
+#define CCR2 0xc2
+#define CCR2_WB 0x02 /* Enables WB cache interface pins */
+#define CCR2_SADS 0x02 /* Slow ADS */
+#define CCR2_LOCK_NW 0x04 /* LOCK NW Bit */
+#define CCR2_SUSP_HLT 0x08 /* Suspend on HALT */
+#define CCR2_WT1 0x10 /* WT region 1 */
+#define CCR2_WPR1 0x10 /* Write-protect region 1 */
+#define CCR2_BARB 0x20 /* Flushes write-back cache when entering
+ hold state. */
+#define CCR2_BWRT 0x40 /* Enables burst write cycles */
+#define CCR2_USE_SUSP 0x80 /* Enables suspend pins */
+
+#define CCR3 0xc3
+#define CCR3_SMILOCK 0x01 /* SMM register lock */
+#define CCR3_NMI 0x02 /* Enables NMI during SMM */
+#define CCR3_LINBRST 0x04 /* Linear address burst cycles */
+#define CCR3_SMMMODE 0x08 /* SMM Mode */
+#define CCR3_MAPEN0 0x10 /* Enables Map0 */
+#define CCR3_MAPEN1 0x20 /* Enables Map1 */
+#define CCR3_MAPEN2 0x40 /* Enables Map2 */
+#define CCR3_MAPEN3 0x80 /* Enables Map3 */
+
+#define CCR4 0xe8
+#define CCR4_IOMASK 0x07
+#define CCR4_MEM 0x08 /* Enables momory bypassing */
+#define CCR4_DTE 0x10 /* Enables directory table entry cache */
+#define CCR4_FASTFPE 0x20 /* Fast FPU exception */
+#define CCR4_CPUID 0x80 /* Enables CPUID instruction */
+
+#define CCR5 0xe9
+#define CCR5_WT_ALLOC 0x01 /* Write-through allocate */
+#define CCR5_SLOP 0x02 /* LOOP instruction slowed down */
+#define CCR5_LBR1 0x10 /* Local bus region 1 */
+#define CCR5_ARREN 0x20 /* Enables ARR region */
+
+#define CCR6 0xea
+
+#define CCR7 0xeb
+
+/* Performance Control Register (5x86 only). */
+#define PCR0 0x20
+#define PCR0_RSTK 0x01 /* Enables return stack */
+#define PCR0_BTB 0x02 /* Enables branch target buffer */
+#define PCR0_LOOP 0x04 /* Enables loop */
+#define PCR0_AIS 0x08 /* Enables all instrcutions stalled to
+ serialize pipe. */
+#define PCR0_MLR 0x10 /* Enables reordering of misaligned loads */
+#define PCR0_BTBRT 0x40 /* Enables BTB test register. */
+#define PCR0_LSSER 0x80 /* Disable reorder */
+
+/* Device Identification Registers */
+#define DIR0 0xfe
+#define DIR1 0xff
+
+/*
+ * Machine Check register constants.
+ */
+#define MCG_CAP_COUNT 0x000000ff
+#define MCG_CAP_CTL_P 0x00000100
+#define MCG_CAP_EXT_P 0x00000200
+#define MCG_CAP_CMCI_P 0x00000400
+#define MCG_CAP_TES_P 0x00000800
+#define MCG_CAP_EXT_CNT 0x00ff0000
+#define MCG_CAP_SER_P 0x01000000
+#define MCG_STATUS_RIPV 0x00000001
+#define MCG_STATUS_EIPV 0x00000002
+#define MCG_STATUS_MCIP 0x00000004
+#define MCG_CTL_ENABLE 0xffffffffffffffff
+#define MCG_CTL_DISABLE 0x0000000000000000
+#define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4)
+#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
+#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
+#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
+#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
+#define MC_STATUS_MCA_ERROR 0x000000000000ffff
+#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000
+#define MC_STATUS_OTHER_INFO 0x01ffffff00000000
+#define MC_STATUS_COR_COUNT 0x001fffc000000000 /* If MCG_CAP_CMCI_P */
+#define MC_STATUS_TES_STATUS 0x0060000000000000 /* If MCG_CAP_TES_P */
+#define MC_STATUS_AR 0x0080000000000000 /* If MCG_CAP_TES_P */
+#define MC_STATUS_S 0x0100000000000000 /* If MCG_CAP_TES_P */
+#define MC_STATUS_PCC 0x0200000000000000
+#define MC_STATUS_ADDRV 0x0400000000000000
+#define MC_STATUS_MISCV 0x0800000000000000
+#define MC_STATUS_EN 0x1000000000000000
+#define MC_STATUS_UC 0x2000000000000000
+#define MC_STATUS_OVER 0x4000000000000000
+#define MC_STATUS_VAL 0x8000000000000000
+#define MC_MISC_RA_LSB 0x000000000000003f /* If MCG_CAP_SER_P */
+#define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */
+#define MC_CTL2_THRESHOLD 0x0000000000007fff
+#define MC_CTL2_CMCI_EN 0x0000000040000000
+#define MC_AMDNB_BANK 4
+#define MC_MISC_AMD_VAL 0x8000000000000000 /* Counter presence valid */
+#define MC_MISC_AMD_CNTP 0x4000000000000000 /* Counter present */
+#define MC_MISC_AMD_LOCK 0x2000000000000000 /* Register locked */
+#define MC_MISC_AMD_INTP 0x1000000000000000 /* Int. type can generate interrupts */
+#define MC_MISC_AMD_LVT_MASK 0x00f0000000000000 /* Extended LVT offset */
+#define MC_MISC_AMD_LVT_SHIFT 52
+#define MC_MISC_AMD_CNTEN 0x0008000000000000 /* Counter enabled */
+#define MC_MISC_AMD_INT_MASK 0x0006000000000000 /* Interrupt type */
+#define MC_MISC_AMD_INT_LVT 0x0002000000000000 /* Interrupt via Extended LVT */
+#define MC_MISC_AMD_INT_SMI 0x0004000000000000 /* SMI */
+#define MC_MISC_AMD_OVERFLOW 0x0001000000000000 /* Counter overflow */
+#define MC_MISC_AMD_CNT_MASK 0x00000fff00000000 /* Counter value */
+#define MC_MISC_AMD_CNT_SHIFT 32
+#define MC_MISC_AMD_CNT_MAX 0xfff
+#define MC_MISC_AMD_PTR_MASK 0x00000000ff000000 /* Pointer to additional registers */
+#define MC_MISC_AMD_PTR_SHIFT 24
+
+/* AMD Scalable MCA */
+#define MSR_SMCA_MC0_CTL 0xc0002000
+#define MSR_SMCA_MC0_STATUS 0xc0002001
+#define MSR_SMCA_MC0_ADDR 0xc0002002
+#define MSR_SMCA_MC0_MISC0 0xc0002003
+#define MSR_SMCA_MC_CTL(x) (MSR_SMCA_MC0_CTL + 0x10 * (x))
+#define MSR_SMCA_MC_STATUS(x) (MSR_SMCA_MC0_STATUS + 0x10 * (x))
+#define MSR_SMCA_MC_ADDR(x) (MSR_SMCA_MC0_ADDR + 0x10 * (x))
+#define MSR_SMCA_MC_MISC(x) (MSR_SMCA_MC0_MISC0 + 0x10 * (x))
+
+/*
+ * The following four 3-byte registers control the non-cacheable regions.
+ * These registers must be written as three separate bytes.
+ *
+ * NCRx+0: A31-A24 of starting address
+ * NCRx+1: A23-A16 of starting address
+ * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx.
+ *
+ * The non-cacheable region's starting address must be aligned to the
+ * size indicated by the NCR_SIZE_xx field.
+ */
+#define NCR1 0xc4
+#define NCR2 0xc7
+#define NCR3 0xca
+#define NCR4 0xcd
+
+#define NCR_SIZE_0K 0
+#define NCR_SIZE_4K 1
+#define NCR_SIZE_8K 2
+#define NCR_SIZE_16K 3
+#define NCR_SIZE_32K 4
+#define NCR_SIZE_64K 5
+#define NCR_SIZE_128K 6
+#define NCR_SIZE_256K 7
+#define NCR_SIZE_512K 8
+#define NCR_SIZE_1M 9
+#define NCR_SIZE_2M 10
+#define NCR_SIZE_4M 11
+#define NCR_SIZE_8M 12
+#define NCR_SIZE_16M 13
+#define NCR_SIZE_32M 14
+#define NCR_SIZE_4G 15
+
+/*
+ * The address region registers are used to specify the location and
+ * size for the eight address regions.
+ *
+ * ARRx + 0: A31-A24 of start address
+ * ARRx + 1: A23-A16 of start address
+ * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx
+ */
+#define ARR0 0xc4
+#define ARR1 0xc7
+#define ARR2 0xca
+#define ARR3 0xcd
+#define ARR4 0xd0
+#define ARR5 0xd3
+#define ARR6 0xd6
+#define ARR7 0xd9
+
+#define ARR_SIZE_0K 0
+#define ARR_SIZE_4K 1
+#define ARR_SIZE_8K 2
+#define ARR_SIZE_16K 3
+#define ARR_SIZE_32K 4
+#define ARR_SIZE_64K 5
+#define ARR_SIZE_128K 6
+#define ARR_SIZE_256K 7
+#define ARR_SIZE_512K 8
+#define ARR_SIZE_1M 9
+#define ARR_SIZE_2M 10
+#define ARR_SIZE_4M 11
+#define ARR_SIZE_8M 12
+#define ARR_SIZE_16M 13
+#define ARR_SIZE_32M 14
+#define ARR_SIZE_4G 15
+
+/*
+ * The region control registers specify the attributes associated with
+ * the ARRx addres regions.
+ */
+#define RCR0 0xdc
+#define RCR1 0xdd
+#define RCR2 0xde
+#define RCR3 0xdf
+#define RCR4 0xe0
+#define RCR5 0xe1
+#define RCR6 0xe2
+#define RCR7 0xe3
+
+#define RCR_RCD 0x01 /* Disables caching for ARRx (x = 0-6). */
+#define RCR_RCE 0x01 /* Enables caching for ARR7. */
+#define RCR_WWO 0x02 /* Weak write ordering. */
+#define RCR_WL 0x04 /* Weak locking. */
+#define RCR_WG 0x08 /* Write gathering. */
+#define RCR_WT 0x10 /* Write-through. */
+#define RCR_NLB 0x20 /* LBA# pin is not asserted. */
+
+/* AMD Write Allocate Top-Of-Memory and Control Register */
+#define AMD_WT_ALLOC_TME 0x40000 /* top-of-memory enable */
+#define AMD_WT_ALLOC_PRE 0x20000 /* programmable range enable */
+#define AMD_WT_ALLOC_FRE 0x10000 /* fixed (A0000-FFFFF) range enable */
+
+/* AMD64 MSR's */
+#define MSR_EFER 0xc0000080 /* extended features */
+#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target/cs/ss */
+#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target rip */
+#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target rip */
+#define MSR_SF_MASK 0xc0000084 /* syscall flags mask */
+#define MSR_FSBASE 0xc0000100 /* base address of the %fs "segment" */
+#define MSR_GSBASE 0xc0000101 /* base address of the %gs "segment" */
+#define MSR_KGSBASE 0xc0000102 /* base address of the kernel %gs */
+#define MSR_TSC_AUX 0xc0000103
+#define MSR_PERFEVSEL0 0xc0010000
+#define MSR_PERFEVSEL1 0xc0010001
+#define MSR_PERFEVSEL2 0xc0010002
+#define MSR_PERFEVSEL3 0xc0010003
+#define MSR_K7_PERFCTR0 0xc0010004
+#define MSR_K7_PERFCTR1 0xc0010005
+#define MSR_K7_PERFCTR2 0xc0010006
+#define MSR_K7_PERFCTR3 0xc0010007
+#define MSR_SYSCFG 0xc0010010
+#define MSR_HWCR 0xc0010015
+#define MSR_IORRBASE0 0xc0010016
+#define MSR_IORRMASK0 0xc0010017
+#define MSR_IORRBASE1 0xc0010018
+#define MSR_IORRMASK1 0xc0010019
+#define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */
+#define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */
+#define MSR_NB_CFG1 0xc001001f /* NB configuration 1 */
+#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */
+#define MSR_MC0_CTL_MASK 0xc0010044
+#define MSR_P_STATE_LIMIT 0xc0010061 /* P-state Current Limit Register */
+#define MSR_P_STATE_CONTROL 0xc0010062 /* P-state Control Register */
+#define MSR_P_STATE_STATUS 0xc0010063 /* P-state Status Register */
+#define MSR_P_STATE_CONFIG(n) (0xc0010064 + (n)) /* P-state Config */
+#define MSR_SMM_ADDR 0xc0010112 /* SMM TSEG base address */
+#define MSR_SMM_MASK 0xc0010113 /* SMM TSEG address mask */
+#define MSR_VM_CR 0xc0010114 /* SVM: feature control */
+#define MSR_VM_HSAVE_PA 0xc0010117 /* SVM: host save area address */
+#define MSR_AMD_CPUID07 0xc0011002 /* CPUID 07 %ebx override */
+#define MSR_EXTFEATURES 0xc0011005 /* Extended CPUID Features override */
+#define MSR_LS_CFG 0xc0011020
+#define MSR_IC_CFG 0xc0011021 /* Instruction Cache Configuration */
+
+/* MSR_VM_CR related */
+#define VM_CR_SVMDIS 0x10 /* SVM: disabled by BIOS */
+
+/* VIA ACE crypto featureset: for via_feature_rng */
+#define VIA_HAS_RNG 1 /* cpu has RNG */
+
+/* VIA ACE crypto featureset: for via_feature_xcrypt */
+#define VIA_HAS_AES 1 /* cpu has AES */
+#define VIA_HAS_SHA 2 /* cpu has SHA1 & SHA256 */
+#define VIA_HAS_MM 4 /* cpu has RSA instructions */
+#define VIA_HAS_AESCTR 8 /* cpu has AES-CTR instructions */
+
+/* Centaur Extended Feature flags */
+#define VIA_CPUID_HAS_RNG 0x000004
+#define VIA_CPUID_DO_RNG 0x000008
+#define VIA_CPUID_HAS_ACE 0x000040
+#define VIA_CPUID_DO_ACE 0x000080
+#define VIA_CPUID_HAS_ACE2 0x000100
+#define VIA_CPUID_DO_ACE2 0x000200
+#define VIA_CPUID_HAS_PHE 0x000400
+#define VIA_CPUID_DO_PHE 0x000800
+#define VIA_CPUID_HAS_PMM 0x001000
+#define VIA_CPUID_DO_PMM 0x002000
+
+/* VIA ACE xcrypt-* instruction context control options */
+#define VIA_CRYPT_CWLO_ROUND_M 0x0000000f
+#define VIA_CRYPT_CWLO_ALG_M 0x00000070
+#define VIA_CRYPT_CWLO_ALG_AES 0x00000000
+#define VIA_CRYPT_CWLO_KEYGEN_M 0x00000080
+#define VIA_CRYPT_CWLO_KEYGEN_HW 0x00000000
+#define VIA_CRYPT_CWLO_KEYGEN_SW 0x00000080
+#define VIA_CRYPT_CWLO_NORMAL 0x00000000
+#define VIA_CRYPT_CWLO_INTERMEDIATE 0x00000100
+#define VIA_CRYPT_CWLO_ENCRYPT 0x00000000
+#define VIA_CRYPT_CWLO_DECRYPT 0x00000200
+#define VIA_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */
+#define VIA_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */
+#define VIA_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */
+
+#endif /* !_MACHINE_SPECIALREG_H_ */
diff --git a/usr/src/lib/libvmm/Makefile.com b/usr/src/lib/libvmm/Makefile.com
index bef555aed3..ab0e7bd2b8 100644
--- a/usr/src/lib/libvmm/Makefile.com
+++ b/usr/src/lib/libvmm/Makefile.com
@@ -27,8 +27,8 @@ LIBS = $(DYNLIB)
# The FreeBSD compat and contrib headers need to be first in the search
# path, hence we can't just append them to CPPFLAGS. So we assign CPPFLAGS
# directly and pull in CPPFLAGS.master at the appropriate place.
-CPPFLAGS = -I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd \
- -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64 \
+CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
+ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \
$(CPPFLAGS.master) -I$(SRC)/uts/i86pc
LDLIBS += -lc -lvmmapi
diff --git a/usr/src/lib/libvmmapi/Makefile.com b/usr/src/lib/libvmmapi/Makefile.com
index 1653e8619c..4c1c74d5a0 100644
--- a/usr/src/lib/libvmmapi/Makefile.com
+++ b/usr/src/lib/libvmmapi/Makefile.com
@@ -29,7 +29,7 @@ SRCDIR = ../common
LIBS = $(DYNLIB)
-CPPFLAGS = -I$(COMPAT)/freebsd -I$(CONTRIB)/freebsd \
+CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
$(CPPFLAGS.master) -I$(SRC)/uts/i86pc
# not linted
@@ -41,7 +41,7 @@ LDLIBS += -lc
all: $(LIBS)
-pics/%.o: $(CONTRIB)/freebsd/lib/libutil/%.c
+pics/%.o: $(CONTRIB)/bhyve/lib/libutil/%.c
$(COMPILE.c) -o $@ $<
$(POST_PROCESS_O)
diff --git a/usr/src/lib/libvmmapi/amd64/Makefile b/usr/src/lib/libvmmapi/amd64/Makefile
index b5cac1ffce..d777b9d456 100644
--- a/usr/src/lib/libvmmapi/amd64/Makefile
+++ b/usr/src/lib/libvmmapi/amd64/Makefile
@@ -16,6 +16,6 @@
include ../Makefile.com
include ../../Makefile.lib.64
-CPPFLAGS += -I$(COMPAT)/freebsd/amd64 -I$(CONTRIB)/freebsd/amd64
+CPPFLAGS += -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64
install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/pkg/manifests/system-file-system-zfs-tests.mf b/usr/src/pkg/manifests/system-file-system-zfs-tests.mf
index 4e2b5f1add..d39248a2e4 100644
--- a/usr/src/pkg/manifests/system-file-system-zfs-tests.mf
+++ b/usr/src/pkg/manifests/system-file-system-zfs-tests.mf
@@ -22,6 +22,7 @@
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+# Copyright 2020 Joyent, Inc.
#
set name=pkg.fmri value=pkg:/system/file-system/zfs/tests@$(PKGVERS)
@@ -53,6 +54,7 @@ $(i386_ONLY)file path=usr/bin/$(ARCH32)/zlook mode=0555
$(i386_ONLY)file path=usr/bin/$(ARCH32)/ztest mode=0555
file path=usr/bin/$(ARCH64)/zlook mode=0555
file path=usr/bin/$(ARCH64)/ztest mode=0555
+file path=usr/bin/raidz_test mode=0555
file path=usr/bin/zloop mode=0555
file path=usr/include/sys/fs/zut.h
file path=usr/lib/devfsadm/linkmod/SUNW_zut_link.so group=sys
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index 3b4bcc373e..0f05230a9b 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -143,6 +143,7 @@ dir path=opt/zfs-tests/tests/functional/poolversion
dir path=opt/zfs-tests/tests/functional/privilege
dir path=opt/zfs-tests/tests/functional/projectquota
dir path=opt/zfs-tests/tests/functional/quota
+dir path=opt/zfs-tests/tests/functional/raidz
dir path=opt/zfs-tests/tests/functional/redundancy
dir path=opt/zfs-tests/tests/functional/refquota
dir path=opt/zfs-tests/tests/functional/refreserv
@@ -2800,6 +2801,10 @@ file path=opt/zfs-tests/tests/functional/quota/quota_004_pos mode=0555
file path=opt/zfs-tests/tests/functional/quota/quota_005_pos mode=0555
file path=opt/zfs-tests/tests/functional/quota/quota_006_neg mode=0555
file path=opt/zfs-tests/tests/functional/quota/setup mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/cleanup mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/raidz_001_neg mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/raidz_002_pos mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/setup mode=0555
file path=opt/zfs-tests/tests/functional/redundancy/cleanup mode=0555
file path=opt/zfs-tests/tests/functional/redundancy/redundancy.cfg mode=0444
file path=opt/zfs-tests/tests/functional/redundancy/redundancy.kshlib \
diff --git a/usr/src/req.flg b/usr/src/req.flg
index aaf37ddd97..f1e17bbd52 100644
--- a/usr/src/req.flg
+++ b/usr/src/req.flg
@@ -34,5 +34,3 @@ echo_file usr/src/Makefile.master.64
echo_file usr/src/Makefile.msg.targ
echo_file usr/src/Makefile.psm
echo_file usr/src/Makefile.psm.targ
-
-find_files "s.*" usr/contrib/freebsd
diff --git a/usr/src/test/zfs-tests/include/commands.cfg b/usr/src/test/zfs-tests/include/commands.cfg
index 050d6caba7..f9b0bdf7ac 100644
--- a/usr/src/test/zfs-tests/include/commands.cfg
+++ b/usr/src/test/zfs-tests/include/commands.cfg
@@ -11,7 +11,7 @@
#
# Copyright (c) 2016, 2018 by Delphix. All rights reserved.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
#
@@ -89,6 +89,7 @@ export USR_BIN_FILES='awk
ps
pwd
python
+ raidz_test
readlink
rm
rmdir
diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run
index f7dc35928e..656e274985 100644
--- a/usr/src/test/zfs-tests/runfiles/delphix.run
+++ b/usr/src/test/zfs-tests/runfiles/delphix.run
@@ -582,6 +582,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/redundancy]
tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
'redundancy_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run
index 4dbfde3fb0..54848ba403 100644
--- a/usr/src/test/zfs-tests/runfiles/omnios.run
+++ b/usr/src/test/zfs-tests/runfiles/omnios.run
@@ -582,6 +582,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/redundancy]
tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
'redundancy_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run
index 21473782c2..6ae3857a0a 100644
--- a/usr/src/test/zfs-tests/runfiles/openindiana.run
+++ b/usr/src/test/zfs-tests/runfiles/openindiana.run
@@ -582,6 +582,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/redundancy]
tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
'redundancy_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run
index c8d6dc4783..47059b0b52 100644
--- a/usr/src/test/zfs-tests/runfiles/smartos.run
+++ b/usr/src/test/zfs-tests/runfiles/smartos.run
@@ -510,6 +510,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/refquota]
tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos',
'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg']
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/Makefile b/usr/src/test/zfs-tests/tests/functional/raidz/Makefile
new file mode 100644
index 0000000000..5d0bf4506a
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/zfs-tests
+TARGETDIR = $(ROOTOPTPKG)/tests/functional/raidz
+
+include $(SRC)/test/zfs-tests/Makefile.com
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh
new file mode 100755
index 0000000000..c92c54c270
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+# default_cleanup
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh
new file mode 100755
index 0000000000..4c105b9411
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh
@@ -0,0 +1,38 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Call the raidz_test tool with -T options to test the infrastructure.
+# This option should make raidz_test to return non 0.
+#
+
+log_mustnot raidz_test -T
+
+log_pass "raidz_test detects errors as espected."
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh
new file mode 100755
index 0000000000..e238a881b0
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh
@@ -0,0 +1,41 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Call the raidz_test tool with -S to test all supported raidz
+# implementations. This options will test several raidz block geometries
+# and several zio parameters that affect raidz block layout. Data
+# reconstruction performs all combinations of failed disks. Wall time
+# is set to 5min, but actual runtime might be longer.
+#
+
+log_must raidz_test -S -t 300
+
+log_pass "raidz_test parameter sweep test succeeded."
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/setup.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/setup.ksh
new file mode 100755
index 0000000000..4e155d24d5
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/setup.ksh
@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+log_pass
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 720701371d..ff108e47e5 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1434,6 +1434,8 @@ ZFS_COMMON_OBJS += \
vdev_missing.o \
vdev_queue.o \
vdev_raidz.o \
+ vdev_raidz_math.o \
+ vdev_raidz_math_scalar.o \
vdev_removal.o \
vdev_root.o \
vdev_trim.o \
diff --git a/usr/src/uts/common/fs/zfs/abd.c b/usr/src/uts/common/fs/zfs/abd.c
index 66a7a49d73..e980c82eee 100644
--- a/usr/src/uts/common/fs/zfs/abd.c
+++ b/usr/src/uts/common/fs/zfs/abd.c
@@ -12,6 +12,7 @@
/*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2019 by Delphix. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -764,7 +765,8 @@ abd_iter_map(struct abd_iter *aiter)
} else {
size_t index = abd_iter_scatter_chunk_index(aiter);
offset = abd_iter_scatter_chunk_offset(aiter);
- aiter->iter_mapsize = zfs_abd_chunk_size - offset;
+ aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset,
+ aiter->iter_abd->abd_size - aiter->iter_pos);
paddr = aiter->iter_abd->abd_u.abd_scatter.abd_chunks[index];
}
aiter->iter_mapaddr = (char *)paddr + offset;
@@ -993,3 +995,180 @@ abd_cmp(abd_t *dabd, abd_t *sabd, size_t size)
{
return (abd_iterate_func2(dabd, sabd, 0, 0, size, abd_cmp_cb, NULL));
}
+
+/*
+ * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
+ *
+ * @cabds parity ABDs, must have equal size
+ * @dabd data ABD. Can be NULL (in this case @dsize = 0)
+ * @func_raidz_gen should be implemented so that its behaviour
+ * is the same when taking linear and when taking scatter
+ */
+void
+abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
+ ssize_t csize, ssize_t dsize, const unsigned parity,
+ void (*func_raidz_gen)(void **, const void *, size_t, size_t))
+{
+ int i;
+ ssize_t len, dlen;
+ struct abd_iter caiters[3];
+ struct abd_iter daiter = {0};
+ void *caddrs[3];
+
+ ASSERT3U(parity, <=, 3);
+
+ for (i = 0; i < parity; i++)
+ abd_iter_init(&caiters[i], cabds[i]);
+
+ if (dabd)
+ abd_iter_init(&daiter, dabd);
+
+ ASSERT3S(dsize, >=, 0);
+
+#ifdef _KERNEL
+ kpreempt_disable();
+#endif
+ while (csize > 0) {
+ len = csize;
+
+ if (dabd && dsize > 0)
+ abd_iter_map(&daiter);
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_map(&caiters[i]);
+ caddrs[i] = caiters[i].iter_mapaddr;
+ }
+
+ switch (parity) {
+ case 3:
+ len = MIN(caiters[2].iter_mapsize, len);
+ /* falls through */
+ case 2:
+ len = MIN(caiters[1].iter_mapsize, len);
+ /* falls through */
+ case 1:
+ len = MIN(caiters[0].iter_mapsize, len);
+ }
+
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+
+ if (dabd && dsize > 0) {
+ /* this needs precise iter.length */
+ len = MIN(daiter.iter_mapsize, len);
+ len = MIN(dsize, len);
+ dlen = len;
+ } else
+ dlen = 0;
+
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+ /*
+ * The iterated function likely will not do well if each
+ * segment except the last one is not multiple of 512 (raidz).
+ */
+ ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
+
+ func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
+
+ for (i = parity-1; i >= 0; i--) {
+ abd_iter_unmap(&caiters[i]);
+ abd_iter_advance(&caiters[i], len);
+ }
+
+ if (dabd && dsize > 0) {
+ abd_iter_unmap(&daiter);
+ abd_iter_advance(&daiter, dlen);
+ dsize -= dlen;
+ }
+
+ csize -= len;
+
+ ASSERT3S(dsize, >=, 0);
+ ASSERT3S(csize, >=, 0);
+ }
+#ifdef _KERNEL
+ kpreempt_enable();
+#endif
+}
+
+/*
+ * Iterate over code ABDs and data reconstruction target ABDs and call
+ * @func_raidz_rec. Function maps at most 6 pages atomically.
+ *
+ * @cabds parity ABDs, must have equal size
+ * @tabds rec target ABDs, at most 3
+ * @tsize size of data target columns
+ * @func_raidz_rec expects syndrome data in target columns. Function
+ * reconstructs data and overwrites target columns.
+ */
+void
+abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
+ ssize_t tsize, const unsigned parity,
+ void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
+ const unsigned *mul),
+ const unsigned *mul)
+{
+ int i;
+ ssize_t len;
+ struct abd_iter citers[3];
+ struct abd_iter xiters[3];
+ void *caddrs[3], *xaddrs[3];
+
+ ASSERT3U(parity, <=, 3);
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_init(&citers[i], cabds[i]);
+ abd_iter_init(&xiters[i], tabds[i]);
+ }
+
+#ifdef _KERNEL
+ kpreempt_disable();
+#endif
+ while (tsize > 0) {
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_map(&citers[i]);
+ abd_iter_map(&xiters[i]);
+ caddrs[i] = citers[i].iter_mapaddr;
+ xaddrs[i] = xiters[i].iter_mapaddr;
+ }
+
+ len = tsize;
+ switch (parity) {
+ case 3:
+ len = MIN(xiters[2].iter_mapsize, len);
+ len = MIN(citers[2].iter_mapsize, len);
+ /* falls through */
+ case 2:
+ len = MIN(xiters[1].iter_mapsize, len);
+ len = MIN(citers[1].iter_mapsize, len);
+ /* falls through */
+ case 1:
+ len = MIN(xiters[0].iter_mapsize, len);
+ len = MIN(citers[0].iter_mapsize, len);
+ }
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+ /*
+ * The iterated function likely will not do well if each
+ * segment except the last one is not multiple of 512 (raidz).
+ */
+ ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
+
+ func_raidz_rec(xaddrs, len, caddrs, mul);
+
+ for (i = parity-1; i >= 0; i--) {
+ abd_iter_unmap(&xiters[i]);
+ abd_iter_unmap(&citers[i]);
+ abd_iter_advance(&xiters[i], len);
+ abd_iter_advance(&citers[i], len);
+ }
+
+ tsize -= len;
+ ASSERT3S(tsize, >=, 0);
+ }
+#ifdef _KERNEL
+ kpreempt_enable();
+#endif
+}
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 9dac4e2ddc..c9ceeb6873 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -44,6 +44,7 @@
#include <sys/vdev_impl.h>
#include <sys/vdev_initialize.h>
#include <sys/vdev_trim.h>
+#include <sys/vdev_raidz.h>
#include <sys/metaslab.h>
#include <sys/uberblock_impl.h>
#include <sys/txg.h>
@@ -2253,6 +2254,7 @@ spa_init(int mode)
zil_init();
vdev_cache_stat_init();
vdev_mirror_stat_init();
+ vdev_raidz_math_init();
zfs_prop_init();
zpool_prop_init();
zpool_feature_init();
@@ -2271,6 +2273,7 @@ spa_fini(void)
vdev_cache_stat_fini();
vdev_mirror_stat_fini();
+ vdev_raidz_math_fini();
zil_fini();
dmu_fini();
zio_fini();
diff --git a/usr/src/uts/common/fs/zfs/sys/abd.h b/usr/src/uts/common/fs/zfs/sys/abd.h
index 621635933e..23699c0420 100644
--- a/usr/src/uts/common/fs/zfs/sys/abd.h
+++ b/usr/src/uts/common/fs/zfs/sys/abd.h
@@ -103,6 +103,15 @@ int abd_cmp(abd_t *, abd_t *, size_t);
int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
void abd_zero_off(abd_t *, size_t, size_t);
+void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
+ ssize_t csize, ssize_t dsize, const unsigned parity,
+ void (*func_raidz_gen)(void **, const void *, size_t, size_t));
+void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
+ ssize_t tsize, const unsigned parity,
+ void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
+ const unsigned *mul),
+ const unsigned *mul);
+
/*
* Wrappers for calls with offsets of 0
*/
diff --git a/usr/src/uts/common/fs/zfs/sys/simd.h b/usr/src/uts/common/fs/zfs/sys/simd.h
new file mode 100644
index 0000000000..4494c7d02a
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/simd.h
@@ -0,0 +1,40 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef _SIMD_H
+#define _SIMD_H
+
+#if defined(__amd64__) || defined(__i386__)
+
+/* Temporararily disabled until subsequent work to turn this on. */
+#define kfpu_allowed() 0
+#define kfpu_initialize(tsk) do {} while (0)
+#define kfpu_begin() do {} while (0)
+#define kfpu_end() do {} while (0)
+#define kfpu_init() (0)
+#define kfpu_fini() do {} while (0)
+
+#else
+
+/* Non-x86 CPUs currently always disallow kernel FPU support */
+#define kfpu_allowed() 0
+#define kfpu_initialize(tsk) do {} while (0)
+#define kfpu_begin() do {} while (0)
+#define kfpu_end() do {} while (0)
+#define kfpu_init() (0)
+#define kfpu_fini() do {} while (0)
+#endif
+
+#endif /* _SIMD_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h
new file mode 100644
index 0000000000..bf5c840139
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef _SYS_VDEV_RAIDZ_H
+#define _SYS_VDEV_RAIDZ_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct zio;
+struct raidz_map;
+#if !defined(_KERNEL)
+struct kernel_param {};
+#endif
+
+/*
+ * vdev_raidz interface
+ */
+struct raidz_map * vdev_raidz_map_alloc(struct zio *, uint64_t,
+ uint64_t, uint64_t);
+void vdev_raidz_map_free(struct raidz_map *);
+void vdev_raidz_generate_parity(struct raidz_map *);
+int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
+
+/*
+ * vdev_raidz_math interface
+ */
+void vdev_raidz_math_init(void);
+void vdev_raidz_math_fini(void);
+const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
+int vdev_raidz_math_generate(struct raidz_map *);
+int vdev_raidz_math_reconstruct(struct raidz_map *, const int *,
+ const int *, const int);
+int vdev_raidz_impl_set(const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_RAIDZ_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h
new file mode 100644
index 0000000000..d8defc04ea
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h
@@ -0,0 +1,351 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#ifndef _VDEV_RAIDZ_H
+#define _VDEV_RAIDZ_H
+
+#include <sys/types.h>
+#include <sys/debug.h>
+#include <sys/kstat.h>
+#include <sys/abd.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CODE_P (0U)
+#define CODE_Q (1U)
+#define CODE_R (2U)
+
+#define PARITY_P (1U)
+#define PARITY_PQ (2U)
+#define PARITY_PQR (3U)
+
+#define TARGET_X (0U)
+#define TARGET_Y (1U)
+#define TARGET_Z (2U)
+
+/*
+ * Parity generation methods indexes
+ */
+enum raidz_math_gen_op {
+ RAIDZ_GEN_P = 0,
+ RAIDZ_GEN_PQ,
+ RAIDZ_GEN_PQR,
+ RAIDZ_GEN_NUM = 3
+};
+/*
+ * Data reconstruction methods indexes
+ */
+enum raidz_rec_op {
+ RAIDZ_REC_P = 0,
+ RAIDZ_REC_Q,
+ RAIDZ_REC_R,
+ RAIDZ_REC_PQ,
+ RAIDZ_REC_PR,
+ RAIDZ_REC_QR,
+ RAIDZ_REC_PQR,
+ RAIDZ_REC_NUM = 7
+};
+
+extern const char *raidz_gen_name[RAIDZ_GEN_NUM];
+extern const char *raidz_rec_name[RAIDZ_REC_NUM];
+
+/*
+ * Methods used to define raidz implementation
+ *
+ * @raidz_gen_f Parity generation function
+ * @par1 pointer to raidz_map
+ * @raidz_rec_f Data reconstruction function
+ * @par1 pointer to raidz_map
+ * @par2 array of reconstruction targets
+ * @will_work_f Function returns TRUE if impl. is supported on the system
+ * @init_impl_f Function is called once on init
+ * @fini_impl_f Function is called once on fini
+ */
+typedef void (*raidz_gen_f)(void *);
+typedef int (*raidz_rec_f)(void *, const int *);
+typedef boolean_t (*will_work_f)(void);
+typedef void (*init_impl_f)(void);
+typedef void (*fini_impl_f)(void);
+
+#define RAIDZ_IMPL_NAME_MAX (20)
+
+typedef struct raidz_impl_ops {
+ init_impl_f init;
+ fini_impl_f fini;
+ raidz_gen_f gen[RAIDZ_GEN_NUM]; /* Parity generate functions */
+ raidz_rec_f rec[RAIDZ_REC_NUM]; /* Data reconstruction functions */
+ will_work_f is_supported; /* Support check function */
+ char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */
+} raidz_impl_ops_t;
+
+typedef struct raidz_col {
+ size_t rc_devidx; /* child device index for I/O */
+ size_t rc_offset; /* device offset */
+ size_t rc_size; /* I/O size */
+ abd_t *rc_abd; /* I/O data */
+ void *rc_gdata; /* used to store the "good" version */
+ int rc_error; /* I/O error for this device */
+ unsigned int rc_tried; /* Did we attempt this I/O column? */
+ unsigned int rc_skipped; /* Did we skip this I/O column? */
+} raidz_col_t;
+
+typedef struct raidz_map {
+ size_t rm_cols; /* Regular column count */
+ size_t rm_scols; /* Count including skipped columns */
+ size_t rm_bigcols; /* Number of oversized columns */
+ size_t rm_asize; /* Actual total I/O size */
+ size_t rm_missingdata; /* Count of missing data devices */
+ size_t rm_missingparity; /* Count of missing parity devices */
+ size_t rm_firstdatacol; /* First data column/parity count */
+ size_t rm_nskip; /* Skipped sectors for padding */
+ size_t rm_skipstart; /* Column index of padding start */
+ void *rm_abd_copy; /* rm_asize-buffer of copied data */
+ size_t rm_reports; /* # of referencing checksum reports */
+ unsigned int rm_freed; /* map no longer has referencing ZIO */
+ unsigned int rm_ecksuminjected; /* checksum error was injected */
+ const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
+ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
+} raidz_map_t;
+
+#define RAIDZ_ORIGINAL_IMPL (INT_MAX)
+
+extern const raidz_impl_ops_t vdev_raidz_scalar_impl;
+
+/*
+ * Commonly used raidz_map helpers
+ *
+ * raidz_parity Returns parity of the RAIDZ block
+ * raidz_ncols Returns number of columns the block spans
+ * raidz_nbigcols Returns number of big columns columns
+ * raidz_col_p Returns pointer to a column
+ * raidz_col_size Returns size of a column
+ * raidz_big_size Returns size of big columns
+ * raidz_short_size Returns size of short columns
+ */
+#define raidz_parity(rm) ((rm)->rm_firstdatacol)
+#define raidz_ncols(rm) ((rm)->rm_cols)
+#define raidz_nbigcols(rm) ((rm)->rm_bigcols)
+#define raidz_col_p(rm, c) ((rm)->rm_col + (c))
+#define raidz_col_size(rm, c) ((rm)->rm_col[c].rc_size)
+#define raidz_big_size(rm) (raidz_col_size(rm, CODE_P))
+#define raidz_short_size(rm) (raidz_col_size(rm, raidz_ncols(rm)-1))
+
+/*
+ * Macro defines an RAIDZ parity generation method
+ *
+ * @code parity the function produce
+ * @impl name of the implementation
+ */
+#define _RAIDZ_GEN_WRAP(code, impl) \
+static void \
+impl ## _gen_ ## code(void *rmp) \
+{ \
+ raidz_map_t *rm = (raidz_map_t *) rmp; \
+ raidz_generate_## code ## _impl(rm); \
+}
+
+/*
+ * Macro defines an RAIDZ data reconstruction method
+ *
+ * @code parity the function produce
+ * @impl name of the implementation
+ */
+#define _RAIDZ_REC_WRAP(code, impl) \
+static int \
+impl ## _rec_ ## code(void *rmp, const int *tgtidx) \
+{ \
+ raidz_map_t *rm = (raidz_map_t *) rmp; \
+ return (raidz_reconstruct_## code ## _impl(rm, tgtidx)); \
+}
+
+/*
+ * Define all gen methods for an implementation
+ *
+ * @impl name of the implementation
+ */
+#define DEFINE_GEN_METHODS(impl) \
+ _RAIDZ_GEN_WRAP(p, impl); \
+ _RAIDZ_GEN_WRAP(pq, impl); \
+ _RAIDZ_GEN_WRAP(pqr, impl)
+
+/*
+ * Define all rec functions for an implementation
+ *
+ * @impl name of the implementation
+ */
+#define DEFINE_REC_METHODS(impl) \
+ _RAIDZ_REC_WRAP(p, impl); \
+ _RAIDZ_REC_WRAP(q, impl); \
+ _RAIDZ_REC_WRAP(r, impl); \
+ _RAIDZ_REC_WRAP(pq, impl); \
+ _RAIDZ_REC_WRAP(pr, impl); \
+ _RAIDZ_REC_WRAP(qr, impl); \
+ _RAIDZ_REC_WRAP(pqr, impl)
+
+#define RAIDZ_GEN_METHODS(impl) \
+{ \
+ [RAIDZ_GEN_P] = & impl ## _gen_p, \
+ [RAIDZ_GEN_PQ] = & impl ## _gen_pq, \
+ [RAIDZ_GEN_PQR] = & impl ## _gen_pqr \
+}
+
+#define RAIDZ_REC_METHODS(impl) \
+{ \
+ [RAIDZ_REC_P] = & impl ## _rec_p, \
+ [RAIDZ_REC_Q] = & impl ## _rec_q, \
+ [RAIDZ_REC_R] = & impl ## _rec_r, \
+ [RAIDZ_REC_PQ] = & impl ## _rec_pq, \
+ [RAIDZ_REC_PR] = & impl ## _rec_pr, \
+ [RAIDZ_REC_QR] = & impl ## _rec_qr, \
+ [RAIDZ_REC_PQR] = & impl ## _rec_pqr \
+}
+
+
+typedef struct raidz_impl_kstat {
+ uint64_t gen[RAIDZ_GEN_NUM]; /* gen method speed kiB/s */
+ uint64_t rec[RAIDZ_REC_NUM]; /* rec method speed kiB/s */
+} raidz_impl_kstat_t;
+
+/*
+ * Enumerate various multiplication constants
+ * used in reconstruction methods
+ */
+typedef enum raidz_mul_info {
+ /* Reconstruct Q */
+ MUL_Q_X = 0,
+ /* Reconstruct R */
+ MUL_R_X = 0,
+ /* Reconstruct PQ */
+ MUL_PQ_X = 0,
+ MUL_PQ_Y = 1,
+ /* Reconstruct PR */
+ MUL_PR_X = 0,
+ MUL_PR_Y = 1,
+ /* Reconstruct QR */
+ MUL_QR_XQ = 0,
+ MUL_QR_X = 1,
+ MUL_QR_YQ = 2,
+ MUL_QR_Y = 3,
+ /* Reconstruct PQR */
+ MUL_PQR_XP = 0,
+ MUL_PQR_XQ = 1,
+ MUL_PQR_XR = 2,
+ MUL_PQR_YU = 3,
+ MUL_PQR_YP = 4,
+ MUL_PQR_YQ = 5,
+
+ MUL_CNT = 6
+} raidz_mul_info_t;
+
+/*
+ * Powers of 2 in the Galois field.
+ */
+extern const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256)));
+/* Logs of 2 in the Galois field defined above. */
+extern const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256)));
+
+/*
+ * Multiply a given number by 2 raised to the given power.
+ */
+static inline uint8_t
+vdev_raidz_exp2(const uint8_t a, const unsigned exp)
+{
+ if (a == 0)
+ return (0);
+
+ return (vdev_raidz_pow2[(exp + (unsigned) vdev_raidz_log2[a]) % 255]);
+}
+
+/*
+ * Galois Field operations.
+ *
+ * gf_exp2 - computes 2 raised to the given power
+ * gf_exp2 - computes 4 raised to the given power
+ * gf_mul - multiplication
+ * gf_div - division
+ * gf_inv - multiplicative inverse
+ */
+typedef unsigned gf_t;
+typedef unsigned gf_log_t;
+
+static inline gf_t
+gf_mul(const gf_t a, const gf_t b)
+{
+ gf_log_t logsum;
+
+ if (a == 0 || b == 0)
+ return (0);
+
+ logsum = (gf_log_t) vdev_raidz_log2[a] + (gf_log_t) vdev_raidz_log2[b];
+
+ return ((gf_t) vdev_raidz_pow2[logsum % 255]);
+}
+
+static inline gf_t
+gf_div(const gf_t a, const gf_t b)
+{
+ gf_log_t logsum;
+
+ ASSERT3U(b, >, 0);
+ if (a == 0)
+ return (0);
+
+ logsum = (gf_log_t) 255 + (gf_log_t) vdev_raidz_log2[a] -
+ (gf_log_t) vdev_raidz_log2[b];
+
+ return ((gf_t) vdev_raidz_pow2[logsum % 255]);
+}
+
+static inline gf_t
+gf_inv(const gf_t a)
+{
+ gf_log_t logsum;
+
+ ASSERT3U(a, >, 0);
+
+ logsum = (gf_log_t) 255 - (gf_log_t) vdev_raidz_log2[a];
+
+ return ((gf_t) vdev_raidz_pow2[logsum]);
+}
+
+static inline gf_t
+gf_exp2(gf_log_t exp)
+{
+ return (vdev_raidz_pow2[exp % 255]);
+}
+
+static inline gf_t
+gf_exp4(gf_log_t exp)
+{
+ ASSERT3U(exp, <=, 255);
+ return ((gf_t) vdev_raidz_pow2[(2 * exp) % 255]);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VDEV_RAIDZ_H */
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index 10772d5265..e4db03ce89 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2016 Gvozden Nešković. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -35,6 +36,8 @@
#include <sys/abd.h>
#include <sys/fs/zfs.h>
#include <sys/fm/fs/zfs.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
#ifdef ZFS_DEBUG
#include <sys/vdev.h> /* For vdev_xlate() in vdev_raidz_io_verify() */
@@ -98,7 +101,7 @@
* R = 4^n-1 * D_0 + 4^n-2 * D_1 + ... + 4^1 * D_n-2 + 4^0 * D_n-1
* = ((...((D_0) * 4 + D_1) * 4 + ...) * 4 + D_n-2) * 4 + D_n-1
*
- * We chose 1, 2, and 4 as our generators because 1 corresponds to the trival
+ * We chose 1, 2, and 4 as our generators because 1 corresponds to the trivial
* XOR operation, and 2 and 4 can be computed quickly and generate linearly-
* independent coefficients. (There are no additional coefficients that have
* this property which is why the uncorrected Plank method breaks down.)
@@ -107,34 +110,6 @@
* or in concert to recover missing data columns.
*/
-typedef struct raidz_col {
- uint64_t rc_devidx; /* child device index for I/O */
- uint64_t rc_offset; /* device offset */
- uint64_t rc_size; /* I/O size */
- abd_t *rc_abd; /* I/O data */
- void *rc_gdata; /* used to store the "good" version */
- int rc_error; /* I/O error for this device */
- uint8_t rc_tried; /* Did we attempt this I/O column? */
- uint8_t rc_skipped; /* Did we skip this I/O column? */
-} raidz_col_t;
-
-typedef struct raidz_map {
- uint64_t rm_cols; /* Regular column count */
- uint64_t rm_scols; /* Count including skipped columns */
- uint64_t rm_bigcols; /* Number of oversized columns */
- uint64_t rm_asize; /* Actual total I/O size */
- uint64_t rm_missingdata; /* Count of missing data devices */
- uint64_t rm_missingparity; /* Count of missing parity devices */
- uint64_t rm_firstdatacol; /* First data column/parity count */
- uint64_t rm_nskip; /* Skipped sectors for padding */
- uint64_t rm_skipstart; /* Column index of padding start */
- abd_t *rm_abd_copy; /* rm_asize-buffer of copied data */
- uintptr_t rm_reports; /* # of referencing checksum reports */
- uint8_t rm_freed; /* map no longer has referencing ZIO */
- uint8_t rm_ecksuminjected; /* checksum error was injected */
- raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
-} raidz_map_t;
-
#define VDEV_RAIDZ_P 0
#define VDEV_RAIDZ_Q 1
#define VDEV_RAIDZ_R 2
@@ -153,7 +128,7 @@ typedef struct raidz_map {
(mask) = (x) & 0x8080808080808080ULL; \
(mask) = ((mask) << 1) - ((mask) >> 7); \
(x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \
- ((mask) & 0x1d1d1d1d1d1d1d1d); \
+ ((mask) & 0x1d1d1d1d1d1d1d1dULL); \
}
#define VDEV_RAIDZ_64MUL_4(x, mask) \
@@ -164,104 +139,7 @@ typedef struct raidz_map {
#define VDEV_LABEL_OFFSET(x) (x + VDEV_LABEL_START_SIZE)
-/*
- * Force reconstruction to use the general purpose method.
- */
-int vdev_raidz_default_to_general;
-
-/* Powers of 2 in the Galois field defined above. */
-static const uint8_t vdev_raidz_pow2[256] = {
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
- 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
- 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
- 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
- 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
- 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
- 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
- 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
- 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
- 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
- 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
- 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
- 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
- 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
- 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
- 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
- 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
- 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
- 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
- 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
- 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
- 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
- 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
- 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
- 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
- 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
- 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
- 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
- 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
- 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
- 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
- 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
-};
-/* Logs of 2 in the Galois field defined above. */
-static const uint8_t vdev_raidz_log2[256] = {
- 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
- 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
- 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
- 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
- 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
- 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
- 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
- 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
- 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
- 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
- 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
- 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
- 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
- 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
- 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
- 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
- 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
- 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
- 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
- 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
- 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
- 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
- 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
- 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
- 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
- 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
- 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
- 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
- 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
- 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
- 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
- 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
-};
-
-static void vdev_raidz_generate_parity(raidz_map_t *rm);
-
-/*
- * Multiply a given number by 2 raised to the given power.
- */
-static uint8_t
-vdev_raidz_exp2(uint_t a, int exp)
-{
- if (a == 0)
- return (0);
-
- ASSERT(exp >= 0);
- ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
-
- exp += vdev_raidz_log2[a];
- if (exp > 255)
- exp -= 255;
-
- return (vdev_raidz_pow2[exp]);
-}
-
-static void
+void
vdev_raidz_map_free(raidz_map_t *rm)
{
int c;
@@ -271,7 +149,6 @@ vdev_raidz_map_free(raidz_map_t *rm)
if (rm->rm_col[c].rc_gdata != NULL)
abd_free(rm->rm_col[c].rc_gdata);
-
}
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
@@ -311,7 +188,7 @@ static void
vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{
raidz_map_t *rm = zcr->zcr_cbdata;
- size_t c = zcr->zcr_cbinfo;
+ const size_t c = zcr->zcr_cbinfo;
size_t x, offset;
const abd_t *good = NULL;
@@ -459,19 +336,19 @@ static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
* Divides the IO evenly across all child vdevs; usually, dcols is
* the number of children in the target vdev.
*/
-static raidz_map_t *
-vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
- uint64_t unit_shift, uint64_t dcols, uint64_t nparity)
+raidz_map_t *
+vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
+ uint64_t nparity)
{
raidz_map_t *rm;
/* The starting RAIDZ (parent) vdev sector of the block. */
- uint64_t b = offset >> unit_shift;
+ uint64_t b = zio->io_offset >> ashift;
/* The zio's size in units of the vdev's minimum sector size. */
- uint64_t s = size >> unit_shift;
+ uint64_t s = zio->io_size >> ashift;
/* The first column for this stripe. */
uint64_t f = b % dcols;
/* The starting byte offset on each child vdev. */
- uint64_t o = (b / dcols) << unit_shift;
+ uint64_t o = (b / dcols) << ashift;
uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
uint64_t off = 0;
@@ -530,7 +407,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
coff = o;
if (col >= dcols) {
col -= dcols;
- coff += 1ULL << unit_shift;
+ coff += 1ULL << ashift;
}
rm->rm_col[c].rc_devidx = col;
rm->rm_col[c].rc_offset = coff;
@@ -543,29 +420,29 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
if (c >= acols)
rm->rm_col[c].rc_size = 0;
else if (c < bc)
- rm->rm_col[c].rc_size = (q + 1) << unit_shift;
+ rm->rm_col[c].rc_size = (q + 1) << ashift;
else
- rm->rm_col[c].rc_size = q << unit_shift;
+ rm->rm_col[c].rc_size = q << ashift;
asize += rm->rm_col[c].rc_size;
}
- ASSERT3U(asize, ==, tot << unit_shift);
- rm->rm_asize = roundup(asize, (nparity + 1) << unit_shift);
+ ASSERT3U(asize, ==, tot << ashift);
+ rm->rm_asize = roundup(asize, (nparity + 1) << ashift);
rm->rm_nskip = roundup(tot, nparity + 1) - tot;
- ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << unit_shift);
+ ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << ashift);
ASSERT3U(rm->rm_nskip, <=, nparity);
for (c = 0; c < rm->rm_firstdatacol; c++)
rm->rm_col[c].rc_abd =
abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE);
- rm->rm_col[c].rc_abd = abd_get_offset_size(abd, 0,
+ rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0,
rm->rm_col[c].rc_size);
off = rm->rm_col[c].rc_size;
for (c = c + 1; c < acols; c++) {
- rm->rm_col[c].rc_abd = abd_get_offset_size(abd, off,
+ rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, off,
rm->rm_col[c].rc_size);
off += rm->rm_col[c].rc_size;
}
@@ -573,7 +450,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
/*
* If all data stored spans all columns, there's a danger that parity
* will always be on the same device and, since parity isn't read
- * during normal operation, that that device's I/O bandwidth won't be
+ * during normal operation, that device's I/O bandwidth won't be
* used effectively. We therefore switch the parity every 1MB.
*
* ... at least that was, ostensibly, the theory. As a practical
@@ -593,7 +470,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
ASSERT(rm->rm_cols >= 2);
ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size);
- if (rm->rm_firstdatacol == 1 && (offset & (1ULL << 20))) {
+ if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
devidx = rm->rm_col[0].rc_devidx;
o = rm->rm_col[0].rc_offset;
rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx;
@@ -605,6 +482,9 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
rm->rm_skipstart = 1;
}
+ /* init RAIDZ parity ops */
+ rm->rm_ops = vdev_raidz_math_get_ops();
+
return (rm);
}
@@ -681,7 +561,6 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm)
p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
if (c == rm->rm_firstdatacol) {
- ASSERT3U(src->abd_size, >=, rm->rm_col[c].rc_size);
abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
} else {
struct pqr_struct pqr = { p, NULL, NULL };
@@ -793,9 +672,13 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
* Generate RAID parity in the first virtual columns according to the number of
* parity columns available.
*/
-static void
+void
vdev_raidz_generate_parity(raidz_map_t *rm)
{
+ /* Generate using the new math implementation */
+ if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL)
+ return;
+
switch (rm->rm_firstdatacol) {
case 1:
vdev_raidz_generate_parity_p(rm);
@@ -873,8 +756,8 @@ vdev_raidz_reconst_q_post_func(void *buf, size_t size, void *private)
int cnt = size / sizeof (dst[0]);
for (int i = 0; i < cnt; i++, dst++, rq->q++) {
- *dst ^= *rq->q;
+ *dst ^= *rq->q;
int j;
uint8_t *b;
for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
@@ -1159,9 +1042,12 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
* ~~ ~~
* __ __
* | 1 1 1 1 1 1 1 1 |
+ * | 128 64 32 16 8 4 2 1 |
* | 19 205 116 29 64 16 4 1 |
* | 1 0 0 0 0 0 0 0 |
- * (V|I)' = | 0 0 0 1 0 0 0 0 |
+ * | 0 1 0 0 0 0 0 0 |
+ * (V|I)' = | 0 0 1 0 0 0 0 0 |
+ * | 0 0 0 1 0 0 0 0 |
* | 0 0 0 0 1 0 0 0 |
* | 0 0 0 0 0 1 0 0 |
* | 0 0 0 0 0 0 1 0 |
@@ -1385,8 +1271,8 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
int i, j, x, cc, c;
uint8_t *src;
uint64_t ccount;
- uint8_t *dst[VDEV_RAIDZ_MAXPARITY];
- uint64_t dcount[VDEV_RAIDZ_MAXPARITY];
+ uint8_t *dst[VDEV_RAIDZ_MAXPARITY] = { NULL };
+ uint64_t dcount[VDEV_RAIDZ_MAXPARITY] = { 0 };
uint8_t log = 0;
uint8_t val;
int ll;
@@ -1595,12 +1481,12 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
return (code);
}
-static int
-vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
+int
+vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
{
int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
int ntgts;
- int i, c;
+ int i, c, ret;
int code;
int nbadparity, nbaddata;
int parity_valid[VDEV_RAIDZ_MAXPARITY];
@@ -1638,34 +1524,37 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
dt = &tgts[nbadparity];
+ /* Reconstruct using the new math implementation */
+ ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata);
+ if (ret != RAIDZ_ORIGINAL_IMPL)
+ return (ret);
+
/*
* See if we can use any of our optimized reconstruction routines.
*/
- if (!vdev_raidz_default_to_general) {
- switch (nbaddata) {
- case 1:
- if (parity_valid[VDEV_RAIDZ_P])
- return (vdev_raidz_reconstruct_p(rm, dt, 1));
+ switch (nbaddata) {
+ case 1:
+ if (parity_valid[VDEV_RAIDZ_P])
+ return (vdev_raidz_reconstruct_p(rm, dt, 1));
- ASSERT(rm->rm_firstdatacol > 1);
+ ASSERT(rm->rm_firstdatacol > 1);
- if (parity_valid[VDEV_RAIDZ_Q])
- return (vdev_raidz_reconstruct_q(rm, dt, 1));
+ if (parity_valid[VDEV_RAIDZ_Q])
+ return (vdev_raidz_reconstruct_q(rm, dt, 1));
- ASSERT(rm->rm_firstdatacol > 2);
- break;
+ ASSERT(rm->rm_firstdatacol > 2);
+ break;
- case 2:
- ASSERT(rm->rm_firstdatacol > 1);
+ case 2:
+ ASSERT(rm->rm_firstdatacol > 1);
- if (parity_valid[VDEV_RAIDZ_P] &&
- parity_valid[VDEV_RAIDZ_Q])
- return (vdev_raidz_reconstruct_pq(rm, dt, 2));
+ if (parity_valid[VDEV_RAIDZ_P] &&
+ parity_valid[VDEV_RAIDZ_Q])
+ return (vdev_raidz_reconstruct_pq(rm, dt, 2));
- ASSERT(rm->rm_firstdatacol > 2);
+ ASSERT(rm->rm_firstdatacol > 2);
- break;
- }
+ break;
}
code = vdev_raidz_reconstruct_general(rm, tgts, ntgts);
@@ -1821,11 +1710,16 @@ vdev_raidz_dumpio(vdev_t *vd, caddr_t data, size_t size,
* treat the on-disk format as if the only blocks are the complete 128
* KB size.
*/
- abd_t *abd = abd_get_from_buf(data - (offset - origoffset),
+
+ /* First, fake a zio for vdev_raidz_map_alloc. */
+ zio_t *zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
+ zio->io_offset = origoffset;
+ zio->io_size = SPA_OLD_MAXBLOCKSIZE;
+ zio->io_abd = abd_get_from_buf(data - (offset - origoffset),
SPA_OLD_MAXBLOCKSIZE);
- rm = vdev_raidz_map_alloc(abd,
- SPA_OLD_MAXBLOCKSIZE, origoffset, tvd->vdev_ashift,
- vd->vdev_children, vd->vdev_nparity);
+
+ rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
+ vd->vdev_nparity);
coloffset = origoffset;
@@ -1874,7 +1768,9 @@ vdev_raidz_dumpio(vdev_t *vd, caddr_t data, size_t size,
}
vdev_raidz_map_free(rm);
- abd_put(abd);
+ abd_put(zio->io_abd);
+ kmem_free(zio, sizeof (zio_t));
+
#endif /* KERNEL */
return (err);
@@ -1965,8 +1861,7 @@ vdev_raidz_io_start(zio_t *zio)
raidz_col_t *rc;
int c, i;
- rm = vdev_raidz_map_alloc(zio->io_abd, zio->io_size, zio->io_offset,
- tvd->vdev_ashift, vd->vdev_children,
+ rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
vd->vdev_nparity);
zio->io_vsd = rm;
@@ -2141,11 +2036,6 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
return (ret);
}
-/*
- * Keep statistics on all the ways that we used parity to correct data.
- */
-static uint64_t raidz_corrected[1 << VDEV_RAIDZ_MAXPARITY];
-
static int
vdev_raidz_worst_error(raidz_map_t *rm)
{
@@ -2251,7 +2141,6 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
*/
code = vdev_raidz_reconstruct(rm, tgts, n);
if (raidz_checksum_verify(zio) == 0) {
- atomic_inc_64(&raidz_corrected[code]);
for (i = 0; i < n; i++) {
c = tgts[i];
@@ -2466,8 +2355,6 @@ vdev_raidz_io_done(zio_t *zio)
code = vdev_raidz_reconstruct(rm, tgts, n);
if (raidz_checksum_verify(zio) == 0) {
- atomic_inc_64(&raidz_corrected[code]);
-
/*
* If we read more parity disks than were used
* for reconstruction, confirm that the other
@@ -2620,7 +2507,7 @@ vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
/*
* Determine if any portion of the provided block resides on a child vdev
* with a dirty DTL and therefore needs to be resilvered. The function
- * assumes that at least one DTL is dirty which imples that full stripe
+ * assumes that at least one DTL is dirty which implies that full stripe
* width blocks must be resilvered.
*/
static boolean_t
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz_math.c b/usr/src/uts/common/fs/zfs/vdev_raidz_math.c
new file mode 100644
index 0000000000..2a1dac33c5
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz_math.c
@@ -0,0 +1,571 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/types.h>
+#include <sys/zio.h>
+#include <sys/debug.h>
+#include <sys/zfs_debug.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
+#include <sys/simd.h>
+
+#ifndef isspace
+#define isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || \
+ (c) == '\r' || (c) == '\f' || (c) == '\013')
+#endif
+
+extern boolean_t raidz_will_scalar_work(void);
+
+/* Opaque implementation with NULL methods to represent original methods */
+static const raidz_impl_ops_t vdev_raidz_original_impl = {
+ .name = "original",
+ .is_supported = raidz_will_scalar_work,
+};
+
+/* RAIDZ parity op that contain the fastest methods */
+static raidz_impl_ops_t vdev_raidz_fastest_impl = {
+ .name = "fastest"
+};
+
+/* All compiled in implementations */
+const raidz_impl_ops_t *raidz_all_maths[] = {
+ &vdev_raidz_original_impl,
+ &vdev_raidz_scalar_impl,
+};
+
+/* Indicate that benchmark has been completed */
+static boolean_t raidz_math_initialized = B_FALSE;
+
+/* Select raidz implementation */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX - 1)
+#define IMPL_ORIGINAL (0)
+#define IMPL_SCALAR (1)
+
+#define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i))
+
+static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
+static uint32_t user_sel_impl = IMPL_FASTEST;
+
+/* Hold all supported implementations */
+static size_t raidz_supp_impl_cnt = 0;
+static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
+
+#if defined(_KERNEL)
+/*
+ * kstats values for supported implementations
+ * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
+ *
+ * PORTING NOTE:
+ * On illumos this is not a kstat. OpenZFS uses their home-grown kstat code
+ * which implements a free-form kstat using additional functionality that does
+ * not exist in illumos. Because there are no software consumers of this
+ * information, we omit a kstat API. If an administrator needs to see this
+ * data for some reason, they can use mdb.
+ *
+ * The format of the kstat data on OpenZFS would be a "header" that looks like
+ * this (a column for each entry in the "raidz_gen_name" and "raidz_rec_name"
+ * arrays, starting with the parity function "implementation" name):
+ * impl gen_p gen_pq gen_pqr rec_p rec_q rec_r rec_pq rec_pr rec_qr rec_pqr
+ * This is followed by a row for each parity function implementation, showing
+ * the "speed" values calculated for that implementation for each of the
+ * parity generation and reconstruction functions in the "raidz_all_maths"
+ * array.
+ */
+static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
+
+#endif
+
+/*
+ * Returns the RAIDZ operations for raidz_map() parity calculations. When
+ * a SIMD implementation is not allowed in the current context, then fallback
+ * to the fastest generic implementation.
+ */
+const raidz_impl_ops_t *
+vdev_raidz_math_get_ops(void)
+{
+ /*
+ * illumos porting note:
+ * The following check from OpenZFS is disabled since we don't have
+ * this compiled in yet and we need to be able to change the
+ * implementation for the user-level test suite.
+ *
+ * if (!kfpu_allowed())
+ * return (&vdev_raidz_scalar_impl);
+ */
+
+ raidz_impl_ops_t *ops = NULL;
+ const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
+
+ switch (impl) {
+ case IMPL_FASTEST:
+ ASSERT(raidz_math_initialized);
+ ops = &vdev_raidz_fastest_impl;
+ break;
+ case IMPL_CYCLE:
+ /* Cycle through all supported implementations */
+ ASSERT(raidz_math_initialized);
+ ASSERT3U(raidz_supp_impl_cnt, >, 0);
+ static size_t cycle_impl_idx = 0;
+ size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
+ ops = raidz_supp_impl[idx];
+ break;
+ case IMPL_ORIGINAL:
+ ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
+ break;
+ case IMPL_SCALAR:
+ ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
+ break;
+ default:
+ ASSERT3U(impl, <, raidz_supp_impl_cnt);
+ ASSERT3U(raidz_supp_impl_cnt, >, 0);
+ if (impl < ARRAY_SIZE(raidz_all_maths))
+ ops = raidz_supp_impl[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+
+ return (ops);
+}
+
+/*
+ * Select parity generation method for raidz_map
+ */
+int
+vdev_raidz_math_generate(raidz_map_t *rm)
+{
+ raidz_gen_f gen_parity = NULL;
+
+ switch (raidz_parity(rm)) {
+ case 1:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
+ break;
+ case 2:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
+ break;
+ case 3:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
+ break;
+ default:
+ gen_parity = NULL;
+ cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
+ (uint_t)raidz_parity(rm));
+ break;
+ }
+
+ /* if method is NULL execute the original implementation */
+ if (gen_parity == NULL)
+ return (RAIDZ_ORIGINAL_IMPL);
+
+ gen_parity(rm);
+
+ return (0);
+}
+
+static raidz_rec_f
+reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1 && parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+static raidz_rec_f
+reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1) {
+ if (parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ } else if (parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_Q]);
+ }
+ } else if (nbaddata == 2 &&
+ parity_valid[CODE_P] && parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+static raidz_rec_f
+reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1) {
+ if (parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ } else if (parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_Q]);
+ } else if (parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_R]);
+ }
+ } else if (nbaddata == 2) {
+ if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
+ } else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PR]);
+ } else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_QR]);
+ }
+ } else if (nbaddata == 3 &&
+ parity_valid[CODE_P] && parity_valid[CODE_Q] &&
+ parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+/*
+ * Select data reconstruction method for raidz_map
+ * @parity_valid - Parity validity flag
+ * @dt - Failed data index array
+ * @nbaddata - Number of failed data columns
+ */
+int
+vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
+ const int *dt, const int nbaddata)
+{
+ raidz_rec_f rec_fn = NULL;
+
+ switch (raidz_parity(rm)) {
+ case PARITY_P:
+ rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
+ break;
+ case PARITY_PQ:
+ rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
+ break;
+ case PARITY_PQR:
+ rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
+ break;
+ default:
+ cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
+ (uint_t)raidz_parity(rm));
+ break;
+ }
+
+ if (rec_fn == NULL)
+ return (RAIDZ_ORIGINAL_IMPL);
+ else
+ return (rec_fn(rm, dt));
+}
+
+const char *raidz_gen_name[] = {
+ "gen_p", "gen_pq", "gen_pqr"
+};
+const char *raidz_rec_name[] = {
+ "rec_p", "rec_q", "rec_r",
+ "rec_pq", "rec_pr", "rec_qr", "rec_pqr"
+};
+
+#if defined(_KERNEL)
+
+#define BENCH_D_COLS (8ULL)
+#define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
+#define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */
+#define BENCH_NS MSEC2NSEC(25) /* 25ms */
+
+typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
+
+static void
+benchmark_gen_impl(raidz_map_t *rm, const int fn)
+{
+ (void) fn;
+ vdev_raidz_generate_parity(rm);
+}
+
+static void
+benchmark_rec_impl(raidz_map_t *rm, const int fn)
+{
+ static const int rec_tgt[7][3] = {
+ {1, 2, 3}, /* rec_p: bad QR & D[0] */
+ {0, 2, 3}, /* rec_q: bad PR & D[0] */
+ {0, 1, 3}, /* rec_r: bad PQ & D[0] */
+ {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
+ {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
+ {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
+ {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
+ };
+
+ vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
+}
+
+/*
+ * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
+ * is performed by setting the rm_ops pointer and calling the top level
+ * generate/reconstruct methods of bench_rm.
+ */
+static void
+benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
+{
+ uint64_t run_cnt, speed, best_speed = 0;
+ hrtime_t t_start, t_diff;
+ raidz_impl_ops_t *curr_impl;
+ raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
+ int impl, i;
+
+ for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
+ /* set an implementation to benchmark */
+ curr_impl = raidz_supp_impl[impl];
+ bench_rm->rm_ops = curr_impl;
+
+ run_cnt = 0;
+ t_start = gethrtime();
+
+ do {
+ for (i = 0; i < 25; i++, run_cnt++)
+ bench_fn(bench_rm, fn);
+
+ t_diff = gethrtime() - t_start;
+ } while (t_diff < BENCH_NS);
+
+ speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
+ speed /= (t_diff * BENCH_COLS);
+
+ if (bench_fn == benchmark_gen_impl)
+ raidz_impl_kstats[impl].gen[fn] = speed;
+ else
+ raidz_impl_kstats[impl].rec[fn] = speed;
+
+ /* Update fastest implementation method */
+ if (speed > best_speed) {
+ best_speed = speed;
+
+ if (bench_fn == benchmark_gen_impl) {
+ fstat->gen[fn] = impl;
+ vdev_raidz_fastest_impl.gen[fn] =
+ curr_impl->gen[fn];
+ } else {
+ fstat->rec[fn] = impl;
+ vdev_raidz_fastest_impl.rec[fn] =
+ curr_impl->rec[fn];
+ }
+ }
+ }
+}
+#endif
+
+/*
+ * Initialize and benchmark all supported implementations.
+ */
+static void
+benchmark_raidz(void)
+{
+ raidz_impl_ops_t *curr_impl;
+ int i, c;
+
+ /* Move supported impl into raidz_supp_impl */
+ for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
+ curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
+
+ if (curr_impl->init)
+ curr_impl->init();
+
+ if (curr_impl->is_supported())
+ raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
+ }
+ membar_producer(); /* complete raidz_supp_impl[] init */
+ raidz_supp_impl_cnt = c; /* number of supported impl */
+
+#if defined(_KERNEL)
+ zio_t *bench_zio = NULL;
+ raidz_map_t *bench_rm = NULL;
+ uint64_t bench_parity;
+
+ /* Fake a zio and run the benchmark on a warmed up buffer */
+ bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
+ bench_zio->io_offset = 0;
+ bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
+ bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
+ memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
+
+ /* Benchmark parity generation methods */
+ for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
+ bench_parity = fn + 1;
+ /* New raidz_map is needed for each generate_p/q/r */
+ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
+ BENCH_D_COLS + bench_parity, bench_parity);
+
+ benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
+
+ vdev_raidz_map_free(bench_rm);
+ }
+
+ /* Benchmark data reconstruction methods */
+ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
+ BENCH_COLS, PARITY_PQR);
+
+ for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
+ benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
+
+ vdev_raidz_map_free(bench_rm);
+
+ /* cleanup the bench zio */
+ abd_free(bench_zio->io_abd);
+ kmem_free(bench_zio, sizeof (zio_t));
+#else
+ /*
+ * Skip the benchmark in user space to avoid impacting libzpool
+ * consumers (zdb, zhack, zinject, ztest). The last implementation
+ * is assumed to be the fastest and used by default.
+ */
+ memcpy(&vdev_raidz_fastest_impl,
+ raidz_supp_impl[raidz_supp_impl_cnt - 1],
+ sizeof (vdev_raidz_fastest_impl));
+ strcpy(vdev_raidz_fastest_impl.name, "fastest");
+#endif /* _KERNEL */
+}
+
+void
+vdev_raidz_math_init(void)
+{
+ /* Determine the fastest available implementation. */
+ benchmark_raidz();
+
+ /* Finish initialization */
+ atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
+ raidz_math_initialized = B_TRUE;
+}
+
+void
+vdev_raidz_math_fini(void)
+{
+ raidz_impl_ops_t const *curr_impl;
+
+ for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
+ curr_impl = raidz_all_maths[i];
+ if (curr_impl->fini)
+ curr_impl->fini();
+ }
+}
+
+static const struct {
+ char *name;
+ uint32_t sel;
+} math_impl_opts[] = {
+ { "cycle", IMPL_CYCLE },
+ { "fastest", IMPL_FASTEST },
+ { "original", IMPL_ORIGINAL },
+ { "scalar", IMPL_SCALAR }
+};
+
+/*
+ * Function sets desired raidz implementation.
+ *
+ * If we are called before init(), user preference will be saved in
+ * user_sel_impl, and applied in later init() call. This occurs when module
+ * parameter is specified on module load. Otherwise, directly update
+ * zfs_vdev_raidz_impl.
+ *
+ * @val Name of raidz implementation to use
+ * @param Unused.
+ */
+int
+vdev_raidz_impl_set(const char *val)
+{
+ int err = -EINVAL;
+ char req_name[RAIDZ_IMPL_NAME_MAX];
+ uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
+ size_t i;
+
+ /* sanitize input */
+ i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
+ if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
+ return (err);
+
+ strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
+ while (i > 0 && !!isspace(req_name[i-1]))
+ i--;
+ req_name[i] = '\0';
+
+ /* Check mandatory options */
+ for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
+ if (strcmp(req_name, math_impl_opts[i].name) == 0) {
+ impl = math_impl_opts[i].sel;
+ err = 0;
+ break;
+ }
+ }
+
+ /* check all supported impl if init() was already called */
+ if (err != 0 && raidz_math_initialized) {
+ /* check all supported implementations */
+ for (i = 0; i < raidz_supp_impl_cnt; i++) {
+ if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+
+ if (err == 0) {
+ if (raidz_math_initialized)
+ atomic_swap_32(&zfs_vdev_raidz_impl, impl);
+ else
+ atomic_swap_32(&user_sel_impl, impl);
+ }
+
+ return (err);
+}
+
+#if defined(_KERNEL) && defined(__linux__)
+
+static int
+zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
+{
+ return (vdev_raidz_impl_set(val));
+}
+
+static int
+zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+ int i, cnt = 0;
+ char *fmt;
+ const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
+
+ ASSERT(raidz_math_initialized);
+
+ /* list mandatory options */
+ for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
+ fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
+ }
+
+ /* list all supported implementations */
+ for (i = 0; i < raidz_supp_impl_cnt; i++) {
+ fmt = (i == impl) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
+ }
+
+ return (cnt);
+}
+
+module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
+ zfs_vdev_raidz_impl_get, NULL, 0644);
+MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
+#endif
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h b/usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h
new file mode 100644
index 0000000000..89c2082c4a
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h
@@ -0,0 +1,1477 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#ifndef _VDEV_RAIDZ_MATH_IMPL_H
+#define _VDEV_RAIDZ_MATH_IMPL_H
+
+#include <sys/types.h>
+
+#define raidz_inline inline __attribute__((always_inline))
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+/*
+ * Functions calculate multiplication constants for data reconstruction.
+ * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
+ * used parity columns for reconstruction.
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ * @coeff output array of coefficients. Array must be provided by
+ * user and must hold minimum MUL_CNT values.
+ */
+static noinline void
+raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+
+ coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
+}
+
+static noinline void
+raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+
+ coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
+}
+
+static noinline void
+raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+ gf_t a, b, e;
+
+ a = gf_exp2(x + 255 - y);
+ b = gf_exp2(255 - (ncols - x - 1));
+ e = a ^ 0x01;
+
+ coeff[MUL_PQ_X] = gf_div(a, e);
+ coeff[MUL_PQ_Y] = gf_div(b, e);
+}
+
+static noinline void
+raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+
+ gf_t a, b, e;
+
+ a = gf_exp4(x + 255 - y);
+ b = gf_exp4(255 - (ncols - x - 1));
+ e = a ^ 0x01;
+
+ coeff[MUL_PR_X] = gf_div(a, e);
+ coeff[MUL_PR_Y] = gf_div(b, e);
+}
+
+static noinline void
+raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+
+ gf_t nx, ny, nxxy, nxyy, d;
+
+ nx = gf_exp2(ncols - x - 1);
+ ny = gf_exp2(ncols - y - 1);
+ nxxy = gf_mul(gf_mul(nx, nx), ny);
+ nxyy = gf_mul(gf_mul(nx, ny), ny);
+ d = nxxy ^ nxyy;
+
+ coeff[MUL_QR_XQ] = ny;
+ coeff[MUL_QR_X] = gf_div(ny, d);
+ coeff[MUL_QR_YQ] = nx;
+ coeff[MUL_QR_Y] = gf_div(nx, d);
+}
+
+static noinline void
+raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+ const unsigned z = tgtidx[TARGET_Z];
+
+ gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
+
+ nx = gf_exp2(ncols - x - 1);
+ ny = gf_exp2(ncols - y - 1);
+ nz = gf_exp2(ncols - z - 1);
+
+ nxx = gf_exp4(ncols - x - 1);
+ nyy = gf_exp4(ncols - y - 1);
+ nzz = gf_exp4(ncols - z - 1);
+
+ nyyz = gf_mul(gf_mul(ny, nz), ny);
+ nyzz = gf_mul(nzz, ny);
+
+ xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
+ gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^ nyzz;
+
+ yd = gf_inv(ny ^ nz);
+
+ coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
+ coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
+ coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
+ coeff[MUL_PQR_YU] = nx;
+ coeff[MUL_PQR_YP] = gf_mul(nz, yd);
+ coeff[MUL_PQR_YQ] = yd;
+}
+
+/*
+ * Method for zeroing a buffer (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @dsize Destination buffer size
+ * @private Unused
+ */
+static int
+raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ size_t i;
+
+ ZERO_DEFINE();
+
+ (void) private; /* unused */
+
+ ZERO(ZERO_D);
+
+ for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
+ STORE(dst + i, ZERO_D);
+ STORE(dst + i + ZERO_STRIDE, ZERO_D);
+ }
+
+ return (0);
+}
+
+#define raidz_zero(dabd, size) \
+{ \
+ abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL); \
+}
+
+/*
+ * Method for copying two buffers (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
+ */
+static int
+raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ const v_t *src = (v_t *)sc;
+ size_t i;
+
+ COPY_DEFINE();
+
+ (void) private; /* unused */
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
+ LOAD(src + i, COPY_D);
+ STORE(dst + i, COPY_D);
+
+ LOAD(src + i + COPY_STRIDE, COPY_D);
+ STORE(dst + i + COPY_STRIDE, COPY_D);
+ }
+
+ return (0);
+}
+
+
+#define raidz_copy(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
+}
+
+/*
+ * Method for adding (XORing) two buffers.
+ * Source and destination are XORed together and result is stored in
+ * destination buffer. This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
+ */
+static int
+raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ const v_t *src = (v_t *)sc;
+ size_t i;
+
+ ADD_DEFINE();
+
+ (void) private; /* unused */
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
+ LOAD(dst + i, ADD_D);
+ XOR_ACC(src + i, ADD_D);
+ STORE(dst + i, ADD_D);
+
+ LOAD(dst + i + ADD_STRIDE, ADD_D);
+ XOR_ACC(src + i + ADD_STRIDE, ADD_D);
+ STORE(dst + i + ADD_STRIDE, ADD_D);
+ }
+
+ return (0);
+}
+
+#define raidz_add(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
+}
+
+/*
+ * Method for multiplying a buffer with a constant in GF(2^8).
+ * Symbols from buffer are multiplied by a constant and result is stored
+ * back in the same buffer.
+ *
+ * @dc In/Out data buffer.
+ * @size Size of the buffer
+ * @private pointer to the multiplication constant (unsigned)
+ */
+static int
+raidz_mul_abd_cb(void *dc, size_t size, void *private)
+{
+ const unsigned mul = *((unsigned *)private);
+ v_t *d = (v_t *)dc;
+ size_t i;
+
+ MUL_DEFINE();
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
+ LOAD(d + i, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i, MUL_D);
+
+ LOAD(d + i + MUL_STRIDE, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i + MUL_STRIDE, MUL_D);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Syndrome generation/update macros
+ *
+ * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
+ */
+#define P_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ STORE((t), T); \
+}
+
+#define R_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define R_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ STORE((t), T); \
+}
+
+
+/*
+ * PARITY CALCULATION
+ *
+ * Macros *_SYNDROME are used for parity/syndrome calculation.
+ * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
+ * length of data column, and *_SYNDROME() macros are only for updating
+ * the parity/syndrome if data column is shorter.
+ *
+ * P parity is calculated using raidz_add_abd().
+ */
+
+/*
+ * Generate P parity (RAIDZ1)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_p_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t psize = rm->rm_col[CODE_P].rc_size;
+ abd_t *pabd = rm->rm_col[CODE_P].rc_abd;
+ size_t size;
+ abd_t *dabd;
+
+ raidz_math_begin();
+
+ /* start with first data column */
+ raidz_copy(pabd, rm->rm_col[1].rc_abd, psize);
+
+ for (c = 2; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ size = rm->rm_col[c].rc_size;
+
+ /* add data column */
+ raidz_add(pabd, dabd, size);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * Generate PQ parity (RAIDZ2)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
+ */
+static void
+raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *)c[0];
+ v_t *q = (v_t *)c[1];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQ_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
+ q += GEN_PQ_STRIDE) {
+ LOAD(d, GEN_PQ_D);
+ P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
+ Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
+ }
+ for (; q < qend; q += GEN_PQ_STRIDE) {
+ Q_SYNDROME(GEN_PQ_C, q);
+ }
+}
+
+
+/*
+ * Generate PQ parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_pq_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
+
+ raidz_math_begin();
+
+ raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize);
+
+ for (c = 3; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
+ raidz_gen_pq_add);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * Generate PQR parity (RAIDZ3)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
+ */
+static void
+raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *)c[0];
+ v_t *q = (v_t *)c[1];
+ v_t *r = (v_t *)c[CODE_R];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
+ q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ LOAD(d, GEN_PQR_D);
+ P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
+ Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
+ R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
+ }
+ for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ Q_SYNDROME(GEN_PQR_C, q);
+ R_SYNDROME(GEN_PQR_C, r);
+ }
+}
+
+
+/*
+ * Generate PQR parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_pqr_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+
+ raidz_math_begin();
+
+ raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize);
+
+ for (c = 4; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
+ raidz_gen_pqr_add);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * DATA RECONSTRUCTION
+ *
+ * Data reconstruction process consists of two phases:
+ * - Syndrome calculation
+ * - Data reconstruction
+ *
+ * Syndrome is calculated by generating parity using available data columns
+ * and zeros in places of erasure. Existing parity is added to corresponding
+ * syndrome value to obtain the [P|Q|R]syn values from equation:
+ * P = Psyn + Dx + Dy + Dz
+ * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
+ * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
+ *
+ * For data reconstruction phase, the corresponding equations are solved
+ * for missing data (Dx, Dy, Dz). This generally involves multiplying known
+ * symbols by an coefficient and adding them together. The multiplication
+ * constant coefficients are calculated ahead of the operation in
+ * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
+ *
+ * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
+ * and "short" columns.
+ * For this reason, reconstruction is performed in minimum of
+ * two steps. First, from offset 0 to short_size, then from short_size to
+ * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
+ * over both ranges. The split also enables removal of conditional expressions
+ * from loop bodies, improving throughput of SIMD implementations.
+ * For the best performance, all functions marked with raidz_inline attribute
+ * must be inlined by compiler.
+ *
+ * parity data
+ * columns columns
+ * <----------> <------------------>
+ * x y <----+ missing columns (x, y)
+ * | |
+ * +---+---+---+---+-v-+---+-v-+---+ ^ 0
+ * | | | | | | | | | |
+ * | | | | | | | | | |
+ * | P | Q | R | D | D | D | D | D | |
+ * | | | | 0 | 1 | 2 | 3 | 4 | |
+ * | | | | | | | | | v
+ * | | | | | +---+---+---+ ^ short_size
+ * | | | | | | |
+ * +---+---+---+---+---+ v big_size
+ * <------------------> <---------->
+ * big columns short columns
+ *
+ */
+
+
+
+
+/*
+ * Reconstruct single data column using P parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method not applicable
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ size_t size;
+ abd_t *dabd;
+
+ raidz_math_begin();
+
+ /* copy P into target */
+ raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize);
+
+ /* generate p_syndrome */
+ for (c = firstdc; c < ncols; c++) {
+ if (c == x)
+ continue;
+
+ dabd = rm->rm_col[c].rc_abd;
+ size = MIN(rm->rm_col[c].rc_size, xsize);
+
+ raidz_add(xabd, dabd, size);
+ }
+
+ raidz_math_end();
+
+ return (1 << CODE_P);
+}
+
+
+/*
+ * Generate Q syndrome (Qsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @xsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)xc[TARGET_X];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (xsize / sizeof (v_t));
+
+ SYN_Q_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_Q_D);
+ Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ Q_SYNDROME(SYN_Q_X, x);
+ }
+}
+
+
+/*
+ * Reconstruct single data column using Q parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd_cb()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *tabds[] = { xabd };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_q_coeff(rm, tgtidx, coeff);
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_q_abd);
+ }
+
+ /* add Q to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
+
+ raidz_math_end();
+
+ return (1 << CODE_Q);
+}
+
+
+/*
+ * Generate R syndrome (Rsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)xc[TARGET_X];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+
+ SYN_R_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_R_D);
+ R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ R_SYNDROME(SYN_R_X, x);
+ }
+}
+
+
+/*
+ * Reconstruct single data column using R parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd_cb()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *tabds[] = { xabd };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_r_coeff(rm, tgtidx, coeff);
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
+
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_r_abd);
+ }
+
+ /* add R to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
+
+ raidz_math_end();
+
+ return (1 << CODE_R);
+}
+
+
+/*
+ * Generate P and Q syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)tc[TARGET_X];
+ v_t *y = (v_t *)tc[TARGET_Y];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+
+ SYN_PQ_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
+ Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQ_X, y);
+ }
+}
+
+/*
+ * Reconstruct data using PQ parity and PQ syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)tc[TARGET_X];
+ v_t *y = (v_t *)tc[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_PQ_DEFINE();
+
+ for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
+ p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
+ LOAD(x, REC_PQ_X);
+ LOAD(y, REC_PQ_Y);
+
+ XOR_ACC(p, REC_PQ_X);
+ XOR_ACC(q, REC_PQ_Y);
+
+ /* Save Pxy */
+ COPY(REC_PQ_X, REC_PQ_T);
+
+ /* Calc X */
+ MUL(mul[MUL_PQ_X], REC_PQ_X);
+ MUL(mul[MUL_PQ_Y], REC_PQ_Y);
+ XOR(REC_PQ_Y, REC_PQ_X);
+ STORE(x, REC_PQ_X);
+
+ /* Calc Y */
+ XOR(REC_PQ_T, REC_PQ_X);
+ STORE(y, REC_PQ_X);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using PQ parity
+ *
+ * @syn_method raidz_syn_pq_abd()
+ * @rec_method raidz_rec_pq_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pq_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pq_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
+
+ /* Copy shorter targets back to the original abd buffer */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q));
+}
+
+
+/*
+ * Generate P and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+
+ SYN_PR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PR_D);
+ P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
+ R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ R_SYNDROME(SYN_PR_X, y);
+ }
+}
+
+/*
+ * Reconstruct data using PR parity and PR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_PR_DEFINE();
+
+ for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
+ p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
+ LOAD(x, REC_PR_X);
+ LOAD(y, REC_PR_Y);
+ XOR_ACC(p, REC_PR_X);
+ XOR_ACC(q, REC_PR_Y);
+
+ /* Save Pxy */
+ COPY(REC_PR_X, REC_PR_T);
+
+ /* Calc X */
+ MUL(mul[MUL_PR_X], REC_PR_X);
+ MUL(mul[MUL_PR_Y], REC_PR_Y);
+ XOR(REC_PR_Y, REC_PR_X);
+ STORE(x, REC_PR_X);
+
+ /* Calc Y */
+ XOR(REC_PR_T, REC_PR_X);
+ STORE(y, REC_PR_X);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using PR parity
+ *
+ * @syn_method raidz_syn_pr_abd()
+ * @rec_method raidz_rec_pr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[0];
+ const size_t y = tgtidx[1];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets are shorter then others.
+ * They need to be replaced with a new buffer so that syndrome can
+ * be calculated on full length.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q));
+}
+
+
+/*
+ * Generate Q and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+
+ SYN_QR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
+ R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
+ }
+ for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_QR_X, x);
+ R_SYNDROME(SYN_QR_X, y);
+ }
+}
+
+
+/*
+ * Reconstruct data using QR parity and QR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_QR_DEFINE();
+
+ for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
+ p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
+ LOAD(x, REC_QR_X);
+ LOAD(y, REC_QR_Y);
+
+ XOR_ACC(p, REC_QR_X);
+ XOR_ACC(q, REC_QR_Y);
+
+ /* Save Pxy */
+ COPY(REC_QR_X, REC_QR_T);
+
+ /* Calc X */
+ MUL(mul[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */
+ MUL(mul[MUL_QR_X], REC_QR_X); /* X = X * xm */
+ STORE(x, REC_QR_X);
+
+ /* Calc Y */
+ MUL(mul[MUL_QR_YQ], REC_QR_T); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_T); /* X = R ^ X */
+ MUL(mul[MUL_QR_Y], REC_QR_T); /* X = X * xm */
+ STORE(y, REC_QR_T);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using QR parity
+ *
+ * @syn_method raidz_syn_qr_abd()
+ * @rec_method raidz_rec_qr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_qr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_qr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+
+ return ((1 << CODE_Q) | (1 << CODE_R));
+}
+
+
+/*
+ * Generate P, Q, and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ v_t *z = (v_t *)c[TARGET_Z];
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+
+ SYN_PQR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
+ z += SYN_STRIDE) {
+ LOAD(d, SYN_PQR_D);
+ P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
+ Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
+ R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
+ }
+ for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQR_X, y);
+ R_SYNDROME(SYN_PQR_X, z);
+ }
+}
+
+
+/*
+ * Reconstruct data using PRQ parity and PQR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
+ const unsigned * const mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ v_t *z = (v_t *)t[TARGET_Z];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+ const v_t *r = (v_t *)c[CODE_R];
+
+ REC_PQR_DEFINE();
+
+ for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
+ z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
+ r += REC_PQR_STRIDE) {
+ LOAD(x, REC_PQR_X);
+ LOAD(y, REC_PQR_Y);
+ LOAD(z, REC_PQR_Z);
+
+ XOR_ACC(p, REC_PQR_X);
+ XOR_ACC(q, REC_PQR_Y);
+ XOR_ACC(r, REC_PQR_Z);
+
+ /* Save Pxyz and Qxyz */
+ COPY(REC_PQR_X, REC_PQR_XS);
+ COPY(REC_PQR_Y, REC_PQR_YS);
+
+ /* Calc X */
+ MUL(mul[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */
+ MUL(mul[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */
+ XOR(REC_PQR_Y, REC_PQR_X);
+ MUL(mul[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */
+ XOR(REC_PQR_Z, REC_PQR_X); /* X = Xp + Xq + Xr */
+ STORE(x, REC_PQR_X);
+
+ /* Calc Y */
+ XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */
+ MUL(mul[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */
+ COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */
+ MUL(mul[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */
+ MUL(mul[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */
+ STORE(y, REC_PQR_YS);
+
+ /* Calc Z */
+ XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */
+ STORE(z, REC_PQR_YS);
+ }
+}
+
+
+/*
+ * Reconstruct three data columns using PQR parity
+ *
+ * @syn_method raidz_syn_pqr_abd()
+ * @rec_method raidz_rec_pqr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t z = tgtidx[TARGET_Z];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ const size_t zsize = rm->rm_col[z].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *zabd = rm->rm_col[z].rc_abd;
+ abd_t *tabds[] = { xabd, yabd, zabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pqr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+ if (zsize < xsize) {
+ zabd = abd_alloc(xsize, B_FALSE);
+ tabds[2] = zabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ raidz_zero(zabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y || c == z) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
+ raidz_syn_pqr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+ if (zsize < xsize)
+ raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+ if (zsize < xsize)
+ abd_free(zabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
+}
+
+#endif /* _VDEV_RAIDZ_MATH_IMPL_H */
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c b/usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c
new file mode 100644
index 0000000000..cd742e146c
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c
@@ -0,0 +1,337 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/vdev_raidz_impl.h>
+
+/*
+ * Provide native CPU scalar routines.
+ * Support 32bit and 64bit CPUs.
+ */
+#if ((~(0x0ULL)) >> 24) == 0xffULL
+#define ELEM_SIZE 4
+typedef uint32_t iv_t;
+#elif ((~(0x0ULL)) >> 56) == 0xffULL
+#define ELEM_SIZE 8
+typedef uint64_t iv_t;
+#endif
+
+/*
+ * Vector type used in scalar implementation
+ *
+ * The union is expected to be of native CPU register size. Since addition
+ * uses XOR operation, it can be performed an all byte elements at once.
+ * Multiplication requires per byte access.
+ */
+typedef union {
+ iv_t e;
+ uint8_t b[ELEM_SIZE];
+} v_t;
+
+/*
+ * Precomputed lookup tables for multiplication by a constant
+ *
+ * Reconstruction path requires multiplication by a constant factors. Instead of
+ * performing two step lookup (log & exp tables), a direct lookup can be used
+ * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
+ *
+ * r = vdev_raidz_mul_lt[c_log][a];
+ *
+ * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
+ * they are faster to obtain while solving the syndrome equations.
+ *
+ * PERFORMANCE NOTE:
+ * Even though the complete lookup table uses 64kiB, only relatively small
+ * portion of it is used at the same time. Following shows number of accessed
+ * bytes for different cases:
+ * - 1 failed disk: 256B (1 mul. coefficient)
+ * - 2 failed disks: 512B (2 mul. coefficients)
+ * - 3 failed disks: 1536B (6 mul. coefficients)
+ *
+ * Size of actually accessed lookup table regions is only larger for
+ * reconstruction of 3 failed disks, when compared to traditional log/exp
+ * method. But since the result is obtained in one lookup step performance is
+ * doubled.
+ */
+static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));
+
+static void
+raidz_init_scalar(void)
+{
+ int c, i;
+ for (c = 0; c < 256; c++)
+ for (i = 0; i < 256; i++)
+ vdev_raidz_mul_lt[c][i] = gf_mul(c, i);
+
+}
+
+#define PREFETCHNTA(ptr, offset) {}
+#define PREFETCH(ptr, offset) {}
+
+#define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
+#define XOR(src, acc) acc.e ^= src.e
+#define ZERO(acc) acc.e = 0
+#define COPY(src, dst) dst = src
+#define LOAD(src, val) val = ((v_t *)src)[0]
+#define STORE(dst, val) ((v_t *)dst)[0] = val
+
+/*
+ * Constants used for optimized multiplication by 2.
+ */
+static const struct {
+ iv_t mod;
+ iv_t mask;
+ iv_t msb;
+} scalar_mul2_consts = {
+#if ELEM_SIZE == 8
+ .mod = 0x1d1d1d1d1d1d1d1dULL,
+ .mask = 0xfefefefefefefefeULL,
+ .msb = 0x8080808080808080ULL,
+#else
+ .mod = 0x1d1d1d1dULL,
+ .mask = 0xfefefefeULL,
+ .msb = 0x80808080ULL,
+#endif
+};
+
+#define MUL2_SETUP() {}
+
+#define MUL2(a) \
+{ \
+ iv_t _mask; \
+ \
+ _mask = (a).e & scalar_mul2_consts.msb; \
+ _mask = (_mask << 1) - (_mask >> 7); \
+ (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
+ (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
+}
+
+#define MUL4(a) \
+{ \
+ MUL2(a); \
+ MUL2(a); \
+}
+
+#define MUL(c, a) \
+{ \
+ const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
+ switch (ELEM_SIZE) { \
+ case 8: \
+ a.b[7] = mul_lt[a.b[7]]; \
+ a.b[6] = mul_lt[a.b[6]]; \
+ a.b[5] = mul_lt[a.b[5]]; \
+ a.b[4] = mul_lt[a.b[4]]; \
+ /* falls through */ \
+ case 4: \
+ a.b[3] = mul_lt[a.b[3]]; \
+ a.b[2] = mul_lt[a.b[2]]; \
+ a.b[1] = mul_lt[a.b[1]]; \
+ a.b[0] = mul_lt[a.b[0]]; \
+ break; \
+ } \
+}
+
+#define raidz_math_begin() {}
+#define raidz_math_end() {}
+
+#define SYN_STRIDE 1
+
+#define ZERO_DEFINE() v_t d0
+#define ZERO_STRIDE 1
+#define ZERO_D d0
+
+#define COPY_DEFINE() v_t d0
+#define COPY_STRIDE 1
+#define COPY_D d0
+
+#define ADD_DEFINE() v_t d0
+#define ADD_STRIDE 1
+#define ADD_D d0
+
+#define MUL_DEFINE() v_t d0
+#define MUL_STRIDE 1
+#define MUL_D d0
+
+#define GEN_P_STRIDE 1
+#define GEN_P_DEFINE() v_t p0
+#define GEN_P_P p0
+
+#define GEN_PQ_STRIDE 1
+#define GEN_PQ_DEFINE() v_t d0, c0
+#define GEN_PQ_D d0
+#define GEN_PQ_C c0
+
+#define GEN_PQR_STRIDE 1
+#define GEN_PQR_DEFINE() v_t d0, c0
+#define GEN_PQR_D d0
+#define GEN_PQR_C c0
+
+#define SYN_Q_DEFINE() v_t d0, x0
+#define SYN_Q_D d0
+#define SYN_Q_X x0
+
+
+#define SYN_R_DEFINE() v_t d0, x0
+#define SYN_R_D d0
+#define SYN_R_X x0
+
+
+#define SYN_PQ_DEFINE() v_t d0, x0
+#define SYN_PQ_D d0
+#define SYN_PQ_X x0
+
+
+#define REC_PQ_STRIDE 1
+#define REC_PQ_DEFINE() v_t x0, y0, t0
+#define REC_PQ_X x0
+#define REC_PQ_Y y0
+#define REC_PQ_T t0
+
+
+#define SYN_PR_DEFINE() v_t d0, x0
+#define SYN_PR_D d0
+#define SYN_PR_X x0
+
+#define REC_PR_STRIDE 1
+#define REC_PR_DEFINE() v_t x0, y0, t0
+#define REC_PR_X x0
+#define REC_PR_Y y0
+#define REC_PR_T t0
+
+
+#define SYN_QR_DEFINE() v_t d0, x0
+#define SYN_QR_D d0
+#define SYN_QR_X x0
+
+
+#define REC_QR_STRIDE 1
+#define REC_QR_DEFINE() v_t x0, y0, t0
+#define REC_QR_X x0
+#define REC_QR_Y y0
+#define REC_QR_T t0
+
+
+#define SYN_PQR_DEFINE() v_t d0, x0
+#define SYN_PQR_D d0
+#define SYN_PQR_X x0
+
+#define REC_PQR_STRIDE 1
+#define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
+#define REC_PQR_X x0
+#define REC_PQR_Y y0
+#define REC_PQR_Z z0
+#define REC_PQR_XS xs0
+#define REC_PQR_YS ys0
+
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(scalar);
+DEFINE_REC_METHODS(scalar);
+
+boolean_t
+raidz_will_scalar_work(void)
+{
+ return (B_TRUE); /* always */
+}
+
+const raidz_impl_ops_t vdev_raidz_scalar_impl = {
+ .init = raidz_init_scalar,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(scalar),
+ .rec = RAIDZ_REC_METHODS(scalar),
+ .is_supported = &raidz_will_scalar_work,
+ .name = "scalar"
+};
+
+/* Powers of 2 in the RAID-Z Galois field. */
+const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+ 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+ 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+ 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+ 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+ 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+ 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+ 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+ 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+ 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+ 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+ 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+ 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+ 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
+};
+
+/* Logs of 2 in the RAID-Z Galois field. */
+const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
+ 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
+ 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
+ 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
+ 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
+ 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
+ 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
+ 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
+ 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
+ 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
+ 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+ 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
+ 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
+ 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
+ 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
+ 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
+ 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
+ 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
+ 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
+ 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
+ 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+ 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
+ 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
+ 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
+ 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
+ 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
+ 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
+ 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
+ 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
+ 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
+ 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+ 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
+ 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
+};
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index 47c4a190cf..d06cb875cf 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -26,6 +26,7 @@
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -2229,18 +2230,34 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
* Our count is maintained in the vfs structure, but the
* number is off by 1 to indicate a hold on the vfs
* structure itself.
- *
- * The '.zfs' directory maintains a reference of its
- * own, and any active references underneath are
- * reflected in the vnode count.
*/
- if (zfsvfs->z_ctldir == NULL) {
- if (vfsp->vfs_count > 1)
- return (SET_ERROR(EBUSY));
- } else {
- if (vfsp->vfs_count > 2 ||
- zfsvfs->z_ctldir->v_count > 1)
- return (SET_ERROR(EBUSY));
+ boolean_t draining;
+ uint_t thresh = 1;
+
+ /*
+ * The '.zfs' directory maintains a reference of its own, and
+ * any active references underneath are reflected in the vnode
+ * count. Allow one additional reference for it.
+ */
+ if (zfsvfs->z_ctldir != NULL)
+ thresh++;
+
+ /*
+ * If it's running, the asynchronous unlinked drain task needs
+ * to be stopped before the number of active vnodes can be
+ * reliably checked.
+ */
+ draining = zfsvfs->z_draining;
+ if (draining)
+ zfs_unlinked_drain_stop_wait(zfsvfs);
+
+ if (vfsp->vfs_count > thresh || (zfsvfs->z_ctldir != NULL &&
+ zfsvfs->z_ctldir->v_count > 1)) {
+ if (draining) {
+ /* If it was draining, restart the task */
+ zfs_unlinked_drain(zfsvfs);
+ }
+ return (SET_ERROR(EBUSY));
}
}
diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h
index a8df3df932..ce0cc20879 100644
--- a/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_osdep.h
@@ -113,7 +113,7 @@ boolean_t ixgbe_removed(struct ixgbe_hw *);
#ifdef _BIG_ENDIAN
#define IXGBE_CPU_TO_LE16 BSWAP_16
#define IXGBE_CPU_TO_LE32 BSWAP_32
-#define IXGBE_LE32_TO_CPUS BSWAP_32
+#define IXGBE_LE32_TO_CPUS(x) *(x) = BSWAP_32(*(x))
#define IXGBE_CPU_TO_BE16(x) (x)
#define IXGBE_CPU_TO_BE32(x) (x)
#define IXGBE_BE32_TO_CPU(x) (x)
diff --git a/usr/src/uts/common/sys/sysevent.h b/usr/src/uts/common/sys/sysevent.h
index 392b426977..c2be00ad27 100644
--- a/usr/src/uts/common/sys/sysevent.h
+++ b/usr/src/uts/common/sys/sysevent.h
@@ -73,7 +73,7 @@ extern "C" {
#define SE_KERN_PUB "kern:"
#define SUNW_KERN_PUB SUNW_VENDOR ":" SE_KERN_PUB
#define SUNW_USR_PUB SUNW_VENDOR ":" SE_USR_PUB
-#define ILLUMOS_KERN_PUB ILLUMOS_VENDOR":"SE_KERN_PUB
+#define ILLUMOS_KERN_PUB ILLUMOS_VENDOR ":" SE_KERN_PUB
/*
* Event header and attribute value limits
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 634575427e..ec338b1cdd 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -12,6 +12,7 @@
/*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
#include <sys/types.h>
@@ -65,6 +66,7 @@
static kmutex_t vmmdev_mtx;
static dev_info_t *vmmdev_dip;
static hma_reg_t *vmmdev_hma_reg;
+static uint_t vmmdev_hma_ref;
static sdev_plugin_hdl_t vmmdev_sdev_hdl;
static kmutex_t vmm_mtx;
@@ -1322,6 +1324,56 @@ vmm_lookup(const char *name)
return (sc);
}
+/*
+ * Acquire an HMA registration if not already held.
+ */
+static boolean_t
+vmm_hma_acquire(void)
+{
+ ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
+
+ mutex_enter(&vmmdev_mtx);
+
+ if (vmmdev_hma_reg == NULL) {
+ VERIFY3U(vmmdev_hma_ref, ==, 0);
+ vmmdev_hma_reg = hma_register(vmmdev_hvm_name);
+ if (vmmdev_hma_reg == NULL) {
+ cmn_err(CE_WARN, "%s HMA registration failed.",
+ vmmdev_hvm_name);
+ mutex_exit(&vmmdev_mtx);
+ return (B_FALSE);
+ }
+ }
+
+ vmmdev_hma_ref++;
+
+ mutex_exit(&vmmdev_mtx);
+
+ return (B_TRUE);
+}
+
+/*
+ * Release the HMA registration if held and there are no remaining VMs.
+ */
+static void
+vmm_hma_release(void)
+{
+ ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
+
+ mutex_enter(&vmmdev_mtx);
+
+ VERIFY3U(vmmdev_hma_ref, !=, 0);
+
+ vmmdev_hma_ref--;
+
+ if (vmmdev_hma_ref == 0) {
+ VERIFY(vmmdev_hma_reg != NULL);
+ hma_unregister(vmmdev_hma_reg);
+ vmmdev_hma_reg = NULL;
+ }
+ mutex_exit(&vmmdev_mtx);
+}
+
static int
vmmdev_do_vm_create(char *name, cred_t *cr)
{
@@ -1333,11 +1385,15 @@ vmmdev_do_vm_create(char *name, cred_t *cr)
return (EINVAL);
}
+ if (!vmm_hma_acquire())
+ return (ENXIO);
+
mutex_enter(&vmm_mtx);
- /* Look for duplicates names */
+ /* Look for duplicate names */
if (vmm_lookup(name) != NULL) {
mutex_exit(&vmm_mtx);
+ vmm_hma_release();
return (EEXIST);
}
@@ -1347,6 +1403,7 @@ vmmdev_do_vm_create(char *name, cred_t *cr)
sc = list_next(&vmm_list, sc)) {
if (sc->vmm_zone == curzone) {
mutex_exit(&vmm_mtx);
+ vmm_hma_release();
return (EINVAL);
}
}
@@ -1397,6 +1454,7 @@ fail:
ddi_soft_state_free(vmm_statep, minor);
}
mutex_exit(&vmm_mtx);
+ vmm_hma_release();
return (error);
}
@@ -1685,13 +1743,16 @@ done:
}
static int
-vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd)
+vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd,
+ boolean_t *hma_release)
{
dev_info_t *pdip = ddi_get_parent(vmmdev_dip);
minor_t minor;
ASSERT(MUTEX_HELD(&vmm_mtx));
+ *hma_release = B_FALSE;
+
if (clean_zsd) {
vmm_zsd_rem_vm(sc);
}
@@ -1714,6 +1775,7 @@ vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd)
vm_destroy(sc->vmm_vm);
ddi_soft_state_free(vmm_statep, minor);
id_free(vmm_minors, minor);
+ *hma_release = B_TRUE;
}
(void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE);
@@ -1723,12 +1785,16 @@ vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd)
int
vmm_do_vm_destroy(vmm_softc_t *sc, boolean_t clean_zsd)
{
+ boolean_t hma_release = B_FALSE;
int err;
mutex_enter(&vmm_mtx);
- err = vmm_do_vm_destroy_locked(sc, clean_zsd);
+ err = vmm_do_vm_destroy_locked(sc, clean_zsd, &hma_release);
mutex_exit(&vmm_mtx);
+ if (hma_release)
+ vmm_hma_release();
+
return (err);
}
@@ -1736,6 +1802,7 @@ vmm_do_vm_destroy(vmm_softc_t *sc, boolean_t clean_zsd)
static int
vmmdev_do_vm_destroy(const char *name, cred_t *cr)
{
+ boolean_t hma_release = B_FALSE;
vmm_softc_t *sc;
int err;
@@ -1756,13 +1823,16 @@ vmmdev_do_vm_destroy(const char *name, cred_t *cr)
mutex_exit(&vmm_mtx);
return (EPERM);
}
- err = vmm_do_vm_destroy_locked(sc, B_TRUE);
+ err = vmm_do_vm_destroy_locked(sc, B_TRUE, &hma_release);
+
mutex_exit(&vmm_mtx);
+ if (hma_release)
+ vmm_hma_release();
+
return (err);
}
-
static int
vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
@@ -1799,6 +1869,7 @@ vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
minor_t minor;
vmm_softc_t *sc;
+ boolean_t hma_release = B_FALSE;
minor = getminor(dev);
if (minor == VMM_CTL_MINOR)
@@ -1814,14 +1885,22 @@ vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
VERIFY(sc->vmm_is_open);
sc->vmm_is_open = B_FALSE;
+ /*
+ * If this VM was destroyed while the vmm device was open, then
+ * clean it up now that it is closed.
+ */
if (sc->vmm_flags & VMM_DESTROY) {
list_remove(&vmm_destroy_list, sc);
vm_destroy(sc->vmm_vm);
ddi_soft_state_free(vmm_statep, minor);
id_free(vmm_minors, minor);
+ hma_release = B_TRUE;
}
mutex_exit(&vmm_mtx);
+ if (hma_release)
+ vmm_hma_release();
+
return (0);
}
@@ -2076,12 +2155,18 @@ vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
vmm_sol_glue_init();
vmm_arena_init();
+ /*
+ * Perform temporary HMA registration to determine if the system
+ * is capable.
+ */
if ((reg = hma_register(vmmdev_hvm_name)) == NULL) {
goto fail;
} else if (vmm_mod_load() != 0) {
goto fail;
}
vmm_loaded = B_TRUE;
+ hma_unregister(reg);
+ reg = NULL;
/* Create control node. Other nodes will be created on demand. */
if (ddi_create_minor_node(dip, "ctl", S_IFCHR,
@@ -2096,7 +2181,6 @@ vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
ddi_report_dev(dip);
- vmmdev_hma_reg = reg;
vmmdev_sdev_hdl = sph;
vmmdev_dip = dip;
mutex_exit(&vmmdev_mtx);
@@ -2153,8 +2237,7 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
vmmdev_dip = NULL;
VERIFY0(vmm_mod_unload());
- hma_unregister(vmmdev_hma_reg);
- vmmdev_hma_reg = NULL;
+ VERIFY3U(vmmdev_hma_reg, ==, NULL);
vmm_arena_fini();
vmm_sol_glue_cleanup();
diff --git a/usr/src/uts/i86pc/ppt/Makefile b/usr/src/uts/i86pc/ppt/Makefile
index 7c41368efd..7c10482e98 100644
--- a/usr/src/uts/i86pc/ppt/Makefile
+++ b/usr/src/uts/i86pc/ppt/Makefile
@@ -44,8 +44,8 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
#
ALL_BUILDS = $(ALL_BUILDSONLY64)
DEF_BUILDS = $(DEF_BUILDSONLY64)
-PRE_INC_PATH = -I$(COMPAT)/freebsd -I$(COMPAT)/freebsd/amd64 \
- -I$(CONTRIB)/freebsd -I$(CONTRIB)/freebsd/amd64
+PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \
+ -I$(CONTRIB)/bhyve -I$(CONTRIB)/bhyve/amd64
INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io
AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR)
diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile
index c55abf6090..e7f07c4c4e 100644
--- a/usr/src/uts/i86pc/vmm/Makefile
+++ b/usr/src/uts/i86pc/vmm/Makefile
@@ -56,8 +56,8 @@ $(OBJS_DIR)/vmx.o := SMOFF += deref_check
ALL_BUILDS = $(ALL_BUILDSONLY64)
DEF_BUILDS = $(DEF_BUILDSONLY64)
-PRE_INC_PATH = -I$(COMPAT)/freebsd -I$(COMPAT)/freebsd/amd64 \
- -I$(CONTRIB)/freebsd -I$(CONTRIB)/freebsd/amd64
+PRE_INC_PATH = -I$(COMPAT)/bhyve -I$(COMPAT)/bhyve/amd64 \
+ -I$(CONTRIB)/bhyve -I$(CONTRIB)/bhyve/amd64
INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(UTSBASE)/i86pc/io/vmm/io
AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR)
diff --git a/usr/src/uts/req.flg b/usr/src/uts/req.flg
index 15085a486d..76b8de8999 100644
--- a/usr/src/uts/req.flg
+++ b/usr/src/uts/req.flg
@@ -38,7 +38,6 @@ echo_file usr/src/Makefile
# For full builds (open and closed), we want both etc/certs and
# etc/keys. For an open source build, there's no etc/keys directory.
-find_files "s.*" usr/contrib/freebsd
find_files "s.*" usr/src/cmd/cmd-crypto/etc
find_files "s.*" usr/src/common/acl
find_files "s.*" usr/src/common/atomic
@@ -58,5 +57,6 @@ find_files "s.*" usr/src/common/smbios
find_files "s.*" usr/src/common/tsol
find_files "s.*" usr/src/common/util
find_files "s.*" usr/src/common/zfs
-find_files "s.*" usr/src/compat/freebsd
+find_files "s.*" usr/src/compat/bhyve
+find_files "s.*" usr/src/contrib/bhyve
find_files "s.*" usr/src/psm/promif
diff --git a/usr/src/uts/sun/io/zs_async.c b/usr/src/uts/sun/io/zs_async.c
index 6470434f6c..afdf563c4f 100644
--- a/usr/src/uts/sun/io/zs_async.c
+++ b/usr/src/uts/sun/io/zs_async.c
@@ -225,7 +225,7 @@ ushort_t zshiwat[NSPEED] = {
*/
#define ZSA_GETBLOCK(zs, allocbcount) \
{ \
- register int n = zsa_rstandby; \
+ int n = zsa_rstandby; \
while (--n >= 0 && allocbcount > 0) { \
if (!za->za_rstandby[n]) { \
if ((za->za_rstandby[n] = allocb(ZSA_RCV_SIZE, \
@@ -243,7 +243,7 @@ ushort_t zshiwat[NSPEED] = {
if (za->za_ttycommon.t_cflag & CRTSXOFF) { \
mutex_enter(zs->zs_excl_hi); \
if (!(zs->zs_wreg[5] & ZSWR5_RTS)) { \
- register int usedcnt = 0; \
+ int usedcnt = 0; \
for (n = 0; n < zsa_rstandby; n++) \
if (!za->za_rstandby[n]) \
usedcnt++; \
@@ -260,7 +260,7 @@ ushort_t zshiwat[NSPEED] = {
*/
#define ZSA_ALLOCB(mp) \
{ \
- register int n = zsa_rstandby; \
+ int n = zsa_rstandby; \
while (--n >= 0) { \
if ((mp = za->za_rstandby[n]) != NULL) { \
za->za_rstandby[n] = NULL; \
@@ -274,7 +274,7 @@ ushort_t zshiwat[NSPEED] = {
cmn_err(CE_WARN, "zs%d: message lost\n", \
UNIT(za->za_dev)); \
} else if (zs->zs_wreg[5] & ZSWR5_RTS) { \
- register int usedcnt = 0; \
+ int usedcnt = 0; \
for (n = 0; n < zsa_rstandby; n++) \
if (!za->za_rstandby[n]) \
usedcnt++; \
@@ -301,7 +301,7 @@ ushort_t zshiwat[NSPEED] = {
*/
#define ZSA_PUTQ(mp) \
{ \
- register int wptr, rptr; \
+ int wptr, rptr; \
wptr = za->za_rdone_wptr; \
rptr = za->za_rdone_rptr; \
za->za_rdone[wptr] = mp; \
@@ -321,7 +321,7 @@ ushort_t zshiwat[NSPEED] = {
*/
#define ZSA_KICK_RCV \
{ \
- register mblk_t *mp = za->za_rcvblk; \
+ mblk_t *mp = za->za_rcvblk; \
if (mp) { \
if (zs->zs_rd_cur) { /* M_DATA */ \
mp->b_wptr = zs->zs_rd_cur; \
@@ -364,7 +364,7 @@ ushort_t zshiwat[NSPEED] = {
*/
#define ZSA_FLUSHQ \
{ \
- register mblk_t *tmp; \
+ mblk_t *tmp; \
for (;;) { \
ZSA_GETQ(tmp); \
if (!(tmp)) \
@@ -412,7 +412,7 @@ int zsa_h_log_n[40];
#define zsa_h_log_clear \
{ \
- register char *p; \
+ char *p; \
for (p = &zsa_h_log[zs->zs_unit][ZSA_H_LOG_MAX]; \
p >= &zsa_h_log[zs->zs_unit][0]; /* null */) \
*p-- = '\0'; \
@@ -498,8 +498,7 @@ static int zsa_resume(struct zscom *zs);
static void
zsa_null(struct zscom *zs)
{
- /* LINTED */
- register short c;
+ short c;
SCC_WRITE0(ZSWR0_RESET_TXINT);
SCC_WRITE0(ZSWR0_RESET_STATUS);
@@ -542,8 +541,8 @@ static void zsa_reioctl(void *);
static void zsa_ioctl(struct asyncline *za, queue_t *q, mblk_t *mp);
static void zsa_program(struct asyncline *za, int setibaud);
static void zsa_start(struct zscom *zs);
-static void zsa_kick_rcv(void *);
-static void zsa_callback(void *);
+static void zsa_kick_rcv(void *);
+static void zsa_callback(void *);
static void zsa_set_za_rcv_flags_mask(struct asyncline *za);
int zsgetspeed(dev_t dev);
@@ -554,9 +553,9 @@ int
zsc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
void **result)
{
- register dev_t dev = (dev_t)arg;
- register int unit, error;
- register struct zscom *zs;
+ dev_t dev = (dev_t)arg;
+ int unit, error;
+ struct zscom *zs;
if ((unit = UNIT(dev)) >= nzs)
return (DDI_FAILURE);
@@ -716,12 +715,12 @@ zsa_init(struct zscom *zs)
static int
zsa_open(queue_t *rq, dev_t *dev, int flag, int sflag, cred_t *cr)
{
- register struct zscom *zs;
- register struct asyncline *za;
- register int speed, unit;
+ struct zscom *zs;
+ struct asyncline *za;
+ int speed, unit;
struct termios *termiosp;
int len;
- register int allocbcount = zsa_rstandby;
+ int allocbcount = zsa_rstandby;
boolean_t set_zsoptinit = B_FALSE;
unit = UNIT(*dev);
@@ -1205,10 +1204,10 @@ out:
static void
zsa_wput(queue_t *q, mblk_t *mp)
{
- register struct asyncline *za;
- register struct zscom *zs;
- register struct copyresp *resp;
- register mblk_t *bp = NULL;
+ struct asyncline *za;
+ struct zscom *zs;
+ struct copyresp *resp;
+ mblk_t *bp = NULL;
int error;
struct iocblk *iocp;
@@ -1321,7 +1320,7 @@ zsa_wput(queue_t *q, mblk_t *mp)
freemsg(mp->b_cont);
mp->b_cont = NULL;
}
- if (za->za_pps == NULL) {
+ if (za->za_pps == 0) {
mp->b_datap->db_type = M_IOCNAK;
iocp->ioc_error = ENXIO;
ZSA_QREPLY(q, mp);
@@ -1640,9 +1639,9 @@ zsa_rsrv(queue_t *q)
static void
zsa_txint(struct zscom *zs)
{
- register struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
- register uchar_t *wr_cur;
- register uchar_t s0;
+ struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
+ uchar_t *wr_cur;
+ uchar_t s0;
s0 = SCC_READ0();
@@ -1701,8 +1700,8 @@ zsa_txint(struct zscom *zs)
static void
zsa_xsint(struct zscom *zs)
{
- register struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
- register uchar_t s0, x0;
+ struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
+ uchar_t s0, x0;
s0 = SCC_READ0();
ZSA_R0_LOG(s0);
@@ -1833,12 +1832,12 @@ zsa_xsint(struct zscom *zs)
static void
zsa_rxint(struct zscom *zs)
{
- register struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
- register uchar_t c;
- register uchar_t *rd_cur = zs->zs_rd_cur;
- register uchar_t *rd_lim = zs->zs_rd_lim;
- register mblk_t *bp;
- register uint_t fm = za->za_rcv_flags_mask;
+ struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
+ uchar_t c;
+ uchar_t *rd_cur = zs->zs_rd_cur;
+ uchar_t *rd_lim = zs->zs_rd_lim;
+ mblk_t *bp;
+ uint_t fm = za->za_rcv_flags_mask;
#ifdef ZSA_DEBUG
@@ -1956,12 +1955,12 @@ zsa_rxint(struct zscom *zs)
static void
zsa_srint(struct zscom *zs)
{
- register struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
- register short s1;
- register uchar_t c;
- register uchar_t c1;
- register mblk_t *bp = za->za_rcvblk;
- register uchar_t *rd_cur = zs->zs_rd_cur;
+ struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
+ short s1;
+ uchar_t c;
+ uchar_t c1;
+ mblk_t *bp = za->za_rcvblk;
+ uchar_t *rd_cur = zs->zs_rd_cur;
SCC_READ(1, s1);
if (s1 & (ZSRR1_FE | ZSRR1_PE | ZSRR1_DO)) {
@@ -2080,17 +2079,17 @@ zsa_srint(struct zscom *zs)
static int
zsa_softint(struct zscom *zs)
{
- register struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
- register uchar_t r0;
- register uchar_t za_kick_active;
- register int m_error;
- register int allocbcount = 0;
- register int do_ttycommon_qfull = 0;
+ struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
+ uchar_t r0;
+ uchar_t za_kick_active;
+ int m_error;
+ int allocbcount = 0;
+ int do_ttycommon_qfull = 0;
boolean_t hangup = B_FALSE, unhangup = B_FALSE;
boolean_t m_break = B_FALSE, wakeup = B_FALSE;
- register queue_t *q;
- register mblk_t *bp;
- register mblk_t *head = NULL, *tail = NULL;
+ queue_t *q;
+ mblk_t *bp;
+ mblk_t *head = NULL, *tail = NULL;
mutex_enter(zs->zs_excl);
if (zs->zs_suspended || (zs->zs_flags & ZS_CLOSED)) {
@@ -2351,10 +2350,10 @@ zsa_softint(struct zscom *zs)
static void
zsa_start(struct zscom *zs)
{
- register struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
- register int cc;
- register queue_t *q;
- register mblk_t *bp;
+ struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
+ int cc;
+ queue_t *q;
+ mblk_t *bp;
uchar_t *rptr, *wptr;
/*
@@ -2501,8 +2500,8 @@ zsa_start_transmit:
* so we need to explicitly mask before transmitting
*/
if ((za->za_ttycommon.t_cflag & CSIZE) == CS5) {
- register unsigned char *p = rptr;
- register int cnt = cc;
+ unsigned char *p = rptr;
+ int cnt = cc;
while (cnt--)
*p++ &= (unsigned char) 0x1f;
@@ -2741,11 +2740,11 @@ zsa_reioctl(void *arg)
static void
zsa_ioctl(struct asyncline *za, queue_t *wq, mblk_t *mp)
{
- register struct zscom *zs = za->za_common;
- register struct iocblk *iocp;
- register unsigned datasize;
+ struct zscom *zs = za->za_common;
+ struct iocblk *iocp;
+ unsigned datasize;
int error;
- register mblk_t *tmp;
+ mblk_t *tmp;
if (za->za_ttycommon.t_iocpending != NULL) {
/*
@@ -2934,7 +2933,7 @@ zsa_ioctl(struct asyncline *za, queue_t *wq, mblk_t *mp)
static int
dmtozs(int bits)
{
- register int b = 0;
+ int b = 0;
if (bits & TIOCM_CAR)
b |= ZSRR0_CD;
@@ -2950,7 +2949,7 @@ dmtozs(int bits)
static int
zstodm(int bits)
{
- register int b;
+ int b;
b = 0;
if (bits & ZSRR0_CD)
@@ -2972,9 +2971,9 @@ zstodm(int bits)
static void
zsa_program(struct asyncline *za, int setibaud)
{
- register struct zscom *zs = za->za_common;
- register struct zs_prog *zspp;
- register int wr3, wr4, wr5, wr15, speed, baudrate, flags = 0;
+ struct zscom *zs = za->za_common;
+ struct zs_prog *zspp;
+ int wr3, wr4, wr5, wr15, speed, baudrate, flags = 0;
if ((baudrate = SPEED(za->za_ttycommon.t_cflag)) == 0) {
/*
@@ -3129,9 +3128,9 @@ zsa_program(struct asyncline *za, int setibaud)
int
zsgetspeed(dev_t dev)
{
- register struct zscom *zs;
- register int uspeed, zspeed;
- register uchar_t rr;
+ struct zscom *zs;
+ int uspeed, zspeed;
+ uchar_t rr;
zs = &zscom[UNIT(dev)];
SCC_READ(12, zspeed);
@@ -3170,7 +3169,7 @@ zsa_callback(void *arg)
static void
zsa_set_za_rcv_flags_mask(struct asyncline *za)
{
- register uint_t mask;
+ uint_t mask;
za->za_rcv_flags_mask &= ~0xFF;
switch (za->za_ttycommon.t_cflag & CSIZE) {
@@ -3303,7 +3302,7 @@ zsa_suspend(struct zscom *zs)
static int
zsa_resume(struct zscom *zs)
{
- register struct asyncline *za;
+ struct asyncline *za;
struct zs_prog *zspp;
za = (struct asyncline *)&zs->zs_priv_str;
@@ -3343,8 +3342,8 @@ zsa_resume(struct zscom *zs)
static void
zsa_print_info(struct zscom *zs)
{
- register struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
- register queue_t *q = za->za_ttycommon.t_writeq;
+ struct asyncline *za = (struct asyncline *)&zs->zs_priv_str;
+ queue_t *q = za->za_ttycommon.t_writeq;
printf(" next q=%s\n", (RD(q))->q_next->q_qinfo->qi_minfo->mi_idname);
printf("unit=%d\n", zs->zs_unit);
diff --git a/usr/src/uts/sun/io/zs_common.c b/usr/src/uts/sun/io/zs_common.c
index 7c39106941..43e5a2b4ec 100644
--- a/usr/src/uts/sun/io/zs_common.c
+++ b/usr/src/uts/sun/io/zs_common.c
@@ -595,14 +595,14 @@ zsattach(dev_info_t *dev, ddi_attach_cmd_t cmd)
(void) strcpy(name, "a");
if (ddi_create_minor_node(dev, name, S_IFCHR,
ddi_get_instance(dev) * 2,
- obp_type, NULL) == DDI_FAILURE) {
+ obp_type, 0) == DDI_FAILURE) {
ddi_remove_minor_node(dev, NULL);
return (DDI_FAILURE);
}
(void) strcpy(name, "b");
if (ddi_create_minor_node(dev, name, S_IFCHR,
(ddi_get_instance(dev) * 2) + 1,
- obp_type, NULL) == DDI_FAILURE) {
+ obp_type, 0) == DDI_FAILURE) {
ddi_remove_minor_node(dev, NULL);
return (DDI_FAILURE);
}
@@ -614,28 +614,28 @@ zsattach(dev_info_t *dev, ddi_attach_cmd_t cmd)
(void) sprintf(name, "%c", (ddi_get_instance(dev) + 'a'));
if (ddi_create_minor_node(dev, name, S_IFCHR,
ddi_get_instance(dev) * 2,
- serial_line, NULL) == DDI_FAILURE) {
+ serial_line, 0) == DDI_FAILURE) {
ddi_remove_minor_node(dev, NULL);
return (DDI_FAILURE);
}
(void) sprintf(name, "%c", (ddi_get_instance(dev) + 'b'));
if (ddi_create_minor_node(dev, name, S_IFCHR,
(ddi_get_instance(dev) * 2) + 1,
- serial_line, NULL) == DDI_FAILURE) {
+ serial_line, 0) == DDI_FAILURE) {
ddi_remove_minor_node(dev, NULL);
return (DDI_FAILURE);
}
(void) sprintf(name, "%c,cu", (ddi_get_instance(dev) + 'a'));
if (ddi_create_minor_node(dev, name, S_IFCHR,
(ddi_get_instance(dev) * 2) | OUTLINE,
- dial_out, NULL) == DDI_FAILURE) {
+ dial_out, 0) == DDI_FAILURE) {
ddi_remove_minor_node(dev, NULL);
return (DDI_FAILURE);
}
(void) sprintf(name, "%c,cu", (ddi_get_instance(dev) + 'b'));
if (ddi_create_minor_node(dev, name, S_IFCHR,
((ddi_get_instance(dev) * 2) + 1) | OUTLINE,
- dial_out, NULL) == DDI_FAILURE) {
+ dial_out, 0) == DDI_FAILURE) {
ddi_remove_minor_node(dev, NULL);
return (DDI_FAILURE);
}
@@ -871,7 +871,7 @@ zsintr(caddr_t intarg)
for (zs = &zscom[0]; zs <= zslast; zs++) {
if (zs->zs_flags & ZS_NEEDSOFT) {
zs->zs_flags &= ~ZS_NEEDSOFT;
- (*zs->zs_ops->zsop_softint)(zs);
+ (void) (*zs->zs_ops->zsop_softint)(zs);
if (zs->intrstats) {
KIOIP->intrs[KSTAT_INTR_SOFT]++;
}
diff --git a/usr/src/uts/sun4u/zs/Makefile b/usr/src/uts/sun4u/zs/Makefile
index 198362da01..0af5aa7379 100644
--- a/usr/src/uts/sun4u/zs/Makefile
+++ b/usr/src/uts/sun4u/zs/Makefile
@@ -40,7 +40,6 @@ UTSBASE = ../..
#
MODULE = zs
OBJECTS = $(ZS_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(ZS_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
#
@@ -60,23 +59,8 @@ CLEANFILES += $(MODSTUBS_O)
# Define targets
#
ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
-#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE)
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += $(CNOWARN_UNINIT)
@@ -93,12 +77,6 @@ clean: $(CLEAN_DEPS)
clobber: $(CLOBBER_DEPS)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS)
#