summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McDonald <danmcd@joyent.com>2022-03-31 01:21:59 -0400
committerDan McDonald <danmcd@joyent.com>2022-03-31 01:21:59 -0400
commit57c509a3a6c67a2c86ae463acfdc0a5fcc3a1723 (patch)
treec4ff6085f3151a646787991689b03dbc49ce6ee4
parent32bb949b910b504c06c2385bfe84e5a566d1f834 (diff)
parent5103e761e384621c5728a6d1f4b0bfdc1be233a4 (diff)
downloadillumos-joyent-57c509a3a6c67a2c86ae463acfdc0a5fcc3a1723.tar.gz
[illumos-gate merge]
commit 5103e761e384621c5728a6d1f4b0bfdc1be233a4 14569 bhyve should consolidate on hrtime 14486 bhyve needs instruction emul tests commit db9aa506ce275f82ee72f31fc2e6e3c53d1212b7 13912 viona should track held pages commit 899b7fc7762875c5244567fbc6bb4ccace75d6f7 12315 errors in section 4i of the manual commit a677d6730e287aadab3cc68d0c46fb01b25d72eb 14583 developer/opensolaris/osnet requirements cleanup
-rw-r--r--exception_lists/packaging.deps14
-rw-r--r--usr/src/compat/bhyve/sys/callout.h12
-rw-r--r--usr/src/compat/bhyve/sys/time.h136
-rw-r--r--usr/src/compat/bhyve/sys/types.h5
-rw-r--r--usr/src/man/man4i/dkio.4i6
-rw-r--r--usr/src/man/man4i/fbio.4i5
-rw-r--r--usr/src/man/man4i/fdio.4i4
-rw-r--r--usr/src/man/man4i/ipnat.4i6
-rw-r--r--usr/src/man/man4i/mhd.4i4
-rw-r--r--usr/src/man/man4i/mixer.4i6
-rw-r--r--usr/src/man/man4i/mtio.4i8
-rw-r--r--usr/src/man/man4i/sesio.4i7
-rw-r--r--usr/src/man/man4i/streamio.4i24
-rw-r--r--usr/src/man/man4i/termio.4i6
-rw-r--r--usr/src/man/man4i/uscsi.4i4
-rw-r--r--usr/src/man/man4i/visual_io.4i15
-rw-r--r--usr/src/man/man4i/vt.4i5
-rw-r--r--usr/src/pkg/manifests/developer-opensolaris-osnet.p5m27
-rw-r--r--usr/src/pkg/manifests/system-bhyve-tests.p5m6
-rw-r--r--usr/src/test/bhyve-tests/runfiles/default.run10
-rw-r--r--usr/src/test/bhyve-tests/tests/Makefile2
-rw-r--r--usr/src/test/bhyve-tests/tests/Makefile.in_guest48
-rw-r--r--usr/src/test/bhyve-tests/tests/common/Mapfile.payload49
-rw-r--r--usr/src/test/bhyve-tests/tests/common/in_guest.c532
-rw-r--r--usr/src/test/bhyve-tests/tests/common/in_guest.h51
-rw-r--r--usr/src/test/bhyve-tests/tests/common/payload_common.h44
-rw-r--r--usr/src/test/bhyve-tests/tests/common/payload_start.s27
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/Makefile77
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_utils.h28
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_utils.s55
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c69
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c60
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c94
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c110
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c36
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/test_defs.h38
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c145
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c146
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c169
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c178
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c151
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_impl.h7
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_ring.c269
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_rx.c22
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_tx.c15
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.c80
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vhpet.c103
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c100
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h7
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.c25
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.c122
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.mapfile9
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c60
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c45
-rw-r--r--usr/src/uts/i86pc/sys/vmm_drv.h19
55 files changed, 2680 insertions, 622 deletions
diff --git a/exception_lists/packaging.deps b/exception_lists/packaging.deps
index 4a4f2cebbe..b15092247f 100644
--- a/exception_lists/packaging.deps
+++ b/exception_lists/packaging.deps
@@ -4,23 +4,13 @@
#
# These are ignored during package lint runs.
#
-pkg:/data/docbook
-pkg:/developer/gnu-binutils
-pkg:/developer/java/jdk
pkg:/developer/java/openjdk8
-pkg:/developer/lexer/flex
pkg:/developer/macro/cpp
-pkg:/developer/macro/gnu-m4
-pkg:/developer/parser/bison
-pkg:/developer/versioning/mercurial
pkg:/gnome/zenity
pkg:/library/expat
pkg:/library/glib2
pkg:/library/libxml2
-pkg:/library/libxslt
pkg:/library/nspr
-pkg:/library/nspr/header-nspr
-pkg:/library/perl-5/xml-parser
pkg:/library/security/openssl
pkg:/library/security/openssl-10
pkg:/library/security/openssl-11
@@ -29,8 +19,6 @@ pkg:/library/security/openssl-3
pkg:/library/security/trousers
pkg:/library/zlib
pkg:/package/pkg
-pkg:/print/cups
-pkg:/print/filter/ghostscript
pkg:/release/name
pkg:/runtime/python-27
pkg:/runtime/python-35
@@ -52,10 +40,8 @@ pkg:/system/library/gcc-runtime
pkg:/system/library/libdbus
pkg:/system/library/libdbus-glib
pkg:/system/library/mozilla-nss
-pkg:/system/library/mozilla-nss/header-nss
pkg:/system/management/snmp/net-snmp
pkg:/system/test/fio
pkg:/system/xvm/xvmstore
-pkg:/text/gnu-gettext
pkg:/text/less
pkg:/web/wget
diff --git a/usr/src/compat/bhyve/sys/callout.h b/usr/src/compat/bhyve/sys/callout.h
index 11823e6321..4156c2d4c6 100644
--- a/usr/src/compat/bhyve/sys/callout.h
+++ b/usr/src/compat/bhyve/sys/callout.h
@@ -45,8 +45,6 @@ struct callout {
#define callout_pending(c) ((c)->c_target > (c)->c_fired)
void vmm_glue_callout_init(struct callout *c, int mpsafe);
-int vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt,
- sbintime_t pr, void (*func)(void *), void *arg, int flags);
int vmm_glue_callout_stop(struct callout *c);
int vmm_glue_callout_drain(struct callout *c);
@@ -71,12 +69,10 @@ callout_drain(struct callout *c)
return (vmm_glue_callout_drain(c));
}
-static __inline int
-callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
- void (*func)(void *), void *arg, int flags)
-{
- return (vmm_glue_callout_reset_sbt(c, sbt, pr, func, arg, flags));
-}
+void callout_reset_hrtime(struct callout *c, hrtime_t target,
+ void (*func)(void *), void *arg, int flags);
+uint64_t hrt_freq_count(hrtime_t interval, uint32_t freq);
+hrtime_t hrt_freq_interval(uint32_t freq, uint64_t count);
#endif /* _COMPAT_FREEBSD_SYS_CALLOUT_H_ */
diff --git a/usr/src/compat/bhyve/sys/time.h b/usr/src/compat/bhyve/sys/time.h
deleted file mode 100644
index 48bdcc304e..0000000000
--- a/usr/src/compat/bhyve/sys/time.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2013 Pluribus Networks Inc.
- * Copyright 2020 Oxide Computer Company
- */
-
-#ifndef _COMPAT_FREEBSD_SYS_TIME_H_
-#define _COMPAT_FREEBSD_SYS_TIME_H_
-
-#include_next <sys/time.h>
-
-#define tc_precexp 0
-
-struct bintime {
- ulong_t sec; /* seconds */
- uint64_t frac; /* 64 bit fraction of a second */
-};
-
-#define BT2FREQ(bt) \
- (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
- ((bt)->frac >> 1))
-
-#define FREQ2BT(freq, bt) \
-{ \
- (bt)->sec = 0; \
- (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \
-}
-
-static __inline void
-binuptime(struct bintime *bt)
-{
- hrtime_t now = gethrtime();
-
- bt->sec = now / 1000000000;
- /* 18446744073 = int(2^64 / 1000000000) = 1ns in 64-bit fractions */
- bt->frac = (now % 1000000000) * (uint64_t)18446744073LL;
-}
-
-#define bintime_cmp(a, b, cmp) \
- (((a)->sec == (b)->sec) ? \
- ((a)->frac cmp (b)->frac) : \
- ((a)->sec cmp (b)->sec))
-
-/*
- * The bintime_cmp() macro is problematic for a couple reasons:
- * 1. Bearing a lowercase name suggests it is a function rather than a macro.
- * 2. Placing the comparison operator as the last argument runs afoul of our
- * cstyle rules, unlike cases such as VERIFY3*().
- *
- * To remedy these issues in illumos bhyve, we provide a slightly modified
- * version which addresses both problems.
- */
-#define BINTIME_CMP(a, cmp, b) bintime_cmp((a), (b), cmp)
-
-#define SBT_1S ((sbintime_t)1 << 32)
-#define SBT_1M (SBT_1S * 60)
-#define SBT_1MS (SBT_1S / 1000)
-#define SBT_1US (SBT_1S / 1000000)
-#define SBT_1NS (SBT_1S / 1000000000)
-#define SBT_MAX 0x7fffffffffffffffLL
-
-
-static __inline void
-bintime_add(struct bintime *bt, const struct bintime *bt2)
-{
- uint64_t u;
-
- u = bt->frac;
- bt->frac += bt2->frac;
- if (u > bt->frac)
- bt->sec++;
- bt->sec += bt2->sec;
-}
-
-static __inline void
-bintime_sub(struct bintime *bt, const struct bintime *bt2)
-{
- uint64_t u;
-
- u = bt->frac;
- bt->frac -= bt2->frac;
- if (u < bt->frac)
- bt->sec--;
- bt->sec -= bt2->sec;
-}
-
-static __inline void
-bintime_mul(struct bintime *bt, u_int x)
-{
- uint64_t p1, p2;
-
- p1 = (bt->frac & 0xffffffffull) * x;
- p2 = (bt->frac >> 32) * x + (p1 >> 32);
- bt->sec *= x;
- bt->sec += (p2 >> 32);
- bt->frac = (p2 << 32) | (p1 & 0xffffffffull);
-}
-
-static __inline sbintime_t
-bttosbt(const struct bintime bt)
-{
- return (((sbintime_t)bt.sec << 32) + (bt.frac >> 32));
-}
-
-static __inline struct bintime
-sbttobt(sbintime_t _sbt)
-{
- struct bintime _bt;
-
- _bt.sec = _sbt >> 32;
- _bt.frac = _sbt << 32;
- return (_bt);
-}
-
-static __inline sbintime_t
-sbinuptime(void)
-{
- hrtime_t hrt = gethrtime();
- uint64_t sec = hrt / NANOSEC;
- uint64_t nsec = hrt % NANOSEC;
-
- return (((sbintime_t)sec << 32) +
- (nsec * (((uint64_t)1 << 63) / 500000000) >> 32));
-}
-
-#endif /* _COMPAT_FREEBSD_SYS_TIME_H_ */
diff --git a/usr/src/compat/bhyve/sys/types.h b/usr/src/compat/bhyve/sys/types.h
index 63731da42e..baa4cad157 100644
--- a/usr/src/compat/bhyve/sys/types.h
+++ b/usr/src/compat/bhyve/sys/types.h
@@ -29,11 +29,6 @@ typedef __uint64_t u_int64_t;
typedef __register_t register_t;
#endif
-#ifndef __SBINTIME_T_DEFINED
-#define __SBINTIME_T_DEFINED
-typedef __int64_t sbintime_t;
-#endif
-
#ifndef __VM_MEMATTR_T_DEFINED
#define __VM_MEMATTR_T_DEFINED
typedef char vm_memattr_t;
diff --git a/usr/src/man/man4i/dkio.4i b/usr/src/man/man4i/dkio.4i
index 05f1e48bee..89d8204e03 100644
--- a/usr/src/man/man4i/dkio.4i
+++ b/usr/src/man/man4i/dkio.4i
@@ -19,7 +19,7 @@
.\" Copyright 2016 Nexenta Systems, Inc.
.\" Copyright (c) 2017, Joyent, Inc.
.\"
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt DKIO 4I
.Os
.Sh NAME
@@ -47,7 +47,7 @@ controller, partitions, or geometry information on all architectures:
The argument is a pointer to a
.Vt dk_cinfo
structure (described below).
-This structure tells the controller-type and attributes regarding bad-block
+This structure contains the controller-type and attributes regarding bad-block
processing done on the controller.
.Bd -literal -offset 2n
/*
@@ -809,7 +809,7 @@ On disks larger than 1TB, this ioctl must be used instead of
* Used by applications to get partition or slice information
*/
struct extpart_info {
- diskkaddr_t p_start;
+ diskaddr_t p_start;
diskaddr_t p_length;
};
.Ed
diff --git a/usr/src/man/man4i/fbio.4i b/usr/src/man/man4i/fbio.4i
index e05c3ee787..ee02fd1cad 100644
--- a/usr/src/man/man4i/fbio.4i
+++ b/usr/src/man/man4i/fbio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the fields
.\" enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 22, 2017
+.Dd March 13, 2022
.Dt FBIO 4I
.Os
.Sh NAME
@@ -137,8 +137,7 @@ are used to transfer
variable-length, device-specific information into and out of framebuffers.
.Sh SEE ALSO
.Xr ioctl 2 ,
-.Xr mmap 2 ,
-.Xr cgsix 4D
+.Xr mmap 2
.Sh BUGS
The
.Dv FBIOSATTR
diff --git a/usr/src/man/man4i/fdio.4i b/usr/src/man/man4i/fdio.4i
index a3f01b1021..99ffb147b6 100644
--- a/usr/src/man/man4i/fdio.4i
+++ b/usr/src/man/man4i/fdio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 22, 2017
+.Dd March 13, 2022
.Dt FDIO 4I
.Os
.Sh NAME
@@ -147,7 +147,7 @@ struct fd_drive {
int fdd_ejectable; /* does the drive support eject? */
int fdd_maxsearch; /* size of per-unit search table */
int fdd_writeprecomp; /* cyl to start write precompensation */
- int fdd_writereduce; /* cyl to start recucing write current */
+ int fdd_writereduce; /* cyl to start reducing write current */
int fdd_stepwidth; /* width of step pulse in 1 us units */
int fdd_steprate; /* step rate in 100 us units */
int fdd_headsettle; /* delay, in 100 us units */
diff --git a/usr/src/man/man4i/ipnat.4i b/usr/src/man/man4i/ipnat.4i
index 1df2b96e2d..d986ec0ea3 100644
--- a/usr/src/man/man4i/ipnat.4i
+++ b/usr/src/man/man4i/ipnat.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt IPNAT 4I
.Os
.Sh NAME
@@ -23,7 +23,7 @@
.Sh DESCRIPTION
The
.Sy ipnat
-device provides interfaction with the NAT features of the Solaris IPFilter.
+device provides interaction with the NAT features of the Solaris IPFilter.
.Sh APPLICATION PROGRAMMING INTERFACE
The NAT features programming model is a component of the Solaris IP Filter and
is accessed via the NAT device file
@@ -421,7 +421,7 @@ before setting the
flag and providing a pointer in the
.Fa nat_fr
field that cannot be found in the current rule set.
-.It Er EACESS
+.It Er EACCES
The calling process issued a
.Dv SIOCSTPUT
before issuing a
diff --git a/usr/src/man/man4i/mhd.4i b/usr/src/man/man4i/mhd.4i
index 735e69982e..b19e3fc79c 100644
--- a/usr/src/man/man4i/mhd.4i
+++ b/usr/src/man/man4i/mhd.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt MHD 4I
.Os
.Sh NAME
@@ -61,7 +61,7 @@ caller must first obtain the open file descriptor.
Non-shared multihost disks ioctls consist of
.Dv MHIOCTKOWN ,
.Dv MHIOCRELEASE ,
-.Dv HIOCSTATUS ,
+.Dv MHIOCSTATUS ,
and
.Dv MHIOCQRESERVE .
These ioctl requests control the access rights of non-shared multihost disks.
diff --git a/usr/src/man/man4i/mixer.4i b/usr/src/man/man4i/mixer.4i
index 6d4487796b..43ffa03ef0 100644
--- a/usr/src/man/man4i/mixer.4i
+++ b/usr/src/man/man4i/mixer.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd February 1, 2019
+.Dd March 13, 2022
.Dt MIXER 4I
.Os
.Sh NAME
@@ -438,10 +438,10 @@ should open this file to access the mixer settings.
The pseudo
.Pa /dev/mixer
device supports ioctls that can change the
-oarious settings for the audio hardware in the system.
+various settings for the audio hardware in the system.
.Pp
Those ioctls should only be used by dedicated mixer applications or desktop
-olumme controls, and not by typical ordinary audio applications such as media
+volume controls, and not by typical ordinary audio applications such as media
players.
Ordinary applications that wish to adjust their own volume settings
should use the
diff --git a/usr/src/man/man4i/mtio.4i b/usr/src/man/man4i/mtio.4i
index 93db7028c7..5f0b0ddbb0 100644
--- a/usr/src/man/man4i/mtio.4i
+++ b/usr/src/man/man4i/mtio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd August 28, 2021
+.Dd March 13, 2022
.Dt MTIO 4I
.Os
.Sh NAME
@@ -201,7 +201,7 @@ errors until the
ioctl is issued.
An
.Dv MTFSF
-ioctl can then he issued.
+ioctl can then be issued.
.Pp
Two successful successive reads that both return zero byte counts indicate
.Sy EOM
@@ -284,7 +284,7 @@ issued, two records are written; one for 65,534 bytes followed by another
record for 6 bytes.
Newer variable-length tape drivers may relax the above
limitation and allow applications to write record sizes larger than 65,534.
-effer to the specific tape driver man page for details.
+Refer to the specific tape driver man page for details.
.Pp
When logical
.Sy EOT
@@ -840,7 +840,7 @@ Note \(em When tape alert cleaning is managed by the st driver, the tape
target driver may continue to return a
.Dq drive needs cleaning
status unless an
-.Dv MTIOCGE
+.Dv MTIOCGET
.Xr ioctl 2
call is made while the cleaning media is in the drive.
.Pp
diff --git a/usr/src/man/man4i/sesio.4i b/usr/src/man/man4i/sesio.4i
index 94d12dd64d..c92e4c34a9 100644
--- a/usr/src/man/man4i/sesio.4i
+++ b/usr/src/man/man4i/sesio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt SESIO 4I
.Os
.Sh NAME
@@ -90,6 +90,7 @@ uint8_t reserved[3]; /* Reserved; Set to 0 */
.Ed
.Sh ARCHITECTURE
SPARC
+X86
.Sh SEE ALSO
-.Xr ses 4D ,
-.Xr ioctl 9E
+.Xr ioctl 2 ,
+.Xr ses 4D
diff --git a/usr/src/man/man4i/streamio.4i b/usr/src/man/man4i/streamio.4i
index 4e99ba3dfe..8dbaeba2fc 100644
--- a/usr/src/man/man4i/streamio.4i
+++ b/usr/src/man/man4i/streamio.4i
@@ -15,7 +15,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 29, 2017
+.Dd March 13, 2022
.Dt STREAMIO 4I
.Os
.Sh NAME
@@ -357,14 +357,14 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
.Fa arg
value is invalid or
.Fa arg
is zero and process is not registered to receive the
.Dv SIGPOLL
signal.
-.It Sy EAGAIN
+.It Er EAGAIN
Allocation of a data structure to store the signal request failed.
.El
.It Dv I_GETSIG
@@ -381,11 +381,11 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
Process not registered to receive the
.Dv SIGPOLL
signal.
-.It Sy EFAULT
+.It Er EFAULT
.Fa arg
points outside the allocated address space.
.El
@@ -399,10 +399,10 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EINVAL
-.It Sy EFAULT
+.It Er EFAULT
.Fa arg
points outside the allocated address space.
-.It Sy EINVAL
+.It Er EINVAL
.Fa arg
does not contain a valid module name.
.El
@@ -807,7 +807,7 @@ calls
will block until the active
.Dv I_STR
completes via a positive or negative
-acknowlegment, a timeout, or an error condition at the stream head.
+acknowledgement, a timeout, or an error condition at the stream head.
By setting the
.Fa ic_timout
field to 0, the user is requesting STREAMS to provide
@@ -897,7 +897,7 @@ On failure,
.Va errno
may be set to the following value:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
.Fa arg
is not the above legal value.
.El
@@ -1168,7 +1168,7 @@ On failure,
.Va errno
is set to the following value:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
Invalid
.Va arg
value.
@@ -1259,7 +1259,7 @@ On failure,
.Va errno
is set to the following value:
.Bl -tag -width EFAULT
-.It Sy EFAULT
+.It Er EFAULT
.Fa arg
points outside the allocated address space.
.El
@@ -1495,7 +1495,7 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EAGAIN
-.It Sy ENXIO
+.It Er ENXIO
Hangup received on
.Fa fildes .
.It Er ETIME
diff --git a/usr/src/man/man4i/termio.4i b/usr/src/man/man4i/termio.4i
index c30fdd5262..c7dcba468a 100644
--- a/usr/src/man/man4i/termio.4i
+++ b/usr/src/man/man4i/termio.4i
@@ -15,7 +15,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd August 13, 2021
+.Dd March 13, 2022
.Dt TERMIO 4I
.Os
.Sh NAME
@@ -233,7 +233,7 @@ Reprinting also occurs automatically if
characters that would normally be erased from the screen are fouled by program
output.
The characters are reprinted as if they were being echoed;
-consequencely, if
+consequently, if
.Dv ECHO
is not set, they are not printed.
.Pp
@@ -407,7 +407,7 @@ example, file transfer programs), where a program would like to process at
least
.Sy MIN
characters at a time.
-In case A, the inteercharacter timer is
+In case A, the intercharacter timer is
activated by a user as a safety measure; in case B, the timer is turned off.
.Pp
Cases C and D exist to handle single character, timed transfers.
diff --git a/usr/src/man/man4i/uscsi.4i b/usr/src/man/man4i/uscsi.4i
index bd8b384c9b..b3e53cd04e 100644
--- a/usr/src/man/man4i/uscsi.4i
+++ b/usr/src/man/man4i/uscsi.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt USCSI 4I
.Os
.Sh NAME
@@ -251,7 +251,7 @@ must point to memory writable by the application.
.It Dv USCSI_RENEGOT
Tells USCSI to renegotiate wide mode and synchronous transfer speed before the
transmitted SCSI command is executed.
-This flag in effects tells the target driver to pass the
+This flag in effect tells the target driver to pass the
.Dv FLAG_RENEGOTIATE_WIDE_SYNC
flag in the SCSI packet
before passing the command to an adapter driver for transport.
diff --git a/usr/src/man/man4i/visual_io.4i b/usr/src/man/man4i/visual_io.4i
index 91975c625c..76a3c4c4d5 100644
--- a/usr/src/man/man4i/visual_io.4i
+++ b/usr/src/man/man4i/visual_io.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd August 31, 2018
+.Dd March 13, 2022
.Dt VISUAL_IO 4I
.Os
.Sh NAME
@@ -51,7 +51,7 @@ standalone mode (for example, when using a stand-alone debugger, entering
the PROM monitor, or when the system panicking).
These are also known as
.Dq Polled I/O
-entry points, which operate under an an explicit set of restrictions, described below.
+entry points, which operate under an explicit set of restrictions, described below.
.Sh IOCTLS
.Bl -tag -width VIS_GETIDENTIFIER -compact
.It Dv VIS_GETIDENTIFIER
@@ -62,11 +62,6 @@ used in the illumos VISUAL environment.
This is a mandatory ioctl and must return a unique string.
We suggest that the name be formed as
.Ao companysymbol Ac Ns Ao devicetype Ac .
-For example, the
-.Xr cgsix 4D
-driver
-returns
-.Sy SUNWcg6 .
.Pp
.Dv VIS_GETIDENTIFIER
takes a
@@ -105,7 +100,7 @@ struct vis_cursorcmap {
#define VIS_CURSOR_SETCURSOR 0x01 /* set cursor */
/* set cursor position */
#define VIS_CURSOR_SETPOSITION 0x02
- /* set cursur hot spot */
+ /* set cursor hot spot */
#define VIS_CURSOR_SETHOTSPOT 0x04
/* set cursor colormap */
#define VIS_CURSOR_SETCOLORMAP 0x08
@@ -609,7 +604,7 @@ ioctl was set to
and
.Fa t_col
are defined to be pixel offsets from the starting position of the
-onssole device.
+console device.
.Pp
.Fa direction
specifies which way to do the copy.
@@ -684,7 +679,7 @@ addition to performing the prescribed function of their ioctl counterparts, the
standalone vectors operate in a special context and must adhere to a strict set
of rules.
The polled I/O vectors are called directly whenever the system is
-quisced (running in a limited context) and must send output to the display.
+quiesced (running in a limited context) and must send output to the display.
Standalone mode describes the state in which the system is running in
single-threaded mode and only one processor is active.
illumos operating
diff --git a/usr/src/man/man4i/vt.4i b/usr/src/man/man4i/vt.4i
index 0684aa44ff..747e30cb1f 100644
--- a/usr/src/man/man4i/vt.4i
+++ b/usr/src/man/man4i/vt.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd December 28, 2020
+.Dd March 13, 2022
.Dt VT 4I
.Os
.Sh NAME
@@ -319,7 +319,8 @@ online 17:49:11 svc:/system/console-login:vt6
.Sy console-login:default
is for the system console, others for virtual consoles.
.Pp
-You can modify properties/disable/enable and remove/add virtual consoles using
+You can modify properties of, disable/enable, and remove/add virtual consoles
+using
.Xr smf 7 :
.Bd -literal -offset indent
# svccfg -s console-login add vt8
diff --git a/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m b/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m
index 9106668756..b0a2f98b02 100644
--- a/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m
+++ b/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m
@@ -23,38 +23,15 @@
# Copyright (c) 2018, Joyent, Inc.
set name=pkg.fmri value=pkg:/developer/opensolaris/osnet@$(PKGVERS)
-set name=pkg.summary \
- value="Dependencies required to build the OS/Net Consolidation."
+set name=pkg.summary value="Dependencies required to build the illumos-gate."
set name=pkg.description \
- value="Dependencies required to build the OS/Net Consolidation."
+ value="Dependencies required to build the illumos-gate."
set name=info.classification \
value="org.opensolaris.category.2008:Development/Distribution Tools"
set name=org.opensolaris.noincorp value=true
set name=variant.arch value=$(ARCH)
-depend type=require fmri=data/docbook@0.5.11-0.133
depend type=require fmri=developer/astdev@0.5.11-0.133
depend type=require fmri=developer/build/make@0.5.11-0.133
depend type=require fmri=developer/build/onbld@0.5.11-0.133
-$(i386_ONLY)depend type=require fmri=developer/gnu-binutils@2.19-0.133
-depend type=require fmri=developer/java/jdk@0.5.11-0.133
-depend type=require fmri=developer/lexer/flex@2.5.35-0.133
-depend type=require fmri=developer/macro/gnu-m4@1.4
depend type=require fmri=developer/object-file@0.5.11-0.133
-depend type=require fmri=developer/parser/bison@2.3-0.133
-depend type=require fmri=developer/versioning/mercurial@1.3.1-0.133
-depend type=require fmri=library/glib2@0.5.11-0.133
-depend type=require fmri=library/libxml2@2.7.6-0.133
-depend type=require fmri=library/libxslt@0.5.11-0.133
-depend type=require fmri=library/nspr/header-nspr@0.5.11-0.133
-depend type=require fmri=library/perl-5/xml-parser@0.5.11-0.133
-depend type=require fmri=library/security/trousers@0.3.2-0.133
-depend type=require fmri=print/cups@1.4.2-0.133
-depend type=require fmri=print/filter/ghostscript@8.64-0.133
-depend type=require fmri=runtime/perl$(PERL_PKGVERS)@5.10.0-0.133
depend type=require fmri=system/header@0.5.11-0.133
-depend type=require fmri=system/library/dbus@0.5.11-0.133
-depend type=require fmri=system/library/libdbus-glib@0.5.11-0.133
-depend type=require fmri=system/library/libdbus@0.5.11-0.133
-depend type=require fmri=system/library/mozilla-nss/header-nss@0.5.11-0.133
-depend type=require fmri=system/management/snmp/net-snmp@5.4.1-0.133
-depend type=require fmri=text/gnu-gettext@0.16.1-0.133
diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m
index d0d31a0190..4222b1d09b 100644
--- a/usr/src/pkg/manifests/system-bhyve-tests.p5m
+++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m
@@ -30,6 +30,12 @@ file path=opt/bhyve-tests/bin/bhyvetest mode=0555
dir path=opt/bhyve-tests/runfiles
file path=opt/bhyve-tests/runfiles/default.run mode=0444
dir path=opt/bhyve-tests/tests
+dir path=opt/bhyve-tests/tests/kdev
+file path=opt/bhyve-tests/tests/kdev/vatpit_freq mode=0555
+file path=opt/bhyve-tests/tests/kdev/vhpet_freq mode=0555
+file path=opt/bhyve-tests/tests/kdev/vlapic_freq mode=0555
+file path=opt/bhyve-tests/tests/kdev/vlapic_freq_periodic mode=0555
+file path=opt/bhyve-tests/tests/kdev/vpmtmr_freq mode=0555
dir path=opt/bhyve-tests/tests/mevent
file path=opt/bhyve-tests/tests/mevent/lists_delete mode=0555
file path=opt/bhyve-tests/tests/mevent/read_disable mode=0555
diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run
index 0aae1bcb46..c37bee591d 100644
--- a/usr/src/test/bhyve-tests/runfiles/default.run
+++ b/usr/src/test/bhyve-tests/runfiles/default.run
@@ -28,6 +28,16 @@ tests = [
'mem_seg_map'
]
+[/opt/bhyve-tests/tests/kdev]
+user = root
+tests = [
+ 'vatpit_freq',
+ 'vhpet_freq',
+ 'vlapic_freq',
+ 'vlapic_freq_periodic',
+ 'vpmtmr_freq'
+ ]
+
# Tests of userspace mevent system, built from cmd/bhyve
[/opt/bhyve-tests/tests/mevent]
tests = ['lists_delete', 'read_disable', 'read_pause', 'read_requeue',
diff --git a/usr/src/test/bhyve-tests/tests/Makefile b/usr/src/test/bhyve-tests/tests/Makefile
index bf18b300ca..8d528c3f80 100644
--- a/usr/src/test/bhyve-tests/tests/Makefile
+++ b/usr/src/test/bhyve-tests/tests/Makefile
@@ -15,6 +15,6 @@
.PARALLEL: $(SUBDIRS)
-SUBDIRS = vmm
+SUBDIRS = kdev vmm
include $(SRC)/test/Makefile.com
diff --git a/usr/src/test/bhyve-tests/tests/Makefile.in_guest b/usr/src/test/bhyve-tests/tests/Makefile.in_guest
new file mode 100644
index 0000000000..7ce2b0d531
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/Makefile.in_guest
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+# Copyright 2022 Oxide Computer Company
+
+PAYLOAD_CLEANFILES = payload_start.o \
+ $(PAYLOADS:%=payload_%) \
+ $(PAYLOADS:%=payload_%.o) \
+ $(PAYLOADS:%=pobj_%.o) \
+ $(PAYLOADS:%=pobj_%.s)
+
+$(PAYLOADS:%=payload_%.o) := AS_CPPFLAGS += -I../common
+
+payload_%: payload_start.o payload_%.o
+ $(LD) -dn -e _start -M ../common/Mapfile.payload -o $@ $^
+
+pobj_%.s: payload_%
+ @echo " .data" > $@
+ @echo " .globl payload_data" >> $@
+ @echo "payload_data:" >> $@
+ $(ELFEXTRACT) $^ >> $@
+ @echo " .size payload_data, [.-payload_data]" >> $@
+ @echo " .align 4" >> $@
+ @echo " .globl payload_size" >> $@
+ @echo " .size payload_size, 4" >> $@
+ @echo "payload_size:" >> $@
+ @echo " .data" >> $@
+ @echo " .long [.-payload_data]" >> $@
+
+pobj_%.o: pobj_%.s
+ $(COMPILE.s) -o $@ $^
+ $(POST_PROCESS)
+
+%.o: ../common/%.s
+ $(COMPILE.s) -o $@ $^
+ $(POST_PROCESS)
+
+%.o: ../common/%.c
+ $(COMPILE.c) -o $@ $^
+ $(POST_PROCESS)
diff --git a/usr/src/test/bhyve-tests/tests/common/Mapfile.payload b/usr/src/test/bhyve-tests/tests/common/Mapfile.payload
new file mode 100644
index 0000000000..ef69288c56
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/Mapfile.payload
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2022 Oxide Computer Company
+#
+
+$mapfile_version 2
+
+# The .eh_frame data was ending up in front of the .text segment, causing issues
+# when the guest attempted to start its payload
+NULL_SEGMENT discard {
+ ASSIGN_SECTION eh_discard {
+ IS_NAME = .eh_frame;
+ };
+};
+
+LOAD_SEGMENT payload {
+ FLAGS = READ WRITE EXECUTE;
+ VADDR = 0x800000;
+ PADDR = 0x800000;
+ ALIGN = 0x1000;
+
+ # Make sure that payload_start.s`_start is the first thing in .text segment,
+ # since when we "boot", that is where we want to begin running.
+ ASSIGN_SECTION is_start_text {
+ IS_NAME = .text;
+ FILE_BASENAME = payload_start.o;
+ };
+ ASSIGN_SECTION is_text {
+ IS_NAME = .text;
+ };
+ ASSIGN_SECTION is_alloc {
+ FLAGS = ALLOC;
+ };
+ IS_ORDER = is_start_text is_text is_alloc;
+};
diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.c b/usr/src/test/bhyve-tests/tests/common/in_guest.c
new file mode 100644
index 0000000000..31bebc0665
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/in_guest.c
@@ -0,0 +1,532 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/segments.h>
+#include <sys/psw.h>
+#include <sys/controlregs.h>
+#include <sys/sysmacros.h>
+#include <sys/varargs.h>
+#include <sys/debug.h>
+
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+
+
+#define PT_VALID 0x01
+#define PT_WRITABLE 0x02
+#define PT_WRITETHRU 0x08
+#define PT_NOCACHE 0x10
+#define PT_PAGESIZE 0x80
+
+#define SEG_ACCESS_TYPE_MASK 0x1f
+#define SEG_ACCESS_DPL_MASK 0x60
+#define SEG_ACCESS_P (1 << 7)
+#define SEG_ACCESS_AVL (1 << 12)
+#define SEG_ACCESS_L (1 << 13)
+#define SEG_ACCESS_D (1 << 14)
+#define SEG_ACCESS_G (1 << 15)
+#define SEG_ACCESS_UNUSABLE (1 << 16)
+
+
+/*
+ * Keep the test name and VM context around so the consumer is not required to
+ * pass either of them to us for subsequent test-related operations after the
+ * initialization has been performed.
+ *
+ * The test code is not designed to be reentrant at this point.
+ */
+static struct vmctx *test_vmctx = NULL;
+static const char *test_name = NULL;
+
+static void
+populate_identity_table(struct vmctx *ctx)
+{
+ uint64_t gpa, pte_loc;
+
+ /* Set up 2MiB PTEs for everything up through 0xffffffff */
+ for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M;
+ gpa < 0x100000000;
+ pte_loc += PAGE_SIZE) {
+ uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE);
+
+ for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) {
+ *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE;
+ /* Make traditional MMIO space uncachable */
+ if (gpa >= 0xc0000000) {
+ *ptep |= PT_WRITETHRU | PT_NOCACHE;
+ }
+ }
+ }
+ assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G);
+
+ uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE);
+ pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE;
+ pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE;
+ pdep[2] =
+ (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
+ pdep[3] =
+ (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
+
+ pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE);
+ pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE;
+}
+
+static void
+populate_desc_tables(struct vmctx *ctx)
+{
+
+}
+
+static void
+test_cleanup(bool is_failure)
+{
+ if (test_vmctx != NULL) {
+ bool keep_on_fail = false;
+
+ const char *keep_var;
+ if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) {
+ if (strlen(keep_var) != 0 &&
+ strcmp(keep_var, "0") != 0) {
+ keep_on_fail = true;
+ }
+ }
+
+ /*
+ * Destroy the instance unless the test failed and it was
+ * requested that we keep it around.
+ */
+ if (!is_failure || !keep_on_fail) {
+ vm_destroy(test_vmctx);
+ }
+ test_vmctx = NULL;
+ }
+}
+
+static void fail_finish(void)
+{
+ assert(test_name != NULL);
+ (void) printf("FAIL %s\n", test_name);
+
+ test_cleanup(true);
+ exit(EXIT_FAILURE);
+}
+
+void
+test_fail_errno(int err, const char *msg)
+{
+ const char *err_str = strerror(err);
+
+ (void) fprintf(stderr, "%s: %s\n", msg, err_str);
+ fail_finish();
+}
+
+void
+test_fail_msg(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+
+ fail_finish();
+}
+
+void
+test_fail_vmexit(const struct vm_exit *vexit)
+{
+ const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n";
+
+ switch (vexit->exitcode) {
+ case VM_EXITCODE_INOUT:
+ (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip);
+ (void) fprintf(stderr,
+ "\teax: %08x\n"
+ "\tport: %04x\n"
+ "\tbytes: %u\n"
+ "\tflags: %x\n",
+ vexit->u.inout.eax,
+ vexit->u.inout.port,
+ vexit->u.inout.bytes,
+ vexit->u.inout.flags);
+ break;
+ case VM_EXITCODE_MMIO:
+ (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip);
+ (void) fprintf(stderr,
+ "\tbytes: %u\n"
+ "\ttype: %s\n"
+ "\tgpa: %x\n"
+ "\tdata: %016x\n",
+ vexit->u.mmio.bytes,
+ vexit->u.mmio.read == 0 ? "write" : "read",
+ vexit->u.mmio.gpa,
+ vexit->u.mmio.data);
+ break;
+ case VM_EXITCODE_VMX:
+ (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip);
+ (void) fprintf(stderr,
+ "\tstatus: %x\n"
+ "\treason: %x\n"
+ "\tqualification: %lx\n"
+ "\tinst_type: %x\n"
+ "\tinst_error: %x\n",
+ vexit->u.vmx.status,
+ vexit->u.vmx.exit_reason,
+ vexit->u.vmx.exit_qualification,
+ vexit->u.vmx.inst_type,
+ vexit->u.vmx.inst_error);
+ break;
+ case VM_EXITCODE_SVM:
+ (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip);
+ break;
+ case VM_EXITCODE_INST_EMUL:
+ (void) fprintf(stderr, hdr_fmt, "instruction emulation",
+ vexit->rip);
+ const uint_t len = vexit->u.inst_emul.num_valid > 0 ?
+ vexit->u.inst_emul.num_valid : 15;
+ (void) fprintf(stderr, "\tinstruction bytes: [");
+ for (uint_t i = 0; i < len; i++) {
+ (void) fprintf(stderr, "%s%02x",
+ i == 0 ? "" : ", ",
+ vexit->u.inst_emul.inst[i]);
+ }
+ (void) fprintf(stderr, "]\n");
+ break;
+ case VM_EXITCODE_SUSPENDED:
+ (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip);
+ switch (vexit->u.suspended.how) {
+ case VM_SUSPEND_RESET:
+ (void) fprintf(stderr, "\thow: reset");
+ break;
+ case VM_SUSPEND_POWEROFF:
+ (void) fprintf(stderr, "\thow: poweroff");
+ break;
+ case VM_SUSPEND_HALT:
+ (void) fprintf(stderr, "\thow: halt");
+ break;
+ case VM_SUSPEND_TRIPLEFAULT:
+ (void) fprintf(stderr, "\thow: triple-fault");
+ break;
+ default:
+ (void) fprintf(stderr, "\thow: unknown - %d",
+ vexit->u.suspended.how);
+ break;
+ }
+ break;
+ default:
+ (void) fprintf(stderr, "Unexpected code %d exit:\n"
+ "\t%%rip: %lx\n", vexit->exitcode, vexit->rip);
+ break;
+ }
+ fail_finish();
+}
+
+void
+test_pass(void)
+{
+ assert(test_name != NULL);
+ (void) printf("PASS %s\n", test_name);
+ test_cleanup(false);
+ exit(EXIT_SUCCESS);
+}
+
+static int
+load_payload(struct vmctx *ctx)
+{
+ extern uint8_t payload_data;
+ extern uint32_t payload_size;
+
+ const uint32_t len = payload_size;
+ const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD);
+
+ if (len > cap) {
+ test_fail_msg("Payload size %u > capacity %u\n", len, cap);
+ }
+
+ const size_t map_len = P2ROUNDUP(len, PAGE_SIZE);
+ void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len);
+ bcopy(&payload_data, outp, len);
+
+ return (0);
+}
+
+struct vmctx *
+test_initialize(const char *tname)
+{
+ char vm_name[VM_MAX_NAMELEN];
+ int err;
+ struct vmctx *ctx;
+
+ assert(test_vmctx == NULL);
+ assert(test_name == NULL);
+
+ test_name = strdup(tname);
+ (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d",
+ test_name, getpid());
+
+ err = vm_create(vm_name, 0);
+ if (err != 0) {
+ test_fail_errno(err, "Could not create VM");
+ }
+
+ ctx = vm_open(vm_name);
+ if (ctx == NULL) {
+ test_fail_errno(errno, "Could not open VM");
+ }
+ test_vmctx = ctx;
+
+ err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL);
+ if (err != 0) {
+ test_fail_errno(err, "Could not set up VM memory");
+ }
+
+ populate_identity_table(ctx);
+ populate_desc_tables(ctx);
+
+ err = load_payload(ctx);
+ if (err != 0) {
+ test_fail_errno(err, "Could not load payload");
+ }
+
+ return (ctx);
+}
+
+int
+test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp)
+{
+ int err;
+
+ err = vm_activate_cpu(ctx, vcpu);
+ if (err != 0 && err != EBUSY) {
+ return (err);
+ }
+
+ /*
+ * Granularity bit important here for VMX validity:
+ * "If any bit in the limit field in the range 31:20 is 1, G must be 1"
+ */
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX,
+ SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G);
+ if (err != 0) {
+ return (err);
+ }
+
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX,
+ SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L |
+ SEG_ACCESS_D | SEG_ACCESS_G);
+ if (err != 0) {
+ return (err);
+ }
+
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX,
+ SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G);
+ if (err != 0) {
+ return (err);
+ }
+
+ /*
+ * While SVM will happilly run with an otherwise unusable TR, VMX
+ * includes it among its entry checks.
+ */
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff,
+ SDT_SYSTSSBSY | SEG_ACCESS_P);
+ if (err != 0) {
+ return (err);
+ }
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0);
+ if (err != 0) {
+ return (err);
+ }
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0);
+ if (err != 0) {
+ return (err);
+ }
+
+ /* Mark unused segments as explicitly unusable (for VMX) */
+ const int unsable_segs[] = {
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ VM_REG_GUEST_LDTR,
+ };
+ for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) {
+ err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0,
+ SEG_ACCESS_UNUSABLE);
+ if (err != 0) {
+ return (err);
+ }
+ }
+
+ /* Place CPU directly in long mode */
+ const int regnums[] = {
+ VM_REG_GUEST_CR0,
+ VM_REG_GUEST_CR3,
+ VM_REG_GUEST_CR4,
+ VM_REG_GUEST_EFER,
+ VM_REG_GUEST_RFLAGS,
+ VM_REG_GUEST_RIP,
+ VM_REG_GUEST_RSP,
+ VM_REG_GUEST_CS,
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_TR,
+ };
+ uint64_t regvals[] = {
+ CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS |
+ CR0_MP | CR0_PE,
+ MEM_LOC_PAGE_TABLE_512G,
+ CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE,
+ AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE,
+ /* start with interrupts disabled */
+ PS_MB1,
+ rip,
+ rsp,
+ (GDT_KCODE << 3),
+ (GDT_KDATA << 3),
+ (GDT_KDATA << 3),
+ (GDT_KTSS << 3),
+ };
+ assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals));
+
+ err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums,
+ regvals);
+ if (err != 0) {
+ return (err);
+ }
+
+ err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0);
+ if (err != 0) {
+ return (err);
+ }
+
+ return (0);
+}
+
+static enum vm_exit_kind
+which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit)
+{
+ const struct vm_inout *inout = &vexit->u.inout;
+
+ switch (vexit->exitcode) {
+ case VM_EXITCODE_BOGUS:
+ case VM_EXITCODE_REQIDLE:
+ bzero(ventry, sizeof (ventry));
+ return (VEK_REENTR);
+ case VM_EXITCODE_INOUT:
+ if (inout->port == IOP_TEST_RESULT &&
+ (inout->flags & INOUT_IN) == 0) {
+ if (inout->eax == 0) {
+ return (VEK_TEST_PASS);
+ } else {
+ return (VEK_TEST_FAIL);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return (VEK_UNHANDLED);
+}
+
+enum vm_exit_kind
+test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry,
+ struct vm_exit *vexit)
+{
+ int err;
+
+ err = vm_run(ctx, vcpu, ventry, vexit);
+ if (err != 0) {
+ test_fail_errno(err, "Failure during vcpu entry");
+ }
+
+ return (which_exit_kind(ventry, vexit));
+}
+
+void
+ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry,
+ uint32_t data)
+{
+ VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT);
+
+ ventry->cmd = VEC_FULFILL_INOUT;
+ bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout));
+ if ((ventry->u.inout.flags & INOUT_IN) != 0) {
+ ventry->u.inout.eax = data;
+ }
+}
+
+void
+ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry,
+ uint64_t data)
+{
+ VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO);
+
+ ventry->cmd = VEC_FULFILL_MMIO;
+ bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio));
+ if (ventry->u.mmio.read != 0) {
+ ventry->u.mmio.data = data;
+ }
+}
+
+bool
+vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port,
+ uint_t len, uint32_t *valp)
+{
+ if (vexit->exitcode != VM_EXITCODE_INOUT) {
+ return (false);
+ }
+
+ const uint_t flag = is_read ? INOUT_IN : 0;
+ if (vexit->u.inout.port != port ||
+ vexit->u.inout.bytes != len ||
+ (vexit->u.inout.flags & INOUT_IN) != flag) {
+ return (false);
+ }
+
+ if (!is_read && valp != NULL) {
+ *valp = vexit->u.inout.eax;
+ }
+ return (true);
+}
+
+bool
+vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr,
+ uint_t len, uint64_t *valp)
+{
+ if (vexit->exitcode != VM_EXITCODE_MMIO) {
+ return (false);
+ }
+
+ if (vexit->u.mmio.gpa != addr ||
+ vexit->u.mmio.bytes != len ||
+ (vexit->u.mmio.read != 0) != is_read) {
+ return (false);
+ }
+
+ if (!is_read && valp != NULL) {
+ *valp = vexit->u.mmio.data;
+ }
+ return (true);
+}
diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.h b/usr/src/test/bhyve-tests/tests/common/in_guest.h
new file mode 100644
index 0000000000..8d6e04a6da
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/in_guest.h
@@ -0,0 +1,51 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _IN_GUEST_H_
+#define _IN_GUEST_H_
+
+#include "payload_common.h"
+
+struct vmctx *test_initialize(const char *);
+void test_fail_errno(int err, const char *msg);
+void test_fail_msg(const char *fmt, ...);
+void test_fail_vmexit(const struct vm_exit *vexit);
+void test_pass(void);
+
+int test_setup_vcpu(struct vmctx *, int, uint64_t, uint64_t);
+
+enum vm_exit_kind {
+ /* Otherwise empty vmexit which should result in immediate re-entry */
+ VEK_REENTR,
+ /* Write to IOP_TEST_RESULT port with success value (0) */
+ VEK_TEST_PASS,
+ /* Write to IOP_TEST_RESULT port with failure value (non-zero) */
+ VEK_TEST_FAIL,
+ /* Test specific logic must handle exit data */
+ VEK_UNHANDLED,
+};
+
+enum vm_exit_kind test_run_vcpu(struct vmctx *, int, struct vm_entry *,
+ struct vm_exit *);
+
+void ventry_fulfill_inout(const struct vm_exit *, struct vm_entry *, uint32_t);
+void ventry_fulfill_mmio(const struct vm_exit *, struct vm_entry *, uint64_t);
+
+bool vexit_match_inout(const struct vm_exit *, bool, uint16_t, uint_t,
+ uint32_t *);
+bool vexit_match_mmio(const struct vm_exit *, bool, uint64_t, uint_t,
+ uint64_t *);
+
+#endif /* _IN_GUEST_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/common/payload_common.h b/usr/src/test/bhyve-tests/tests/common/payload_common.h
new file mode 100644
index 0000000000..895364f18e
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/payload_common.h
@@ -0,0 +1,44 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _PAYLOAD_COMMON_H_
+#define _PAYLOAD_COMMON_H_
+
+#define MEM_TOTAL_SZ (64 * 1024 * 1024)
+
+/* 2MiB-page entries for identity-mapped table at 2MiB */
+#define MEM_LOC_PAGE_TABLE_2M 0x200000
+#define MEM_LOC_PAGE_TABLE_1G 0x204000
+#define MEM_LOC_PAGE_TABLE_512G 0x205000
+#define MEM_LOC_GDT 0x206000
+#define MEM_LOC_TSS 0x206200
+#define MEM_LOC_IDT 0x207000
+#define MEM_LOC_STACK 0x400000
+#define MEM_LOC_PAYLOAD 0x800000
+
+/* IO port set aside for emitting test result */
+#define IOP_TEST_RESULT 0xef00U
+
+/* IO port set aside for emitting test value */
+#define IOP_TEST_VALUE 0xef10U
+
+/* IO port set aside for inputting test param(s) */
+#define IOP_TEST_PARAM IOP_TEST_PARAM0
+#define IOP_TEST_PARAM0 0xef20U
+#define IOP_TEST_PARAM1 0xef21U
+#define IOP_TEST_PARAM2 0xef22U
+#define IOP_TEST_PARAM3 0xef23U
+
+#endif /* _PAYLOAD_COMMON_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/common/payload_start.s b/usr/src/test/bhyve-tests/tests/common/payload_start.s
new file mode 100644
index 0000000000..8a57e259de
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/payload_start.s
@@ -0,0 +1,27 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <sys/asm_linkage.h>
+
+/*
+ .text
+ .globl _start
+_start:
+ jmp start
+ */
+
+ENTRY_NP(_start)
+ jmp start
+SET_SIZE(_start)
diff --git a/usr/src/test/bhyve-tests/tests/kdev/Makefile b/usr/src/test/bhyve-tests/tests/kdev/Makefile
new file mode 100644
index 0000000000..52f3c2576c
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/Makefile
@@ -0,0 +1,77 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+# Copyright 2022 Oxide Computer Company
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/cmd/Makefile.cmd.64
+include $(SRC)/test/Makefile.com
+
+PROG = vpmtmr_freq \
+ vhpet_freq \
+ vlapic_freq \
+ vlapic_freq_periodic \
+ vatpit_freq
+
+PAYLOADS = $(PROG)
+include ../Makefile.in_guest
+
+COMMON_OBJS = in_guest.o
+
+CLEANFILES = $(COMMON_OBJS) $(PAYLOAD_CLEANFILES) payload_utils.o
+CLOBBERFILES = $(PROG)
+
+ROOTOPTPKG = $(ROOT)/opt/bhyve-tests
+TESTDIR = $(ROOTOPTPKG)/tests/kdev
+
+CMDS = $(PROG:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+CSTD= $(CSTD_GNU99)
+CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
+ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \
+ $(CPPFLAGS.master) \
+ -I$(SRC)/uts/i86pc/io/vmm \
+ -I$(SRC)/uts/i86pc \
+ -I../common
+
+ASFLAGS += -P -D__STDC__ -D_ASM
+
+
+CFLAGS = -m64
+$(PROG) := LDLIBS += -lvmmapi
+
+all: $(PROG)
+
+install: all $(CMDS)
+
+clean:
+ -$(RM) $(CLEANFILES)
+clobber: clean
+ -$(RM) $(CLOBBERFILES)
+
+$(CMDS): $(TESTDIR) $(PROG)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %
+ $(INS.file)
+
+%: %.c pobj_%.o $(COMMON_OBJS)
+ $(LINK.c) -o $@ $^ $(LDLIBS)
+ $(POST_PROCESS)
+
+%: %.o
+ $(LINK.c) -o $@ $^ $(LDLIBS)
+ $(POST_PROCESS)
+
+$(PAYLOADS:%=payload_%): payload_utils.o
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h
new file mode 100644
index 0000000000..8bd51023df
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _PAYLOAD_UTILS_H_
+#define _PAYLOAD_UTILS_H_
+
+#include <sys/types.h>
+
+void outb(uint16_t, uint8_t);
+void outw(uint16_t, uint16_t);
+void outl(uint16_t, uint32_t);
+uint8_t inb(uint16_t);
+uint16_t inw(uint16_t);
+uint32_t inl(uint16_t);
+
+#endif /* _PAYLOAD_UTILS_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s
new file mode 100644
index 0000000000..8c8e745c17
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s
@@ -0,0 +1,55 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <sys/asm_linkage.h>
+
+ENTRY(outb)
+ movw %di, %dx
+ movb %sil, %al
+ outb (%dx)
+ ret
+SET_SIZE(outb)
+
+ENTRY(outw)
+ movw %di, %dx
+ movw %si, %ax
+ outw (%dx)
+ ret
+SET_SIZE(outb)
+
+ENTRY(outl)
+ movw %di, %dx
+ movl %esi, %eax
+ outl (%dx)
+ ret
+SET_SIZE(outl)
+
+ENTRY(inb)
+ movw %di, %dx
+ inb (%dx)
+ ret
+SET_SIZE(inb)
+
+ENTRY(inw)
+ movw %di, %dx
+ inw (%dx)
+ ret
+SET_SIZE(inw)
+
+ENTRY(inl)
+ movw %di, %dx
+ inl (%dx)
+ ret
+SET_SIZE(inl)
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c
new file mode 100644
index 0000000000..d899dc449d
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+
+void
+timer0_reset(void)
+{
+ /*
+ * Configure timer 0 for interrupt-on-terminal-count mode, and prepare
+ * it to be loaded with the high and low bytes.
+ */
+ outb(IOP_ATPIT_CMD, 0x30);
+
+ /* Load timer with max value (0xffff) */
+ outb(IOP_ATPIT_C0, 0xff);
+ outb(IOP_ATPIT_C0, 0xff);
+}
+
+uint16_t
+timer0_read(void)
+{
+ uint16_t val;
+
+ /* Latch timer0 */
+ outb(IOP_ATPIT_CMD, 0x00);
+
+ /* Read low and high bytes */
+ val = inb(IOP_ATPIT_C0);
+ val |= (uint16_t)inb(IOP_ATPIT_C0) << 8;
+
+ return (val);
+}
+
+void
+start(void)
+{
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint16_t start, end;
+
+ timer0_reset();
+
+ start = timer0_read();
+ outw(IOP_TEST_VALUE, start);
+
+ do {
+ end = timer0_read();
+ /* wait for enough ticks to pass */
+ } while (end > (start - ATPIT_TARGET_TICKS));
+ outw(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c
new file mode 100644
index 0000000000..7f74e72cbb
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c
@@ -0,0 +1,60 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+#define HPET_OFF_CONFIG 0x10
+#define HPET_OFF_MAIN_COUNT_LOW 0xf0
+
+#define HPET_CONFIG_ENABLE 1
+
+
+static void
+write_hpet(uint_t reg, uint32_t value)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_HPET_BASE + reg);
+ *ptr = value;
+}
+
+static uint32_t
+read_hpet_main_low(void)
+{
+ volatile uint32_t *ptr =
+ (uint32_t *)(MMIO_HPET_BASE + HPET_OFF_MAIN_COUNT_LOW);
+ return (*ptr);
+}
+
+
+void
+start(void)
+{
+ write_hpet(HPET_OFF_CONFIG, HPET_CONFIG_ENABLE);
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint32_t start, end;
+
+ start = read_hpet_main_low();
+ outl(IOP_TEST_VALUE, start);
+
+ do {
+ end = read_hpet_main_low();
+ /* wait for enough ticks to pass */
+ } while (end < (start + HPET_TARGET_TICKS));
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c
new file mode 100644
index 0000000000..cc4d72a9f7
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c
@@ -0,0 +1,94 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+#define LAPIC_OFF_SVR 0xf0
+#define LAPIC_OFF_TIMER_ICR 0x380
+#define LAPIC_OFF_TIMER_CCR 0x390
+#define LAPIC_OFF_TIMER_DCR 0x3e0
+
+
+#define LAPIC_SVR_ENABLE 0x100
+
+static void
+write_vlapic(uint_t reg, uint32_t value)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ *ptr = value;
+}
+
+static uint32_t
+read_vlapic(uint_t reg)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ return (*ptr);
+}
+
+static uint32_t
+divisor_to_dcr(uint32_t inp)
+{
+ switch (inp) {
+ case 1:
+ return (0xb);
+ case 2:
+ return (0x0);
+ case 4:
+ return (0x1);
+ case 8:
+ return (0x2);
+ case 16:
+ return (0x3);
+ case 32:
+ return (0x8);
+ case 64:
+ return (0x9);
+ case 128:
+ return (0xa);
+ default:
+ /* fail immediate if divisor is out of range */
+ outl(IOP_TEST_VALUE, 1);
+ return (0xff);
+ }
+}
+
+
+void
+start(void)
+{
+ write_vlapic(LAPIC_OFF_SVR, LAPIC_SVR_ENABLE);
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint32_t divisor;
+ uint32_t start, end;
+
+ divisor = inl(IOP_TEST_PARAM);
+ write_vlapic(LAPIC_OFF_TIMER_DCR, divisor_to_dcr(divisor));
+ write_vlapic(LAPIC_OFF_TIMER_ICR, 0xffffffff);
+
+ start = read_vlapic(LAPIC_OFF_TIMER_CCR);
+ outl(IOP_TEST_VALUE, start);
+
+ uint32_t target = start - LAPIC_TARGET_TICKS;
+ do {
+ end = read_vlapic(LAPIC_OFF_TIMER_CCR);
+ /* wait for enough ticks to pass */
+ } while (end > target);
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c
new file mode 100644
index 0000000000..969f708ada
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c
@@ -0,0 +1,110 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+#define LAPIC_OFF_SVR 0xf0
+#define LAPIC_OFF_LVT_TIMER 0x320
+#define LAPIC_OFF_TIMER_ICR 0x380
+#define LAPIC_OFF_TIMER_CCR 0x390
+#define LAPIC_OFF_TIMER_DCR 0x3e0
+
+#define LAPIC_LVT_MASKED (1 << 16)
+#define LAPIC_LVT_PERIODIC (1 << 17)
+
+
+#define LAPIC_SVR_ENABLE 0x100
+
+static void
+write_vlapic(uint_t reg, uint32_t value)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ *ptr = value;
+}
+
+static uint32_t
+read_vlapic(uint_t reg)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ return (*ptr);
+}
+
+static uint32_t
+divisor_to_dcr(uint32_t inp)
+{
+ switch (inp) {
+ case 1:
+ return (0xb);
+ case 2:
+ return (0x0);
+ case 4:
+ return (0x1);
+ case 8:
+ return (0x2);
+ case 16:
+ return (0x3);
+ case 32:
+ return (0x8);
+ case 64:
+ return (0x9);
+ case 128:
+ return (0xa);
+ default:
+ /* fail immediate if divisor is out of range */
+ outl(IOP_TEST_VALUE, 1);
+ return (0xff);
+ }
+}
+
+
+void
+start(void)
+{
+ write_vlapic(LAPIC_OFF_SVR, LAPIC_SVR_ENABLE);
+
+ /*
+ * Configure the LAPIC timer for periodic operation, but leave the
+ * interrupt itself masked.
+ */
+ write_vlapic(LAPIC_OFF_LVT_TIMER,
+ LAPIC_LVT_MASKED | LAPIC_LVT_PERIODIC);
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ const uint16_t divisor = inw(IOP_TEST_PARAM0);
+ const uint16_t loop_count = inw(IOP_TEST_PARAM1);
+
+ write_vlapic(LAPIC_OFF_TIMER_DCR, divisor_to_dcr(divisor));
+ write_vlapic(LAPIC_OFF_TIMER_ICR, LAPIC_TARGET_TICKS);
+
+ uint32_t start, end, count = 0;
+ start = read_vlapic(LAPIC_OFF_TIMER_CCR);
+ outl(IOP_TEST_VALUE, start);
+
+ uint32_t prev = start;
+ do {
+ end = read_vlapic(LAPIC_OFF_TIMER_CCR);
+
+ /* timer period rolled over */
+ if (end > prev) {
+ count++;
+ }
+ prev = end;
+ } while (count < loop_count);
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c
new file mode 100644
index 0000000000..d96bb2b8b1
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c
@@ -0,0 +1,36 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+void
+start(void)
+{
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint32_t start, end;
+
+ start = inl(IOP_PMTMR);
+ outl(IOP_TEST_VALUE, start);
+
+ do {
+ end = inl(IOP_PMTMR);
+ /* wait for enough ticks to pass */
+ } while (end < (start + PMTMR_TARGET_TICKS));
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/test_defs.h b/usr/src/test/bhyve-tests/tests/kdev/test_defs.h
new file mode 100644
index 0000000000..acc9553274
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/test_defs.h
@@ -0,0 +1,38 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _TEST_DEFS_H_
+#define _TEST_DEFS_H_
+
+#define IOP_PMTMR 0x408
+#define IOP_ATPIT_C0 0x40
+#define IOP_ATPIT_CMD 0x43
+
+#define MMIO_HPET_BASE 0xfed00000UL
+#define MMIO_LAPIC_BASE 0xfee00000UL
+
+#define PMTMR_FREQ 3579545
+#define PMTMR_TARGET_TICKS (PMTMR_FREQ / 10)
+
+#define HPET_FREQ (1 << 24)
+#define HPET_TARGET_TICKS (HPET_FREQ / 10)
+
+#define LAPIC_FREQ (128 * 1024 * 1024)
+#define LAPIC_TARGET_TICKS (LAPIC_FREQ / 50)
+
+#define ATPIT_FREQ 1193182
+#define ATPIT_TARGET_TICKS (ATPIT_FREQ / 50)
+
+#endif /* _TEST_DEFS_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c
new file mode 100644
index 0000000000..1f4051ef6a
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c
@@ -0,0 +1,145 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint16_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t tick_margin,
+ uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint16_t tick_delta;
+
+ tick_delta = before.value - after.value;
+
+ /* is the number of ticks OK? */
+ if (tick_delta < ATPIT_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, ATPIT_TARGET_TICKS);
+ } else if ((tick_delta - ATPIT_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ ATPIT_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC) / ATPIT_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("margin limits: ticks=%u ppm=%lu\n",
+ tick_margin, ppm_margin);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ /*
+ * Since the PIT is slower to read back (requiring 3 emulated reads),
+ * operate with a more loose ticks margin.
+ */
+ const uint_t margin_ticks = MAX(1, ATPIT_TARGET_TICKS / 2500);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, &ventry, &vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(&vexit);
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 2, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+
+ ventry_fulfill_inout(&vexit, &ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1],
+ margin_ticks, margin_ppm)) {
+ test_pass();
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(&vexit);
+ }
+
+ } while (true);
+
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c
new file mode 100644
index 0000000000..238596c739
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c
@@ -0,0 +1,146 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t tick_margin,
+ uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint32_t tick_delta;
+
+ if (after.value < before.value) {
+ /* handle rollover */
+ tick_delta = (UINT32_MAX - before.value) + after.value;
+ } else {
+ tick_delta = after.value - before.value;
+ }
+
+ /* is the number of ticks OK? */
+ if (tick_delta < HPET_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, HPET_TARGET_TICKS);
+ } else if ((tick_delta - HPET_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ HPET_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC) / HPET_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("margin limits: ticks=%u ppm=%lu\n",
+ tick_margin, ppm_margin);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, HPET_TARGET_TICKS / 10000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, &ventry, &vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(&vexit);
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+
+ ventry_fulfill_inout(&vexit, &ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1],
+ margin_ticks, margin_ppm)) {
+ test_pass();
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(&vexit);
+ }
+
+ } while (true);
+
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c
new file mode 100644
index 0000000000..cf462b1acc
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c
@@ -0,0 +1,169 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t divisor,
+ uint_t tick_margin, uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint32_t tick_delta;
+
+ /*
+ * The ticks margin should shrink proportionally to how coarsely the
+ * timer clock is being divided.
+ */
+ tick_margin /= divisor;
+
+ /* timer is counting down, so act appropriately */
+ if (after.value > before.value) {
+ /* handle rollover */
+ tick_delta = (UINT32_MAX - after.value) + before.value;
+ } else {
+ tick_delta = before.value - after.value;
+ }
+
+ /* is the number of ticks OK? */
+ if (tick_delta < LAPIC_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, LAPIC_TARGET_TICKS);
+ } else if ((tick_delta - LAPIC_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ LAPIC_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC * divisor) / LAPIC_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("params: tick_margin=%u ppm_margin=%lu divisor=%u\n",
+ tick_margin, ppm_margin, divisor);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+
+static void
+test_for_divisor(struct vmctx *ctx, uint_t divisor, struct vm_entry *ventry,
+ struct vm_exit *vexit)
+{
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, LAPIC_TARGET_TICKS / 5000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, ventry, vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(vexit);
+ }
+
+ /* input the divisor */
+ if (vexit_match_inout(vexit, true, IOP_TEST_PARAM, 4, NULL)) {
+ ventry_fulfill_inout(vexit, ventry, divisor);
+ continue;
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+ ventry_fulfill_inout(vexit, ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1], divisor,
+ margin_ticks, margin_ppm)) {
+ (void) printf("good result\n");
+ return;
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(vexit);
+ }
+ } while (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+
+ test_for_divisor(ctx, 2, &ventry, &vexit);
+ test_for_divisor(ctx, 4, &ventry, &vexit);
+ test_for_divisor(ctx, 16, &ventry, &vexit);
+ test_pass();
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c
new file mode 100644
index 0000000000..d5c6d8184c
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c
@@ -0,0 +1,178 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t divisor, uint_t loops,
+ uint_t tick_margin, uint_t ppm_margin)
+{
+ const hrtime_t time_delta = after.when - before.when;
+
+
+ /*
+ * The ticks margin should shrink proportionally to how coarsely the
+ * timer clock is being divided.
+ */
+ tick_margin /= divisor;
+
+ /*
+ * The 'before' measurement includes the ticks which occurred between
+ * programming the timer and taking the first reading. The 'after'
+ * measurement includes the number of loops (each consisting of the
+ * target tick count) plus however many ticks had transpired since the
+ * most recent roll-over.
+ */
+ const uint32_t tick_delta =
+ loops * LAPIC_TARGET_TICKS + before.value - after.value;
+ const uint32_t tick_target = loops * LAPIC_TARGET_TICKS;
+
+ /* is the number of ticks OK? */
+ if (tick_delta < tick_target) {
+ if ((tick_target - tick_delta) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n",
+ tick_delta, tick_target - tick_margin);
+ }
+ } else if ((tick_delta - tick_target) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ tick_target + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC * divisor) / LAPIC_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("params: tick_margin=%u ppm_margin=%lu divisor=%u\n",
+ tick_margin, ppm_margin, divisor);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+
+static void
+run_test(struct vmctx *ctx, uint_t divisor, uint_t loops,
+ struct vm_entry *ventry, struct vm_exit *vexit)
+{
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, LAPIC_TARGET_TICKS / 5000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, ventry, vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(vexit);
+ }
+
+ /* input the divisor (bits 0-15) and loop count (bits 16-31) */
+ if (vexit_match_inout(vexit, true, IOP_TEST_PARAM0, 2, NULL)) {
+ ventry_fulfill_inout(vexit, ventry, divisor);
+ continue;
+ }
+ /* input the loop count */
+ if (vexit_match_inout(vexit, true, IOP_TEST_PARAM1, 2, NULL)) {
+ ventry_fulfill_inout(vexit, ventry, loops);
+ continue;
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+ ventry_fulfill_inout(vexit, ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1], divisor,
+ loops, margin_ticks, margin_ppm)) {
+ (void) printf("good result\n");
+ return;
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(vexit);
+ }
+ } while (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+
+ run_test(ctx, 4, 3, &ventry, &vexit);
+ run_test(ctx, 2, 4, &ventry, &vexit);
+ test_pass();
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c
new file mode 100644
index 0000000000..60541bf898
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c
@@ -0,0 +1,151 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t tick_margin,
+ uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint32_t tick_delta;
+
+ if (after.value < before.value) {
+ /* handle rollover */
+ tick_delta = (UINT32_MAX - before.value) + after.value;
+ } else {
+ tick_delta = after.value - before.value;
+ }
+
+ /* is the number of ticks OK? */
+ if (tick_delta < PMTMR_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, PMTMR_TARGET_TICKS);
+ } else if ((tick_delta - PMTMR_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ PMTMR_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC) / PMTMR_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("margin limits: ticks=%u ppm=%lu\n",
+ tick_margin, ppm_margin);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = vm_pmtmr_set_location(ctx, IOP_PMTMR);
+ if (err != 0) {
+ test_fail_errno(err, "Could not place pmtmr");
+ }
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, PMTMR_TARGET_TICKS / 10000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, &ventry, &vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(&vexit);
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = vexit.u.inout.eax;
+
+ ventry_fulfill_inout(&vexit, &ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1],
+ margin_ticks, margin_ppm)) {
+ test_pass();
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(&vexit);
+ }
+
+ } while (true);
+
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/io/viona/viona_impl.h b/usr/src/uts/i86pc/io/viona/viona_impl.h
index 4872720f79..760474e78b 100644
--- a/usr/src/uts/i86pc/io/viona/viona_impl.h
+++ b/usr/src/uts/i86pc/io/viona/viona_impl.h
@@ -109,6 +109,7 @@ typedef struct viona_vring {
/* Reference to guest pages holding virtqueue */
void **vr_map_pages;
+ vmm_page_t *vr_map_hold;
/* Per-ring error condition statistics */
struct viona_ring_stats {
@@ -293,15 +294,19 @@ void viona_ring_free(viona_vring_t *);
int viona_ring_reset(viona_vring_t *, boolean_t);
int viona_ring_init(viona_link_t *, uint16_t, uint16_t, uint64_t);
boolean_t viona_ring_lease_renew(viona_vring_t *);
-int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *);
+
+int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *,
+ vmm_page_t **);
void vq_pushchain(viona_vring_t *, uint32_t, uint16_t);
void vq_pushchain_many(viona_vring_t *, uint_t, used_elem_t *);
+
void viona_intr_ring(viona_vring_t *ring, boolean_t);
void viona_ring_set_no_notify(viona_vring_t *, boolean_t);
void viona_ring_disable_notify(viona_vring_t *);
void viona_ring_enable_notify(viona_vring_t *);
uint16_t viona_ring_num_avail(viona_vring_t *);
+
void viona_rx_init(void);
void viona_rx_fini(void);
int viona_rx_set(viona_link_t *);
diff --git a/usr/src/uts/i86pc/io/viona/viona_ring.c b/usr/src/uts/i86pc/io/viona/viona_ring.c
index 79094d3dc0..2d847dda09 100644
--- a/usr/src/uts/i86pc/io/viona/viona_ring.c
+++ b/usr/src/uts/i86pc/io/viona/viona_ring.c
@@ -81,17 +81,109 @@
P2ROUNDUP(LEGACY_USED_SZ(qsz), LEGACY_VQ_ALIGN))
#define LEGACY_VQ_PAGES(qsz) (LEGACY_VQ_SIZE(qsz) / PAGESIZE)
+struct vq_held_region {
+ struct iovec *vhr_iov;
+ vmm_page_t *vhr_head;
+ vmm_page_t *vhr_tail;
+ /* Length of iovec array supplied in `vhr_iov` */
+ uint_t vhr_niov;
+ /*
+ * Index into vhr_iov, indicating the next "free" entry (following the
+ * last entry which has valid contents).
+ */
+ uint_t vhr_idx;
+};
+typedef struct vq_held_region vq_held_region_t;
+
static boolean_t viona_ring_map(viona_vring_t *);
static void viona_ring_unmap(viona_vring_t *);
static kthread_t *viona_create_worker(viona_vring_t *);
-static void *
-viona_hold_page(viona_vring_t *ring, uint64_t gpa)
+static vmm_page_t *
+vq_page_hold(viona_vring_t *ring, uint64_t gpa, bool writable)
{
ASSERT3P(ring->vr_lease, !=, NULL);
- ASSERT3U(gpa & PAGEOFFSET, ==, 0);
- return (vmm_drv_gpa2kva(ring->vr_lease, gpa, PAGESIZE));
+ int prot = PROT_READ;
+ if (writable) {
+ prot |= PROT_WRITE;
+ }
+
+ return (vmm_drv_page_hold(ring->vr_lease, gpa, prot));
+}
+
+/*
+ * Establish a hold on the page(s) which back the region of guest memory covered
+ * by [gpa, gpa + len). The host-kernel-virtual pointers to those pages are
+ * stored in the iovec array supplied in `region`, along with the chain of
+ * vmm_page_t entries representing the held pages. Since guest memory
+ * carries no guarantees of being physically contiguous (on the host), it is
+ * assumed that an iovec entry will be required for each PAGESIZE section
+ * covered by the specified `gpa` and `len` range. For each iovec entry
+ * successfully populated by holding a page, `vhr_idx` will be incremented so it
+ * references the next available iovec entry (or `vhr_niov`, if the iovec array
+ * is full). The responsibility for releasing the `vmm_page_t` chain (stored in
+ * `vhr_head` and `vhr_tail`) resides with the caller, regardless of the result.
+ */
+static int
+vq_region_hold(viona_vring_t *ring, uint64_t gpa, uint32_t len,
+ bool writable, vq_held_region_t *region)
+{
+ const uint32_t front_offset = gpa & PAGEOFFSET;
+ const uint32_t front_len = MIN(len, PAGESIZE - front_offset);
+ uint_t pages = 1;
+ vmm_page_t *vmp;
+ caddr_t buf;
+
+ ASSERT3U(region->vhr_idx, <, region->vhr_niov);
+
+ if (front_len < len) {
+ pages += P2ROUNDUP((uint64_t)(len - front_len),
+ PAGESIZE) / PAGESIZE;
+ }
+ if (pages > (region->vhr_niov - region->vhr_idx)) {
+ return (E2BIG);
+ }
+
+ vmp = vq_page_hold(ring, gpa & PAGEMASK, writable);
+ if (vmp == NULL) {
+ return (EFAULT);
+ }
+ buf = (caddr_t)vmm_drv_page_readable(vmp);
+
+ region->vhr_iov[region->vhr_idx].iov_base = buf + front_offset;
+ region->vhr_iov[region->vhr_idx].iov_len = front_len;
+ region->vhr_idx++;
+ gpa += front_len;
+ len -= front_len;
+ if (region->vhr_head == NULL) {
+ region->vhr_head = vmp;
+ region->vhr_tail = vmp;
+ } else {
+ vmm_drv_page_chain(region->vhr_tail, vmp);
+ region->vhr_tail = vmp;
+ }
+
+ for (uint_t i = 1; i < pages; i++) {
+ ASSERT3U(gpa & PAGEOFFSET, ==, 0);
+
+ vmp = vq_page_hold(ring, gpa, writable);
+ if (vmp == NULL) {
+ return (EFAULT);
+ }
+ buf = (caddr_t)vmm_drv_page_readable(vmp);
+
+ const uint32_t chunk_len = MIN(len, PAGESIZE);
+ region->vhr_iov[region->vhr_idx].iov_base = buf;
+ region->vhr_iov[region->vhr_idx].iov_len = chunk_len;
+ region->vhr_idx++;
+ gpa += chunk_len;
+ len -= chunk_len;
+ vmm_drv_page_chain(region->vhr_tail, vmp);
+ region->vhr_tail = vmp;
+ }
+
+ return (0);
}
static boolean_t
@@ -310,14 +402,28 @@ viona_ring_map(viona_vring_t *ring)
const uint_t npages = LEGACY_VQ_PAGES(qsz);
ring->vr_map_pages = kmem_zalloc(npages * sizeof (void *), KM_SLEEP);
+ vmm_page_t *prev = NULL;
+
for (uint_t i = 0; i < npages; i++, pa += PAGESIZE) {
- void *page = viona_hold_page(ring, pa);
+ vmm_page_t *vmp;
- if (page == NULL) {
+ vmp = vq_page_hold(ring, pa, true);
+ if (vmp == NULL) {
viona_ring_unmap(ring);
return (B_FALSE);
}
- ring->vr_map_pages[i] = page;
+
+ /*
+ * Keep the first page has the head of the chain, appending all
+ * subsequent pages to the tail.
+ */
+ if (prev == NULL) {
+ ring->vr_map_hold = vmp;
+ } else {
+ vmm_drv_page_chain(prev, vmp);
+ }
+ prev = vmp;
+ ring->vr_map_pages[i] = vmm_drv_page_writable(vmp);
}
return (B_TRUE);
@@ -330,17 +436,14 @@ viona_ring_unmap(viona_vring_t *ring)
void **map = ring->vr_map_pages;
if (map != NULL) {
- /*
- * The bhyve page-hold mechanism does not currently require a
- * corresponding page-release action, given the simplicity of
- * the underlying virtual memory constructs.
- *
- * If/when those systems become more sophisticated, more than a
- * simple free of the page pointers will be required here.
- */
const uint_t npages = LEGACY_VQ_PAGES(ring->vr_size);
kmem_free(map, npages * sizeof (void *));
ring->vr_map_pages = NULL;
+
+ vmm_drv_page_release_chain(ring->vr_map_hold);
+ ring->vr_map_hold = NULL;
+ } else {
+ ASSERT3P(ring->vr_map_hold, ==, NULL);
}
}
@@ -520,14 +623,9 @@ vq_read_avail(viona_vring_t *ring, uint16_t idx)
*/
static int
vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
- struct iovec *iov, uint_t niov, uint16_t *idxp)
+ vq_held_region_t *region)
{
- uint64_t gpa = desc->vd_addr;
- uint32_t len = desc->vd_len;
- uint16_t lidx = *idxp;
- caddr_t buf;
-
- ASSERT3U(lidx, <, niov);
+ int err;
if (desc->vd_len == 0) {
VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
@@ -536,55 +634,22 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
return (EINVAL);
}
- const uint32_t front_offset = desc->vd_addr & PAGEOFFSET;
- const uint32_t front_len = MIN(len, PAGESIZE - front_offset);
- uint_t pages = 1;
- if (front_len < len) {
- pages += P2ROUNDUP((uint64_t)(len - front_len),
- PAGESIZE) / PAGESIZE;
- }
-
- if (pages > (niov - lidx)) {
+ err = vq_region_hold(ring, desc->vd_addr, desc->vd_len,
+ (desc->vd_flags & VRING_DESC_F_WRITE) != 0, region);
+ switch (err) {
+ case E2BIG:
VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, too_many_desc);
- return (E2BIG);
- }
-
- buf = viona_hold_page(ring, gpa & PAGEMASK);
- if (buf == NULL) {
+ break;
+ case EFAULT:
VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
+ break;
+ default:
+ break;
}
- iov[lidx].iov_base = buf + front_offset;
- iov[lidx].iov_len = front_len;
- gpa += front_len;
- len -= front_len;
- lidx++;
-
- for (uint_t i = 1; i < pages; i++) {
- ASSERT3U(gpa & PAGEOFFSET, ==, 0);
-
- buf = viona_hold_page(ring, gpa);
- if (buf == NULL) {
- VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
- VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
- }
- const uint32_t region_len = MIN(len, PAGESIZE);
- iov[lidx].iov_base = buf;
- iov[lidx].iov_len = region_len;
- gpa += region_len;
- len -= region_len;
- lidx++;
- }
-
- ASSERT3U(len, ==, 0);
- ASSERT3U(gpa, ==, desc->vd_addr + desc->vd_len);
-
- *idxp = lidx;
- return (0);
+ return (err);
}
/*
@@ -593,7 +658,7 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
*/
static int
vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
- struct iovec *iov, uint_t niov, uint16_t *idxp)
+ vq_held_region_t *region)
{
const uint16_t indir_count = desc->vd_len / sizeof (struct virtio_desc);
@@ -607,8 +672,10 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
}
uint16_t indir_next = 0;
- caddr_t buf = NULL;
+ const uint8_t *buf = NULL;
uint64_t buf_gpa = UINT64_MAX;
+ vmm_page_t *vmp = NULL;
+ int err = 0;
for (;;) {
uint64_t indir_gpa =
@@ -621,13 +688,18 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
* resides in, if has not already been done.
*/
if (indir_page != buf_gpa) {
- buf = viona_hold_page(ring, indir_page);
- if (buf == NULL) {
- VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
+ if (vmp != NULL) {
+ vmm_drv_page_release(vmp);
+ }
+ vmp = vq_page_hold(ring, indir_page, false);
+ if (vmp == NULL) {
+ VIONA_PROBE_BAD_RING_ADDR(ring, indir_page);
VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
+ err = EFAULT;
+ break;
}
buf_gpa = indir_page;
+ buf = vmm_drv_page_readable(vmp);
}
/*
@@ -641,27 +713,30 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
if (vp.vd_flags & VRING_DESC_F_INDIRECT) {
VIONA_PROBE1(indir_bad_nest, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, indir_bad_nest);
- return (EINVAL);
+ err = EINVAL;
+ break;
} else if (vp.vd_len == 0) {
VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
uint32_t, vp.vd_len);
VIONA_RING_STAT_INCR(ring, desc_bad_len);
- return (EINVAL);
+ err = EINVAL;
+ break;
}
- int err = vq_map_desc_bufs(ring, &vp, iov, niov, idxp);
+ err = vq_map_desc_bufs(ring, &vp, region);
if (err != 0) {
- return (err);
+ break;
}
/* Successfully reach the end of the indir chain */
if ((vp.vd_flags & VRING_DESC_F_NEXT) == 0) {
- return (0);
+ break;
}
- if (*idxp >= niov) {
+ if (region->vhr_idx >= region->vhr_niov) {
VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, too_many_desc);
- return (E2BIG);
+ err = E2BIG;
+ break;
}
indir_next = vp.vd_next;
@@ -669,23 +744,31 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
VIONA_PROBE3(indir_bad_next, viona_vring_t *, ring,
uint16_t, indir_next, uint16_t, indir_count);
VIONA_RING_STAT_INCR(ring, indir_bad_next);
- return (EINVAL);
+ err = EINVAL;
+ break;
}
}
- /* NOTREACHED */
- return (-1);
+ if (vmp != NULL) {
+ vmm_drv_page_release(vmp);
+ }
+ return (err);
}
int
vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
- uint16_t *cookie)
+ uint16_t *cookie, vmm_page_t **chain)
{
- uint16_t i, ndesc, idx, head, next;
+ uint16_t ndesc, idx, head, next;
struct virtio_desc vdir;
+ vq_held_region_t region = {
+ .vhr_niov = niov,
+ .vhr_iov = iov,
+ };
ASSERT(iov != NULL);
ASSERT(niov > 0 && niov < INT_MAX);
+ ASSERT(*chain == NULL);
mutex_enter(&ring->vr_a_mutex);
idx = ring->vr_cur_aidx;
@@ -711,7 +794,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
head = vq_read_avail(ring, idx & ring->vr_mask);
next = head;
- for (i = 0; i < niov; next = vdir.vd_next) {
+ for (region.vhr_idx = 0; region.vhr_idx < niov; next = vdir.vd_next) {
if (next >= ring->vr_size) {
VIONA_PROBE2(bad_idx, viona_vring_t *, ring,
uint16_t, next);
@@ -721,7 +804,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
vq_read_desc(ring, next, &vdir);
if ((vdir.vd_flags & VRING_DESC_F_INDIRECT) == 0) {
- if (vq_map_desc_bufs(ring, &vdir, iov, niov, &i) != 0) {
+ if (vq_map_desc_bufs(ring, &vdir, &region) != 0) {
break;
}
} else {
@@ -738,21 +821,29 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
break;
}
- if (vq_map_indir_desc_bufs(ring, &vdir, iov, niov, &i)
- != 0) {
+ if (vq_map_indir_desc_bufs(ring, &vdir, &region) != 0) {
break;
}
}
if ((vdir.vd_flags & VRING_DESC_F_NEXT) == 0) {
- *cookie = head;
ring->vr_cur_aidx++;
mutex_exit(&ring->vr_a_mutex);
- return (i);
+
+ *cookie = head;
+ *chain = region.vhr_head;
+ return (region.vhr_idx);
}
}
mutex_exit(&ring->vr_a_mutex);
+ if (region.vhr_head != NULL) {
+ /*
+ * If any pages were held prior to encountering an error, we
+ * must release them now.
+ */
+ vmm_drv_page_release_chain(region.vhr_head);
+ }
return (-1);
}
diff --git a/usr/src/uts/i86pc/io/viona/viona_rx.c b/usr/src/uts/i86pc/io/viona/viona_rx.c
index dc3feb10fe..2fbf6be972 100644
--- a/usr/src/uts/i86pc/io/viona/viona_rx.c
+++ b/usr/src/uts/i86pc/io/viona/viona_rx.c
@@ -208,10 +208,11 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
caddr_t buf = NULL;
boolean_t end = B_FALSE;
const uint32_t features = ring->vr_link->l_features;
+ vmm_page_t *pages = NULL;
ASSERT(msz >= MIN_BUF_SIZE);
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages);
if (n <= 0) {
/* Without available buffers, the frame must be dropped. */
return (ENOSPC);
@@ -279,6 +280,7 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
}
/* Release this chain */
+ vmm_drv_page_release_chain(pages);
vq_pushchain(ring, copied, cookie);
return (0);
@@ -287,6 +289,7 @@ bad_frame:
mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
+ vmm_drv_page_release_chain(pages);
vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie);
return (EINVAL);
}
@@ -296,6 +299,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
{
struct iovec iov[VTNET_MAXSEGS];
used_elem_t uelem[VTNET_MAXSEGS];
+ vmm_page_t *pages = NULL, *hdr_pages = NULL;
int n, i = 0, buf_idx = 0, err = 0;
uint16_t cookie;
caddr_t buf;
@@ -307,7 +311,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
ASSERT(msz >= MIN_BUF_SIZE);
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages);
if (n <= 0) {
/* Without available buffers, the frame must be dropped. */
VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp);
@@ -376,7 +380,12 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
err = EOVERFLOW;
break;
}
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ if (pages != NULL) {
+ vmm_drv_page_release_chain(pages);
+ pages = NULL;
+ }
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie,
+ &pages);
if (n <= 0) {
/*
* Without more immediate space to perform the
@@ -453,6 +462,13 @@ done:
uint16_t, cookie, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
}
+
+ if (hdr_pages != NULL) {
+ vmm_drv_page_release_chain(hdr_pages);
+ }
+ if (pages != NULL) {
+ vmm_drv_page_release_chain(pages);
+ }
vq_pushchain_many(ring, buf_idx + 1, uelem);
return (err);
}
diff --git a/usr/src/uts/i86pc/io/viona/viona_tx.c b/usr/src/uts/i86pc/io/viona/viona_tx.c
index f8018692a5..424deee498 100644
--- a/usr/src/uts/i86pc/io/viona/viona_tx.c
+++ b/usr/src/uts/i86pc/io/viona/viona_tx.c
@@ -70,6 +70,7 @@ struct viona_desb {
uint32_t d_len;
uint16_t d_cookie;
uchar_t *d_headers;
+ vmm_page_t *d_pages;
};
static void viona_tx(viona_link_t *, viona_vring_t *);
@@ -287,6 +288,14 @@ viona_desb_release(viona_desb_t *dp)
cookie = dp->d_cookie;
dp->d_len = 0;
dp->d_cookie = 0;
+ vmm_drv_page_release_chain(dp->d_pages);
+ dp->d_pages = NULL;
+
+ /*
+ * Ensure all other changes to the desb are visible prior to zeroing its
+ * refcount, signifying its readiness for reuse.
+ */
+ membar_exit();
dp->d_ref = 0;
viona_tx_done(ring, len, cookie);
@@ -484,12 +493,13 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
viona_desb_t *dp = NULL;
mac_client_handle_t link_mch = link->l_mch;
const struct virtio_net_hdr *hdr;
+ vmm_page_t *pages = NULL;
mp_head = mp_tail = NULL;
ASSERT(iov != NULL);
- n = vq_popchain(ring, iov, max_segs, &cookie);
+ n = vq_popchain(ring, iov, max_segs, &cookie, &pages);
if (n == 0) {
VIONA_PROBE1(tx_absent, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, tx_absent);
@@ -670,6 +680,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
if (dp != NULL) {
dp->d_len = len;
+ dp->d_pages = pages;
mutex_enter(&ring->vr_lock);
ring->vr_xfer_outstanding++;
mutex_exit(&ring->vr_lock);
@@ -679,6 +690,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
* be marked as 'used' now, rather than deferring that action
* until after successful packet transmission.
*/
+ vmm_drv_page_release_chain(pages);
viona_tx_done(ring, len, cookie);
}
@@ -731,5 +743,6 @@ drop_hook:
VIONA_PROBE3(tx_drop, viona_vring_t *, ring, uint32_t, len,
uint16_t, cookie);
+ vmm_drv_page_release_chain(pages);
viona_tx_done(ring, len, cookie);
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
index 501884e0df..3f137e1b4d 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
@@ -90,8 +90,9 @@ struct channel {
bool ol_sel; /* read MSB from output latch */
bool fr_sel; /* read MSB from free-running timer */
- struct bintime load_bt; /* time when counter was loaded */
- struct bintime callout_bt; /* target time */
+ hrtime_t time_loaded; /* time when counter was loaded */
+ hrtime_t time_target; /* target time */
+ uint64_t total_target;
struct callout callout;
struct vatpit_callout_arg callout_arg;
@@ -101,8 +102,6 @@ struct vatpit {
struct vm *vm;
struct mtx mtx;
- struct bintime freq_bt;
-
struct channel channel[3];
};
@@ -111,16 +110,9 @@ static void pit_timer_start_cntr0(struct vatpit *vatpit);
static uint64_t
vatpit_delta_ticks(struct vatpit *vatpit, struct channel *c)
{
- struct bintime delta;
- uint64_t result;
-
- binuptime(&delta);
- bintime_sub(&delta, &c->load_bt);
+ const hrtime_t delta = gethrtime() - c->time_loaded;
- result = delta.sec * PIT_8254_FREQ;
- result += delta.frac / vatpit->freq_bt.frac;
-
- return (result);
+ return (hrt_freq_count(delta, PIT_8254_FREQ));
}
static int
@@ -183,32 +175,32 @@ done:
static void
pit_timer_start_cntr0(struct vatpit *vatpit)
{
- struct channel *c;
- struct bintime now, delta;
- sbintime_t precision;
+ struct channel *c = &vatpit->channel[0];
- c = &vatpit->channel[0];
- if (c->initial != 0) {
- delta.sec = 0;
- delta.frac = vatpit->freq_bt.frac * c->initial;
- bintime_add(&c->callout_bt, &delta);
- precision = bttosbt(delta) >> tc_precexp;
+ if (c->initial == 0) {
+ return;
+ }
- /*
- * Reset 'callout_bt' if the time that the callout
- * was supposed to fire is more than 'c->initial'
- * ticks in the past.
- */
- binuptime(&now);
- if (BINTIME_CMP(&c->callout_bt, <, &now)) {
- c->callout_bt = now;
- bintime_add(&c->callout_bt, &delta);
- }
+ c->total_target += c->initial;
+ c->time_target = c->time_loaded +
+ hrt_freq_interval(PIT_8254_FREQ, c->total_target);
- callout_reset_sbt(&c->callout, bttosbt(c->callout_bt),
- precision, vatpit_callout_handler, &c->callout_arg,
- C_ABSOLUTE);
+ /*
+ * If we are more than 'c->initial' ticks behind, reset the timer base
+ * to fire at the next 'c->initial' interval boundary.
+ */
+ hrtime_t now = gethrtime();
+ if (c->time_target < now) {
+ const uint64_t ticks_behind =
+ hrt_freq_count(c->time_target - now, PIT_8254_FREQ);
+
+ c->total_target += roundup(ticks_behind, c->initial);
+ c->time_target = c->time_loaded +
+ hrt_freq_interval(PIT_8254_FREQ, c->total_target);
}
+
+ callout_reset_hrtime(&c->callout, c->time_target,
+ vatpit_callout_handler, &c->callout_arg, C_ABSOLUTE);
}
static uint16_t
@@ -223,15 +215,14 @@ pit_update_counter(struct vatpit *vatpit, struct channel *c, bool latch)
if (c->initial == 0) {
/*
- * This is possibly an o/s bug - reading the value of
- * the timer without having set up the initial value.
+ * This is possibly an OS bug - reading the value of the timer
+ * without having set up the initial value.
*
- * The original user-space version of this code set
- * the timer to 100hz in this condition; do the same
- * here.
+ * The original user-space version of this code set the timer to
+ * 100hz in this condition; do the same here.
*/
c->initial = TIMER_DIV(PIT_8254_FREQ, 100);
- binuptime(&c->load_bt);
+ c->time_loaded = gethrtime();
c->reg_status &= ~TIMER_STS_NULLCNT;
}
@@ -419,10 +410,11 @@ vatpit_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *eax)
c->reg_status &= ~TIMER_STS_NULLCNT;
c->fr_sel = false;
c->initial = c->reg_cr[0] | (uint16_t)c->reg_cr[1] << 8;
- binuptime(&c->load_bt);
+ c->time_loaded = gethrtime();
/* Start an interval timer for channel 0 */
if (port == TIMER_CNTR0) {
- c->callout_bt = c->load_bt;
+ c->time_target = c->time_loaded;
+ c->total_target = 0;
pit_timer_start_cntr0(vatpit);
}
if (c->initial == 0)
@@ -465,8 +457,6 @@ vatpit_init(struct vm *vm)
mtx_init(&vatpit->mtx, "vatpit lock", NULL, MTX_SPIN);
- FREQ2BT(PIT_8254_FREQ, &vatpit->freq_bt);
-
for (i = 0; i < 3; i++) {
callout_init(&vatpit->channel[i].callout, 1);
arg = &vatpit->channel[i].callout_arg;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.c b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
index 14418ff5fa..deb1417b71 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vhpet.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
@@ -76,32 +76,33 @@ struct vhpet_callout_arg {
int timer_num;
};
+struct vhpet_timer {
+ uint64_t cap_config; /* Configuration */
+ uint64_t msireg; /* FSB interrupt routing */
+ uint32_t compval; /* Comparator */
+ uint32_t comprate;
+ struct callout callout;
+ hrtime_t callout_expire; /* time when counter==compval */
+ struct vhpet_callout_arg arg;
+};
+
struct vhpet {
struct vm *vm;
struct mtx mtx;
- sbintime_t freq_sbt;
uint64_t config; /* Configuration */
uint64_t isr; /* Interrupt Status */
- uint32_t countbase; /* HPET counter base value */
- sbintime_t countbase_sbt; /* uptime corresponding to base value */
-
- struct {
- uint64_t cap_config; /* Configuration */
- uint64_t msireg; /* FSB interrupt routing */
- uint32_t compval; /* Comparator */
- uint32_t comprate;
- struct callout callout;
- sbintime_t callout_sbt; /* time when counter==compval */
- struct vhpet_callout_arg arg;
- } timer[VHPET_NUM_TIMERS];
+ uint32_t base_count; /* HPET counter base value */
+ hrtime_t base_time; /* uptime corresponding to base value */
+
+ struct vhpet_timer timer[VHPET_NUM_TIMERS];
};
#define VHPET_LOCK(vhp) mtx_lock(&((vhp)->mtx))
#define VHPET_UNLOCK(vhp) mtx_unlock(&((vhp)->mtx))
static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
- sbintime_t now);
+ hrtime_t now);
static uint64_t
vhpet_capabilities(void)
@@ -151,27 +152,22 @@ vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
}
static uint32_t
-vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
+vhpet_counter(struct vhpet *vhpet, hrtime_t *nowptr)
{
- uint32_t val;
- sbintime_t now, delta;
+ const hrtime_t now = gethrtime();
+ uint32_t val = vhpet->base_count;
- val = vhpet->countbase;
if (vhpet_counter_enabled(vhpet)) {
- now = sbinuptime();
- delta = now - vhpet->countbase_sbt;
- KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
- "%lx to %lx", vhpet->countbase_sbt, now));
- val += delta / vhpet->freq_sbt;
- if (nowptr != NULL)
- *nowptr = now;
+ const hrtime_t delta = now - vhpet->base_time;
+
+ ASSERT3S(delta, >=, 0);
+ val += hrt_freq_count(delta, HPET_FREQ);
} else {
- /*
- * The sbinuptime corresponding to the 'countbase' is
- * meaningless when the counter is disabled. Make sure
- * that the caller doesn't want to use it.
- */
- KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
+ /* Value of the counter is meaningless when it is disabled */
+ }
+
+ if (nowptr != NULL) {
+ *nowptr = now;
}
return (val);
}
@@ -284,7 +280,7 @@ vhpet_handler(void *a)
{
int n;
uint32_t counter;
- sbintime_t now;
+ hrtime_t now;
struct vhpet *vhpet;
struct callout *callout;
struct vhpet_callout_arg *arg;
@@ -317,7 +313,7 @@ done:
}
static void
-vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
+vhpet_stop_timer(struct vhpet *vhpet, int n, hrtime_t now)
{
VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
@@ -330,7 +326,7 @@ vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
* in the guest. This is especially bad in one-shot mode because
* the next interrupt has to wait for the counter to wrap around.
*/
- if (vhpet->timer[n].callout_sbt < now) {
+ if (vhpet->timer[n].callout_expire < now) {
VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
"stopping timer", n);
vhpet_timer_interrupt(vhpet, n);
@@ -338,11 +334,11 @@ vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
}
static void
-vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
+vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, hrtime_t now)
{
- sbintime_t delta, precision;
+ struct vhpet_timer *timer = &vhpet->timer[n];
- if (vhpet->timer[n].comprate != 0)
+ if (timer->comprate != 0)
vhpet_adjust_compval(vhpet, n, counter);
else {
/*
@@ -353,11 +349,11 @@ vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
*/
}
- delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
- precision = delta >> tc_precexp;
- vhpet->timer[n].callout_sbt = now + delta;
- callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
- precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
+ const hrtime_t delta = hrt_freq_interval(HPET_FREQ,
+ timer->compval - counter);
+ timer->callout_expire = now + delta;
+ callout_reset_hrtime(&timer->callout, timer->callout_expire,
+ vhpet_handler, &timer->arg, C_ABSOLUTE);
}
static void
@@ -365,23 +361,23 @@ vhpet_start_counting(struct vhpet *vhpet)
{
int i;
- vhpet->countbase_sbt = sbinuptime();
+ vhpet->base_time = gethrtime();
for (i = 0; i < VHPET_NUM_TIMERS; i++) {
/*
* Restart the timers based on the value of the main counter
* when it stopped counting.
*/
- vhpet_start_timer(vhpet, i, vhpet->countbase,
- vhpet->countbase_sbt);
+ vhpet_start_timer(vhpet, i, vhpet->base_count,
+ vhpet->base_time);
}
}
static void
-vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
+vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, hrtime_t now)
{
int i;
- vhpet->countbase = counter;
+ vhpet->base_count = counter;
for (i = 0; i < VHPET_NUM_TIMERS; i++)
vhpet_stop_timer(vhpet, i, now);
}
@@ -478,7 +474,7 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
struct vhpet *vhpet;
uint64_t data, mask, oldval, val64;
uint32_t isr_clear_mask, old_compval, old_comprate, counter;
- sbintime_t now, *nowptr;
+ hrtime_t now;
int i, offset;
vhpet = vm_hpet(vm);
@@ -517,11 +513,10 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
/*
* Get the most recent value of the counter before updating
* the 'config' register. If the HPET is going to be disabled
- * then we need to update 'countbase' with the value right
+ * then we need to update 'base_count' with the value right
* before it is disabled.
*/
- nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
- counter = vhpet_counter(vhpet, nowptr);
+ counter = vhpet_counter(vhpet, &now);
oldval = vhpet->config;
update_register(&vhpet->config, data, mask);
@@ -558,7 +553,7 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
/* Zero-extend the counter to 64-bits before updating it */
val64 = vhpet_counter(vhpet, NULL);
update_register(&val64, data, mask);
- vhpet->countbase = val64;
+ vhpet->base_count = val64;
if (vhpet_counter_enabled(vhpet))
vhpet_start_counting(vhpet);
goto done;
@@ -710,15 +705,11 @@ vhpet_init(struct vm *vm)
struct vhpet *vhpet;
uint64_t allowed_irqs;
struct vhpet_callout_arg *arg;
- struct bintime bt;
vhpet = malloc(sizeof (struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
vhpet->vm = vm;
mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
- FREQ2BT(HPET_FREQ, &bt);
- vhpet->freq_sbt = bttosbt(bt);
-
pincount = vioapic_pincount(vm);
if (pincount >= 32)
allowed_irqs = 0xff000000; /* irqs 24-31 */
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index 55f491b664..e88438da0d 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -94,7 +94,7 @@ __FBSDID("$FreeBSD$");
/*
* APIC timer frequency:
* - arbitrary but chosen to be in the ballpark of contemporary hardware.
- * - power-of-two to avoid loss of precision when converted to a bintime.
+ * - power-of-two to avoid loss of precision when calculating times
*/
#define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
@@ -215,7 +215,6 @@ vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
static uint32_t
vlapic_get_ccr(struct vlapic *vlapic)
{
- struct bintime bt_now, bt_rem;
struct LAPIC *lapic;
uint32_t ccr;
@@ -228,12 +227,11 @@ vlapic_get_ccr(struct vlapic *vlapic)
* If the timer is scheduled to expire in the future then
* compute the value of 'ccr' based on the remaining time.
*/
- binuptime(&bt_now);
- if (BINTIME_CMP(&vlapic->timer_fire_bt, >, &bt_now)) {
- bt_rem = vlapic->timer_fire_bt;
- bintime_sub(&bt_rem, &bt_now);
- ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
- ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
+
+ const hrtime_t now = gethrtime();
+ if (vlapic->timer_fire_when > now) {
+ ccr += hrt_freq_count(vlapic->timer_fire_when - now,
+ vlapic->timer_cur_freq);
}
}
KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, "
@@ -263,9 +261,9 @@ vlapic_dcr_write_handler(struct vlapic *vlapic)
* XXX changes to the frequency divider will not take effect until
* the timer is reloaded.
*/
- FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
- vlapic->timer_period_bt = vlapic->timer_freq_bt;
- bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+ vlapic->timer_cur_freq = VLAPIC_BUS_FREQ / divisor;
+ vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
+ lapic->icr_timer);
VLAPIC_TIMER_UNLOCK(vlapic);
}
@@ -729,20 +727,16 @@ vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
}
static void
-vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t)
+vlapic_callout_reset(struct vlapic *vlapic)
{
- callout_reset_sbt(&vlapic->callout, t, 0,
- vlapic_callout_handler, vlapic, 0);
+ callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when,
+ vlapic_callout_handler, vlapic, C_ABSOLUTE);
}
static void
vlapic_callout_handler(void *arg)
{
- struct vlapic *vlapic;
- struct bintime bt, btnow;
- sbintime_t rem_sbt;
-
- vlapic = arg;
+ struct vlapic *vlapic = arg;
VLAPIC_TIMER_LOCK(vlapic);
if (callout_pending(&vlapic->callout)) /* callout was reset */
@@ -756,42 +750,25 @@ vlapic_callout_handler(void *arg)
vlapic_fire_timer(vlapic);
if (vlapic_periodic_timer(vlapic)) {
- binuptime(&btnow);
-
- KASSERT(BINTIME_CMP(&btnow, >=, &vlapic->timer_fire_bt),
- ("vlapic callout at %lx.%lx, expected at %lx.%lx",
- btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
- vlapic->timer_fire_bt.frac));
-
/*
* Compute the delta between when the timer was supposed to
- * fire and the present time.
+ * fire and the present time. We can depend on the fact that
+ * cyclics (which underly these callouts) will never be called
+ * early.
*/
- bt = btnow;
- bintime_sub(&bt, &vlapic->timer_fire_bt);
-
- rem_sbt = bttosbt(vlapic->timer_period_bt);
- if (BINTIME_CMP(&bt, <, &vlapic->timer_period_bt)) {
+ const hrtime_t now = gethrtime();
+ const hrtime_t delta = now - vlapic->timer_fire_when;
+ if (delta >= vlapic->timer_period) {
/*
- * Adjust the time until the next countdown downward
- * to account for the lost time.
+ * If we are so behind that we have missed an entire
+ * timer period, reset the time base rather than
+ * attempting to catch up.
*/
- rem_sbt -= bttosbt(bt);
+ vlapic->timer_fire_when = now + vlapic->timer_period;
} else {
- /*
- * If the delta is greater than the timer period then
- * just reset our time base instead of trying to catch
- * up.
- */
- vlapic->timer_fire_bt = btnow;
- VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
- "usecs, period is %lu usecs - resetting time base",
- bttosbt(bt) / SBT_1US,
- bttosbt(vlapic->timer_period_bt) / SBT_1US);
+ vlapic->timer_fire_when += vlapic->timer_period;
}
-
- bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
- vlapic_callout_reset(vlapic, rem_sbt);
+ vlapic_callout_reset(vlapic);
}
done:
VLAPIC_TIMER_UNLOCK(vlapic);
@@ -800,27 +777,18 @@ done:
void
vlapic_icrtmr_write_handler(struct vlapic *vlapic)
{
- struct LAPIC *lapic;
- sbintime_t sbt;
- uint32_t icr_timer;
+ struct LAPIC *lapic = vlapic->apic_page;
VLAPIC_TIMER_LOCK(vlapic);
-
- lapic = vlapic->apic_page;
- icr_timer = lapic->icr_timer;
-
- vlapic->timer_period_bt = vlapic->timer_freq_bt;
- bintime_mul(&vlapic->timer_period_bt, icr_timer);
-
- if (icr_timer != 0) {
- binuptime(&vlapic->timer_fire_bt);
- bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
-
- sbt = bttosbt(vlapic->timer_period_bt);
- vlapic_callout_reset(vlapic, sbt);
- } else
+ vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
+ lapic->icr_timer);
+ if (vlapic->timer_period != 0) {
+ vlapic->timer_fire_when = gethrtime() + vlapic->timer_period;
+ vlapic_callout_reset(vlapic);
+ } else {
+ vlapic->timer_fire_when = 0;
callout_stop(&vlapic->callout);
-
+ }
VLAPIC_TIMER_UNLOCK(vlapic);
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
index 8d739bcfcc..7f07665874 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
@@ -170,9 +170,10 @@ struct vlapic {
uint32_t esr_pending;
struct callout callout; /* vlapic timer */
- struct bintime timer_fire_bt; /* callout expiry time */
- struct bintime timer_freq_bt; /* timer frequency */
- struct bintime timer_period_bt; /* timer period */
+ hrtime_t timer_fire_when;
+ hrtime_t timer_period;
+ uint32_t timer_cur_freq;
+
struct mtx timer_mtx;
uint64_t msr_apicbase;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
index 2644ee61d6..9a7d7d4253 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
@@ -64,9 +64,7 @@ struct vpmtmr {
struct vm *vm;
void *io_cookie;
uint16_t io_port;
- sbintime_t freq_sbt;
- sbintime_t baseuptime;
- uint32_t baseval;
+ hrtime_t base_time;
};
static MALLOC_DEFINE(M_VPMTMR, "vpmtmr", "bhyve virtual acpi timer");
@@ -75,15 +73,10 @@ struct vpmtmr *
vpmtmr_init(struct vm *vm)
{
struct vpmtmr *vpmtmr;
- struct bintime bt;
vpmtmr = malloc(sizeof (struct vpmtmr), M_VPMTMR, M_WAITOK | M_ZERO);
vpmtmr->vm = vm;
- vpmtmr->baseuptime = sbinuptime();
- vpmtmr->baseval = 0;
-
- FREQ2BT(PMTMR_FREQ, &bt);
- vpmtmr->freq_sbt = bttosbt(bt);
+ vpmtmr->base_time = gethrtime();
return (vpmtmr);
}
@@ -149,20 +142,18 @@ int
vpmtmr_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *val)
{
struct vpmtmr *vpmtmr = arg;
- sbintime_t now, delta;
if (!in || bytes != 4)
return (-1);
/*
- * No locking needed because 'baseuptime' and 'baseval' are
- * written only during initialization.
+ * No locking needed because 'base_time' is written only during
+ * initialization.
*/
- now = sbinuptime();
- delta = now - vpmtmr->baseuptime;
- KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: "
- "%lx to %lx", vpmtmr->baseuptime, now));
- *val = vpmtmr->baseval + delta / vpmtmr->freq_sbt;
+ const hrtime_t delta = gethrtime() - vpmtmr->base_time;
+ ASSERT3S(delta, >=, 0);
+
+ *val = hrt_freq_count(delta, PMTMR_FREQ);
return (0);
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
index a67e82d156..2b3a5b5432 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
@@ -80,7 +80,7 @@ struct vrtc {
struct mtx mtx;
struct callout callout;
uint_t addr; /* RTC register to read or write */
- sbintime_t base_uptime;
+ hrtime_t base_uptime;
time_t base_rtctime;
struct rtcdev rtcdev;
};
@@ -147,23 +147,24 @@ update_enabled(struct vrtc *vrtc)
}
static time_t
-vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime)
+vrtc_curtime(struct vrtc *vrtc, hrtime_t *basetime)
{
- sbintime_t now, delta;
- time_t t, secs;
+ time_t t = vrtc->base_rtctime;
+ hrtime_t base = vrtc->base_uptime;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
- t = vrtc->base_rtctime;
- *basetime = vrtc->base_uptime;
if (update_enabled(vrtc)) {
- now = sbinuptime();
- delta = now - vrtc->base_uptime;
- KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
- "%lx to %lx", vrtc->base_uptime, now));
- secs = delta / SBT_1S;
- t += secs;
- *basetime += secs * SBT_1S;
+ const hrtime_t delta = gethrtime() - vrtc->base_uptime;
+ const time_t sec = delta / NANOSEC;
+
+ ASSERT3S(delta, >=, 0);
+
+ t += sec;
+ base += sec * NANOSEC;
+ }
+ if (basetime != NULL) {
+ *basetime = base;
}
return (t);
}
@@ -389,7 +390,7 @@ fail:
}
static int
-vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
+vrtc_time_update(struct vrtc *vrtc, time_t newtime, hrtime_t newbase)
{
struct rtcdev *rtc;
time_t oldtime;
@@ -463,28 +464,26 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
return (0);
}
-static sbintime_t
+static hrtime_t
vrtc_freq(struct vrtc *vrtc)
{
- int ratesel;
-
- static sbintime_t pf[16] = {
+ const hrtime_t rate_freq[16] = {
0,
- SBT_1S / 256,
- SBT_1S / 128,
- SBT_1S / 8192,
- SBT_1S / 4096,
- SBT_1S / 2048,
- SBT_1S / 1024,
- SBT_1S / 512,
- SBT_1S / 256,
- SBT_1S / 128,
- SBT_1S / 64,
- SBT_1S / 32,
- SBT_1S / 16,
- SBT_1S / 8,
- SBT_1S / 4,
- SBT_1S / 2,
+ NANOSEC / 256,
+ NANOSEC / 128,
+ NANOSEC / 8192,
+ NANOSEC / 4096,
+ NANOSEC / 2048,
+ NANOSEC / 1024,
+ NANOSEC / 512,
+ NANOSEC / 256,
+ NANOSEC / 128,
+ NANOSEC / 64,
+ NANOSEC / 32,
+ NANOSEC / 16,
+ NANOSEC / 8,
+ NANOSEC / 4,
+ NANOSEC / 2,
};
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
@@ -497,32 +496,32 @@ vrtc_freq(struct vrtc *vrtc)
* the update interrupt.
*/
if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) {
- ratesel = vrtc->rtcdev.reg_a & 0xf;
- return (pf[ratesel]);
+ uint_t sel = vrtc->rtcdev.reg_a & 0xf;
+ return (rate_freq[sel]);
} else if (aintr_enabled(vrtc) && update_enabled(vrtc)) {
- return (SBT_1S);
+ return (NANOSEC);
} else if (uintr_enabled(vrtc) && update_enabled(vrtc)) {
- return (SBT_1S);
+ return (NANOSEC);
} else {
return (0);
}
}
static void
-vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt)
+vrtc_callout_reset(struct vrtc *vrtc, hrtime_t freqhrt)
{
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
- if (freqsbt == 0) {
+ if (freqhrt == 0) {
if (callout_active(&vrtc->callout)) {
VM_CTR0(vrtc->vm, "RTC callout stopped");
callout_stop(&vrtc->callout);
}
return;
}
- VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt);
- callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler,
+ VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", NANOSEC / freqhrt);
+ callout_reset_hrtime(&vrtc->callout, freqhrt, vrtc_callout_handler,
vrtc, 0);
}
@@ -530,7 +529,6 @@ static void
vrtc_callout_handler(void *arg)
{
struct vrtc *vrtc = arg;
- sbintime_t freqsbt, basetime;
time_t rtctime;
int error;
@@ -552,28 +550,30 @@ vrtc_callout_handler(void *arg)
vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
+ hrtime_t basetime;
+
rtctime = vrtc_curtime(vrtc, &basetime);
error = vrtc_time_update(vrtc, rtctime, basetime);
KASSERT(error == 0, ("%s: vrtc_time_update error %d",
__func__, error));
}
- freqsbt = vrtc_freq(vrtc);
- KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__));
- vrtc_callout_reset(vrtc, freqsbt);
+ hrtime_t freqhrt = vrtc_freq(vrtc);
+ KASSERT(freqhrt != 0, ("%s: vrtc frequency cannot be zero", __func__));
+ vrtc_callout_reset(vrtc, freqhrt);
done:
VRTC_UNLOCK(vrtc);
}
static __inline void
-vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
+vrtc_callout_check(struct vrtc *vrtc, hrtime_t freqhrt)
{
int active;
active = callout_active(&vrtc->callout) ? 1 : 0;
- KASSERT((freq == 0 && !active) || (freq != 0 && active),
- ("vrtc callout %s with frequency %lx",
- active ? "active" : "inactive", freq));
+ KASSERT((freqhrt == 0 && !active) || (freqhrt != 0 && active),
+ ("vrtc callout %s with frequency %llx",
+ active ? "active" : "inactive", NANOSEC / freqhrt));
}
static void
@@ -618,7 +618,7 @@ static int
vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
{
struct rtcdev *rtc;
- sbintime_t oldfreq, newfreq, basetime;
+ hrtime_t oldfreq, newfreq;
time_t curtime, rtctime;
int error;
uint8_t oldval, changed;
@@ -637,9 +637,11 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
}
if (changed & RTCSB_HALT) {
+ hrtime_t basetime;
+
if ((newval & RTCSB_HALT) == 0) {
rtctime = rtc_to_secs(vrtc);
- basetime = sbinuptime();
+ basetime = gethrtime();
if (rtctime == VRTC_BROKEN_TIME) {
if (rtc_flag_broken_time)
return (-1);
@@ -693,7 +695,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
static void
vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
{
- sbintime_t oldfreq, newfreq;
+ hrtime_t oldfreq, newfreq;
uint8_t oldval, changed;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
@@ -712,7 +714,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
* maintain the illusion that the RTC date/time was frozen
* while the dividers were disabled.
*/
- vrtc->base_uptime = sbinuptime();
+ vrtc->base_uptime = gethrtime();
VM_CTR2(vrtc->vm, "RTC divider out of reset at %lx/%lx",
vrtc->base_rtctime, vrtc->base_uptime);
} else {
@@ -744,7 +746,7 @@ vrtc_set_time(struct vm *vm, time_t secs)
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- error = vrtc_time_update(vrtc, secs, sbinuptime());
+ error = vrtc_time_update(vrtc, secs, gethrtime());
VRTC_UNLOCK(vrtc);
if (error) {
@@ -761,12 +763,11 @@ time_t
vrtc_get_time(struct vm *vm)
{
struct vrtc *vrtc;
- sbintime_t basetime;
time_t t;
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- t = vrtc_curtime(vrtc, &basetime);
+ t = vrtc_curtime(vrtc, NULL);
VRTC_UNLOCK(vrtc);
return (t);
@@ -803,7 +804,6 @@ int
vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
{
struct vrtc *vrtc;
- sbintime_t basetime;
time_t curtime;
uint8_t *ptr;
@@ -820,7 +820,7 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
* Update RTC date/time fields if necessary.
*/
if (offset < 10 || offset == RTC_CENTURY) {
- curtime = vrtc_curtime(vrtc, &basetime);
+ curtime = vrtc_curtime(vrtc, NULL);
secs_to_rtc(curtime, vrtc, 0);
}
@@ -858,7 +858,7 @@ vrtc_data_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
{
struct vrtc *vrtc = arg;
struct rtcdev *rtc = &vrtc->rtcdev;
- sbintime_t basetime;
+ hrtime_t basetime;
time_t curtime;
int error, offset;
@@ -936,7 +936,7 @@ vrtc_data_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
*/
if (offset == RTC_CENTURY && !rtc_halted(vrtc)) {
curtime = rtc_to_secs(vrtc);
- error = vrtc_time_update(vrtc, curtime, sbinuptime());
+ error = vrtc_time_update(vrtc, curtime, gethrtime());
KASSERT(!error, ("vrtc_time_update error %d", error));
if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time)
error = -1;
@@ -990,7 +990,7 @@ vrtc_init(struct vm *vm)
VRTC_LOCK(vrtc);
vrtc->base_rtctime = VRTC_BROKEN_TIME;
- vrtc_time_update(vrtc, curtime, sbinuptime());
+ vrtc_time_update(vrtc, curtime, gethrtime());
secs_to_rtc(curtime, vrtc, 0);
VRTC_UNLOCK(vrtc);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.mapfile b/usr/src/uts/i86pc/io/vmm/vmm.mapfile
index 0af4f090b4..fb1c9366de 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.mapfile
+++ b/usr/src/uts/i86pc/io/vmm/vmm.mapfile
@@ -12,6 +12,7 @@
#
# Copyright 2019 Joyent, Inc.
+# Copyright 2021 Oxide Computer Company
#
#
@@ -44,7 +45,13 @@ SYMBOL_VERSION ILLUMOSprivate {
vmm_drv_lease_sign;
vmm_drv_lease_break;
vmm_drv_lease_expired;
- vmm_drv_gpa2kva;
+ vmm_drv_page_hold;
+ vmm_drv_page_release;
+ vmm_drv_page_release_chain;
+ vmm_drv_page_readable;
+ vmm_drv_page_writable;
+ vmm_drv_page_chain;
+ vmm_drv_page_next;
vmm_drv_ioport_hook;
vmm_drv_ioport_unhook;
vmm_drv_msi;
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 24dd851831..d66778c55a 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -1969,33 +1969,49 @@ vmm_drv_lease_expired(vmm_lease_t *lease)
return (lease->vml_expired);
}
-void *
-vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz)
+vmm_page_t *
+vmm_drv_page_hold(vmm_lease_t *lease, uintptr_t gpa, int prot)
{
- vm_page_t *vmp;
- void *res = NULL;
-
ASSERT(lease != NULL);
- ASSERT3U(sz, ==, PAGESIZE);
ASSERT0(gpa & PAGEOFFSET);
- vmp = vmc_hold(lease->vml_vmclient, gpa, PROT_READ | PROT_WRITE);
- /*
- * Break the rules for now and just extract the pointer. This is
- * nominally safe, since holding a driver lease on the VM read-locks it.
- *
- * A pointer which would otherwise be at risk of being a use-after-free
- * vector is made safe since actions such as vmspace_unmap() require
- * acquisition of the VM write-lock, (causing all driver leases to be
- * broken) allowing the consumers to cease their access prior to
- * modification of the vmspace.
- */
- if (vmp != NULL) {
- res = vmp_get_writable(vmp);
- vmp_release(vmp);
- }
+ return ((vmm_page_t *)vmc_hold(lease->vml_vmclient, gpa, prot));
+}
- return (res);
+void
+vmm_drv_page_release(vmm_page_t *vmmp)
+{
+ vmp_release((vm_page_t *)vmmp);
+}
+
+void
+vmm_drv_page_release_chain(vmm_page_t *vmmp)
+{
+ vmp_release_chain((vm_page_t *)vmmp);
+}
+
+const void *
+vmm_drv_page_readable(const vmm_page_t *vmmp)
+{
+ return (vmp_get_readable((const vm_page_t *)vmmp));
+}
+
+void *
+vmm_drv_page_writable(const vmm_page_t *vmmp)
+{
+ return (vmp_get_writable((const vm_page_t *)vmmp));
+}
+
+void
+vmm_drv_page_chain(vmm_page_t *vmmp, vmm_page_t *to_chain)
+{
+ vmp_chain((vm_page_t *)vmmp, (vm_page_t *)to_chain);
+}
+
+vmm_page_t *
+vmm_drv_page_next(const vmm_page_t *vmmp)
+{
+ return ((vmm_page_t *)vmp_next((vm_page_t *)vmmp));
}
int
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index cdcebc71d4..04bdb6a3d6 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -349,19 +349,10 @@ vmm_glue_callout_init(struct callout *c, int mpsafe)
mutex_exit(&cpu_lock);
}
-static __inline hrtime_t
-sbttohrtime(sbintime_t sbt)
-{
- return (((sbt >> 32) * NANOSEC) +
- (((uint64_t)NANOSEC * (uint32_t)sbt) >> 32));
-}
-
-int
-vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
- void (*func)(void *), void *arg, int flags)
+void
+callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *),
+ void *arg, int flags)
{
- hrtime_t target = sbttohrtime(sbt);
-
ASSERT(c->c_cyc_id != CYCLIC_NONE);
if ((flags & C_ABSOLUTE) == 0) {
@@ -372,8 +363,6 @@ vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
c->c_arg = arg;
c->c_target = target;
cyclic_reprogram(c->c_cyc_id, target);
-
- return (0);
}
int
@@ -409,6 +398,34 @@ vmm_glue_callout_localize(struct callout *c)
mutex_exit(&cpu_lock);
}
+/*
+ * Given an interval (in ns) and a frequency (in hz), calculate the number of
+ * "ticks" at that frequency which cover the interval.
+ */
+uint64_t
+hrt_freq_count(hrtime_t interval, uint32_t freq)
+{
+ ASSERT3S(interval, >=, 0);
+ const uint64_t sec = interval / NANOSEC;
+ const uint64_t nsec = interval % NANOSEC;
+
+ return ((sec * freq) + ((nsec * freq) / NANOSEC));
+}
+
+/*
+ * Given a frequency (in hz) and number of "ticks", calculate the interval
+ * (in ns) which would be covered by those ticks.
+ */
+hrtime_t
+hrt_freq_interval(uint32_t freq, uint64_t count)
+{
+ const uint64_t sec = count / freq;
+ const uint64_t frac = count % freq;
+
+ return ((NANOSEC * sec) + ((frac * NANOSEC) / freq));
+}
+
+
uint_t cpu_high; /* Highest arg to CPUID */
uint_t cpu_exthigh; /* Highest arg to extended CPUID */
uint_t cpu_id; /* Stepping ID */
diff --git a/usr/src/uts/i86pc/sys/vmm_drv.h b/usr/src/uts/i86pc/sys/vmm_drv.h
index 1f2b3d9254..0b7f622e53 100644
--- a/usr/src/uts/i86pc/sys/vmm_drv.h
+++ b/usr/src/uts/i86pc/sys/vmm_drv.h
@@ -12,7 +12,7 @@
/*
* Copyright 2019 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#ifndef _VMM_DRV_H_
@@ -30,6 +30,14 @@ struct vmm_lease;
typedef struct vmm_lease vmm_lease_t;
/*
+ * This is effectively a synonym for the bhyve-internal 'struct vm_page' type.
+ * Use of `vmm_page_t *` instead allows us to keep those implementation details
+ * hidden from vmm_drv consumers.
+ */
+struct vmm_page;
+typedef struct vmm_page vmm_page_t;
+
+/*
* Because of tangled headers, this definitions mirrors its ioport_handler_t
* counterpart in vmm_kernel.h.
*/
@@ -44,7 +52,14 @@ extern vmm_lease_t *vmm_drv_lease_sign(vmm_hold_t *, boolean_t (*)(void *),
extern void vmm_drv_lease_break(vmm_hold_t *, vmm_lease_t *);
extern boolean_t vmm_drv_lease_expired(vmm_lease_t *);
-extern void *vmm_drv_gpa2kva(vmm_lease_t *, uintptr_t, size_t);
+extern vmm_page_t *vmm_drv_page_hold(vmm_lease_t *, uintptr_t, int);
+extern void vmm_drv_page_release(vmm_page_t *);
+extern void vmm_drv_page_release_chain(vmm_page_t *);
+extern const void *vmm_drv_page_readable(const vmm_page_t *);
+extern void *vmm_drv_page_writable(const vmm_page_t *);
+extern void vmm_drv_page_chain(vmm_page_t *, vmm_page_t *);
+extern vmm_page_t *vmm_drv_page_next(const vmm_page_t *);
+
extern int vmm_drv_msi(vmm_lease_t *, uint64_t, uint64_t);
extern int vmm_drv_ioport_hook(vmm_hold_t *, uint16_t, vmm_drv_iop_cb_t, void *,