summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--exception_lists/packaging.deps14
-rw-r--r--usr/src/compat/bhyve/sys/callout.h12
-rw-r--r--usr/src/compat/bhyve/sys/time.h136
-rw-r--r--usr/src/compat/bhyve/sys/types.h5
-rw-r--r--usr/src/man/man4i/dkio.4i6
-rw-r--r--usr/src/man/man4i/fbio.4i5
-rw-r--r--usr/src/man/man4i/fdio.4i4
-rw-r--r--usr/src/man/man4i/ipnat.4i6
-rw-r--r--usr/src/man/man4i/mhd.4i4
-rw-r--r--usr/src/man/man4i/mixer.4i6
-rw-r--r--usr/src/man/man4i/mtio.4i8
-rw-r--r--usr/src/man/man4i/sesio.4i7
-rw-r--r--usr/src/man/man4i/streamio.4i24
-rw-r--r--usr/src/man/man4i/termio.4i6
-rw-r--r--usr/src/man/man4i/uscsi.4i4
-rw-r--r--usr/src/man/man4i/visual_io.4i15
-rw-r--r--usr/src/man/man4i/vt.4i5
-rw-r--r--usr/src/pkg/manifests/developer-opensolaris-osnet.p5m27
-rw-r--r--usr/src/pkg/manifests/system-bhyve-tests.p5m6
-rw-r--r--usr/src/test/bhyve-tests/runfiles/default.run10
-rw-r--r--usr/src/test/bhyve-tests/tests/Makefile2
-rw-r--r--usr/src/test/bhyve-tests/tests/Makefile.in_guest48
-rw-r--r--usr/src/test/bhyve-tests/tests/common/Mapfile.payload49
-rw-r--r--usr/src/test/bhyve-tests/tests/common/in_guest.c532
-rw-r--r--usr/src/test/bhyve-tests/tests/common/in_guest.h51
-rw-r--r--usr/src/test/bhyve-tests/tests/common/payload_common.h44
-rw-r--r--usr/src/test/bhyve-tests/tests/common/payload_start.s27
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/Makefile77
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_utils.h28
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_utils.s55
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c69
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c60
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c94
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c110
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c36
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/test_defs.h38
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c145
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c146
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c169
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c178
-rw-r--r--usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c151
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_impl.h7
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_ring.c269
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_rx.c22
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_tx.c15
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.c80
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vhpet.c103
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c100
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h7
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.c25
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.c122
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.mapfile9
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c60
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c45
-rw-r--r--usr/src/uts/i86pc/sys/vmm_drv.h19
55 files changed, 2680 insertions, 622 deletions
diff --git a/exception_lists/packaging.deps b/exception_lists/packaging.deps
index 4a4f2cebbe..b15092247f 100644
--- a/exception_lists/packaging.deps
+++ b/exception_lists/packaging.deps
@@ -4,23 +4,13 @@
#
# These are ignored during package lint runs.
#
-pkg:/data/docbook
-pkg:/developer/gnu-binutils
-pkg:/developer/java/jdk
pkg:/developer/java/openjdk8
-pkg:/developer/lexer/flex
pkg:/developer/macro/cpp
-pkg:/developer/macro/gnu-m4
-pkg:/developer/parser/bison
-pkg:/developer/versioning/mercurial
pkg:/gnome/zenity
pkg:/library/expat
pkg:/library/glib2
pkg:/library/libxml2
-pkg:/library/libxslt
pkg:/library/nspr
-pkg:/library/nspr/header-nspr
-pkg:/library/perl-5/xml-parser
pkg:/library/security/openssl
pkg:/library/security/openssl-10
pkg:/library/security/openssl-11
@@ -29,8 +19,6 @@ pkg:/library/security/openssl-3
pkg:/library/security/trousers
pkg:/library/zlib
pkg:/package/pkg
-pkg:/print/cups
-pkg:/print/filter/ghostscript
pkg:/release/name
pkg:/runtime/python-27
pkg:/runtime/python-35
@@ -52,10 +40,8 @@ pkg:/system/library/gcc-runtime
pkg:/system/library/libdbus
pkg:/system/library/libdbus-glib
pkg:/system/library/mozilla-nss
-pkg:/system/library/mozilla-nss/header-nss
pkg:/system/management/snmp/net-snmp
pkg:/system/test/fio
pkg:/system/xvm/xvmstore
-pkg:/text/gnu-gettext
pkg:/text/less
pkg:/web/wget
diff --git a/usr/src/compat/bhyve/sys/callout.h b/usr/src/compat/bhyve/sys/callout.h
index 11823e6321..4156c2d4c6 100644
--- a/usr/src/compat/bhyve/sys/callout.h
+++ b/usr/src/compat/bhyve/sys/callout.h
@@ -45,8 +45,6 @@ struct callout {
#define callout_pending(c) ((c)->c_target > (c)->c_fired)
void vmm_glue_callout_init(struct callout *c, int mpsafe);
-int vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt,
- sbintime_t pr, void (*func)(void *), void *arg, int flags);
int vmm_glue_callout_stop(struct callout *c);
int vmm_glue_callout_drain(struct callout *c);
@@ -71,12 +69,10 @@ callout_drain(struct callout *c)
return (vmm_glue_callout_drain(c));
}
-static __inline int
-callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
- void (*func)(void *), void *arg, int flags)
-{
- return (vmm_glue_callout_reset_sbt(c, sbt, pr, func, arg, flags));
-}
+void callout_reset_hrtime(struct callout *c, hrtime_t target,
+ void (*func)(void *), void *arg, int flags);
+uint64_t hrt_freq_count(hrtime_t interval, uint32_t freq);
+hrtime_t hrt_freq_interval(uint32_t freq, uint64_t count);
#endif /* _COMPAT_FREEBSD_SYS_CALLOUT_H_ */
diff --git a/usr/src/compat/bhyve/sys/time.h b/usr/src/compat/bhyve/sys/time.h
deleted file mode 100644
index 48bdcc304e..0000000000
--- a/usr/src/compat/bhyve/sys/time.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2013 Pluribus Networks Inc.
- * Copyright 2020 Oxide Computer Company
- */
-
-#ifndef _COMPAT_FREEBSD_SYS_TIME_H_
-#define _COMPAT_FREEBSD_SYS_TIME_H_
-
-#include_next <sys/time.h>
-
-#define tc_precexp 0
-
-struct bintime {
- ulong_t sec; /* seconds */
- uint64_t frac; /* 64 bit fraction of a second */
-};
-
-#define BT2FREQ(bt) \
- (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
- ((bt)->frac >> 1))
-
-#define FREQ2BT(freq, bt) \
-{ \
- (bt)->sec = 0; \
- (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \
-}
-
-static __inline void
-binuptime(struct bintime *bt)
-{
- hrtime_t now = gethrtime();
-
- bt->sec = now / 1000000000;
- /* 18446744073 = int(2^64 / 1000000000) = 1ns in 64-bit fractions */
- bt->frac = (now % 1000000000) * (uint64_t)18446744073LL;
-}
-
-#define bintime_cmp(a, b, cmp) \
- (((a)->sec == (b)->sec) ? \
- ((a)->frac cmp (b)->frac) : \
- ((a)->sec cmp (b)->sec))
-
-/*
- * The bintime_cmp() macro is problematic for a couple reasons:
- * 1. Bearing a lowercase name suggests it is a function rather than a macro.
- * 2. Placing the comparison operator as the last argument runs afoul of our
- * cstyle rules, unlike cases such as VERIFY3*().
- *
- * To remedy these issues in illumos bhyve, we provide a slightly modified
- * version which addresses both problems.
- */
-#define BINTIME_CMP(a, cmp, b) bintime_cmp((a), (b), cmp)
-
-#define SBT_1S ((sbintime_t)1 << 32)
-#define SBT_1M (SBT_1S * 60)
-#define SBT_1MS (SBT_1S / 1000)
-#define SBT_1US (SBT_1S / 1000000)
-#define SBT_1NS (SBT_1S / 1000000000)
-#define SBT_MAX 0x7fffffffffffffffLL
-
-
-static __inline void
-bintime_add(struct bintime *bt, const struct bintime *bt2)
-{
- uint64_t u;
-
- u = bt->frac;
- bt->frac += bt2->frac;
- if (u > bt->frac)
- bt->sec++;
- bt->sec += bt2->sec;
-}
-
-static __inline void
-bintime_sub(struct bintime *bt, const struct bintime *bt2)
-{
- uint64_t u;
-
- u = bt->frac;
- bt->frac -= bt2->frac;
- if (u < bt->frac)
- bt->sec--;
- bt->sec -= bt2->sec;
-}
-
-static __inline void
-bintime_mul(struct bintime *bt, u_int x)
-{
- uint64_t p1, p2;
-
- p1 = (bt->frac & 0xffffffffull) * x;
- p2 = (bt->frac >> 32) * x + (p1 >> 32);
- bt->sec *= x;
- bt->sec += (p2 >> 32);
- bt->frac = (p2 << 32) | (p1 & 0xffffffffull);
-}
-
-static __inline sbintime_t
-bttosbt(const struct bintime bt)
-{
- return (((sbintime_t)bt.sec << 32) + (bt.frac >> 32));
-}
-
-static __inline struct bintime
-sbttobt(sbintime_t _sbt)
-{
- struct bintime _bt;
-
- _bt.sec = _sbt >> 32;
- _bt.frac = _sbt << 32;
- return (_bt);
-}
-
-static __inline sbintime_t
-sbinuptime(void)
-{
- hrtime_t hrt = gethrtime();
- uint64_t sec = hrt / NANOSEC;
- uint64_t nsec = hrt % NANOSEC;
-
- return (((sbintime_t)sec << 32) +
- (nsec * (((uint64_t)1 << 63) / 500000000) >> 32));
-}
-
-#endif /* _COMPAT_FREEBSD_SYS_TIME_H_ */
diff --git a/usr/src/compat/bhyve/sys/types.h b/usr/src/compat/bhyve/sys/types.h
index 63731da42e..baa4cad157 100644
--- a/usr/src/compat/bhyve/sys/types.h
+++ b/usr/src/compat/bhyve/sys/types.h
@@ -29,11 +29,6 @@ typedef __uint64_t u_int64_t;
typedef __register_t register_t;
#endif
-#ifndef __SBINTIME_T_DEFINED
-#define __SBINTIME_T_DEFINED
-typedef __int64_t sbintime_t;
-#endif
-
#ifndef __VM_MEMATTR_T_DEFINED
#define __VM_MEMATTR_T_DEFINED
typedef char vm_memattr_t;
diff --git a/usr/src/man/man4i/dkio.4i b/usr/src/man/man4i/dkio.4i
index 05f1e48bee..89d8204e03 100644
--- a/usr/src/man/man4i/dkio.4i
+++ b/usr/src/man/man4i/dkio.4i
@@ -19,7 +19,7 @@
.\" Copyright 2016 Nexenta Systems, Inc.
.\" Copyright (c) 2017, Joyent, Inc.
.\"
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt DKIO 4I
.Os
.Sh NAME
@@ -47,7 +47,7 @@ controller, partitions, or geometry information on all architectures:
The argument is a pointer to a
.Vt dk_cinfo
structure (described below).
-This structure tells the controller-type and attributes regarding bad-block
+This structure contains the controller-type and attributes regarding bad-block
processing done on the controller.
.Bd -literal -offset 2n
/*
@@ -809,7 +809,7 @@ On disks larger than 1TB, this ioctl must be used instead of
* Used by applications to get partition or slice information
*/
struct extpart_info {
- diskkaddr_t p_start;
+ diskaddr_t p_start;
diskaddr_t p_length;
};
.Ed
diff --git a/usr/src/man/man4i/fbio.4i b/usr/src/man/man4i/fbio.4i
index e05c3ee787..ee02fd1cad 100644
--- a/usr/src/man/man4i/fbio.4i
+++ b/usr/src/man/man4i/fbio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the fields
.\" enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 22, 2017
+.Dd March 13, 2022
.Dt FBIO 4I
.Os
.Sh NAME
@@ -137,8 +137,7 @@ are used to transfer
variable-length, device-specific information into and out of framebuffers.
.Sh SEE ALSO
.Xr ioctl 2 ,
-.Xr mmap 2 ,
-.Xr cgsix 4D
+.Xr mmap 2
.Sh BUGS
The
.Dv FBIOSATTR
diff --git a/usr/src/man/man4i/fdio.4i b/usr/src/man/man4i/fdio.4i
index a3f01b1021..99ffb147b6 100644
--- a/usr/src/man/man4i/fdio.4i
+++ b/usr/src/man/man4i/fdio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 22, 2017
+.Dd March 13, 2022
.Dt FDIO 4I
.Os
.Sh NAME
@@ -147,7 +147,7 @@ struct fd_drive {
int fdd_ejectable; /* does the drive support eject? */
int fdd_maxsearch; /* size of per-unit search table */
int fdd_writeprecomp; /* cyl to start write precompensation */
- int fdd_writereduce; /* cyl to start recucing write current */
+ int fdd_writereduce; /* cyl to start reducing write current */
int fdd_stepwidth; /* width of step pulse in 1 us units */
int fdd_steprate; /* step rate in 100 us units */
int fdd_headsettle; /* delay, in 100 us units */
diff --git a/usr/src/man/man4i/ipnat.4i b/usr/src/man/man4i/ipnat.4i
index 1df2b96e2d..d986ec0ea3 100644
--- a/usr/src/man/man4i/ipnat.4i
+++ b/usr/src/man/man4i/ipnat.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt IPNAT 4I
.Os
.Sh NAME
@@ -23,7 +23,7 @@
.Sh DESCRIPTION
The
.Sy ipnat
-device provides interfaction with the NAT features of the Solaris IPFilter.
+device provides interaction with the NAT features of the Solaris IPFilter.
.Sh APPLICATION PROGRAMMING INTERFACE
The NAT features programming model is a component of the Solaris IP Filter and
is accessed via the NAT device file
@@ -421,7 +421,7 @@ before setting the
flag and providing a pointer in the
.Fa nat_fr
field that cannot be found in the current rule set.
-.It Er EACESS
+.It Er EACCES
The calling process issued a
.Dv SIOCSTPUT
before issuing a
diff --git a/usr/src/man/man4i/mhd.4i b/usr/src/man/man4i/mhd.4i
index 735e69982e..b19e3fc79c 100644
--- a/usr/src/man/man4i/mhd.4i
+++ b/usr/src/man/man4i/mhd.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt MHD 4I
.Os
.Sh NAME
@@ -61,7 +61,7 @@ caller must first obtain the open file descriptor.
Non-shared multihost disks ioctls consist of
.Dv MHIOCTKOWN ,
.Dv MHIOCRELEASE ,
-.Dv HIOCSTATUS ,
+.Dv MHIOCSTATUS ,
and
.Dv MHIOCQRESERVE .
These ioctl requests control the access rights of non-shared multihost disks.
diff --git a/usr/src/man/man4i/mixer.4i b/usr/src/man/man4i/mixer.4i
index 6d4487796b..43ffa03ef0 100644
--- a/usr/src/man/man4i/mixer.4i
+++ b/usr/src/man/man4i/mixer.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd February 1, 2019
+.Dd March 13, 2022
.Dt MIXER 4I
.Os
.Sh NAME
@@ -438,10 +438,10 @@ should open this file to access the mixer settings.
The pseudo
.Pa /dev/mixer
device supports ioctls that can change the
-oarious settings for the audio hardware in the system.
+various settings for the audio hardware in the system.
.Pp
Those ioctls should only be used by dedicated mixer applications or desktop
-olumme controls, and not by typical ordinary audio applications such as media
+volume controls, and not by typical ordinary audio applications such as media
players.
Ordinary applications that wish to adjust their own volume settings
should use the
diff --git a/usr/src/man/man4i/mtio.4i b/usr/src/man/man4i/mtio.4i
index 93db7028c7..5f0b0ddbb0 100644
--- a/usr/src/man/man4i/mtio.4i
+++ b/usr/src/man/man4i/mtio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd August 28, 2021
+.Dd March 13, 2022
.Dt MTIO 4I
.Os
.Sh NAME
@@ -201,7 +201,7 @@ errors until the
ioctl is issued.
An
.Dv MTFSF
-ioctl can then he issued.
+ioctl can then be issued.
.Pp
Two successful successive reads that both return zero byte counts indicate
.Sy EOM
@@ -284,7 +284,7 @@ issued, two records are written; one for 65,534 bytes followed by another
record for 6 bytes.
Newer variable-length tape drivers may relax the above
limitation and allow applications to write record sizes larger than 65,534.
-effer to the specific tape driver man page for details.
+Refer to the specific tape driver man page for details.
.Pp
When logical
.Sy EOT
@@ -840,7 +840,7 @@ Note \(em When tape alert cleaning is managed by the st driver, the tape
target driver may continue to return a
.Dq drive needs cleaning
status unless an
-.Dv MTIOCGE
+.Dv MTIOCGET
.Xr ioctl 2
call is made while the cleaning media is in the drive.
.Pp
diff --git a/usr/src/man/man4i/sesio.4i b/usr/src/man/man4i/sesio.4i
index 94d12dd64d..c92e4c34a9 100644
--- a/usr/src/man/man4i/sesio.4i
+++ b/usr/src/man/man4i/sesio.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt SESIO 4I
.Os
.Sh NAME
@@ -90,6 +90,7 @@ uint8_t reserved[3]; /* Reserved; Set to 0 */
.Ed
.Sh ARCHITECTURE
SPARC
+X86
.Sh SEE ALSO
-.Xr ses 4D ,
-.Xr ioctl 9E
+.Xr ioctl 2 ,
+.Xr ses 4D
diff --git a/usr/src/man/man4i/streamio.4i b/usr/src/man/man4i/streamio.4i
index 4e99ba3dfe..8dbaeba2fc 100644
--- a/usr/src/man/man4i/streamio.4i
+++ b/usr/src/man/man4i/streamio.4i
@@ -15,7 +15,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 29, 2017
+.Dd March 13, 2022
.Dt STREAMIO 4I
.Os
.Sh NAME
@@ -357,14 +357,14 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
.Fa arg
value is invalid or
.Fa arg
is zero and process is not registered to receive the
.Dv SIGPOLL
signal.
-.It Sy EAGAIN
+.It Er EAGAIN
Allocation of a data structure to store the signal request failed.
.El
.It Dv I_GETSIG
@@ -381,11 +381,11 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
Process not registered to receive the
.Dv SIGPOLL
signal.
-.It Sy EFAULT
+.It Er EFAULT
.Fa arg
points outside the allocated address space.
.El
@@ -399,10 +399,10 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EINVAL
-.It Sy EFAULT
+.It Er EFAULT
.Fa arg
points outside the allocated address space.
-.It Sy EINVAL
+.It Er EINVAL
.Fa arg
does not contain a valid module name.
.El
@@ -807,7 +807,7 @@ calls
will block until the active
.Dv I_STR
completes via a positive or negative
-acknowlegment, a timeout, or an error condition at the stream head.
+acknowledgement, a timeout, or an error condition at the stream head.
By setting the
.Fa ic_timout
field to 0, the user is requesting STREAMS to provide
@@ -897,7 +897,7 @@ On failure,
.Va errno
may be set to the following value:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
.Fa arg
is not the above legal value.
.El
@@ -1168,7 +1168,7 @@ On failure,
.Va errno
is set to the following value:
.Bl -tag -width EINVAL
-.It Sy EINVAL
+.It Er EINVAL
Invalid
.Va arg
value.
@@ -1259,7 +1259,7 @@ On failure,
.Va errno
is set to the following value:
.Bl -tag -width EFAULT
-.It Sy EFAULT
+.It Er EFAULT
.Fa arg
points outside the allocated address space.
.El
@@ -1495,7 +1495,7 @@ On failure,
.Va errno
is set to one of the following values:
.Bl -tag -width EAGAIN
-.It Sy ENXIO
+.It Er ENXIO
Hangup received on
.Fa fildes .
.It Er ETIME
diff --git a/usr/src/man/man4i/termio.4i b/usr/src/man/man4i/termio.4i
index c30fdd5262..c7dcba468a 100644
--- a/usr/src/man/man4i/termio.4i
+++ b/usr/src/man/man4i/termio.4i
@@ -15,7 +15,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd August 13, 2021
+.Dd March 13, 2022
.Dt TERMIO 4I
.Os
.Sh NAME
@@ -233,7 +233,7 @@ Reprinting also occurs automatically if
characters that would normally be erased from the screen are fouled by program
output.
The characters are reprinted as if they were being echoed;
-consequencely, if
+consequently, if
.Dv ECHO
is not set, they are not printed.
.Pp
@@ -407,7 +407,7 @@ example, file transfer programs), where a program would like to process at
least
.Sy MIN
characters at a time.
-In case A, the inteercharacter timer is
+In case A, the intercharacter timer is
activated by a user as a safety measure; in case B, the timer is turned off.
.Pp
Cases C and D exist to handle single character, timed transfers.
diff --git a/usr/src/man/man4i/uscsi.4i b/usr/src/man/man4i/uscsi.4i
index bd8b384c9b..b3e53cd04e 100644
--- a/usr/src/man/man4i/uscsi.4i
+++ b/usr/src/man/man4i/uscsi.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd October 23, 2017
+.Dd March 13, 2022
.Dt USCSI 4I
.Os
.Sh NAME
@@ -251,7 +251,7 @@ must point to memory writable by the application.
.It Dv USCSI_RENEGOT
Tells USCSI to renegotiate wide mode and synchronous transfer speed before the
transmitted SCSI command is executed.
-This flag in effects tells the target driver to pass the
+This flag in effect tells the target driver to pass the
.Dv FLAG_RENEGOTIATE_WIDE_SYNC
flag in the SCSI packet
before passing the command to an adapter driver for transport.
diff --git a/usr/src/man/man4i/visual_io.4i b/usr/src/man/man4i/visual_io.4i
index 91975c625c..76a3c4c4d5 100644
--- a/usr/src/man/man4i/visual_io.4i
+++ b/usr/src/man/man4i/visual_io.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd August 31, 2018
+.Dd March 13, 2022
.Dt VISUAL_IO 4I
.Os
.Sh NAME
@@ -51,7 +51,7 @@ standalone mode (for example, when using a stand-alone debugger, entering
the PROM monitor, or when the system panicking).
These are also known as
.Dq Polled I/O
-entry points, which operate under an an explicit set of restrictions, described below.
+entry points, which operate under an explicit set of restrictions, described below.
.Sh IOCTLS
.Bl -tag -width VIS_GETIDENTIFIER -compact
.It Dv VIS_GETIDENTIFIER
@@ -62,11 +62,6 @@ used in the illumos VISUAL environment.
This is a mandatory ioctl and must return a unique string.
We suggest that the name be formed as
.Ao companysymbol Ac Ns Ao devicetype Ac .
-For example, the
-.Xr cgsix 4D
-driver
-returns
-.Sy SUNWcg6 .
.Pp
.Dv VIS_GETIDENTIFIER
takes a
@@ -105,7 +100,7 @@ struct vis_cursorcmap {
#define VIS_CURSOR_SETCURSOR 0x01 /* set cursor */
/* set cursor position */
#define VIS_CURSOR_SETPOSITION 0x02
- /* set cursur hot spot */
+ /* set cursor hot spot */
#define VIS_CURSOR_SETHOTSPOT 0x04
/* set cursor colormap */
#define VIS_CURSOR_SETCOLORMAP 0x08
@@ -609,7 +604,7 @@ ioctl was set to
and
.Fa t_col
are defined to be pixel offsets from the starting position of the
-onssole device.
+console device.
.Pp
.Fa direction
specifies which way to do the copy.
@@ -684,7 +679,7 @@ addition to performing the prescribed function of their ioctl counterparts, the
standalone vectors operate in a special context and must adhere to a strict set
of rules.
The polled I/O vectors are called directly whenever the system is
-quisced (running in a limited context) and must send output to the display.
+quiesced (running in a limited context) and must send output to the display.
Standalone mode describes the state in which the system is running in
single-threaded mode and only one processor is active.
illumos operating
diff --git a/usr/src/man/man4i/vt.4i b/usr/src/man/man4i/vt.4i
index 0684aa44ff..747e30cb1f 100644
--- a/usr/src/man/man4i/vt.4i
+++ b/usr/src/man/man4i/vt.4i
@@ -14,7 +14,7 @@
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
-.Dd December 28, 2020
+.Dd March 13, 2022
.Dt VT 4I
.Os
.Sh NAME
@@ -319,7 +319,8 @@ online 17:49:11 svc:/system/console-login:vt6
.Sy console-login:default
is for the system console, others for virtual consoles.
.Pp
-You can modify properties/disable/enable and remove/add virtual consoles using
+You can modify properties of, disable/enable, and remove/add virtual consoles
+using
.Xr smf 7 :
.Bd -literal -offset indent
# svccfg -s console-login add vt8
diff --git a/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m b/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m
index 9106668756..b0a2f98b02 100644
--- a/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m
+++ b/usr/src/pkg/manifests/developer-opensolaris-osnet.p5m
@@ -23,38 +23,15 @@
# Copyright (c) 2018, Joyent, Inc.
set name=pkg.fmri value=pkg:/developer/opensolaris/osnet@$(PKGVERS)
-set name=pkg.summary \
- value="Dependencies required to build the OS/Net Consolidation."
+set name=pkg.summary value="Dependencies required to build the illumos-gate."
set name=pkg.description \
- value="Dependencies required to build the OS/Net Consolidation."
+ value="Dependencies required to build the illumos-gate."
set name=info.classification \
value="org.opensolaris.category.2008:Development/Distribution Tools"
set name=org.opensolaris.noincorp value=true
set name=variant.arch value=$(ARCH)
-depend type=require fmri=data/docbook@0.5.11-0.133
depend type=require fmri=developer/astdev@0.5.11-0.133
depend type=require fmri=developer/build/make@0.5.11-0.133
depend type=require fmri=developer/build/onbld@0.5.11-0.133
-$(i386_ONLY)depend type=require fmri=developer/gnu-binutils@2.19-0.133
-depend type=require fmri=developer/java/jdk@0.5.11-0.133
-depend type=require fmri=developer/lexer/flex@2.5.35-0.133
-depend type=require fmri=developer/macro/gnu-m4@1.4
depend type=require fmri=developer/object-file@0.5.11-0.133
-depend type=require fmri=developer/parser/bison@2.3-0.133
-depend type=require fmri=developer/versioning/mercurial@1.3.1-0.133
-depend type=require fmri=library/glib2@0.5.11-0.133
-depend type=require fmri=library/libxml2@2.7.6-0.133
-depend type=require fmri=library/libxslt@0.5.11-0.133
-depend type=require fmri=library/nspr/header-nspr@0.5.11-0.133
-depend type=require fmri=library/perl-5/xml-parser@0.5.11-0.133
-depend type=require fmri=library/security/trousers@0.3.2-0.133
-depend type=require fmri=print/cups@1.4.2-0.133
-depend type=require fmri=print/filter/ghostscript@8.64-0.133
-depend type=require fmri=runtime/perl$(PERL_PKGVERS)@5.10.0-0.133
depend type=require fmri=system/header@0.5.11-0.133
-depend type=require fmri=system/library/dbus@0.5.11-0.133
-depend type=require fmri=system/library/libdbus-glib@0.5.11-0.133
-depend type=require fmri=system/library/libdbus@0.5.11-0.133
-depend type=require fmri=system/library/mozilla-nss/header-nss@0.5.11-0.133
-depend type=require fmri=system/management/snmp/net-snmp@5.4.1-0.133
-depend type=require fmri=text/gnu-gettext@0.16.1-0.133
diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m
index d0d31a0190..4222b1d09b 100644
--- a/usr/src/pkg/manifests/system-bhyve-tests.p5m
+++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m
@@ -30,6 +30,12 @@ file path=opt/bhyve-tests/bin/bhyvetest mode=0555
dir path=opt/bhyve-tests/runfiles
file path=opt/bhyve-tests/runfiles/default.run mode=0444
dir path=opt/bhyve-tests/tests
+dir path=opt/bhyve-tests/tests/kdev
+file path=opt/bhyve-tests/tests/kdev/vatpit_freq mode=0555
+file path=opt/bhyve-tests/tests/kdev/vhpet_freq mode=0555
+file path=opt/bhyve-tests/tests/kdev/vlapic_freq mode=0555
+file path=opt/bhyve-tests/tests/kdev/vlapic_freq_periodic mode=0555
+file path=opt/bhyve-tests/tests/kdev/vpmtmr_freq mode=0555
dir path=opt/bhyve-tests/tests/mevent
file path=opt/bhyve-tests/tests/mevent/lists_delete mode=0555
file path=opt/bhyve-tests/tests/mevent/read_disable mode=0555
diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run
index 0aae1bcb46..c37bee591d 100644
--- a/usr/src/test/bhyve-tests/runfiles/default.run
+++ b/usr/src/test/bhyve-tests/runfiles/default.run
@@ -28,6 +28,16 @@ tests = [
'mem_seg_map'
]
+[/opt/bhyve-tests/tests/kdev]
+user = root
+tests = [
+ 'vatpit_freq',
+ 'vhpet_freq',
+ 'vlapic_freq',
+ 'vlapic_freq_periodic',
+ 'vpmtmr_freq'
+ ]
+
# Tests of userspace mevent system, built from cmd/bhyve
[/opt/bhyve-tests/tests/mevent]
tests = ['lists_delete', 'read_disable', 'read_pause', 'read_requeue',
diff --git a/usr/src/test/bhyve-tests/tests/Makefile b/usr/src/test/bhyve-tests/tests/Makefile
index bf18b300ca..8d528c3f80 100644
--- a/usr/src/test/bhyve-tests/tests/Makefile
+++ b/usr/src/test/bhyve-tests/tests/Makefile
@@ -15,6 +15,6 @@
.PARALLEL: $(SUBDIRS)
-SUBDIRS = vmm
+SUBDIRS = kdev vmm
include $(SRC)/test/Makefile.com
diff --git a/usr/src/test/bhyve-tests/tests/Makefile.in_guest b/usr/src/test/bhyve-tests/tests/Makefile.in_guest
new file mode 100644
index 0000000000..7ce2b0d531
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/Makefile.in_guest
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+# Copyright 2022 Oxide Computer Company
+
+PAYLOAD_CLEANFILES = payload_start.o \
+ $(PAYLOADS:%=payload_%) \
+ $(PAYLOADS:%=payload_%.o) \
+ $(PAYLOADS:%=pobj_%.o) \
+ $(PAYLOADS:%=pobj_%.s)
+
+$(PAYLOADS:%=payload_%.o) := AS_CPPFLAGS += -I../common
+
+payload_%: payload_start.o payload_%.o
+ $(LD) -dn -e _start -M ../common/Mapfile.payload -o $@ $^
+
+pobj_%.s: payload_%
+ @echo " .data" > $@
+ @echo " .globl payload_data" >> $@
+ @echo "payload_data:" >> $@
+ $(ELFEXTRACT) $^ >> $@
+ @echo " .size payload_data, [.-payload_data]" >> $@
+ @echo " .align 4" >> $@
+ @echo " .globl payload_size" >> $@
+ @echo " .size payload_size, 4" >> $@
+ @echo "payload_size:" >> $@
+ @echo " .data" >> $@
+ @echo " .long [.-payload_data]" >> $@
+
+pobj_%.o: pobj_%.s
+ $(COMPILE.s) -o $@ $^
+ $(POST_PROCESS)
+
+%.o: ../common/%.s
+ $(COMPILE.s) -o $@ $^
+ $(POST_PROCESS)
+
+%.o: ../common/%.c
+ $(COMPILE.c) -o $@ $^
+ $(POST_PROCESS)
diff --git a/usr/src/test/bhyve-tests/tests/common/Mapfile.payload b/usr/src/test/bhyve-tests/tests/common/Mapfile.payload
new file mode 100644
index 0000000000..ef69288c56
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/Mapfile.payload
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2022 Oxide Computer Company
+#
+
+$mapfile_version 2
+
+# The .eh_frame data was ending up in front of the .text segment, causing issues
+# when the guest attempted to start its payload
+NULL_SEGMENT discard {
+ ASSIGN_SECTION eh_discard {
+ IS_NAME = .eh_frame;
+ };
+};
+
+LOAD_SEGMENT payload {
+ FLAGS = READ WRITE EXECUTE;
+ VADDR = 0x800000;
+ PADDR = 0x800000;
+ ALIGN = 0x1000;
+
+ # Make sure that payload_start.s`_start is the first thing in .text segment,
+ # since when we "boot", that is where we want to begin running.
+ ASSIGN_SECTION is_start_text {
+ IS_NAME = .text;
+ FILE_BASENAME = payload_start.o;
+ };
+ ASSIGN_SECTION is_text {
+ IS_NAME = .text;
+ };
+ ASSIGN_SECTION is_alloc {
+ FLAGS = ALLOC;
+ };
+ IS_ORDER = is_start_text is_text is_alloc;
+};
diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.c b/usr/src/test/bhyve-tests/tests/common/in_guest.c
new file mode 100644
index 0000000000..31bebc0665
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/in_guest.c
@@ -0,0 +1,532 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/segments.h>
+#include <sys/psw.h>
+#include <sys/controlregs.h>
+#include <sys/sysmacros.h>
+#include <sys/varargs.h>
+#include <sys/debug.h>
+
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+
+
+#define PT_VALID 0x01
+#define PT_WRITABLE 0x02
+#define PT_WRITETHRU 0x08
+#define PT_NOCACHE 0x10
+#define PT_PAGESIZE 0x80
+
+#define SEG_ACCESS_TYPE_MASK 0x1f
+#define SEG_ACCESS_DPL_MASK 0x60
+#define SEG_ACCESS_P (1 << 7)
+#define SEG_ACCESS_AVL (1 << 12)
+#define SEG_ACCESS_L (1 << 13)
+#define SEG_ACCESS_D (1 << 14)
+#define SEG_ACCESS_G (1 << 15)
+#define SEG_ACCESS_UNUSABLE (1 << 16)
+
+
+/*
+ * Keep the test name and VM context around so the consumer is not required to
+ * pass either of them to us for subsequent test-related operations after the
+ * initialization has been performed.
+ *
+ * The test code is not designed to be reentrant at this point.
+ */
+static struct vmctx *test_vmctx = NULL;
+static const char *test_name = NULL;
+
+static void
+populate_identity_table(struct vmctx *ctx)
+{
+ uint64_t gpa, pte_loc;
+
+ /* Set up 2MiB PTEs for everything up through 0xffffffff */
+ for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M;
+ gpa < 0x100000000;
+ pte_loc += PAGE_SIZE) {
+ uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE);
+
+ for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) {
+ *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE;
+ /* Make traditional MMIO space uncachable */
+ if (gpa >= 0xc0000000) {
+ *ptep |= PT_WRITETHRU | PT_NOCACHE;
+ }
+ }
+ }
+ assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G);
+
+ uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE);
+ pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE;
+ pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE;
+ pdep[2] =
+ (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
+ pdep[3] =
+ (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
+
+ pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE);
+ pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE;
+}
+
+static void
+populate_desc_tables(struct vmctx *ctx)
+{
+
+}
+
+static void
+test_cleanup(bool is_failure)
+{
+ if (test_vmctx != NULL) {
+ bool keep_on_fail = false;
+
+ const char *keep_var;
+ if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) {
+ if (strlen(keep_var) != 0 &&
+ strcmp(keep_var, "0") != 0) {
+ keep_on_fail = true;
+ }
+ }
+
+ /*
+ * Destroy the instance unless the test failed and it was
+ * requested that we keep it around.
+ */
+ if (!is_failure || !keep_on_fail) {
+ vm_destroy(test_vmctx);
+ }
+ test_vmctx = NULL;
+ }
+}
+
+static void fail_finish(void)
+{
+ assert(test_name != NULL);
+ (void) printf("FAIL %s\n", test_name);
+
+ test_cleanup(true);
+ exit(EXIT_FAILURE);
+}
+
+void
+test_fail_errno(int err, const char *msg)
+{
+ const char *err_str = strerror(err);
+
+ (void) fprintf(stderr, "%s: %s\n", msg, err_str);
+ fail_finish();
+}
+
+void
+test_fail_msg(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+
+ fail_finish();
+}
+
+void
+test_fail_vmexit(const struct vm_exit *vexit)
+{
+ const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n";
+
+ switch (vexit->exitcode) {
+ case VM_EXITCODE_INOUT:
+ (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip);
+ (void) fprintf(stderr,
+ "\teax: %08x\n"
+ "\tport: %04x\n"
+ "\tbytes: %u\n"
+ "\tflags: %x\n",
+ vexit->u.inout.eax,
+ vexit->u.inout.port,
+ vexit->u.inout.bytes,
+ vexit->u.inout.flags);
+ break;
+ case VM_EXITCODE_MMIO:
+ (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip);
+ (void) fprintf(stderr,
+ "\tbytes: %u\n"
+ "\ttype: %s\n"
+ "\tgpa: %x\n"
+ "\tdata: %016x\n",
+ vexit->u.mmio.bytes,
+ vexit->u.mmio.read == 0 ? "write" : "read",
+ vexit->u.mmio.gpa,
+ vexit->u.mmio.data);
+ break;
+ case VM_EXITCODE_VMX:
+ (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip);
+ (void) fprintf(stderr,
+ "\tstatus: %x\n"
+ "\treason: %x\n"
+ "\tqualification: %lx\n"
+ "\tinst_type: %x\n"
+ "\tinst_error: %x\n",
+ vexit->u.vmx.status,
+ vexit->u.vmx.exit_reason,
+ vexit->u.vmx.exit_qualification,
+ vexit->u.vmx.inst_type,
+ vexit->u.vmx.inst_error);
+ break;
+ case VM_EXITCODE_SVM:
+ (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip);
+ break;
+ case VM_EXITCODE_INST_EMUL:
+ (void) fprintf(stderr, hdr_fmt, "instruction emulation",
+ vexit->rip);
+ const uint_t len = vexit->u.inst_emul.num_valid > 0 ?
+ vexit->u.inst_emul.num_valid : 15;
+ (void) fprintf(stderr, "\tinstruction bytes: [");
+ for (uint_t i = 0; i < len; i++) {
+ (void) fprintf(stderr, "%s%02x",
+ i == 0 ? "" : ", ",
+ vexit->u.inst_emul.inst[i]);
+ }
+ (void) fprintf(stderr, "]\n");
+ break;
+ case VM_EXITCODE_SUSPENDED:
+ (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip);
+ switch (vexit->u.suspended.how) {
+ case VM_SUSPEND_RESET:
+ (void) fprintf(stderr, "\thow: reset");
+ break;
+ case VM_SUSPEND_POWEROFF:
+ (void) fprintf(stderr, "\thow: poweroff");
+ break;
+ case VM_SUSPEND_HALT:
+ (void) fprintf(stderr, "\thow: halt");
+ break;
+ case VM_SUSPEND_TRIPLEFAULT:
+ (void) fprintf(stderr, "\thow: triple-fault");
+ break;
+ default:
+ (void) fprintf(stderr, "\thow: unknown - %d",
+ vexit->u.suspended.how);
+ break;
+ }
+ break;
+ default:
+ (void) fprintf(stderr, "Unexpected code %d exit:\n"
+ "\t%%rip: %lx\n", vexit->exitcode, vexit->rip);
+ break;
+ }
+ fail_finish();
+}
+
+void
+test_pass(void)
+{
+ assert(test_name != NULL);
+ (void) printf("PASS %s\n", test_name);
+ test_cleanup(false);
+ exit(EXIT_SUCCESS);
+}
+
+static int
+load_payload(struct vmctx *ctx)
+{
+ extern uint8_t payload_data;
+ extern uint32_t payload_size;
+
+ const uint32_t len = payload_size;
+ const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD);
+
+ if (len > cap) {
+ test_fail_msg("Payload size %u > capacity %u\n", len, cap);
+ }
+
+ const size_t map_len = P2ROUNDUP(len, PAGE_SIZE);
+ void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len);
+ bcopy(&payload_data, outp, len);
+
+ return (0);
+}
+
+struct vmctx *
+test_initialize(const char *tname)
+{
+ char vm_name[VM_MAX_NAMELEN];
+ int err;
+ struct vmctx *ctx;
+
+ assert(test_vmctx == NULL);
+ assert(test_name == NULL);
+
+ test_name = strdup(tname);
+ (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d",
+ test_name, getpid());
+
+ err = vm_create(vm_name, 0);
+ if (err != 0) {
+ test_fail_errno(err, "Could not create VM");
+ }
+
+ ctx = vm_open(vm_name);
+ if (ctx == NULL) {
+ test_fail_errno(errno, "Could not open VM");
+ }
+ test_vmctx = ctx;
+
+ err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL);
+ if (err != 0) {
+ test_fail_errno(err, "Could not set up VM memory");
+ }
+
+ populate_identity_table(ctx);
+ populate_desc_tables(ctx);
+
+ err = load_payload(ctx);
+ if (err != 0) {
+ test_fail_errno(err, "Could not load payload");
+ }
+
+ return (ctx);
+}
+
+int
+test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp)
+{
+ int err;
+
+ err = vm_activate_cpu(ctx, vcpu);
+ if (err != 0 && err != EBUSY) {
+ return (err);
+ }
+
+ /*
+ * Granularity bit important here for VMX validity:
+ * "If any bit in the limit field in the range 31:20 is 1, G must be 1"
+ */
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX,
+ SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G);
+ if (err != 0) {
+ return (err);
+ }
+
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX,
+ SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L |
+ SEG_ACCESS_D | SEG_ACCESS_G);
+ if (err != 0) {
+ return (err);
+ }
+
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX,
+ SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G);
+ if (err != 0) {
+ return (err);
+ }
+
+ /*
+ * While SVM will happilly run with an otherwise unusable TR, VMX
+ * includes it among its entry checks.
+ */
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff,
+ SDT_SYSTSSBSY | SEG_ACCESS_P);
+ if (err != 0) {
+ return (err);
+ }
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0);
+ if (err != 0) {
+ return (err);
+ }
+ err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0);
+ if (err != 0) {
+ return (err);
+ }
+
+ /* Mark unused segments as explicitly unusable (for VMX) */
+ const int unsable_segs[] = {
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ VM_REG_GUEST_LDTR,
+ };
+ for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) {
+ err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0,
+ SEG_ACCESS_UNUSABLE);
+ if (err != 0) {
+ return (err);
+ }
+ }
+
+ /* Place CPU directly in long mode */
+ const int regnums[] = {
+ VM_REG_GUEST_CR0,
+ VM_REG_GUEST_CR3,
+ VM_REG_GUEST_CR4,
+ VM_REG_GUEST_EFER,
+ VM_REG_GUEST_RFLAGS,
+ VM_REG_GUEST_RIP,
+ VM_REG_GUEST_RSP,
+ VM_REG_GUEST_CS,
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_TR,
+ };
+ uint64_t regvals[] = {
+ CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS |
+ CR0_MP | CR0_PE,
+ MEM_LOC_PAGE_TABLE_512G,
+ CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE,
+ AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE,
+ /* start with interrupts disabled */
+ PS_MB1,
+ rip,
+ rsp,
+ (GDT_KCODE << 3),
+ (GDT_KDATA << 3),
+ (GDT_KDATA << 3),
+ (GDT_KTSS << 3),
+ };
+ assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals));
+
+ err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums,
+ regvals);
+ if (err != 0) {
+ return (err);
+ }
+
+ err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0);
+ if (err != 0) {
+ return (err);
+ }
+
+ return (0);
+}
+
+static enum vm_exit_kind
+which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit)
+{
+ const struct vm_inout *inout = &vexit->u.inout;
+
+ switch (vexit->exitcode) {
+ case VM_EXITCODE_BOGUS:
+ case VM_EXITCODE_REQIDLE:
+ bzero(ventry, sizeof (ventry));
+ return (VEK_REENTR);
+ case VM_EXITCODE_INOUT:
+ if (inout->port == IOP_TEST_RESULT &&
+ (inout->flags & INOUT_IN) == 0) {
+ if (inout->eax == 0) {
+ return (VEK_TEST_PASS);
+ } else {
+ return (VEK_TEST_FAIL);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return (VEK_UNHANDLED);
+}
+
+enum vm_exit_kind
+test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry,
+ struct vm_exit *vexit)
+{
+ int err;
+
+ err = vm_run(ctx, vcpu, ventry, vexit);
+ if (err != 0) {
+ test_fail_errno(err, "Failure during vcpu entry");
+ }
+
+ return (which_exit_kind(ventry, vexit));
+}
+
+void
+ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry,
+ uint32_t data)
+{
+ VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT);
+
+ ventry->cmd = VEC_FULFILL_INOUT;
+ bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout));
+ if ((ventry->u.inout.flags & INOUT_IN) != 0) {
+ ventry->u.inout.eax = data;
+ }
+}
+
+void
+ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry,
+ uint64_t data)
+{
+ VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO);
+
+ ventry->cmd = VEC_FULFILL_MMIO;
+ bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio));
+ if (ventry->u.mmio.read != 0) {
+ ventry->u.mmio.data = data;
+ }
+}
+
+bool
+vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port,
+ uint_t len, uint32_t *valp)
+{
+ if (vexit->exitcode != VM_EXITCODE_INOUT) {
+ return (false);
+ }
+
+ const uint_t flag = is_read ? INOUT_IN : 0;
+ if (vexit->u.inout.port != port ||
+ vexit->u.inout.bytes != len ||
+ (vexit->u.inout.flags & INOUT_IN) != flag) {
+ return (false);
+ }
+
+ if (!is_read && valp != NULL) {
+ *valp = vexit->u.inout.eax;
+ }
+ return (true);
+}
+
+bool
+vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr,
+ uint_t len, uint64_t *valp)
+{
+ if (vexit->exitcode != VM_EXITCODE_MMIO) {
+ return (false);
+ }
+
+ if (vexit->u.mmio.gpa != addr ||
+ vexit->u.mmio.bytes != len ||
+ (vexit->u.mmio.read != 0) != is_read) {
+ return (false);
+ }
+
+ if (!is_read && valp != NULL) {
+ *valp = vexit->u.mmio.data;
+ }
+ return (true);
+}
diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.h b/usr/src/test/bhyve-tests/tests/common/in_guest.h
new file mode 100644
index 0000000000..8d6e04a6da
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/in_guest.h
@@ -0,0 +1,51 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _IN_GUEST_H_
+#define _IN_GUEST_H_
+
+#include "payload_common.h"
+
+struct vmctx *test_initialize(const char *);
+void test_fail_errno(int err, const char *msg);
+void test_fail_msg(const char *fmt, ...);
+void test_fail_vmexit(const struct vm_exit *vexit);
+void test_pass(void);
+
+int test_setup_vcpu(struct vmctx *, int, uint64_t, uint64_t);
+
+enum vm_exit_kind {
+ /* Otherwise empty vmexit which should result in immediate re-entry */
+ VEK_REENTR,
+ /* Write to IOP_TEST_RESULT port with success value (0) */
+ VEK_TEST_PASS,
+ /* Write to IOP_TEST_RESULT port with failure value (non-zero) */
+ VEK_TEST_FAIL,
+ /* Test specific logic must handle exit data */
+ VEK_UNHANDLED,
+};
+
+enum vm_exit_kind test_run_vcpu(struct vmctx *, int, struct vm_entry *,
+ struct vm_exit *);
+
+void ventry_fulfill_inout(const struct vm_exit *, struct vm_entry *, uint32_t);
+void ventry_fulfill_mmio(const struct vm_exit *, struct vm_entry *, uint64_t);
+
+bool vexit_match_inout(const struct vm_exit *, bool, uint16_t, uint_t,
+ uint32_t *);
+bool vexit_match_mmio(const struct vm_exit *, bool, uint64_t, uint_t,
+ uint64_t *);
+
+#endif /* _IN_GUEST_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/common/payload_common.h b/usr/src/test/bhyve-tests/tests/common/payload_common.h
new file mode 100644
index 0000000000..895364f18e
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/payload_common.h
@@ -0,0 +1,44 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _PAYLOAD_COMMON_H_
+#define _PAYLOAD_COMMON_H_
+
+#define MEM_TOTAL_SZ (64 * 1024 * 1024)
+
+/* 2MiB-page entries for identity-mapped table at 2MiB */
+#define MEM_LOC_PAGE_TABLE_2M 0x200000
+#define MEM_LOC_PAGE_TABLE_1G 0x204000
+#define MEM_LOC_PAGE_TABLE_512G 0x205000
+#define MEM_LOC_GDT 0x206000
+#define MEM_LOC_TSS 0x206200
+#define MEM_LOC_IDT 0x207000
+#define MEM_LOC_STACK 0x400000
+#define MEM_LOC_PAYLOAD 0x800000
+
+/* IO port set aside for emitting test result */
+#define IOP_TEST_RESULT 0xef00U
+
+/* IO port set aside for emitting test value */
+#define IOP_TEST_VALUE 0xef10U
+
+/* IO port set aside for inputting test param(s) */
+#define IOP_TEST_PARAM IOP_TEST_PARAM0
+#define IOP_TEST_PARAM0 0xef20U
+#define IOP_TEST_PARAM1 0xef21U
+#define IOP_TEST_PARAM2 0xef22U
+#define IOP_TEST_PARAM3 0xef23U
+
+#endif /* _PAYLOAD_COMMON_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/common/payload_start.s b/usr/src/test/bhyve-tests/tests/common/payload_start.s
new file mode 100644
index 0000000000..8a57e259de
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/common/payload_start.s
@@ -0,0 +1,27 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <sys/asm_linkage.h>
+
+/*
+ .text
+ .globl _start
+_start:
+ jmp start
+ */
+
+ENTRY_NP(_start)
+ jmp start
+SET_SIZE(_start)
diff --git a/usr/src/test/bhyve-tests/tests/kdev/Makefile b/usr/src/test/bhyve-tests/tests/kdev/Makefile
new file mode 100644
index 0000000000..52f3c2576c
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/Makefile
@@ -0,0 +1,77 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+# Copyright 2022 Oxide Computer Company
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/cmd/Makefile.cmd.64
+include $(SRC)/test/Makefile.com
+
+PROG = vpmtmr_freq \
+ vhpet_freq \
+ vlapic_freq \
+ vlapic_freq_periodic \
+ vatpit_freq
+
+PAYLOADS = $(PROG)
+include ../Makefile.in_guest
+
+COMMON_OBJS = in_guest.o
+
+CLEANFILES = $(COMMON_OBJS) $(PAYLOAD_CLEANFILES) payload_utils.o
+CLOBBERFILES = $(PROG)
+
+ROOTOPTPKG = $(ROOT)/opt/bhyve-tests
+TESTDIR = $(ROOTOPTPKG)/tests/kdev
+
+CMDS = $(PROG:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+CSTD= $(CSTD_GNU99)
+CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
+ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \
+ $(CPPFLAGS.master) \
+ -I$(SRC)/uts/i86pc/io/vmm \
+ -I$(SRC)/uts/i86pc \
+ -I../common
+
+ASFLAGS += -P -D__STDC__ -D_ASM
+
+
+CFLAGS = -m64
+$(PROG) := LDLIBS += -lvmmapi
+
+all: $(PROG)
+
+install: all $(CMDS)
+
+clean:
+ -$(RM) $(CLEANFILES)
+clobber: clean
+ -$(RM) $(CLOBBERFILES)
+
+$(CMDS): $(TESTDIR) $(PROG)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %
+ $(INS.file)
+
+%: %.c pobj_%.o $(COMMON_OBJS)
+ $(LINK.c) -o $@ $^ $(LDLIBS)
+ $(POST_PROCESS)
+
+%: %.o
+ $(LINK.c) -o $@ $^ $(LDLIBS)
+ $(POST_PROCESS)
+
+$(PAYLOADS:%=payload_%): payload_utils.o
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h
new file mode 100644
index 0000000000..8bd51023df
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _PAYLOAD_UTILS_H_
+#define _PAYLOAD_UTILS_H_
+
+#include <sys/types.h>
+
+void outb(uint16_t, uint8_t);
+void outw(uint16_t, uint16_t);
+void outl(uint16_t, uint32_t);
+uint8_t inb(uint16_t);
+uint16_t inw(uint16_t);
+uint32_t inl(uint16_t);
+
+#endif /* _PAYLOAD_UTILS_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s
new file mode 100644
index 0000000000..8c8e745c17
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s
@@ -0,0 +1,55 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <sys/asm_linkage.h>
+
+ENTRY(outb)
+ movw %di, %dx
+ movb %sil, %al
+ outb (%dx)
+ ret
+SET_SIZE(outb)
+
+ENTRY(outw)
+ movw %di, %dx
+ movw %si, %ax
+ outw (%dx)
+ ret
+SET_SIZE(outb)
+
+ENTRY(outl)
+ movw %di, %dx
+ movl %esi, %eax
+ outl (%dx)
+ ret
+SET_SIZE(outl)
+
+ENTRY(inb)
+ movw %di, %dx
+ inb (%dx)
+ ret
+SET_SIZE(inb)
+
+ENTRY(inw)
+ movw %di, %dx
+ inw (%dx)
+ ret
+SET_SIZE(inw)
+
+ENTRY(inl)
+ movw %di, %dx
+ inl (%dx)
+ ret
+SET_SIZE(inl)
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c
new file mode 100644
index 0000000000..d899dc449d
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+
+void
+timer0_reset(void)
+{
+ /*
+ * Configure timer 0 for interrupt-on-terminal-count mode, and prepare
+ * it to be loaded with the high and low bytes.
+ */
+ outb(IOP_ATPIT_CMD, 0x30);
+
+ /* Load timer with max value (0xffff) */
+ outb(IOP_ATPIT_C0, 0xff);
+ outb(IOP_ATPIT_C0, 0xff);
+}
+
+uint16_t
+timer0_read(void)
+{
+ uint16_t val;
+
+ /* Latch timer0 */
+ outb(IOP_ATPIT_CMD, 0x00);
+
+ /* Read low and high bytes */
+ val = inb(IOP_ATPIT_C0);
+ val |= (uint16_t)inb(IOP_ATPIT_C0) << 8;
+
+ return (val);
+}
+
+void
+start(void)
+{
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint16_t start, end;
+
+ timer0_reset();
+
+ start = timer0_read();
+ outw(IOP_TEST_VALUE, start);
+
+ do {
+ end = timer0_read();
+ /* wait for enough ticks to pass */
+ } while (end > (start - ATPIT_TARGET_TICKS));
+ outw(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c
new file mode 100644
index 0000000000..7f74e72cbb
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c
@@ -0,0 +1,60 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+#define HPET_OFF_CONFIG 0x10
+#define HPET_OFF_MAIN_COUNT_LOW 0xf0
+
+#define HPET_CONFIG_ENABLE 1
+
+
+static void
+write_hpet(uint_t reg, uint32_t value)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_HPET_BASE + reg);
+ *ptr = value;
+}
+
+static uint32_t
+read_hpet_main_low(void)
+{
+ volatile uint32_t *ptr =
+ (uint32_t *)(MMIO_HPET_BASE + HPET_OFF_MAIN_COUNT_LOW);
+ return (*ptr);
+}
+
+
+void
+start(void)
+{
+ write_hpet(HPET_OFF_CONFIG, HPET_CONFIG_ENABLE);
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint32_t start, end;
+
+ start = read_hpet_main_low();
+ outl(IOP_TEST_VALUE, start);
+
+ do {
+ end = read_hpet_main_low();
+ /* wait for enough ticks to pass */
+ } while (end < (start + HPET_TARGET_TICKS));
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c
new file mode 100644
index 0000000000..cc4d72a9f7
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c
@@ -0,0 +1,94 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+#define LAPIC_OFF_SVR 0xf0
+#define LAPIC_OFF_TIMER_ICR 0x380
+#define LAPIC_OFF_TIMER_CCR 0x390
+#define LAPIC_OFF_TIMER_DCR 0x3e0
+
+
+#define LAPIC_SVR_ENABLE 0x100
+
+static void
+write_vlapic(uint_t reg, uint32_t value)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ *ptr = value;
+}
+
+static uint32_t
+read_vlapic(uint_t reg)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ return (*ptr);
+}
+
+static uint32_t
+divisor_to_dcr(uint32_t inp)
+{
+ switch (inp) {
+ case 1:
+ return (0xb);
+ case 2:
+ return (0x0);
+ case 4:
+ return (0x1);
+ case 8:
+ return (0x2);
+ case 16:
+ return (0x3);
+ case 32:
+ return (0x8);
+ case 64:
+ return (0x9);
+ case 128:
+ return (0xa);
+ default:
+ /* fail immediate if divisor is out of range */
+ outl(IOP_TEST_VALUE, 1);
+ return (0xff);
+ }
+}
+
+
+void
+start(void)
+{
+ write_vlapic(LAPIC_OFF_SVR, LAPIC_SVR_ENABLE);
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint32_t divisor;
+ uint32_t start, end;
+
+ divisor = inl(IOP_TEST_PARAM);
+ write_vlapic(LAPIC_OFF_TIMER_DCR, divisor_to_dcr(divisor));
+ write_vlapic(LAPIC_OFF_TIMER_ICR, 0xffffffff);
+
+ start = read_vlapic(LAPIC_OFF_TIMER_CCR);
+ outl(IOP_TEST_VALUE, start);
+
+ uint32_t target = start - LAPIC_TARGET_TICKS;
+ do {
+ end = read_vlapic(LAPIC_OFF_TIMER_CCR);
+ /* wait for enough ticks to pass */
+ } while (end > target);
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c
new file mode 100644
index 0000000000..969f708ada
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c
@@ -0,0 +1,110 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+#define LAPIC_OFF_SVR 0xf0
+#define LAPIC_OFF_LVT_TIMER 0x320
+#define LAPIC_OFF_TIMER_ICR 0x380
+#define LAPIC_OFF_TIMER_CCR 0x390
+#define LAPIC_OFF_TIMER_DCR 0x3e0
+
+#define LAPIC_LVT_MASKED (1 << 16)
+#define LAPIC_LVT_PERIODIC (1 << 17)
+
+
+#define LAPIC_SVR_ENABLE 0x100
+
+static void
+write_vlapic(uint_t reg, uint32_t value)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ *ptr = value;
+}
+
+static uint32_t
+read_vlapic(uint_t reg)
+{
+ volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg);
+ return (*ptr);
+}
+
+static uint32_t
+divisor_to_dcr(uint32_t inp)
+{
+ switch (inp) {
+ case 1:
+ return (0xb);
+ case 2:
+ return (0x0);
+ case 4:
+ return (0x1);
+ case 8:
+ return (0x2);
+ case 16:
+ return (0x3);
+ case 32:
+ return (0x8);
+ case 64:
+ return (0x9);
+ case 128:
+ return (0xa);
+ default:
+ /* fail immediate if divisor is out of range */
+ outl(IOP_TEST_VALUE, 1);
+ return (0xff);
+ }
+}
+
+
+void
+start(void)
+{
+ write_vlapic(LAPIC_OFF_SVR, LAPIC_SVR_ENABLE);
+
+ /*
+ * Configure the LAPIC timer for periodic operation, but leave the
+ * interrupt itself masked.
+ */
+ write_vlapic(LAPIC_OFF_LVT_TIMER,
+ LAPIC_LVT_MASKED | LAPIC_LVT_PERIODIC);
+
+ /* loop for as long as the host wants */
+ for (;;) {
+ const uint16_t divisor = inw(IOP_TEST_PARAM0);
+ const uint16_t loop_count = inw(IOP_TEST_PARAM1);
+
+ write_vlapic(LAPIC_OFF_TIMER_DCR, divisor_to_dcr(divisor));
+ write_vlapic(LAPIC_OFF_TIMER_ICR, LAPIC_TARGET_TICKS);
+
+ uint32_t start, end, count = 0;
+ start = read_vlapic(LAPIC_OFF_TIMER_CCR);
+ outl(IOP_TEST_VALUE, start);
+
+ uint32_t prev = start;
+ do {
+ end = read_vlapic(LAPIC_OFF_TIMER_CCR);
+
+ /* timer period rolled over */
+ if (end > prev) {
+ count++;
+ }
+ prev = end;
+ } while (count < loop_count);
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c
new file mode 100644
index 0000000000..d96bb2b8b1
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c
@@ -0,0 +1,36 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include "payload_common.h"
+#include "payload_utils.h"
+#include "test_defs.h"
+
+void
+start(void)
+{
+ /* loop for as long as the host wants */
+ for (;;) {
+ uint32_t start, end;
+
+ start = inl(IOP_PMTMR);
+ outl(IOP_TEST_VALUE, start);
+
+ do {
+ end = inl(IOP_PMTMR);
+ /* wait for enough ticks to pass */
+ } while (end < (start + PMTMR_TARGET_TICKS));
+ outl(IOP_TEST_VALUE, end);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/test_defs.h b/usr/src/test/bhyve-tests/tests/kdev/test_defs.h
new file mode 100644
index 0000000000..acc9553274
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/test_defs.h
@@ -0,0 +1,38 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#ifndef _TEST_DEFS_H_
+#define _TEST_DEFS_H_
+
+#define IOP_PMTMR 0x408
+#define IOP_ATPIT_C0 0x40
+#define IOP_ATPIT_CMD 0x43
+
+#define MMIO_HPET_BASE 0xfed00000UL
+#define MMIO_LAPIC_BASE 0xfee00000UL
+
+#define PMTMR_FREQ 3579545
+#define PMTMR_TARGET_TICKS (PMTMR_FREQ / 10)
+
+#define HPET_FREQ (1 << 24)
+#define HPET_TARGET_TICKS (HPET_FREQ / 10)
+
+#define LAPIC_FREQ (128 * 1024 * 1024)
+#define LAPIC_TARGET_TICKS (LAPIC_FREQ / 50)
+
+#define ATPIT_FREQ 1193182
+#define ATPIT_TARGET_TICKS (ATPIT_FREQ / 50)
+
+#endif /* _TEST_DEFS_H_ */
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c
new file mode 100644
index 0000000000..1f4051ef6a
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c
@@ -0,0 +1,145 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint16_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t tick_margin,
+ uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint16_t tick_delta;
+
+ tick_delta = before.value - after.value;
+
+ /* is the number of ticks OK? */
+ if (tick_delta < ATPIT_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, ATPIT_TARGET_TICKS);
+ } else if ((tick_delta - ATPIT_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ ATPIT_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC) / ATPIT_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("margin limits: ticks=%u ppm=%lu\n",
+ tick_margin, ppm_margin);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ /*
+ * Since the PIT is slower to read back (requiring 3 emulated reads),
+ * operate with a more loose ticks margin.
+ */
+ const uint_t margin_ticks = MAX(1, ATPIT_TARGET_TICKS / 2500);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, &ventry, &vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(&vexit);
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 2, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+
+ ventry_fulfill_inout(&vexit, &ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1],
+ margin_ticks, margin_ppm)) {
+ test_pass();
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(&vexit);
+ }
+
+ } while (true);
+
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c
new file mode 100644
index 0000000000..238596c739
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c
@@ -0,0 +1,146 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t tick_margin,
+ uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint32_t tick_delta;
+
+ if (after.value < before.value) {
+ /* handle rollover */
+ tick_delta = (UINT32_MAX - before.value) + after.value;
+ } else {
+ tick_delta = after.value - before.value;
+ }
+
+ /* is the number of ticks OK? */
+ if (tick_delta < HPET_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, HPET_TARGET_TICKS);
+ } else if ((tick_delta - HPET_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ HPET_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC) / HPET_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("margin limits: ticks=%u ppm=%lu\n",
+ tick_margin, ppm_margin);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, HPET_TARGET_TICKS / 10000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, &ventry, &vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(&vexit);
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+
+ ventry_fulfill_inout(&vexit, &ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1],
+ margin_ticks, margin_ppm)) {
+ test_pass();
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(&vexit);
+ }
+
+ } while (true);
+
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c
new file mode 100644
index 0000000000..cf462b1acc
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c
@@ -0,0 +1,169 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t divisor,
+ uint_t tick_margin, uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint32_t tick_delta;
+
+ /*
+ * The ticks margin should shrink proportionally to how coarsely the
+ * timer clock is being divided.
+ */
+ tick_margin /= divisor;
+
+ /* timer is counting down, so act appropriately */
+ if (after.value > before.value) {
+ /* handle rollover */
+ tick_delta = (UINT32_MAX - after.value) + before.value;
+ } else {
+ tick_delta = before.value - after.value;
+ }
+
+ /* is the number of ticks OK? */
+ if (tick_delta < LAPIC_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, LAPIC_TARGET_TICKS);
+ } else if ((tick_delta - LAPIC_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ LAPIC_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC * divisor) / LAPIC_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("params: tick_margin=%u ppm_margin=%lu divisor=%u\n",
+ tick_margin, ppm_margin, divisor);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+
+static void
+test_for_divisor(struct vmctx *ctx, uint_t divisor, struct vm_entry *ventry,
+ struct vm_exit *vexit)
+{
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, LAPIC_TARGET_TICKS / 5000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, ventry, vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(vexit);
+ }
+
+ /* input the divisor */
+ if (vexit_match_inout(vexit, true, IOP_TEST_PARAM, 4, NULL)) {
+ ventry_fulfill_inout(vexit, ventry, divisor);
+ continue;
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+ ventry_fulfill_inout(vexit, ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1], divisor,
+ margin_ticks, margin_ppm)) {
+ (void) printf("good result\n");
+ return;
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(vexit);
+ }
+ } while (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+
+ test_for_divisor(ctx, 2, &ventry, &vexit);
+ test_for_divisor(ctx, 4, &ventry, &vexit);
+ test_for_divisor(ctx, 16, &ventry, &vexit);
+ test_pass();
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c
new file mode 100644
index 0000000000..d5c6d8184c
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c
@@ -0,0 +1,178 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t divisor, uint_t loops,
+ uint_t tick_margin, uint_t ppm_margin)
+{
+ const hrtime_t time_delta = after.when - before.when;
+
+
+ /*
+ * The ticks margin should shrink proportionally to how coarsely the
+ * timer clock is being divided.
+ */
+ tick_margin /= divisor;
+
+ /*
+ * The 'before' measurement includes the ticks which occurred between
+ * programming the timer and taking the first reading. The 'after'
+ * measurement includes the number of loops (each consisting of the
+ * target tick count) plus however many ticks had transpired since the
+ * most recent roll-over.
+ */
+ const uint32_t tick_delta =
+ loops * LAPIC_TARGET_TICKS + before.value - after.value;
+ const uint32_t tick_target = loops * LAPIC_TARGET_TICKS;
+
+ /* is the number of ticks OK? */
+ if (tick_delta < tick_target) {
+ if ((tick_target - tick_delta) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n",
+ tick_delta, tick_target - tick_margin);
+ }
+ } else if ((tick_delta - tick_target) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ tick_target + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC * divisor) / LAPIC_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("params: tick_margin=%u ppm_margin=%lu divisor=%u\n",
+ tick_margin, ppm_margin, divisor);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+
+static void
+run_test(struct vmctx *ctx, uint_t divisor, uint_t loops,
+ struct vm_entry *ventry, struct vm_exit *vexit)
+{
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, LAPIC_TARGET_TICKS / 5000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, ventry, vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(vexit);
+ }
+
+ /* input the divisor (bits 0-15) and loop count (bits 16-31) */
+ if (vexit_match_inout(vexit, true, IOP_TEST_PARAM0, 2, NULL)) {
+ ventry_fulfill_inout(vexit, ventry, divisor);
+ continue;
+ }
+ /* input the loop count */
+ if (vexit_match_inout(vexit, true, IOP_TEST_PARAM1, 2, NULL)) {
+ ventry_fulfill_inout(vexit, ventry, loops);
+ continue;
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = v;
+ ventry_fulfill_inout(vexit, ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1], divisor,
+ loops, margin_ticks, margin_ppm)) {
+ (void) printf("good result\n");
+ return;
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(vexit);
+ }
+ } while (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+
+ run_test(ctx, 4, 3, &ventry, &vexit);
+ run_test(ctx, 2, 4, &ventry, &vexit);
+ test_pass();
+ return (0);
+}
diff --git a/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c
new file mode 100644
index 0000000000..60541bf898
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c
@@ -0,0 +1,151 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <libgen.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "in_guest.h"
+#include "test_defs.h"
+
+typedef struct reading {
+ hrtime_t when;
+ uint32_t value;
+} reading_t;
+
+static bool
+check_reading(reading_t before, reading_t after, uint_t tick_margin,
+ uint_t ppm_margin)
+{
+ hrtime_t time_delta = after.when - before.when;
+ uint32_t tick_delta;
+
+ if (after.value < before.value) {
+ /* handle rollover */
+ tick_delta = (UINT32_MAX - before.value) + after.value;
+ } else {
+ tick_delta = after.value - before.value;
+ }
+
+ /* is the number of ticks OK? */
+ if (tick_delta < PMTMR_TARGET_TICKS) {
+ test_fail_msg("inadequate passage of ticks %u < %u\n",
+ tick_delta, PMTMR_TARGET_TICKS);
+ } else if ((tick_delta - PMTMR_TARGET_TICKS) > tick_margin) {
+ (void) printf("%u ticks outside margin %u\n", tick_delta,
+ PMTMR_TARGET_TICKS + tick_margin);
+ return (false);
+ }
+
+ hrtime_t time_target = (tick_delta * NANOSEC) / PMTMR_FREQ;
+
+ hrtime_t offset;
+ if (time_delta < time_target) {
+ offset = time_target - time_delta;
+ } else {
+ offset = time_delta - time_target;
+ }
+ uint64_t ppm = (offset * 1000000) / time_target;
+ (void) printf("margin limits: ticks=%u ppm=%lu\n",
+ tick_margin, ppm_margin);
+ (void) printf("%u ticks in %lu ns (error %lu ppm)\n",
+ tick_delta, time_delta, ppm);
+ if (ppm > ppm_margin) {
+ (void) printf("UNACCEPTABLE!\n");
+ return (false);
+ }
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ const char *test_suite_name = basename(argv[0]);
+ struct vmctx *ctx = NULL;
+ int err;
+
+ ctx = test_initialize(test_suite_name);
+
+ err = vm_pmtmr_set_location(ctx, IOP_PMTMR);
+ if (err != 0) {
+ test_fail_errno(err, "Could not place pmtmr");
+ }
+
+ err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK);
+ if (err != 0) {
+ test_fail_errno(err, "Could not initialize vcpu0");
+ }
+
+ struct vm_entry ventry = { 0 };
+ struct vm_exit vexit = { 0 };
+ reading_t readings[2];
+ uint_t nread = 0;
+ uint_t nrepeat = 0;
+
+ const uint_t margin_ticks = MAX(1, PMTMR_TARGET_TICKS / 10000);
+ const uint_t margin_ppm = 400;
+
+ do {
+ const enum vm_exit_kind kind =
+ test_run_vcpu(ctx, 0, &ventry, &vexit);
+ if (kind == VEK_REENTR) {
+ continue;
+ } else if (kind != VEK_UNHANDLED) {
+ test_fail_vmexit(&vexit);
+ }
+
+ uint32_t v;
+ if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 4, &v)) {
+ readings[nread].when = gethrtime();
+ readings[nread].value = vexit.u.inout.eax;
+
+ ventry_fulfill_inout(&vexit, &ventry, 0);
+
+ nread++;
+ if (nread != 2) {
+ continue;
+ }
+
+ if (check_reading(readings[0], readings[1],
+ margin_ticks, margin_ppm)) {
+ test_pass();
+ } else {
+ nrepeat++;
+ if (nrepeat < 3) {
+ nread = 0;
+ (void) printf("retry %u\n", nrepeat);
+ continue;
+ }
+ test_fail_msg("bad result after %u retries\n",
+ nrepeat);
+ }
+ } else {
+ test_fail_vmexit(&vexit);
+ }
+
+ } while (true);
+
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/io/viona/viona_impl.h b/usr/src/uts/i86pc/io/viona/viona_impl.h
index 4872720f79..760474e78b 100644
--- a/usr/src/uts/i86pc/io/viona/viona_impl.h
+++ b/usr/src/uts/i86pc/io/viona/viona_impl.h
@@ -109,6 +109,7 @@ typedef struct viona_vring {
/* Reference to guest pages holding virtqueue */
void **vr_map_pages;
+ vmm_page_t *vr_map_hold;
/* Per-ring error condition statistics */
struct viona_ring_stats {
@@ -293,15 +294,19 @@ void viona_ring_free(viona_vring_t *);
int viona_ring_reset(viona_vring_t *, boolean_t);
int viona_ring_init(viona_link_t *, uint16_t, uint16_t, uint64_t);
boolean_t viona_ring_lease_renew(viona_vring_t *);
-int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *);
+
+int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *,
+ vmm_page_t **);
void vq_pushchain(viona_vring_t *, uint32_t, uint16_t);
void vq_pushchain_many(viona_vring_t *, uint_t, used_elem_t *);
+
void viona_intr_ring(viona_vring_t *ring, boolean_t);
void viona_ring_set_no_notify(viona_vring_t *, boolean_t);
void viona_ring_disable_notify(viona_vring_t *);
void viona_ring_enable_notify(viona_vring_t *);
uint16_t viona_ring_num_avail(viona_vring_t *);
+
void viona_rx_init(void);
void viona_rx_fini(void);
int viona_rx_set(viona_link_t *);
diff --git a/usr/src/uts/i86pc/io/viona/viona_ring.c b/usr/src/uts/i86pc/io/viona/viona_ring.c
index 79094d3dc0..2d847dda09 100644
--- a/usr/src/uts/i86pc/io/viona/viona_ring.c
+++ b/usr/src/uts/i86pc/io/viona/viona_ring.c
@@ -81,17 +81,109 @@
P2ROUNDUP(LEGACY_USED_SZ(qsz), LEGACY_VQ_ALIGN))
#define LEGACY_VQ_PAGES(qsz) (LEGACY_VQ_SIZE(qsz) / PAGESIZE)
+struct vq_held_region {
+ struct iovec *vhr_iov;
+ vmm_page_t *vhr_head;
+ vmm_page_t *vhr_tail;
+ /* Length of iovec array supplied in `vhr_iov` */
+ uint_t vhr_niov;
+ /*
+ * Index into vhr_iov, indicating the next "free" entry (following the
+ * last entry which has valid contents).
+ */
+ uint_t vhr_idx;
+};
+typedef struct vq_held_region vq_held_region_t;
+
static boolean_t viona_ring_map(viona_vring_t *);
static void viona_ring_unmap(viona_vring_t *);
static kthread_t *viona_create_worker(viona_vring_t *);
-static void *
-viona_hold_page(viona_vring_t *ring, uint64_t gpa)
+static vmm_page_t *
+vq_page_hold(viona_vring_t *ring, uint64_t gpa, bool writable)
{
ASSERT3P(ring->vr_lease, !=, NULL);
- ASSERT3U(gpa & PAGEOFFSET, ==, 0);
- return (vmm_drv_gpa2kva(ring->vr_lease, gpa, PAGESIZE));
+ int prot = PROT_READ;
+ if (writable) {
+ prot |= PROT_WRITE;
+ }
+
+ return (vmm_drv_page_hold(ring->vr_lease, gpa, prot));
+}
+
+/*
+ * Establish a hold on the page(s) which back the region of guest memory covered
+ * by [gpa, gpa + len). The host-kernel-virtual pointers to those pages are
+ * stored in the iovec array supplied in `region`, along with the chain of
+ * vmm_page_t entries representing the held pages. Since guest memory
+ * carries no guarantees of being physically contiguous (on the host), it is
+ * assumed that an iovec entry will be required for each PAGESIZE section
+ * covered by the specified `gpa` and `len` range. For each iovec entry
+ * successfully populated by holding a page, `vhr_idx` will be incremented so it
+ * references the next available iovec entry (or `vhr_niov`, if the iovec array
+ * is full). The responsibility for releasing the `vmm_page_t` chain (stored in
+ * `vhr_head` and `vhr_tail`) resides with the caller, regardless of the result.
+ */
+static int
+vq_region_hold(viona_vring_t *ring, uint64_t gpa, uint32_t len,
+ bool writable, vq_held_region_t *region)
+{
+ const uint32_t front_offset = gpa & PAGEOFFSET;
+ const uint32_t front_len = MIN(len, PAGESIZE - front_offset);
+ uint_t pages = 1;
+ vmm_page_t *vmp;
+ caddr_t buf;
+
+ ASSERT3U(region->vhr_idx, <, region->vhr_niov);
+
+ if (front_len < len) {
+ pages += P2ROUNDUP((uint64_t)(len - front_len),
+ PAGESIZE) / PAGESIZE;
+ }
+ if (pages > (region->vhr_niov - region->vhr_idx)) {
+ return (E2BIG);
+ }
+
+ vmp = vq_page_hold(ring, gpa & PAGEMASK, writable);
+ if (vmp == NULL) {
+ return (EFAULT);
+ }
+ buf = (caddr_t)vmm_drv_page_readable(vmp);
+
+ region->vhr_iov[region->vhr_idx].iov_base = buf + front_offset;
+ region->vhr_iov[region->vhr_idx].iov_len = front_len;
+ region->vhr_idx++;
+ gpa += front_len;
+ len -= front_len;
+ if (region->vhr_head == NULL) {
+ region->vhr_head = vmp;
+ region->vhr_tail = vmp;
+ } else {
+ vmm_drv_page_chain(region->vhr_tail, vmp);
+ region->vhr_tail = vmp;
+ }
+
+ for (uint_t i = 1; i < pages; i++) {
+ ASSERT3U(gpa & PAGEOFFSET, ==, 0);
+
+ vmp = vq_page_hold(ring, gpa, writable);
+ if (vmp == NULL) {
+ return (EFAULT);
+ }
+ buf = (caddr_t)vmm_drv_page_readable(vmp);
+
+ const uint32_t chunk_len = MIN(len, PAGESIZE);
+ region->vhr_iov[region->vhr_idx].iov_base = buf;
+ region->vhr_iov[region->vhr_idx].iov_len = chunk_len;
+ region->vhr_idx++;
+ gpa += chunk_len;
+ len -= chunk_len;
+ vmm_drv_page_chain(region->vhr_tail, vmp);
+ region->vhr_tail = vmp;
+ }
+
+ return (0);
}
static boolean_t
@@ -310,14 +402,28 @@ viona_ring_map(viona_vring_t *ring)
const uint_t npages = LEGACY_VQ_PAGES(qsz);
ring->vr_map_pages = kmem_zalloc(npages * sizeof (void *), KM_SLEEP);
+ vmm_page_t *prev = NULL;
+
for (uint_t i = 0; i < npages; i++, pa += PAGESIZE) {
- void *page = viona_hold_page(ring, pa);
+ vmm_page_t *vmp;
- if (page == NULL) {
+ vmp = vq_page_hold(ring, pa, true);
+ if (vmp == NULL) {
viona_ring_unmap(ring);
return (B_FALSE);
}
- ring->vr_map_pages[i] = page;
+
+ /*
+ * Keep the first page has the head of the chain, appending all
+ * subsequent pages to the tail.
+ */
+ if (prev == NULL) {
+ ring->vr_map_hold = vmp;
+ } else {
+ vmm_drv_page_chain(prev, vmp);
+ }
+ prev = vmp;
+ ring->vr_map_pages[i] = vmm_drv_page_writable(vmp);
}
return (B_TRUE);
@@ -330,17 +436,14 @@ viona_ring_unmap(viona_vring_t *ring)
void **map = ring->vr_map_pages;
if (map != NULL) {
- /*
- * The bhyve page-hold mechanism does not currently require a
- * corresponding page-release action, given the simplicity of
- * the underlying virtual memory constructs.
- *
- * If/when those systems become more sophisticated, more than a
- * simple free of the page pointers will be required here.
- */
const uint_t npages = LEGACY_VQ_PAGES(ring->vr_size);
kmem_free(map, npages * sizeof (void *));
ring->vr_map_pages = NULL;
+
+ vmm_drv_page_release_chain(ring->vr_map_hold);
+ ring->vr_map_hold = NULL;
+ } else {
+ ASSERT3P(ring->vr_map_hold, ==, NULL);
}
}
@@ -520,14 +623,9 @@ vq_read_avail(viona_vring_t *ring, uint16_t idx)
*/
static int
vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
- struct iovec *iov, uint_t niov, uint16_t *idxp)
+ vq_held_region_t *region)
{
- uint64_t gpa = desc->vd_addr;
- uint32_t len = desc->vd_len;
- uint16_t lidx = *idxp;
- caddr_t buf;
-
- ASSERT3U(lidx, <, niov);
+ int err;
if (desc->vd_len == 0) {
VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
@@ -536,55 +634,22 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
return (EINVAL);
}
- const uint32_t front_offset = desc->vd_addr & PAGEOFFSET;
- const uint32_t front_len = MIN(len, PAGESIZE - front_offset);
- uint_t pages = 1;
- if (front_len < len) {
- pages += P2ROUNDUP((uint64_t)(len - front_len),
- PAGESIZE) / PAGESIZE;
- }
-
- if (pages > (niov - lidx)) {
+ err = vq_region_hold(ring, desc->vd_addr, desc->vd_len,
+ (desc->vd_flags & VRING_DESC_F_WRITE) != 0, region);
+ switch (err) {
+ case E2BIG:
VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, too_many_desc);
- return (E2BIG);
- }
-
- buf = viona_hold_page(ring, gpa & PAGEMASK);
- if (buf == NULL) {
+ break;
+ case EFAULT:
VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
+ break;
+ default:
+ break;
}
- iov[lidx].iov_base = buf + front_offset;
- iov[lidx].iov_len = front_len;
- gpa += front_len;
- len -= front_len;
- lidx++;
-
- for (uint_t i = 1; i < pages; i++) {
- ASSERT3U(gpa & PAGEOFFSET, ==, 0);
-
- buf = viona_hold_page(ring, gpa);
- if (buf == NULL) {
- VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
- VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
- }
- const uint32_t region_len = MIN(len, PAGESIZE);
- iov[lidx].iov_base = buf;
- iov[lidx].iov_len = region_len;
- gpa += region_len;
- len -= region_len;
- lidx++;
- }
-
- ASSERT3U(len, ==, 0);
- ASSERT3U(gpa, ==, desc->vd_addr + desc->vd_len);
-
- *idxp = lidx;
- return (0);
+ return (err);
}
/*
@@ -593,7 +658,7 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
*/
static int
vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
- struct iovec *iov, uint_t niov, uint16_t *idxp)
+ vq_held_region_t *region)
{
const uint16_t indir_count = desc->vd_len / sizeof (struct virtio_desc);
@@ -607,8 +672,10 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
}
uint16_t indir_next = 0;
- caddr_t buf = NULL;
+ const uint8_t *buf = NULL;
uint64_t buf_gpa = UINT64_MAX;
+ vmm_page_t *vmp = NULL;
+ int err = 0;
for (;;) {
uint64_t indir_gpa =
@@ -621,13 +688,18 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
* resides in, if has not already been done.
*/
if (indir_page != buf_gpa) {
- buf = viona_hold_page(ring, indir_page);
- if (buf == NULL) {
- VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
+ if (vmp != NULL) {
+ vmm_drv_page_release(vmp);
+ }
+ vmp = vq_page_hold(ring, indir_page, false);
+ if (vmp == NULL) {
+ VIONA_PROBE_BAD_RING_ADDR(ring, indir_page);
VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
+ err = EFAULT;
+ break;
}
buf_gpa = indir_page;
+ buf = vmm_drv_page_readable(vmp);
}
/*
@@ -641,27 +713,30 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
if (vp.vd_flags & VRING_DESC_F_INDIRECT) {
VIONA_PROBE1(indir_bad_nest, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, indir_bad_nest);
- return (EINVAL);
+ err = EINVAL;
+ break;
} else if (vp.vd_len == 0) {
VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
uint32_t, vp.vd_len);
VIONA_RING_STAT_INCR(ring, desc_bad_len);
- return (EINVAL);
+ err = EINVAL;
+ break;
}
- int err = vq_map_desc_bufs(ring, &vp, iov, niov, idxp);
+ err = vq_map_desc_bufs(ring, &vp, region);
if (err != 0) {
- return (err);
+ break;
}
/* Successfully reach the end of the indir chain */
if ((vp.vd_flags & VRING_DESC_F_NEXT) == 0) {
- return (0);
+ break;
}
- if (*idxp >= niov) {
+ if (region->vhr_idx >= region->vhr_niov) {
VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, too_many_desc);
- return (E2BIG);
+ err = E2BIG;
+ break;
}
indir_next = vp.vd_next;
@@ -669,23 +744,31 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
VIONA_PROBE3(indir_bad_next, viona_vring_t *, ring,
uint16_t, indir_next, uint16_t, indir_count);
VIONA_RING_STAT_INCR(ring, indir_bad_next);
- return (EINVAL);
+ err = EINVAL;
+ break;
}
}
- /* NOTREACHED */
- return (-1);
+ if (vmp != NULL) {
+ vmm_drv_page_release(vmp);
+ }
+ return (err);
}
int
vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
- uint16_t *cookie)
+ uint16_t *cookie, vmm_page_t **chain)
{
- uint16_t i, ndesc, idx, head, next;
+ uint16_t ndesc, idx, head, next;
struct virtio_desc vdir;
+ vq_held_region_t region = {
+ .vhr_niov = niov,
+ .vhr_iov = iov,
+ };
ASSERT(iov != NULL);
ASSERT(niov > 0 && niov < INT_MAX);
+ ASSERT(*chain == NULL);
mutex_enter(&ring->vr_a_mutex);
idx = ring->vr_cur_aidx;
@@ -711,7 +794,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
head = vq_read_avail(ring, idx & ring->vr_mask);
next = head;
- for (i = 0; i < niov; next = vdir.vd_next) {
+ for (region.vhr_idx = 0; region.vhr_idx < niov; next = vdir.vd_next) {
if (next >= ring->vr_size) {
VIONA_PROBE2(bad_idx, viona_vring_t *, ring,
uint16_t, next);
@@ -721,7 +804,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
vq_read_desc(ring, next, &vdir);
if ((vdir.vd_flags & VRING_DESC_F_INDIRECT) == 0) {
- if (vq_map_desc_bufs(ring, &vdir, iov, niov, &i) != 0) {
+ if (vq_map_desc_bufs(ring, &vdir, &region) != 0) {
break;
}
} else {
@@ -738,21 +821,29 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
break;
}
- if (vq_map_indir_desc_bufs(ring, &vdir, iov, niov, &i)
- != 0) {
+ if (vq_map_indir_desc_bufs(ring, &vdir, &region) != 0) {
break;
}
}
if ((vdir.vd_flags & VRING_DESC_F_NEXT) == 0) {
- *cookie = head;
ring->vr_cur_aidx++;
mutex_exit(&ring->vr_a_mutex);
- return (i);
+
+ *cookie = head;
+ *chain = region.vhr_head;
+ return (region.vhr_idx);
}
}
mutex_exit(&ring->vr_a_mutex);
+ if (region.vhr_head != NULL) {
+ /*
+ * If any pages were held prior to encountering an error, we
+ * must release them now.
+ */
+ vmm_drv_page_release_chain(region.vhr_head);
+ }
return (-1);
}
diff --git a/usr/src/uts/i86pc/io/viona/viona_rx.c b/usr/src/uts/i86pc/io/viona/viona_rx.c
index dc3feb10fe..2fbf6be972 100644
--- a/usr/src/uts/i86pc/io/viona/viona_rx.c
+++ b/usr/src/uts/i86pc/io/viona/viona_rx.c
@@ -208,10 +208,11 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
caddr_t buf = NULL;
boolean_t end = B_FALSE;
const uint32_t features = ring->vr_link->l_features;
+ vmm_page_t *pages = NULL;
ASSERT(msz >= MIN_BUF_SIZE);
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages);
if (n <= 0) {
/* Without available buffers, the frame must be dropped. */
return (ENOSPC);
@@ -279,6 +280,7 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
}
/* Release this chain */
+ vmm_drv_page_release_chain(pages);
vq_pushchain(ring, copied, cookie);
return (0);
@@ -287,6 +289,7 @@ bad_frame:
mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
+ vmm_drv_page_release_chain(pages);
vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie);
return (EINVAL);
}
@@ -296,6 +299,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
{
struct iovec iov[VTNET_MAXSEGS];
used_elem_t uelem[VTNET_MAXSEGS];
+ vmm_page_t *pages = NULL, *hdr_pages = NULL;
int n, i = 0, buf_idx = 0, err = 0;
uint16_t cookie;
caddr_t buf;
@@ -307,7 +311,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
ASSERT(msz >= MIN_BUF_SIZE);
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages);
if (n <= 0) {
/* Without available buffers, the frame must be dropped. */
VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp);
@@ -376,7 +380,12 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
err = EOVERFLOW;
break;
}
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ if (pages != NULL) {
+ vmm_drv_page_release_chain(pages);
+ pages = NULL;
+ }
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie,
+ &pages);
if (n <= 0) {
/*
* Without more immediate space to perform the
@@ -453,6 +462,13 @@ done:
uint16_t, cookie, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
}
+
+ if (hdr_pages != NULL) {
+ vmm_drv_page_release_chain(hdr_pages);
+ }
+ if (pages != NULL) {
+ vmm_drv_page_release_chain(pages);
+ }
vq_pushchain_many(ring, buf_idx + 1, uelem);
return (err);
}
diff --git a/usr/src/uts/i86pc/io/viona/viona_tx.c b/usr/src/uts/i86pc/io/viona/viona_tx.c
index f8018692a5..424deee498 100644
--- a/usr/src/uts/i86pc/io/viona/viona_tx.c
+++ b/usr/src/uts/i86pc/io/viona/viona_tx.c
@@ -70,6 +70,7 @@ struct viona_desb {
uint32_t d_len;
uint16_t d_cookie;
uchar_t *d_headers;
+ vmm_page_t *d_pages;
};
static void viona_tx(viona_link_t *, viona_vring_t *);
@@ -287,6 +288,14 @@ viona_desb_release(viona_desb_t *dp)
cookie = dp->d_cookie;
dp->d_len = 0;
dp->d_cookie = 0;
+ vmm_drv_page_release_chain(dp->d_pages);
+ dp->d_pages = NULL;
+
+ /*
+ * Ensure all other changes to the desb are visible prior to zeroing its
+ * refcount, signifying its readiness for reuse.
+ */
+ membar_exit();
dp->d_ref = 0;
viona_tx_done(ring, len, cookie);
@@ -484,12 +493,13 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
viona_desb_t *dp = NULL;
mac_client_handle_t link_mch = link->l_mch;
const struct virtio_net_hdr *hdr;
+ vmm_page_t *pages = NULL;
mp_head = mp_tail = NULL;
ASSERT(iov != NULL);
- n = vq_popchain(ring, iov, max_segs, &cookie);
+ n = vq_popchain(ring, iov, max_segs, &cookie, &pages);
if (n == 0) {
VIONA_PROBE1(tx_absent, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, tx_absent);
@@ -670,6 +680,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
if (dp != NULL) {
dp->d_len = len;
+ dp->d_pages = pages;
mutex_enter(&ring->vr_lock);
ring->vr_xfer_outstanding++;
mutex_exit(&ring->vr_lock);
@@ -679,6 +690,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
* be marked as 'used' now, rather than deferring that action
* until after successful packet transmission.
*/
+ vmm_drv_page_release_chain(pages);
viona_tx_done(ring, len, cookie);
}
@@ -731,5 +743,6 @@ drop_hook:
VIONA_PROBE3(tx_drop, viona_vring_t *, ring, uint32_t, len,
uint16_t, cookie);
+ vmm_drv_page_release_chain(pages);
viona_tx_done(ring, len, cookie);
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
index 501884e0df..3f137e1b4d 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
@@ -90,8 +90,9 @@ struct channel {
bool ol_sel; /* read MSB from output latch */
bool fr_sel; /* read MSB from free-running timer */
- struct bintime load_bt; /* time when counter was loaded */
- struct bintime callout_bt; /* target time */
+ hrtime_t time_loaded; /* time when counter was loaded */
+ hrtime_t time_target; /* target time */
+ uint64_t total_target;
struct callout callout;
struct vatpit_callout_arg callout_arg;
@@ -101,8 +102,6 @@ struct vatpit {
struct vm *vm;
struct mtx mtx;
- struct bintime freq_bt;
-
struct channel channel[3];
};
@@ -111,16 +110,9 @@ static void pit_timer_start_cntr0(struct vatpit *vatpit);
static uint64_t
vatpit_delta_ticks(struct vatpit *vatpit, struct channel *c)
{
- struct bintime delta;
- uint64_t result;
-
- binuptime(&delta);
- bintime_sub(&delta, &c->load_bt);
+ const hrtime_t delta = gethrtime() - c->time_loaded;
- result = delta.sec * PIT_8254_FREQ;
- result += delta.frac / vatpit->freq_bt.frac;
-
- return (result);
+ return (hrt_freq_count(delta, PIT_8254_FREQ));
}
static int
@@ -183,32 +175,32 @@ done:
static void
pit_timer_start_cntr0(struct vatpit *vatpit)
{
- struct channel *c;
- struct bintime now, delta;
- sbintime_t precision;
+ struct channel *c = &vatpit->channel[0];
- c = &vatpit->channel[0];
- if (c->initial != 0) {
- delta.sec = 0;
- delta.frac = vatpit->freq_bt.frac * c->initial;
- bintime_add(&c->callout_bt, &delta);
- precision = bttosbt(delta) >> tc_precexp;
+ if (c->initial == 0) {
+ return;
+ }
- /*
- * Reset 'callout_bt' if the time that the callout
- * was supposed to fire is more than 'c->initial'
- * ticks in the past.
- */
- binuptime(&now);
- if (BINTIME_CMP(&c->callout_bt, <, &now)) {
- c->callout_bt = now;
- bintime_add(&c->callout_bt, &delta);
- }
+ c->total_target += c->initial;
+ c->time_target = c->time_loaded +
+ hrt_freq_interval(PIT_8254_FREQ, c->total_target);
- callout_reset_sbt(&c->callout, bttosbt(c->callout_bt),
- precision, vatpit_callout_handler, &c->callout_arg,
- C_ABSOLUTE);
+ /*
+ * If we are more than 'c->initial' ticks behind, reset the timer base
+ * to fire at the next 'c->initial' interval boundary.
+ */
+ hrtime_t now = gethrtime();
+ if (c->time_target < now) {
+ const uint64_t ticks_behind =
+ hrt_freq_count(c->time_target - now, PIT_8254_FREQ);
+
+ c->total_target += roundup(ticks_behind, c->initial);
+ c->time_target = c->time_loaded +
+ hrt_freq_interval(PIT_8254_FREQ, c->total_target);
}
+
+ callout_reset_hrtime(&c->callout, c->time_target,
+ vatpit_callout_handler, &c->callout_arg, C_ABSOLUTE);
}
static uint16_t
@@ -223,15 +215,14 @@ pit_update_counter(struct vatpit *vatpit, struct channel *c, bool latch)
if (c->initial == 0) {
/*
- * This is possibly an o/s bug - reading the value of
- * the timer without having set up the initial value.
+ * This is possibly an OS bug - reading the value of the timer
+ * without having set up the initial value.
*
- * The original user-space version of this code set
- * the timer to 100hz in this condition; do the same
- * here.
+ * The original user-space version of this code set the timer to
+ * 100hz in this condition; do the same here.
*/
c->initial = TIMER_DIV(PIT_8254_FREQ, 100);
- binuptime(&c->load_bt);
+ c->time_loaded = gethrtime();
c->reg_status &= ~TIMER_STS_NULLCNT;
}
@@ -419,10 +410,11 @@ vatpit_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *eax)
c->reg_status &= ~TIMER_STS_NULLCNT;
c->fr_sel = false;
c->initial = c->reg_cr[0] | (uint16_t)c->reg_cr[1] << 8;
- binuptime(&c->load_bt);
+ c->time_loaded = gethrtime();
/* Start an interval timer for channel 0 */
if (port == TIMER_CNTR0) {
- c->callout_bt = c->load_bt;
+ c->time_target = c->time_loaded;
+ c->total_target = 0;
pit_timer_start_cntr0(vatpit);
}
if (c->initial == 0)
@@ -465,8 +457,6 @@ vatpit_init(struct vm *vm)
mtx_init(&vatpit->mtx, "vatpit lock", NULL, MTX_SPIN);
- FREQ2BT(PIT_8254_FREQ, &vatpit->freq_bt);
-
for (i = 0; i < 3; i++) {
callout_init(&vatpit->channel[i].callout, 1);
arg = &vatpit->channel[i].callout_arg;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.c b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
index 14418ff5fa..deb1417b71 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vhpet.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
@@ -76,32 +76,33 @@ struct vhpet_callout_arg {
int timer_num;
};
+struct vhpet_timer {
+ uint64_t cap_config; /* Configuration */
+ uint64_t msireg; /* FSB interrupt routing */
+ uint32_t compval; /* Comparator */
+ uint32_t comprate;
+ struct callout callout;
+ hrtime_t callout_expire; /* time when counter==compval */
+ struct vhpet_callout_arg arg;
+};
+
struct vhpet {
struct vm *vm;
struct mtx mtx;
- sbintime_t freq_sbt;
uint64_t config; /* Configuration */
uint64_t isr; /* Interrupt Status */
- uint32_t countbase; /* HPET counter base value */
- sbintime_t countbase_sbt; /* uptime corresponding to base value */
-
- struct {
- uint64_t cap_config; /* Configuration */
- uint64_t msireg; /* FSB interrupt routing */
- uint32_t compval; /* Comparator */
- uint32_t comprate;
- struct callout callout;
- sbintime_t callout_sbt; /* time when counter==compval */
- struct vhpet_callout_arg arg;
- } timer[VHPET_NUM_TIMERS];
+ uint32_t base_count; /* HPET counter base value */
+ hrtime_t base_time; /* uptime corresponding to base value */
+
+ struct vhpet_timer timer[VHPET_NUM_TIMERS];
};
#define VHPET_LOCK(vhp) mtx_lock(&((vhp)->mtx))
#define VHPET_UNLOCK(vhp) mtx_unlock(&((vhp)->mtx))
static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
- sbintime_t now);
+ hrtime_t now);
static uint64_t
vhpet_capabilities(void)
@@ -151,27 +152,22 @@ vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
}
static uint32_t
-vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
+vhpet_counter(struct vhpet *vhpet, hrtime_t *nowptr)
{
- uint32_t val;
- sbintime_t now, delta;
+ const hrtime_t now = gethrtime();
+ uint32_t val = vhpet->base_count;
- val = vhpet->countbase;
if (vhpet_counter_enabled(vhpet)) {
- now = sbinuptime();
- delta = now - vhpet->countbase_sbt;
- KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
- "%lx to %lx", vhpet->countbase_sbt, now));
- val += delta / vhpet->freq_sbt;
- if (nowptr != NULL)
- *nowptr = now;
+ const hrtime_t delta = now - vhpet->base_time;
+
+ ASSERT3S(delta, >=, 0);
+ val += hrt_freq_count(delta, HPET_FREQ);
} else {
- /*
- * The sbinuptime corresponding to the 'countbase' is
- * meaningless when the counter is disabled. Make sure
- * that the caller doesn't want to use it.
- */
- KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
+ /* Value of the counter is meaningless when it is disabled */
+ }
+
+ if (nowptr != NULL) {
+ *nowptr = now;
}
return (val);
}
@@ -284,7 +280,7 @@ vhpet_handler(void *a)
{
int n;
uint32_t counter;
- sbintime_t now;
+ hrtime_t now;
struct vhpet *vhpet;
struct callout *callout;
struct vhpet_callout_arg *arg;
@@ -317,7 +313,7 @@ done:
}
static void
-vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
+vhpet_stop_timer(struct vhpet *vhpet, int n, hrtime_t now)
{
VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
@@ -330,7 +326,7 @@ vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
* in the guest. This is especially bad in one-shot mode because
* the next interrupt has to wait for the counter to wrap around.
*/
- if (vhpet->timer[n].callout_sbt < now) {
+ if (vhpet->timer[n].callout_expire < now) {
VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
"stopping timer", n);
vhpet_timer_interrupt(vhpet, n);
@@ -338,11 +334,11 @@ vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
}
static void
-vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
+vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, hrtime_t now)
{
- sbintime_t delta, precision;
+ struct vhpet_timer *timer = &vhpet->timer[n];
- if (vhpet->timer[n].comprate != 0)
+ if (timer->comprate != 0)
vhpet_adjust_compval(vhpet, n, counter);
else {
/*
@@ -353,11 +349,11 @@ vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
*/
}
- delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
- precision = delta >> tc_precexp;
- vhpet->timer[n].callout_sbt = now + delta;
- callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
- precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
+ const hrtime_t delta = hrt_freq_interval(HPET_FREQ,
+ timer->compval - counter);
+ timer->callout_expire = now + delta;
+ callout_reset_hrtime(&timer->callout, timer->callout_expire,
+ vhpet_handler, &timer->arg, C_ABSOLUTE);
}
static void
@@ -365,23 +361,23 @@ vhpet_start_counting(struct vhpet *vhpet)
{
int i;
- vhpet->countbase_sbt = sbinuptime();
+ vhpet->base_time = gethrtime();
for (i = 0; i < VHPET_NUM_TIMERS; i++) {
/*
* Restart the timers based on the value of the main counter
* when it stopped counting.
*/
- vhpet_start_timer(vhpet, i, vhpet->countbase,
- vhpet->countbase_sbt);
+ vhpet_start_timer(vhpet, i, vhpet->base_count,
+ vhpet->base_time);
}
}
static void
-vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
+vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, hrtime_t now)
{
int i;
- vhpet->countbase = counter;
+ vhpet->base_count = counter;
for (i = 0; i < VHPET_NUM_TIMERS; i++)
vhpet_stop_timer(vhpet, i, now);
}
@@ -478,7 +474,7 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
struct vhpet *vhpet;
uint64_t data, mask, oldval, val64;
uint32_t isr_clear_mask, old_compval, old_comprate, counter;
- sbintime_t now, *nowptr;
+ hrtime_t now;
int i, offset;
vhpet = vm_hpet(vm);
@@ -517,11 +513,10 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
/*
* Get the most recent value of the counter before updating
* the 'config' register. If the HPET is going to be disabled
- * then we need to update 'countbase' with the value right
+ * then we need to update 'base_count' with the value right
* before it is disabled.
*/
- nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
- counter = vhpet_counter(vhpet, nowptr);
+ counter = vhpet_counter(vhpet, &now);
oldval = vhpet->config;
update_register(&vhpet->config, data, mask);
@@ -558,7 +553,7 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
/* Zero-extend the counter to 64-bits before updating it */
val64 = vhpet_counter(vhpet, NULL);
update_register(&val64, data, mask);
- vhpet->countbase = val64;
+ vhpet->base_count = val64;
if (vhpet_counter_enabled(vhpet))
vhpet_start_counting(vhpet);
goto done;
@@ -710,15 +705,11 @@ vhpet_init(struct vm *vm)
struct vhpet *vhpet;
uint64_t allowed_irqs;
struct vhpet_callout_arg *arg;
- struct bintime bt;
vhpet = malloc(sizeof (struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
vhpet->vm = vm;
mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
- FREQ2BT(HPET_FREQ, &bt);
- vhpet->freq_sbt = bttosbt(bt);
-
pincount = vioapic_pincount(vm);
if (pincount >= 32)
allowed_irqs = 0xff000000; /* irqs 24-31 */
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index 55f491b664..e88438da0d 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -94,7 +94,7 @@ __FBSDID("$FreeBSD$");
/*
* APIC timer frequency:
* - arbitrary but chosen to be in the ballpark of contemporary hardware.
- * - power-of-two to avoid loss of precision when converted to a bintime.
+ * - power-of-two to avoid loss of precision when calculating times
*/
#define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
@@ -215,7 +215,6 @@ vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
static uint32_t
vlapic_get_ccr(struct vlapic *vlapic)
{
- struct bintime bt_now, bt_rem;
struct LAPIC *lapic;
uint32_t ccr;
@@ -228,12 +227,11 @@ vlapic_get_ccr(struct vlapic *vlapic)
* If the timer is scheduled to expire in the future then
* compute the value of 'ccr' based on the remaining time.
*/
- binuptime(&bt_now);
- if (BINTIME_CMP(&vlapic->timer_fire_bt, >, &bt_now)) {
- bt_rem = vlapic->timer_fire_bt;
- bintime_sub(&bt_rem, &bt_now);
- ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
- ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
+
+ const hrtime_t now = gethrtime();
+ if (vlapic->timer_fire_when > now) {
+ ccr += hrt_freq_count(vlapic->timer_fire_when - now,
+ vlapic->timer_cur_freq);
}
}
KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, "
@@ -263,9 +261,9 @@ vlapic_dcr_write_handler(struct vlapic *vlapic)
* XXX changes to the frequency divider will not take effect until
* the timer is reloaded.
*/
- FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
- vlapic->timer_period_bt = vlapic->timer_freq_bt;
- bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+ vlapic->timer_cur_freq = VLAPIC_BUS_FREQ / divisor;
+ vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
+ lapic->icr_timer);
VLAPIC_TIMER_UNLOCK(vlapic);
}
@@ -729,20 +727,16 @@ vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
}
static void
-vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t)
+vlapic_callout_reset(struct vlapic *vlapic)
{
- callout_reset_sbt(&vlapic->callout, t, 0,
- vlapic_callout_handler, vlapic, 0);
+ callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when,
+ vlapic_callout_handler, vlapic, C_ABSOLUTE);
}
static void
vlapic_callout_handler(void *arg)
{
- struct vlapic *vlapic;
- struct bintime bt, btnow;
- sbintime_t rem_sbt;
-
- vlapic = arg;
+ struct vlapic *vlapic = arg;
VLAPIC_TIMER_LOCK(vlapic);
if (callout_pending(&vlapic->callout)) /* callout was reset */
@@ -756,42 +750,25 @@ vlapic_callout_handler(void *arg)
vlapic_fire_timer(vlapic);
if (vlapic_periodic_timer(vlapic)) {
- binuptime(&btnow);
-
- KASSERT(BINTIME_CMP(&btnow, >=, &vlapic->timer_fire_bt),
- ("vlapic callout at %lx.%lx, expected at %lx.%lx",
- btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
- vlapic->timer_fire_bt.frac));
-
/*
* Compute the delta between when the timer was supposed to
- * fire and the present time.
+ * fire and the present time. We can depend on the fact that
+ * cyclics (which underly these callouts) will never be called
+ * early.
*/
- bt = btnow;
- bintime_sub(&bt, &vlapic->timer_fire_bt);
-
- rem_sbt = bttosbt(vlapic->timer_period_bt);
- if (BINTIME_CMP(&bt, <, &vlapic->timer_period_bt)) {
+ const hrtime_t now = gethrtime();
+ const hrtime_t delta = now - vlapic->timer_fire_when;
+ if (delta >= vlapic->timer_period) {
/*
- * Adjust the time until the next countdown downward
- * to account for the lost time.
+ * If we are so behind that we have missed an entire
+ * timer period, reset the time base rather than
+ * attempting to catch up.
*/
- rem_sbt -= bttosbt(bt);
+ vlapic->timer_fire_when = now + vlapic->timer_period;
} else {
- /*
- * If the delta is greater than the timer period then
- * just reset our time base instead of trying to catch
- * up.
- */
- vlapic->timer_fire_bt = btnow;
- VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
- "usecs, period is %lu usecs - resetting time base",
- bttosbt(bt) / SBT_1US,
- bttosbt(vlapic->timer_period_bt) / SBT_1US);
+ vlapic->timer_fire_when += vlapic->timer_period;
}
-
- bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
- vlapic_callout_reset(vlapic, rem_sbt);
+ vlapic_callout_reset(vlapic);
}
done:
VLAPIC_TIMER_UNLOCK(vlapic);
@@ -800,27 +777,18 @@ done:
void
vlapic_icrtmr_write_handler(struct vlapic *vlapic)
{
- struct LAPIC *lapic;
- sbintime_t sbt;
- uint32_t icr_timer;
+ struct LAPIC *lapic = vlapic->apic_page;
VLAPIC_TIMER_LOCK(vlapic);
-
- lapic = vlapic->apic_page;
- icr_timer = lapic->icr_timer;
-
- vlapic->timer_period_bt = vlapic->timer_freq_bt;
- bintime_mul(&vlapic->timer_period_bt, icr_timer);
-
- if (icr_timer != 0) {
- binuptime(&vlapic->timer_fire_bt);
- bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
-
- sbt = bttosbt(vlapic->timer_period_bt);
- vlapic_callout_reset(vlapic, sbt);
- } else
+ vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq,
+ lapic->icr_timer);
+ if (vlapic->timer_period != 0) {
+ vlapic->timer_fire_when = gethrtime() + vlapic->timer_period;
+ vlapic_callout_reset(vlapic);
+ } else {
+ vlapic->timer_fire_when = 0;
callout_stop(&vlapic->callout);
-
+ }
VLAPIC_TIMER_UNLOCK(vlapic);
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
index 8d739bcfcc..7f07665874 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h
@@ -170,9 +170,10 @@ struct vlapic {
uint32_t esr_pending;
struct callout callout; /* vlapic timer */
- struct bintime timer_fire_bt; /* callout expiry time */
- struct bintime timer_freq_bt; /* timer frequency */
- struct bintime timer_period_bt; /* timer period */
+ hrtime_t timer_fire_when;
+ hrtime_t timer_period;
+ uint32_t timer_cur_freq;
+
struct mtx timer_mtx;
uint64_t msr_apicbase;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
index 2644ee61d6..9a7d7d4253 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
@@ -64,9 +64,7 @@ struct vpmtmr {
struct vm *vm;
void *io_cookie;
uint16_t io_port;
- sbintime_t freq_sbt;
- sbintime_t baseuptime;
- uint32_t baseval;
+ hrtime_t base_time;
};
static MALLOC_DEFINE(M_VPMTMR, "vpmtmr", "bhyve virtual acpi timer");
@@ -75,15 +73,10 @@ struct vpmtmr *
vpmtmr_init(struct vm *vm)
{
struct vpmtmr *vpmtmr;
- struct bintime bt;
vpmtmr = malloc(sizeof (struct vpmtmr), M_VPMTMR, M_WAITOK | M_ZERO);
vpmtmr->vm = vm;
- vpmtmr->baseuptime = sbinuptime();
- vpmtmr->baseval = 0;
-
- FREQ2BT(PMTMR_FREQ, &bt);
- vpmtmr->freq_sbt = bttosbt(bt);
+ vpmtmr->base_time = gethrtime();
return (vpmtmr);
}
@@ -149,20 +142,18 @@ int
vpmtmr_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *val)
{
struct vpmtmr *vpmtmr = arg;
- sbintime_t now, delta;
if (!in || bytes != 4)
return (-1);
/*
- * No locking needed because 'baseuptime' and 'baseval' are
- * written only during initialization.
+ * No locking needed because 'base_time' is written only during
+ * initialization.
*/
- now = sbinuptime();
- delta = now - vpmtmr->baseuptime;
- KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: "
- "%lx to %lx", vpmtmr->baseuptime, now));
- *val = vpmtmr->baseval + delta / vpmtmr->freq_sbt;
+ const hrtime_t delta = gethrtime() - vpmtmr->base_time;
+ ASSERT3S(delta, >=, 0);
+
+ *val = hrt_freq_count(delta, PMTMR_FREQ);
return (0);
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
index a67e82d156..2b3a5b5432 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
@@ -80,7 +80,7 @@ struct vrtc {
struct mtx mtx;
struct callout callout;
uint_t addr; /* RTC register to read or write */
- sbintime_t base_uptime;
+ hrtime_t base_uptime;
time_t base_rtctime;
struct rtcdev rtcdev;
};
@@ -147,23 +147,24 @@ update_enabled(struct vrtc *vrtc)
}
static time_t
-vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime)
+vrtc_curtime(struct vrtc *vrtc, hrtime_t *basetime)
{
- sbintime_t now, delta;
- time_t t, secs;
+ time_t t = vrtc->base_rtctime;
+ hrtime_t base = vrtc->base_uptime;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
- t = vrtc->base_rtctime;
- *basetime = vrtc->base_uptime;
if (update_enabled(vrtc)) {
- now = sbinuptime();
- delta = now - vrtc->base_uptime;
- KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
- "%lx to %lx", vrtc->base_uptime, now));
- secs = delta / SBT_1S;
- t += secs;
- *basetime += secs * SBT_1S;
+ const hrtime_t delta = gethrtime() - vrtc->base_uptime;
+ const time_t sec = delta / NANOSEC;
+
+ ASSERT3S(delta, >=, 0);
+
+ t += sec;
+ base += sec * NANOSEC;
+ }
+ if (basetime != NULL) {
+ *basetime = base;
}
return (t);
}
@@ -389,7 +390,7 @@ fail:
}
static int
-vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
+vrtc_time_update(struct vrtc *vrtc, time_t newtime, hrtime_t newbase)
{
struct rtcdev *rtc;
time_t oldtime;
@@ -463,28 +464,26 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
return (0);
}
-static sbintime_t
+static hrtime_t
vrtc_freq(struct vrtc *vrtc)
{
- int ratesel;
-
- static sbintime_t pf[16] = {
+ const hrtime_t rate_freq[16] = {
0,
- SBT_1S / 256,
- SBT_1S / 128,
- SBT_1S / 8192,
- SBT_1S / 4096,
- SBT_1S / 2048,
- SBT_1S / 1024,
- SBT_1S / 512,
- SBT_1S / 256,
- SBT_1S / 128,
- SBT_1S / 64,
- SBT_1S / 32,
- SBT_1S / 16,
- SBT_1S / 8,
- SBT_1S / 4,
- SBT_1S / 2,
+ NANOSEC / 256,
+ NANOSEC / 128,
+ NANOSEC / 8192,
+ NANOSEC / 4096,
+ NANOSEC / 2048,
+ NANOSEC / 1024,
+ NANOSEC / 512,
+ NANOSEC / 256,
+ NANOSEC / 128,
+ NANOSEC / 64,
+ NANOSEC / 32,
+ NANOSEC / 16,
+ NANOSEC / 8,
+ NANOSEC / 4,
+ NANOSEC / 2,
};
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
@@ -497,32 +496,32 @@ vrtc_freq(struct vrtc *vrtc)
* the update interrupt.
*/
if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) {
- ratesel = vrtc->rtcdev.reg_a & 0xf;
- return (pf[ratesel]);
+ uint_t sel = vrtc->rtcdev.reg_a & 0xf;
+ return (rate_freq[sel]);
} else if (aintr_enabled(vrtc) && update_enabled(vrtc)) {
- return (SBT_1S);
+ return (NANOSEC);
} else if (uintr_enabled(vrtc) && update_enabled(vrtc)) {
- return (SBT_1S);
+ return (NANOSEC);
} else {
return (0);
}
}
static void
-vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt)
+vrtc_callout_reset(struct vrtc *vrtc, hrtime_t freqhrt)
{
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
- if (freqsbt == 0) {
+ if (freqhrt == 0) {
if (callout_active(&vrtc->callout)) {
VM_CTR0(vrtc->vm, "RTC callout stopped");
callout_stop(&vrtc->callout);
}
return;
}
- VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt);
- callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler,
+ VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", NANOSEC / freqhrt);
+ callout_reset_hrtime(&vrtc->callout, freqhrt, vrtc_callout_handler,
vrtc, 0);
}
@@ -530,7 +529,6 @@ static void
vrtc_callout_handler(void *arg)
{
struct vrtc *vrtc = arg;
- sbintime_t freqsbt, basetime;
time_t rtctime;
int error;
@@ -552,28 +550,30 @@ vrtc_callout_handler(void *arg)
vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
+ hrtime_t basetime;
+
rtctime = vrtc_curtime(vrtc, &basetime);
error = vrtc_time_update(vrtc, rtctime, basetime);
KASSERT(error == 0, ("%s: vrtc_time_update error %d",
__func__, error));
}
- freqsbt = vrtc_freq(vrtc);
- KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__));
- vrtc_callout_reset(vrtc, freqsbt);
+ hrtime_t freqhrt = vrtc_freq(vrtc);
+ KASSERT(freqhrt != 0, ("%s: vrtc frequency cannot be zero", __func__));
+ vrtc_callout_reset(vrtc, freqhrt);
done:
VRTC_UNLOCK(vrtc);
}
static __inline void
-vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
+vrtc_callout_check(struct vrtc *vrtc, hrtime_t freqhrt)
{
int active;
active = callout_active(&vrtc->callout) ? 1 : 0;
- KASSERT((freq == 0 && !active) || (freq != 0 && active),
- ("vrtc callout %s with frequency %lx",
- active ? "active" : "inactive", freq));
+ KASSERT((freqhrt == 0 && !active) || (freqhrt != 0 && active),
+ ("vrtc callout %s with frequency %llx",
+ active ? "active" : "inactive", NANOSEC / freqhrt));
}
static void
@@ -618,7 +618,7 @@ static int
vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
{
struct rtcdev *rtc;
- sbintime_t oldfreq, newfreq, basetime;
+ hrtime_t oldfreq, newfreq;
time_t curtime, rtctime;
int error;
uint8_t oldval, changed;
@@ -637,9 +637,11 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
}
if (changed & RTCSB_HALT) {
+ hrtime_t basetime;
+
if ((newval & RTCSB_HALT) == 0) {
rtctime = rtc_to_secs(vrtc);
- basetime = sbinuptime();
+ basetime = gethrtime();
if (rtctime == VRTC_BROKEN_TIME) {
if (rtc_flag_broken_time)
return (-1);
@@ -693,7 +695,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
static void
vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
{
- sbintime_t oldfreq, newfreq;
+ hrtime_t oldfreq, newfreq;
uint8_t oldval, changed;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
@@ -712,7 +714,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
* maintain the illusion that the RTC date/time was frozen
* while the dividers were disabled.
*/
- vrtc->base_uptime = sbinuptime();
+ vrtc->base_uptime = gethrtime();
VM_CTR2(vrtc->vm, "RTC divider out of reset at %lx/%lx",
vrtc->base_rtctime, vrtc->base_uptime);
} else {
@@ -744,7 +746,7 @@ vrtc_set_time(struct vm *vm, time_t secs)
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- error = vrtc_time_update(vrtc, secs, sbinuptime());
+ error = vrtc_time_update(vrtc, secs, gethrtime());
VRTC_UNLOCK(vrtc);
if (error) {
@@ -761,12 +763,11 @@ time_t
vrtc_get_time(struct vm *vm)
{
struct vrtc *vrtc;
- sbintime_t basetime;
time_t t;
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- t = vrtc_curtime(vrtc, &basetime);
+ t = vrtc_curtime(vrtc, NULL);
VRTC_UNLOCK(vrtc);
return (t);
@@ -803,7 +804,6 @@ int
vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
{
struct vrtc *vrtc;
- sbintime_t basetime;
time_t curtime;
uint8_t *ptr;
@@ -820,7 +820,7 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
* Update RTC date/time fields if necessary.
*/
if (offset < 10 || offset == RTC_CENTURY) {
- curtime = vrtc_curtime(vrtc, &basetime);
+ curtime = vrtc_curtime(vrtc, NULL);
secs_to_rtc(curtime, vrtc, 0);
}
@@ -858,7 +858,7 @@ vrtc_data_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
{
struct vrtc *vrtc = arg;
struct rtcdev *rtc = &vrtc->rtcdev;
- sbintime_t basetime;
+ hrtime_t basetime;
time_t curtime;
int error, offset;
@@ -936,7 +936,7 @@ vrtc_data_handler(void *arg, bool in, uint16_t port, uint8_t bytes,
*/
if (offset == RTC_CENTURY && !rtc_halted(vrtc)) {
curtime = rtc_to_secs(vrtc);
- error = vrtc_time_update(vrtc, curtime, sbinuptime());
+ error = vrtc_time_update(vrtc, curtime, gethrtime());
KASSERT(!error, ("vrtc_time_update error %d", error));
if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time)
error = -1;
@@ -990,7 +990,7 @@ vrtc_init(struct vm *vm)
VRTC_LOCK(vrtc);
vrtc->base_rtctime = VRTC_BROKEN_TIME;
- vrtc_time_update(vrtc, curtime, sbinuptime());
+ vrtc_time_update(vrtc, curtime, gethrtime());
secs_to_rtc(curtime, vrtc, 0);
VRTC_UNLOCK(vrtc);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.mapfile b/usr/src/uts/i86pc/io/vmm/vmm.mapfile
index 0af4f090b4..fb1c9366de 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.mapfile
+++ b/usr/src/uts/i86pc/io/vmm/vmm.mapfile
@@ -12,6 +12,7 @@
#
# Copyright 2019 Joyent, Inc.
+# Copyright 2021 Oxide Computer Company
#
#
@@ -44,7 +45,13 @@ SYMBOL_VERSION ILLUMOSprivate {
vmm_drv_lease_sign;
vmm_drv_lease_break;
vmm_drv_lease_expired;
- vmm_drv_gpa2kva;
+ vmm_drv_page_hold;
+ vmm_drv_page_release;
+ vmm_drv_page_release_chain;
+ vmm_drv_page_readable;
+ vmm_drv_page_writable;
+ vmm_drv_page_chain;
+ vmm_drv_page_next;
vmm_drv_ioport_hook;
vmm_drv_ioport_unhook;
vmm_drv_msi;
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 24dd851831..d66778c55a 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -1969,33 +1969,49 @@ vmm_drv_lease_expired(vmm_lease_t *lease)
return (lease->vml_expired);
}
-void *
-vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz)
+vmm_page_t *
+vmm_drv_page_hold(vmm_lease_t *lease, uintptr_t gpa, int prot)
{
- vm_page_t *vmp;
- void *res = NULL;
-
ASSERT(lease != NULL);
- ASSERT3U(sz, ==, PAGESIZE);
ASSERT0(gpa & PAGEOFFSET);
- vmp = vmc_hold(lease->vml_vmclient, gpa, PROT_READ | PROT_WRITE);
- /*
- * Break the rules for now and just extract the pointer. This is
- * nominally safe, since holding a driver lease on the VM read-locks it.
- *
- * A pointer which would otherwise be at risk of being a use-after-free
- * vector is made safe since actions such as vmspace_unmap() require
- * acquisition of the VM write-lock, (causing all driver leases to be
- * broken) allowing the consumers to cease their access prior to
- * modification of the vmspace.
- */
- if (vmp != NULL) {
- res = vmp_get_writable(vmp);
- vmp_release(vmp);
- }
+ return ((vmm_page_t *)vmc_hold(lease->vml_vmclient, gpa, prot));
+}
- return (res);
+void
+vmm_drv_page_release(vmm_page_t *vmmp)
+{
+ vmp_release((vm_page_t *)vmmp);
+}
+
+void
+vmm_drv_page_release_chain(vmm_page_t *vmmp)
+{
+ vmp_release_chain((vm_page_t *)vmmp);
+}
+
+const void *
+vmm_drv_page_readable(const vmm_page_t *vmmp)
+{
+ return (vmp_get_readable((const vm_page_t *)vmmp));
+}
+
+void *
+vmm_drv_page_writable(const vmm_page_t *vmmp)
+{
+ return (vmp_get_writable((const vm_page_t *)vmmp));
+}
+
+void
+vmm_drv_page_chain(vmm_page_t *vmmp, vmm_page_t *to_chain)
+{
+ vmp_chain((vm_page_t *)vmmp, (vm_page_t *)to_chain);
+}
+
+vmm_page_t *
+vmm_drv_page_next(const vmm_page_t *vmmp)
+{
+ return ((vmm_page_t *)vmp_next((vm_page_t *)vmmp));
}
int
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index cdcebc71d4..04bdb6a3d6 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -349,19 +349,10 @@ vmm_glue_callout_init(struct callout *c, int mpsafe)
mutex_exit(&cpu_lock);
}
-static __inline hrtime_t
-sbttohrtime(sbintime_t sbt)
-{
- return (((sbt >> 32) * NANOSEC) +
- (((uint64_t)NANOSEC * (uint32_t)sbt) >> 32));
-}
-
-int
-vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
- void (*func)(void *), void *arg, int flags)
+void
+callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *),
+ void *arg, int flags)
{
- hrtime_t target = sbttohrtime(sbt);
-
ASSERT(c->c_cyc_id != CYCLIC_NONE);
if ((flags & C_ABSOLUTE) == 0) {
@@ -372,8 +363,6 @@ vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr,
c->c_arg = arg;
c->c_target = target;
cyclic_reprogram(c->c_cyc_id, target);
-
- return (0);
}
int
@@ -409,6 +398,34 @@ vmm_glue_callout_localize(struct callout *c)
mutex_exit(&cpu_lock);
}
+/*
+ * Given an interval (in ns) and a frequency (in hz), calculate the number of
+ * "ticks" at that frequency which cover the interval.
+ */
+uint64_t
+hrt_freq_count(hrtime_t interval, uint32_t freq)
+{
+ ASSERT3S(interval, >=, 0);
+ const uint64_t sec = interval / NANOSEC;
+ const uint64_t nsec = interval % NANOSEC;
+
+ return ((sec * freq) + ((nsec * freq) / NANOSEC));
+}
+
+/*
+ * Given a frequency (in hz) and number of "ticks", calculate the interval
+ * (in ns) which would be covered by those ticks.
+ */
+hrtime_t
+hrt_freq_interval(uint32_t freq, uint64_t count)
+{
+ const uint64_t sec = count / freq;
+ const uint64_t frac = count % freq;
+
+ return ((NANOSEC * sec) + ((frac * NANOSEC) / freq));
+}
+
+
uint_t cpu_high; /* Highest arg to CPUID */
uint_t cpu_exthigh; /* Highest arg to extended CPUID */
uint_t cpu_id; /* Stepping ID */
diff --git a/usr/src/uts/i86pc/sys/vmm_drv.h b/usr/src/uts/i86pc/sys/vmm_drv.h
index 1f2b3d9254..0b7f622e53 100644
--- a/usr/src/uts/i86pc/sys/vmm_drv.h
+++ b/usr/src/uts/i86pc/sys/vmm_drv.h
@@ -12,7 +12,7 @@
/*
* Copyright 2019 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#ifndef _VMM_DRV_H_
@@ -30,6 +30,14 @@ struct vmm_lease;
typedef struct vmm_lease vmm_lease_t;
/*
+ * This is effectively a synonym for the bhyve-internal 'struct vm_page' type.
+ * Use of `vmm_page_t *` instead allows us to keep those implementation details
+ * hidden from vmm_drv consumers.
+ */
+struct vmm_page;
+typedef struct vmm_page vmm_page_t;
+
+/*
* Because of tangled headers, this definitions mirrors its ioport_handler_t
* counterpart in vmm_kernel.h.
*/
@@ -44,7 +52,14 @@ extern vmm_lease_t *vmm_drv_lease_sign(vmm_hold_t *, boolean_t (*)(void *),
extern void vmm_drv_lease_break(vmm_hold_t *, vmm_lease_t *);
extern boolean_t vmm_drv_lease_expired(vmm_lease_t *);
-extern void *vmm_drv_gpa2kva(vmm_lease_t *, uintptr_t, size_t);
+extern vmm_page_t *vmm_drv_page_hold(vmm_lease_t *, uintptr_t, int);
+extern void vmm_drv_page_release(vmm_page_t *);
+extern void vmm_drv_page_release_chain(vmm_page_t *);
+extern const void *vmm_drv_page_readable(const vmm_page_t *);
+extern void *vmm_drv_page_writable(const vmm_page_t *);
+extern void vmm_drv_page_chain(vmm_page_t *, vmm_page_t *);
+extern vmm_page_t *vmm_drv_page_next(const vmm_page_t *);
+
extern int vmm_drv_msi(vmm_lease_t *, uint64_t, uint64_t);
extern int vmm_drv_ioport_hook(vmm_hold_t *, uint16_t, vmm_drv_iop_cb_t, void *,