diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2022-03-02 12:44:01 -0600 |
---|---|---|
committer | Patrick Mooney <pmooney@oxide.computer> | 2022-03-30 15:50:42 +0000 |
commit | 5103e761e384621c5728a6d1f4b0bfdc1be233a4 (patch) | |
tree | 4902e230f96adbc93d7aab7b2408f748a774e7c8 | |
parent | db9aa506ce275f82ee72f31fc2e6e3c53d1212b7 (diff) | |
download | illumos-joyent-5103e761e384621c5728a6d1f4b0bfdc1be233a4.tar.gz |
14569 bhyve should consolidate on hrtime
14486 bhyve needs instruction emul tests
Reviewed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: Luqman Aden <luqman@oxide.computer>
Approved by: Dan McDonald <danmcd@joyent.com>
33 files changed, 2348 insertions, 412 deletions
diff --git a/usr/src/compat/bhyve/sys/callout.h b/usr/src/compat/bhyve/sys/callout.h index 11823e6321..4156c2d4c6 100644 --- a/usr/src/compat/bhyve/sys/callout.h +++ b/usr/src/compat/bhyve/sys/callout.h @@ -45,8 +45,6 @@ struct callout { #define callout_pending(c) ((c)->c_target > (c)->c_fired) void vmm_glue_callout_init(struct callout *c, int mpsafe); -int vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, - sbintime_t pr, void (*func)(void *), void *arg, int flags); int vmm_glue_callout_stop(struct callout *c); int vmm_glue_callout_drain(struct callout *c); @@ -71,12 +69,10 @@ callout_drain(struct callout *c) return (vmm_glue_callout_drain(c)); } -static __inline int -callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr, - void (*func)(void *), void *arg, int flags) -{ - return (vmm_glue_callout_reset_sbt(c, sbt, pr, func, arg, flags)); -} +void callout_reset_hrtime(struct callout *c, hrtime_t target, + void (*func)(void *), void *arg, int flags); +uint64_t hrt_freq_count(hrtime_t interval, uint32_t freq); +hrtime_t hrt_freq_interval(uint32_t freq, uint64_t count); #endif /* _COMPAT_FREEBSD_SYS_CALLOUT_H_ */ diff --git a/usr/src/compat/bhyve/sys/time.h b/usr/src/compat/bhyve/sys/time.h deleted file mode 100644 index 48bdcc304e..0000000000 --- a/usr/src/compat/bhyve/sys/time.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2013 Pluribus Networks Inc. - * Copyright 2020 Oxide Computer Company - */ - -#ifndef _COMPAT_FREEBSD_SYS_TIME_H_ -#define _COMPAT_FREEBSD_SYS_TIME_H_ - -#include_next <sys/time.h> - -#define tc_precexp 0 - -struct bintime { - ulong_t sec; /* seconds */ - uint64_t frac; /* 64 bit fraction of a second */ -}; - -#define BT2FREQ(bt) \ - (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \ - ((bt)->frac >> 1)) - -#define FREQ2BT(freq, bt) \ -{ \ - (bt)->sec = 0; \ - (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \ -} - -static __inline void -binuptime(struct bintime *bt) -{ - hrtime_t now = gethrtime(); - - bt->sec = now / 1000000000; - /* 18446744073 = int(2^64 / 1000000000) = 1ns in 64-bit fractions */ - bt->frac = (now % 1000000000) * (uint64_t)18446744073LL; -} - -#define bintime_cmp(a, b, cmp) \ - (((a)->sec == (b)->sec) ? \ - ((a)->frac cmp (b)->frac) : \ - ((a)->sec cmp (b)->sec)) - -/* - * The bintime_cmp() macro is problematic for a couple reasons: - * 1. Bearing a lowercase name suggests it is a function rather than a macro. - * 2. Placing the comparison operator as the last argument runs afoul of our - * cstyle rules, unlike cases such as VERIFY3*(). - * - * To remedy these issues in illumos bhyve, we provide a slightly modified - * version which addresses both problems. - */ -#define BINTIME_CMP(a, cmp, b) bintime_cmp((a), (b), cmp) - -#define SBT_1S ((sbintime_t)1 << 32) -#define SBT_1M (SBT_1S * 60) -#define SBT_1MS (SBT_1S / 1000) -#define SBT_1US (SBT_1S / 1000000) -#define SBT_1NS (SBT_1S / 1000000000) -#define SBT_MAX 0x7fffffffffffffffLL - - -static __inline void -bintime_add(struct bintime *bt, const struct bintime *bt2) -{ - uint64_t u; - - u = bt->frac; - bt->frac += bt2->frac; - if (u > bt->frac) - bt->sec++; - bt->sec += bt2->sec; -} - -static __inline void -bintime_sub(struct bintime *bt, const struct bintime *bt2) -{ - uint64_t u; - - u = bt->frac; - bt->frac -= bt2->frac; - if (u < bt->frac) - bt->sec--; - bt->sec -= bt2->sec; -} - -static __inline void -bintime_mul(struct bintime *bt, u_int x) -{ - uint64_t p1, p2; - - p1 = (bt->frac & 0xffffffffull) * x; - p2 = (bt->frac >> 32) * x + (p1 >> 32); - bt->sec *= x; - bt->sec += (p2 >> 32); - bt->frac = (p2 << 32) | (p1 & 0xffffffffull); -} - -static __inline sbintime_t -bttosbt(const struct bintime bt) -{ - return (((sbintime_t)bt.sec << 32) + (bt.frac >> 32)); -} - -static __inline struct bintime -sbttobt(sbintime_t _sbt) -{ - struct bintime _bt; - - _bt.sec = _sbt >> 32; - _bt.frac = _sbt << 32; - return (_bt); -} - -static __inline sbintime_t -sbinuptime(void) -{ - hrtime_t hrt = gethrtime(); - uint64_t sec = hrt / NANOSEC; - uint64_t nsec = hrt % NANOSEC; - - return (((sbintime_t)sec << 32) + - (nsec * (((uint64_t)1 << 63) / 500000000) >> 32)); -} - -#endif /* _COMPAT_FREEBSD_SYS_TIME_H_ */ diff --git a/usr/src/compat/bhyve/sys/types.h b/usr/src/compat/bhyve/sys/types.h index 63731da42e..baa4cad157 100644 --- a/usr/src/compat/bhyve/sys/types.h +++ b/usr/src/compat/bhyve/sys/types.h @@ -29,11 +29,6 @@ typedef __uint64_t u_int64_t; typedef __register_t register_t; #endif -#ifndef __SBINTIME_T_DEFINED -#define __SBINTIME_T_DEFINED -typedef __int64_t sbintime_t; -#endif - #ifndef __VM_MEMATTR_T_DEFINED #define __VM_MEMATTR_T_DEFINED typedef char vm_memattr_t; diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m index d0d31a0190..4222b1d09b 100644 --- a/usr/src/pkg/manifests/system-bhyve-tests.p5m +++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m @@ -30,6 +30,12 @@ file path=opt/bhyve-tests/bin/bhyvetest mode=0555 dir path=opt/bhyve-tests/runfiles file path=opt/bhyve-tests/runfiles/default.run mode=0444 dir path=opt/bhyve-tests/tests +dir path=opt/bhyve-tests/tests/kdev +file path=opt/bhyve-tests/tests/kdev/vatpit_freq mode=0555 +file path=opt/bhyve-tests/tests/kdev/vhpet_freq mode=0555 +file path=opt/bhyve-tests/tests/kdev/vlapic_freq mode=0555 +file path=opt/bhyve-tests/tests/kdev/vlapic_freq_periodic mode=0555 +file path=opt/bhyve-tests/tests/kdev/vpmtmr_freq mode=0555 dir path=opt/bhyve-tests/tests/mevent file path=opt/bhyve-tests/tests/mevent/lists_delete mode=0555 file path=opt/bhyve-tests/tests/mevent/read_disable mode=0555 diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run index 0aae1bcb46..c37bee591d 100644 --- a/usr/src/test/bhyve-tests/runfiles/default.run +++ b/usr/src/test/bhyve-tests/runfiles/default.run @@ -28,6 +28,16 @@ tests = [ 'mem_seg_map' ] +[/opt/bhyve-tests/tests/kdev] +user = root +tests = [ + 'vatpit_freq', + 'vhpet_freq', + 'vlapic_freq', + 'vlapic_freq_periodic', + 'vpmtmr_freq' + ] + # Tests of userspace mevent system, built from cmd/bhyve [/opt/bhyve-tests/tests/mevent] tests = ['lists_delete', 'read_disable', 'read_pause', 'read_requeue', diff --git a/usr/src/test/bhyve-tests/tests/Makefile b/usr/src/test/bhyve-tests/tests/Makefile index bf18b300ca..8d528c3f80 100644 --- a/usr/src/test/bhyve-tests/tests/Makefile +++ b/usr/src/test/bhyve-tests/tests/Makefile @@ -15,6 +15,6 @@ .PARALLEL: $(SUBDIRS) -SUBDIRS = vmm +SUBDIRS = kdev vmm include $(SRC)/test/Makefile.com diff --git a/usr/src/test/bhyve-tests/tests/Makefile.in_guest b/usr/src/test/bhyve-tests/tests/Makefile.in_guest new file mode 100644 index 0000000000..7ce2b0d531 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/Makefile.in_guest @@ -0,0 +1,48 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# Copyright 2022 Oxide Computer Company + +PAYLOAD_CLEANFILES = payload_start.o \ + $(PAYLOADS:%=payload_%) \ + $(PAYLOADS:%=payload_%.o) \ + $(PAYLOADS:%=pobj_%.o) \ + $(PAYLOADS:%=pobj_%.s) + +$(PAYLOADS:%=payload_%.o) := AS_CPPFLAGS += -I../common + +payload_%: payload_start.o payload_%.o + $(LD) -dn -e _start -M ../common/Mapfile.payload -o $@ $^ + +pobj_%.s: payload_% + @echo " .data" > $@ + @echo " .globl payload_data" >> $@ + @echo "payload_data:" >> $@ + $(ELFEXTRACT) $^ >> $@ + @echo " .size payload_data, [.-payload_data]" >> $@ + @echo " .align 4" >> $@ + @echo " .globl payload_size" >> $@ + @echo " .size payload_size, 4" >> $@ + @echo "payload_size:" >> $@ + @echo " .data" >> $@ + @echo " .long [.-payload_data]" >> $@ + +pobj_%.o: pobj_%.s + $(COMPILE.s) -o $@ $^ + $(POST_PROCESS) + +%.o: ../common/%.s + $(COMPILE.s) -o $@ $^ + $(POST_PROCESS) + +%.o: ../common/%.c + $(COMPILE.c) -o $@ $^ + $(POST_PROCESS) diff --git a/usr/src/test/bhyve-tests/tests/common/Mapfile.payload b/usr/src/test/bhyve-tests/tests/common/Mapfile.payload new file mode 100644 index 0000000000..ef69288c56 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/common/Mapfile.payload @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright 2022 Oxide Computer Company +# + +$mapfile_version 2 + +# The .eh_frame data was ending up in front of the .text segment, causing issues +# when the guest attempted to start its payload +NULL_SEGMENT discard { + ASSIGN_SECTION eh_discard { + IS_NAME = .eh_frame; + }; +}; + +LOAD_SEGMENT payload { + FLAGS = READ WRITE EXECUTE; + VADDR = 0x800000; + PADDR = 0x800000; + ALIGN = 0x1000; + + # Make sure that payload_start.s`_start is the first thing in .text segment, + # since when we "boot", that is where we want to begin running. + ASSIGN_SECTION is_start_text { + IS_NAME = .text; + FILE_BASENAME = payload_start.o; + }; + ASSIGN_SECTION is_text { + IS_NAME = .text; + }; + ASSIGN_SECTION is_alloc { + FLAGS = ALLOC; + }; + IS_ORDER = is_start_text is_text is_alloc; +}; diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.c b/usr/src/test/bhyve-tests/tests/common/in_guest.c new file mode 100644 index 0000000000..31bebc0665 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/common/in_guest.c @@ -0,0 +1,532 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <assert.h> +#include <errno.h> + +#include <sys/types.h> +#include <sys/segments.h> +#include <sys/psw.h> +#include <sys/controlregs.h> +#include <sys/sysmacros.h> +#include <sys/varargs.h> +#include <sys/debug.h> + +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" + + +#define PT_VALID 0x01 +#define PT_WRITABLE 0x02 +#define PT_WRITETHRU 0x08 +#define PT_NOCACHE 0x10 +#define PT_PAGESIZE 0x80 + +#define SEG_ACCESS_TYPE_MASK 0x1f +#define SEG_ACCESS_DPL_MASK 0x60 +#define SEG_ACCESS_P (1 << 7) +#define SEG_ACCESS_AVL (1 << 12) +#define SEG_ACCESS_L (1 << 13) +#define SEG_ACCESS_D (1 << 14) +#define SEG_ACCESS_G (1 << 15) +#define SEG_ACCESS_UNUSABLE (1 << 16) + + +/* + * Keep the test name and VM context around so the consumer is not required to + * pass either of them to us for subsequent test-related operations after the + * initialization has been performed. + * + * The test code is not designed to be reentrant at this point. + */ +static struct vmctx *test_vmctx = NULL; +static const char *test_name = NULL; + +static void +populate_identity_table(struct vmctx *ctx) +{ + uint64_t gpa, pte_loc; + + /* Set up 2MiB PTEs for everything up through 0xffffffff */ + for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M; + gpa < 0x100000000; + pte_loc += PAGE_SIZE) { + uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE); + + for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) { + *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE; + /* Make traditional MMIO space uncachable */ + if (gpa >= 0xc0000000) { + *ptep |= PT_WRITETHRU | PT_NOCACHE; + } + } + } + assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G); + + uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE); + pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE; + pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE; + pdep[2] = + (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; + pdep[3] = + (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; + + pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE); + pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE; +} + +static void +populate_desc_tables(struct vmctx *ctx) +{ + +} + +static void +test_cleanup(bool is_failure) +{ + if (test_vmctx != NULL) { + bool keep_on_fail = false; + + const char *keep_var; + if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) { + if (strlen(keep_var) != 0 && + strcmp(keep_var, "0") != 0) { + keep_on_fail = true; + } + } + + /* + * Destroy the instance unless the test failed and it was + * requested that we keep it around. + */ + if (!is_failure || !keep_on_fail) { + vm_destroy(test_vmctx); + } + test_vmctx = NULL; + } +} + +static void fail_finish(void) +{ + assert(test_name != NULL); + (void) printf("FAIL %s\n", test_name); + + test_cleanup(true); + exit(EXIT_FAILURE); +} + +void +test_fail_errno(int err, const char *msg) +{ + const char *err_str = strerror(err); + + (void) fprintf(stderr, "%s: %s\n", msg, err_str); + fail_finish(); +} + +void +test_fail_msg(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void) vfprintf(stderr, fmt, ap); + + fail_finish(); +} + +void +test_fail_vmexit(const struct vm_exit *vexit) +{ + const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n"; + + switch (vexit->exitcode) { + case VM_EXITCODE_INOUT: + (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip); + (void) fprintf(stderr, + "\teax: %08x\n" + "\tport: %04x\n" + "\tbytes: %u\n" + "\tflags: %x\n", + vexit->u.inout.eax, + vexit->u.inout.port, + vexit->u.inout.bytes, + vexit->u.inout.flags); + break; + case VM_EXITCODE_MMIO: + (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip); + (void) fprintf(stderr, + "\tbytes: %u\n" + "\ttype: %s\n" + "\tgpa: %x\n" + "\tdata: %016x\n", + vexit->u.mmio.bytes, + vexit->u.mmio.read == 0 ? "write" : "read", + vexit->u.mmio.gpa, + vexit->u.mmio.data); + break; + case VM_EXITCODE_VMX: + (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip); + (void) fprintf(stderr, + "\tstatus: %x\n" + "\treason: %x\n" + "\tqualification: %lx\n" + "\tinst_type: %x\n" + "\tinst_error: %x\n", + vexit->u.vmx.status, + vexit->u.vmx.exit_reason, + vexit->u.vmx.exit_qualification, + vexit->u.vmx.inst_type, + vexit->u.vmx.inst_error); + break; + case VM_EXITCODE_SVM: + (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip); + break; + case VM_EXITCODE_INST_EMUL: + (void) fprintf(stderr, hdr_fmt, "instruction emulation", + vexit->rip); + const uint_t len = vexit->u.inst_emul.num_valid > 0 ? + vexit->u.inst_emul.num_valid : 15; + (void) fprintf(stderr, "\tinstruction bytes: ["); + for (uint_t i = 0; i < len; i++) { + (void) fprintf(stderr, "%s%02x", + i == 0 ? "" : ", ", + vexit->u.inst_emul.inst[i]); + } + (void) fprintf(stderr, "]\n"); + break; + case VM_EXITCODE_SUSPENDED: + (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip); + switch (vexit->u.suspended.how) { + case VM_SUSPEND_RESET: + (void) fprintf(stderr, "\thow: reset"); + break; + case VM_SUSPEND_POWEROFF: + (void) fprintf(stderr, "\thow: poweroff"); + break; + case VM_SUSPEND_HALT: + (void) fprintf(stderr, "\thow: halt"); + break; + case VM_SUSPEND_TRIPLEFAULT: + (void) fprintf(stderr, "\thow: triple-fault"); + break; + default: + (void) fprintf(stderr, "\thow: unknown - %d", + vexit->u.suspended.how); + break; + } + break; + default: + (void) fprintf(stderr, "Unexpected code %d exit:\n" + "\t%%rip: %lx\n", vexit->exitcode, vexit->rip); + break; + } + fail_finish(); +} + +void +test_pass(void) +{ + assert(test_name != NULL); + (void) printf("PASS %s\n", test_name); + test_cleanup(false); + exit(EXIT_SUCCESS); +} + +static int +load_payload(struct vmctx *ctx) +{ + extern uint8_t payload_data; + extern uint32_t payload_size; + + const uint32_t len = payload_size; + const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD); + + if (len > cap) { + test_fail_msg("Payload size %u > capacity %u\n", len, cap); + } + + const size_t map_len = P2ROUNDUP(len, PAGE_SIZE); + void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len); + bcopy(&payload_data, outp, len); + + return (0); +} + +struct vmctx * +test_initialize(const char *tname) +{ + char vm_name[VM_MAX_NAMELEN]; + int err; + struct vmctx *ctx; + + assert(test_vmctx == NULL); + assert(test_name == NULL); + + test_name = strdup(tname); + (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d", + test_name, getpid()); + + err = vm_create(vm_name, 0); + if (err != 0) { + test_fail_errno(err, "Could not create VM"); + } + + ctx = vm_open(vm_name); + if (ctx == NULL) { + test_fail_errno(errno, "Could not open VM"); + } + test_vmctx = ctx; + + err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL); + if (err != 0) { + test_fail_errno(err, "Could not set up VM memory"); + } + + populate_identity_table(ctx); + populate_desc_tables(ctx); + + err = load_payload(ctx); + if (err != 0) { + test_fail_errno(err, "Could not load payload"); + } + + return (ctx); +} + +int +test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp) +{ + int err; + + err = vm_activate_cpu(ctx, vcpu); + if (err != 0 && err != EBUSY) { + return (err); + } + + /* + * Granularity bit important here for VMX validity: + * "If any bit in the limit field in the range 31:20 is 1, G must be 1" + */ + err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX, + SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G); + if (err != 0) { + return (err); + } + + err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX, + SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L | + SEG_ACCESS_D | SEG_ACCESS_G); + if (err != 0) { + return (err); + } + + err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX, + SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G); + if (err != 0) { + return (err); + } + + /* + * While SVM will happilly run with an otherwise unusable TR, VMX + * includes it among its entry checks. + */ + err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff, + SDT_SYSTSSBSY | SEG_ACCESS_P); + if (err != 0) { + return (err); + } + err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0); + if (err != 0) { + return (err); + } + err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0); + if (err != 0) { + return (err); + } + + /* Mark unused segments as explicitly unusable (for VMX) */ + const int unsable_segs[] = { + VM_REG_GUEST_ES, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS, + VM_REG_GUEST_LDTR, + }; + for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) { + err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0, + SEG_ACCESS_UNUSABLE); + if (err != 0) { + return (err); + } + } + + /* Place CPU directly in long mode */ + const int regnums[] = { + VM_REG_GUEST_CR0, + VM_REG_GUEST_CR3, + VM_REG_GUEST_CR4, + VM_REG_GUEST_EFER, + VM_REG_GUEST_RFLAGS, + VM_REG_GUEST_RIP, + VM_REG_GUEST_RSP, + VM_REG_GUEST_CS, + VM_REG_GUEST_SS, + VM_REG_GUEST_DS, + VM_REG_GUEST_TR, + }; + uint64_t regvals[] = { + CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS | + CR0_MP | CR0_PE, + MEM_LOC_PAGE_TABLE_512G, + CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE, + AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE, + /* start with interrupts disabled */ + PS_MB1, + rip, + rsp, + (GDT_KCODE << 3), + (GDT_KDATA << 3), + (GDT_KDATA << 3), + (GDT_KTSS << 3), + }; + assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals)); + + err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums, + regvals); + if (err != 0) { + return (err); + } + + err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0); + if (err != 0) { + return (err); + } + + return (0); +} + +static enum vm_exit_kind +which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit) +{ + const struct vm_inout *inout = &vexit->u.inout; + + switch (vexit->exitcode) { + case VM_EXITCODE_BOGUS: + case VM_EXITCODE_REQIDLE: + bzero(ventry, sizeof (ventry)); + return (VEK_REENTR); + case VM_EXITCODE_INOUT: + if (inout->port == IOP_TEST_RESULT && + (inout->flags & INOUT_IN) == 0) { + if (inout->eax == 0) { + return (VEK_TEST_PASS); + } else { + return (VEK_TEST_FAIL); + } + } + break; + default: + break; + } + return (VEK_UNHANDLED); +} + +enum vm_exit_kind +test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry, + struct vm_exit *vexit) +{ + int err; + + err = vm_run(ctx, vcpu, ventry, vexit); + if (err != 0) { + test_fail_errno(err, "Failure during vcpu entry"); + } + + return (which_exit_kind(ventry, vexit)); +} + +void +ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry, + uint32_t data) +{ + VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT); + + ventry->cmd = VEC_FULFILL_INOUT; + bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout)); + if ((ventry->u.inout.flags & INOUT_IN) != 0) { + ventry->u.inout.eax = data; + } +} + +void +ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry, + uint64_t data) +{ + VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO); + + ventry->cmd = VEC_FULFILL_MMIO; + bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio)); + if (ventry->u.mmio.read != 0) { + ventry->u.mmio.data = data; + } +} + +bool +vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port, + uint_t len, uint32_t *valp) +{ + if (vexit->exitcode != VM_EXITCODE_INOUT) { + return (false); + } + + const uint_t flag = is_read ? INOUT_IN : 0; + if (vexit->u.inout.port != port || + vexit->u.inout.bytes != len || + (vexit->u.inout.flags & INOUT_IN) != flag) { + return (false); + } + + if (!is_read && valp != NULL) { + *valp = vexit->u.inout.eax; + } + return (true); +} + +bool +vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr, + uint_t len, uint64_t *valp) +{ + if (vexit->exitcode != VM_EXITCODE_MMIO) { + return (false); + } + + if (vexit->u.mmio.gpa != addr || + vexit->u.mmio.bytes != len || + (vexit->u.mmio.read != 0) != is_read) { + return (false); + } + + if (!is_read && valp != NULL) { + *valp = vexit->u.mmio.data; + } + return (true); +} diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.h b/usr/src/test/bhyve-tests/tests/common/in_guest.h new file mode 100644 index 0000000000..8d6e04a6da --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/common/in_guest.h @@ -0,0 +1,51 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#ifndef _IN_GUEST_H_ +#define _IN_GUEST_H_ + +#include "payload_common.h" + +struct vmctx *test_initialize(const char *); +void test_fail_errno(int err, const char *msg); +void test_fail_msg(const char *fmt, ...); +void test_fail_vmexit(const struct vm_exit *vexit); +void test_pass(void); + +int test_setup_vcpu(struct vmctx *, int, uint64_t, uint64_t); + +enum vm_exit_kind { + /* Otherwise empty vmexit which should result in immediate re-entry */ + VEK_REENTR, + /* Write to IOP_TEST_RESULT port with success value (0) */ + VEK_TEST_PASS, + /* Write to IOP_TEST_RESULT port with failure value (non-zero) */ + VEK_TEST_FAIL, + /* Test specific logic must handle exit data */ + VEK_UNHANDLED, +}; + +enum vm_exit_kind test_run_vcpu(struct vmctx *, int, struct vm_entry *, + struct vm_exit *); + +void ventry_fulfill_inout(const struct vm_exit *, struct vm_entry *, uint32_t); +void ventry_fulfill_mmio(const struct vm_exit *, struct vm_entry *, uint64_t); + +bool vexit_match_inout(const struct vm_exit *, bool, uint16_t, uint_t, + uint32_t *); +bool vexit_match_mmio(const struct vm_exit *, bool, uint64_t, uint_t, + uint64_t *); + +#endif /* _IN_GUEST_H_ */ diff --git a/usr/src/test/bhyve-tests/tests/common/payload_common.h b/usr/src/test/bhyve-tests/tests/common/payload_common.h new file mode 100644 index 0000000000..895364f18e --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/common/payload_common.h @@ -0,0 +1,44 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#ifndef _PAYLOAD_COMMON_H_ +#define _PAYLOAD_COMMON_H_ + +#define MEM_TOTAL_SZ (64 * 1024 * 1024) + +/* 2MiB-page entries for identity-mapped table at 2MiB */ +#define MEM_LOC_PAGE_TABLE_2M 0x200000 +#define MEM_LOC_PAGE_TABLE_1G 0x204000 +#define MEM_LOC_PAGE_TABLE_512G 0x205000 +#define MEM_LOC_GDT 0x206000 +#define MEM_LOC_TSS 0x206200 +#define MEM_LOC_IDT 0x207000 +#define MEM_LOC_STACK 0x400000 +#define MEM_LOC_PAYLOAD 0x800000 + +/* IO port set aside for emitting test result */ +#define IOP_TEST_RESULT 0xef00U + +/* IO port set aside for emitting test value */ +#define IOP_TEST_VALUE 0xef10U + +/* IO port set aside for inputting test param(s) */ +#define IOP_TEST_PARAM IOP_TEST_PARAM0 +#define IOP_TEST_PARAM0 0xef20U +#define IOP_TEST_PARAM1 0xef21U +#define IOP_TEST_PARAM2 0xef22U +#define IOP_TEST_PARAM3 0xef23U + +#endif /* _PAYLOAD_COMMON_H_ */ diff --git a/usr/src/test/bhyve-tests/tests/common/payload_start.s b/usr/src/test/bhyve-tests/tests/common/payload_start.s new file mode 100644 index 0000000000..8a57e259de --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/common/payload_start.s @@ -0,0 +1,27 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <sys/asm_linkage.h> + +/* + .text + .globl _start +_start: + jmp start + */ + +ENTRY_NP(_start) + jmp start +SET_SIZE(_start) diff --git a/usr/src/test/bhyve-tests/tests/kdev/Makefile b/usr/src/test/bhyve-tests/tests/kdev/Makefile new file mode 100644 index 0000000000..52f3c2576c --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/Makefile @@ -0,0 +1,77 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# Copyright 2022 Oxide Computer Company + +include $(SRC)/cmd/Makefile.cmd +include $(SRC)/cmd/Makefile.cmd.64 +include $(SRC)/test/Makefile.com + +PROG = vpmtmr_freq \ + vhpet_freq \ + vlapic_freq \ + vlapic_freq_periodic \ + vatpit_freq + +PAYLOADS = $(PROG) +include ../Makefile.in_guest + +COMMON_OBJS = in_guest.o + +CLEANFILES = $(COMMON_OBJS) $(PAYLOAD_CLEANFILES) payload_utils.o +CLOBBERFILES = $(PROG) + +ROOTOPTPKG = $(ROOT)/opt/bhyve-tests +TESTDIR = $(ROOTOPTPKG)/tests/kdev + +CMDS = $(PROG:%=$(TESTDIR)/%) +$(CMDS) := FILEMODE = 0555 + +CSTD= $(CSTD_GNU99) +CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ + -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \ + $(CPPFLAGS.master) \ + -I$(SRC)/uts/i86pc/io/vmm \ + -I$(SRC)/uts/i86pc \ + -I../common + +ASFLAGS += -P -D__STDC__ -D_ASM + + +CFLAGS = -m64 +$(PROG) := LDLIBS += -lvmmapi + +all: $(PROG) + +install: all $(CMDS) + +clean: + -$(RM) $(CLEANFILES) +clobber: clean + -$(RM) $(CLOBBERFILES) + +$(CMDS): $(TESTDIR) $(PROG) + +$(TESTDIR): + $(INS.dir) + +$(TESTDIR)/%: % + $(INS.file) + +%: %.c pobj_%.o $(COMMON_OBJS) + $(LINK.c) -o $@ $^ $(LDLIBS) + $(POST_PROCESS) + +%: %.o + $(LINK.c) -o $@ $^ $(LDLIBS) + $(POST_PROCESS) + +$(PAYLOADS:%=payload_%): payload_utils.o diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h new file mode 100644 index 0000000000..8bd51023df --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h @@ -0,0 +1,28 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#ifndef _PAYLOAD_UTILS_H_ +#define _PAYLOAD_UTILS_H_ + +#include <sys/types.h> + +void outb(uint16_t, uint8_t); +void outw(uint16_t, uint16_t); +void outl(uint16_t, uint32_t); +uint8_t inb(uint16_t); +uint16_t inw(uint16_t); +uint32_t inl(uint16_t); + +#endif /* _PAYLOAD_UTILS_H_ */ diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s new file mode 100644 index 0000000000..8c8e745c17 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s @@ -0,0 +1,55 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <sys/asm_linkage.h> + +ENTRY(outb) + movw %di, %dx + movb %sil, %al + outb (%dx) + ret +SET_SIZE(outb) + +ENTRY(outw) + movw %di, %dx + movw %si, %ax + outw (%dx) + ret +SET_SIZE(outb) + +ENTRY(outl) + movw %di, %dx + movl %esi, %eax + outl (%dx) + ret +SET_SIZE(outl) + +ENTRY(inb) + movw %di, %dx + inb (%dx) + ret +SET_SIZE(inb) + +ENTRY(inw) + movw %di, %dx + inw (%dx) + ret +SET_SIZE(inw) + +ENTRY(inl) + movw %di, %dx + inl (%dx) + ret +SET_SIZE(inl) diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c new file mode 100644 index 0000000000..d899dc449d --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vatpit_freq.c @@ -0,0 +1,69 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include "payload_common.h" +#include "payload_utils.h" +#include "test_defs.h" + + +void +timer0_reset(void) +{ + /* + * Configure timer 0 for interrupt-on-terminal-count mode, and prepare + * it to be loaded with the high and low bytes. + */ + outb(IOP_ATPIT_CMD, 0x30); + + /* Load timer with max value (0xffff) */ + outb(IOP_ATPIT_C0, 0xff); + outb(IOP_ATPIT_C0, 0xff); +} + +uint16_t +timer0_read(void) +{ + uint16_t val; + + /* Latch timer0 */ + outb(IOP_ATPIT_CMD, 0x00); + + /* Read low and high bytes */ + val = inb(IOP_ATPIT_C0); + val |= (uint16_t)inb(IOP_ATPIT_C0) << 8; + + return (val); +} + +void +start(void) +{ + + /* loop for as long as the host wants */ + for (;;) { + uint16_t start, end; + + timer0_reset(); + + start = timer0_read(); + outw(IOP_TEST_VALUE, start); + + do { + end = timer0_read(); + /* wait for enough ticks to pass */ + } while (end > (start - ATPIT_TARGET_TICKS)); + outw(IOP_TEST_VALUE, end); + } +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c new file mode 100644 index 0000000000..7f74e72cbb --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vhpet_freq.c @@ -0,0 +1,60 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include "payload_common.h" +#include "payload_utils.h" +#include "test_defs.h" + +#define HPET_OFF_CONFIG 0x10 +#define HPET_OFF_MAIN_COUNT_LOW 0xf0 + +#define HPET_CONFIG_ENABLE 1 + + +static void +write_hpet(uint_t reg, uint32_t value) +{ + volatile uint32_t *ptr = (uint32_t *)(MMIO_HPET_BASE + reg); + *ptr = value; +} + +static uint32_t +read_hpet_main_low(void) +{ + volatile uint32_t *ptr = + (uint32_t *)(MMIO_HPET_BASE + HPET_OFF_MAIN_COUNT_LOW); + return (*ptr); +} + + +void +start(void) +{ + write_hpet(HPET_OFF_CONFIG, HPET_CONFIG_ENABLE); + + /* loop for as long as the host wants */ + for (;;) { + uint32_t start, end; + + start = read_hpet_main_low(); + outl(IOP_TEST_VALUE, start); + + do { + end = read_hpet_main_low(); + /* wait for enough ticks to pass */ + } while (end < (start + HPET_TARGET_TICKS)); + outl(IOP_TEST_VALUE, end); + } +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c new file mode 100644 index 0000000000..cc4d72a9f7 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq.c @@ -0,0 +1,94 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include "payload_common.h" +#include "payload_utils.h" +#include "test_defs.h" + +#define LAPIC_OFF_SVR 0xf0 +#define LAPIC_OFF_TIMER_ICR 0x380 +#define LAPIC_OFF_TIMER_CCR 0x390 +#define LAPIC_OFF_TIMER_DCR 0x3e0 + + +#define LAPIC_SVR_ENABLE 0x100 + +static void +write_vlapic(uint_t reg, uint32_t value) +{ + volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg); + *ptr = value; +} + +static uint32_t +read_vlapic(uint_t reg) +{ + volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg); + return (*ptr); +} + +static uint32_t +divisor_to_dcr(uint32_t inp) +{ + switch (inp) { + case 1: + return (0xb); + case 2: + return (0x0); + case 4: + return (0x1); + case 8: + return (0x2); + case 16: + return (0x3); + case 32: + return (0x8); + case 64: + return (0x9); + case 128: + return (0xa); + default: + /* fail immediate if divisor is out of range */ + outl(IOP_TEST_VALUE, 1); + return (0xff); + } +} + + +void +start(void) +{ + write_vlapic(LAPIC_OFF_SVR, LAPIC_SVR_ENABLE); + + /* loop for as long as the host wants */ + for (;;) { + uint32_t divisor; + uint32_t start, end; + + divisor = inl(IOP_TEST_PARAM); + write_vlapic(LAPIC_OFF_TIMER_DCR, divisor_to_dcr(divisor)); + write_vlapic(LAPIC_OFF_TIMER_ICR, 0xffffffff); + + start = read_vlapic(LAPIC_OFF_TIMER_CCR); + outl(IOP_TEST_VALUE, start); + + uint32_t target = start - LAPIC_TARGET_TICKS; + do { + end = read_vlapic(LAPIC_OFF_TIMER_CCR); + /* wait for enough ticks to pass */ + } while (end > target); + outl(IOP_TEST_VALUE, end); + } +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c new file mode 100644 index 0000000000..969f708ada --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_freq_periodic.c @@ -0,0 +1,110 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include "payload_common.h" +#include "payload_utils.h" +#include "test_defs.h" + +#define LAPIC_OFF_SVR 0xf0 +#define LAPIC_OFF_LVT_TIMER 0x320 +#define LAPIC_OFF_TIMER_ICR 0x380 +#define LAPIC_OFF_TIMER_CCR 0x390 +#define LAPIC_OFF_TIMER_DCR 0x3e0 + +#define LAPIC_LVT_MASKED (1 << 16) +#define LAPIC_LVT_PERIODIC (1 << 17) + + +#define LAPIC_SVR_ENABLE 0x100 + +static void +write_vlapic(uint_t reg, uint32_t value) +{ + volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg); + *ptr = value; +} + +static uint32_t +read_vlapic(uint_t reg) +{ + volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg); + return (*ptr); +} + +static uint32_t +divisor_to_dcr(uint32_t inp) +{ + switch (inp) { + case 1: + return (0xb); + case 2: + return (0x0); + case 4: + return (0x1); + case 8: + return (0x2); + case 16: + return (0x3); + case 32: + return (0x8); + case 64: + return (0x9); + case 128: + return (0xa); + default: + /* fail immediate if divisor is out of range */ + outl(IOP_TEST_VALUE, 1); + return (0xff); + } +} + + +void +start(void) +{ + write_vlapic(LAPIC_OFF_SVR, LAPIC_SVR_ENABLE); + + /* + * Configure the LAPIC timer for periodic operation, but leave the + * interrupt itself masked. + */ + write_vlapic(LAPIC_OFF_LVT_TIMER, + LAPIC_LVT_MASKED | LAPIC_LVT_PERIODIC); + + /* loop for as long as the host wants */ + for (;;) { + const uint16_t divisor = inw(IOP_TEST_PARAM0); + const uint16_t loop_count = inw(IOP_TEST_PARAM1); + + write_vlapic(LAPIC_OFF_TIMER_DCR, divisor_to_dcr(divisor)); + write_vlapic(LAPIC_OFF_TIMER_ICR, LAPIC_TARGET_TICKS); + + uint32_t start, end, count = 0; + start = read_vlapic(LAPIC_OFF_TIMER_CCR); + outl(IOP_TEST_VALUE, start); + + uint32_t prev = start; + do { + end = read_vlapic(LAPIC_OFF_TIMER_CCR); + + /* timer period rolled over */ + if (end > prev) { + count++; + } + prev = end; + } while (count < loop_count); + outl(IOP_TEST_VALUE, end); + } +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c new file mode 100644 index 0000000000..d96bb2b8b1 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vpmtmr_freq.c @@ -0,0 +1,36 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include "payload_common.h" +#include "payload_utils.h" +#include "test_defs.h" + +void +start(void) +{ + /* loop for as long as the host wants */ + for (;;) { + uint32_t start, end; + + start = inl(IOP_PMTMR); + outl(IOP_TEST_VALUE, start); + + do { + end = inl(IOP_PMTMR); + /* wait for enough ticks to pass */ + } while (end < (start + PMTMR_TARGET_TICKS)); + outl(IOP_TEST_VALUE, end); + } +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/test_defs.h b/usr/src/test/bhyve-tests/tests/kdev/test_defs.h new file mode 100644 index 0000000000..acc9553274 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/test_defs.h @@ -0,0 +1,38 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#ifndef _TEST_DEFS_H_ +#define _TEST_DEFS_H_ + +#define IOP_PMTMR 0x408 +#define IOP_ATPIT_C0 0x40 +#define IOP_ATPIT_CMD 0x43 + +#define MMIO_HPET_BASE 0xfed00000UL +#define MMIO_LAPIC_BASE 0xfee00000UL + +#define PMTMR_FREQ 3579545 +#define PMTMR_TARGET_TICKS (PMTMR_FREQ / 10) + +#define HPET_FREQ (1 << 24) +#define HPET_TARGET_TICKS (HPET_FREQ / 10) + +#define LAPIC_FREQ (128 * 1024 * 1024) +#define LAPIC_TARGET_TICKS (LAPIC_FREQ / 50) + +#define ATPIT_FREQ 1193182 +#define ATPIT_TARGET_TICKS (ATPIT_FREQ / 50) + +#endif /* _TEST_DEFS_H_ */ diff --git a/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c new file mode 100644 index 0000000000..1f4051ef6a --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/vatpit_freq.c @@ -0,0 +1,145 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" +#include "test_defs.h" + +typedef struct reading { + hrtime_t when; + uint16_t value; +} reading_t; + +static bool +check_reading(reading_t before, reading_t after, uint_t tick_margin, + uint_t ppm_margin) +{ + hrtime_t time_delta = after.when - before.when; + uint16_t tick_delta; + + tick_delta = before.value - after.value; + + /* is the number of ticks OK? */ + if (tick_delta < ATPIT_TARGET_TICKS) { + test_fail_msg("inadequate passage of ticks %u < %u\n", + tick_delta, ATPIT_TARGET_TICKS); + } else if ((tick_delta - ATPIT_TARGET_TICKS) > tick_margin) { + (void) printf("%u ticks outside margin %u\n", tick_delta, + ATPIT_TARGET_TICKS + tick_margin); + return (false); + } + + hrtime_t time_target = (tick_delta * NANOSEC) / ATPIT_FREQ; + + hrtime_t offset; + if (time_delta < time_target) { + offset = time_target - time_delta; + } else { + offset = time_delta - time_target; + } + uint64_t ppm = (offset * 1000000) / time_target; + (void) printf("margin limits: ticks=%u ppm=%lu\n", + tick_margin, ppm_margin); + (void) printf("%u ticks in %lu ns (error %lu ppm)\n", + tick_delta, time_delta, ppm); + if (ppm > ppm_margin) { + (void) printf("UNACCEPTABLE!\n"); + return (false); + } + return (true); +} + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + reading_t readings[2]; + uint_t nread = 0; + uint_t nrepeat = 0; + + /* + * Since the PIT is slower to read back (requiring 3 emulated reads), + * operate with a more loose ticks margin. + */ + const uint_t margin_ticks = MAX(1, ATPIT_TARGET_TICKS / 2500); + const uint_t margin_ppm = 400; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, &ventry, &vexit); + if (kind == VEK_REENTR) { + continue; + } else if (kind != VEK_UNHANDLED) { + test_fail_vmexit(&vexit); + } + + uint32_t v; + if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 2, &v)) { + readings[nread].when = gethrtime(); + readings[nread].value = v; + + ventry_fulfill_inout(&vexit, &ventry, 0); + + nread++; + if (nread != 2) { + continue; + } + + if (check_reading(readings[0], readings[1], + margin_ticks, margin_ppm)) { + test_pass(); + } else { + nrepeat++; + if (nrepeat < 3) { + nread = 0; + (void) printf("retry %u\n", nrepeat); + continue; + } + test_fail_msg("bad result after %u retries\n", + nrepeat); + } + } else { + test_fail_vmexit(&vexit); + } + + } while (true); + + return (0); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c new file mode 100644 index 0000000000..238596c739 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/vhpet_freq.c @@ -0,0 +1,146 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" +#include "test_defs.h" + +typedef struct reading { + hrtime_t when; + uint32_t value; +} reading_t; + +static bool +check_reading(reading_t before, reading_t after, uint_t tick_margin, + uint_t ppm_margin) +{ + hrtime_t time_delta = after.when - before.when; + uint32_t tick_delta; + + if (after.value < before.value) { + /* handle rollover */ + tick_delta = (UINT32_MAX - before.value) + after.value; + } else { + tick_delta = after.value - before.value; + } + + /* is the number of ticks OK? */ + if (tick_delta < HPET_TARGET_TICKS) { + test_fail_msg("inadequate passage of ticks %u < %u\n", + tick_delta, HPET_TARGET_TICKS); + } else if ((tick_delta - HPET_TARGET_TICKS) > tick_margin) { + (void) printf("%u ticks outside margin %u\n", tick_delta, + HPET_TARGET_TICKS + tick_margin); + return (false); + } + + hrtime_t time_target = (tick_delta * NANOSEC) / HPET_FREQ; + + hrtime_t offset; + if (time_delta < time_target) { + offset = time_target - time_delta; + } else { + offset = time_delta - time_target; + } + uint64_t ppm = (offset * 1000000) / time_target; + (void) printf("margin limits: ticks=%u ppm=%lu\n", + tick_margin, ppm_margin); + (void) printf("%u ticks in %lu ns (error %lu ppm)\n", + tick_delta, time_delta, ppm); + if (ppm > ppm_margin) { + (void) printf("UNACCEPTABLE!\n"); + return (false); + } + return (true); +} + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + reading_t readings[2]; + uint_t nread = 0; + uint_t nrepeat = 0; + + const uint_t margin_ticks = MAX(1, HPET_TARGET_TICKS / 10000); + const uint_t margin_ppm = 400; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, &ventry, &vexit); + if (kind == VEK_REENTR) { + continue; + } else if (kind != VEK_UNHANDLED) { + test_fail_vmexit(&vexit); + } + + uint32_t v; + if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 4, &v)) { + readings[nread].when = gethrtime(); + readings[nread].value = v; + + ventry_fulfill_inout(&vexit, &ventry, 0); + + nread++; + if (nread != 2) { + continue; + } + + if (check_reading(readings[0], readings[1], + margin_ticks, margin_ppm)) { + test_pass(); + } else { + nrepeat++; + if (nrepeat < 3) { + nread = 0; + (void) printf("retry %u\n", nrepeat); + continue; + } + test_fail_msg("bad result after %u retries\n", + nrepeat); + } + } else { + test_fail_vmexit(&vexit); + } + + } while (true); + + return (0); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c new file mode 100644 index 0000000000..cf462b1acc --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq.c @@ -0,0 +1,169 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" +#include "test_defs.h" + +typedef struct reading { + hrtime_t when; + uint32_t value; +} reading_t; + +static bool +check_reading(reading_t before, reading_t after, uint_t divisor, + uint_t tick_margin, uint_t ppm_margin) +{ + hrtime_t time_delta = after.when - before.when; + uint32_t tick_delta; + + /* + * The ticks margin should shrink proportionally to how coarsely the + * timer clock is being divided. + */ + tick_margin /= divisor; + + /* timer is counting down, so act appropriately */ + if (after.value > before.value) { + /* handle rollover */ + tick_delta = (UINT32_MAX - after.value) + before.value; + } else { + tick_delta = before.value - after.value; + } + + /* is the number of ticks OK? */ + if (tick_delta < LAPIC_TARGET_TICKS) { + test_fail_msg("inadequate passage of ticks %u < %u\n", + tick_delta, LAPIC_TARGET_TICKS); + } else if ((tick_delta - LAPIC_TARGET_TICKS) > tick_margin) { + (void) printf("%u ticks outside margin %u\n", tick_delta, + LAPIC_TARGET_TICKS + tick_margin); + return (false); + } + + hrtime_t time_target = (tick_delta * NANOSEC * divisor) / LAPIC_FREQ; + + hrtime_t offset; + if (time_delta < time_target) { + offset = time_target - time_delta; + } else { + offset = time_delta - time_target; + } + uint64_t ppm = (offset * 1000000) / time_target; + (void) printf("params: tick_margin=%u ppm_margin=%lu divisor=%u\n", + tick_margin, ppm_margin, divisor); + (void) printf("%u ticks in %lu ns (error %lu ppm)\n", + tick_delta, time_delta, ppm); + if (ppm > ppm_margin) { + (void) printf("UNACCEPTABLE!\n"); + return (false); + } + return (true); +} + + +static void +test_for_divisor(struct vmctx *ctx, uint_t divisor, struct vm_entry *ventry, + struct vm_exit *vexit) +{ + reading_t readings[2]; + uint_t nread = 0; + uint_t nrepeat = 0; + + const uint_t margin_ticks = MAX(1, LAPIC_TARGET_TICKS / 5000); + const uint_t margin_ppm = 400; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, ventry, vexit); + if (kind == VEK_REENTR) { + continue; + } else if (kind != VEK_UNHANDLED) { + test_fail_vmexit(vexit); + } + + /* input the divisor */ + if (vexit_match_inout(vexit, true, IOP_TEST_PARAM, 4, NULL)) { + ventry_fulfill_inout(vexit, ventry, divisor); + continue; + } + + uint32_t v; + if (vexit_match_inout(vexit, false, IOP_TEST_VALUE, 4, &v)) { + readings[nread].when = gethrtime(); + readings[nread].value = v; + ventry_fulfill_inout(vexit, ventry, 0); + + nread++; + if (nread != 2) { + continue; + } + + if (check_reading(readings[0], readings[1], divisor, + margin_ticks, margin_ppm)) { + (void) printf("good result\n"); + return; + } else { + nrepeat++; + if (nrepeat < 3) { + nread = 0; + (void) printf("retry %u\n", nrepeat); + continue; + } + test_fail_msg("bad result after %u retries\n", + nrepeat); + } + } else { + test_fail_vmexit(vexit); + } + } while (true); +} + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + + test_for_divisor(ctx, 2, &ventry, &vexit); + test_for_divisor(ctx, 4, &ventry, &vexit); + test_for_divisor(ctx, 16, &ventry, &vexit); + test_pass(); + return (0); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c new file mode 100644 index 0000000000..d5c6d8184c --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_freq_periodic.c @@ -0,0 +1,178 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" +#include "test_defs.h" + +typedef struct reading { + hrtime_t when; + uint32_t value; +} reading_t; + +static bool +check_reading(reading_t before, reading_t after, uint_t divisor, uint_t loops, + uint_t tick_margin, uint_t ppm_margin) +{ + const hrtime_t time_delta = after.when - before.when; + + + /* + * The ticks margin should shrink proportionally to how coarsely the + * timer clock is being divided. + */ + tick_margin /= divisor; + + /* + * The 'before' measurement includes the ticks which occurred between + * programming the timer and taking the first reading. The 'after' + * measurement includes the number of loops (each consisting of the + * target tick count) plus however many ticks had transpired since the + * most recent roll-over. + */ + const uint32_t tick_delta = + loops * LAPIC_TARGET_TICKS + before.value - after.value; + const uint32_t tick_target = loops * LAPIC_TARGET_TICKS; + + /* is the number of ticks OK? */ + if (tick_delta < tick_target) { + if ((tick_target - tick_delta) > tick_margin) { + (void) printf("%u ticks outside margin %u\n", + tick_delta, tick_target - tick_margin); + } + } else if ((tick_delta - tick_target) > tick_margin) { + (void) printf("%u ticks outside margin %u\n", tick_delta, + tick_target + tick_margin); + return (false); + } + + hrtime_t time_target = (tick_delta * NANOSEC * divisor) / LAPIC_FREQ; + + hrtime_t offset; + if (time_delta < time_target) { + offset = time_target - time_delta; + } else { + offset = time_delta - time_target; + } + uint64_t ppm = (offset * 1000000) / time_target; + (void) printf("params: tick_margin=%u ppm_margin=%lu divisor=%u\n", + tick_margin, ppm_margin, divisor); + (void) printf("%u ticks in %lu ns (error %lu ppm)\n", + tick_delta, time_delta, ppm); + if (ppm > ppm_margin) { + (void) printf("UNACCEPTABLE!\n"); + return (false); + } + return (true); +} + + +static void +run_test(struct vmctx *ctx, uint_t divisor, uint_t loops, + struct vm_entry *ventry, struct vm_exit *vexit) +{ + reading_t readings[2]; + uint_t nread = 0; + uint_t nrepeat = 0; + + const uint_t margin_ticks = MAX(1, LAPIC_TARGET_TICKS / 5000); + const uint_t margin_ppm = 400; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, ventry, vexit); + if (kind == VEK_REENTR) { + continue; + } else if (kind != VEK_UNHANDLED) { + test_fail_vmexit(vexit); + } + + /* input the divisor (bits 0-15) and loop count (bits 16-31) */ + if (vexit_match_inout(vexit, true, IOP_TEST_PARAM0, 2, NULL)) { + ventry_fulfill_inout(vexit, ventry, divisor); + continue; + } + /* input the loop count */ + if (vexit_match_inout(vexit, true, IOP_TEST_PARAM1, 2, NULL)) { + ventry_fulfill_inout(vexit, ventry, loops); + continue; + } + + uint32_t v; + if (vexit_match_inout(vexit, false, IOP_TEST_VALUE, 4, &v)) { + readings[nread].when = gethrtime(); + readings[nread].value = v; + ventry_fulfill_inout(vexit, ventry, 0); + + nread++; + if (nread != 2) { + continue; + } + + if (check_reading(readings[0], readings[1], divisor, + loops, margin_ticks, margin_ppm)) { + (void) printf("good result\n"); + return; + } else { + nrepeat++; + if (nrepeat < 3) { + nread = 0; + (void) printf("retry %u\n", nrepeat); + continue; + } + test_fail_msg("bad result after %u retries\n", + nrepeat); + } + } else { + test_fail_vmexit(vexit); + } + } while (true); +} + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + + run_test(ctx, 4, 3, &ventry, &vexit); + run_test(ctx, 2, 4, &ventry, &vexit); + test_pass(); + return (0); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c b/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c new file mode 100644 index 0000000000..60541bf898 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/vpmtmr_freq.c @@ -0,0 +1,151 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" +#include "test_defs.h" + +typedef struct reading { + hrtime_t when; + uint32_t value; +} reading_t; + +static bool +check_reading(reading_t before, reading_t after, uint_t tick_margin, + uint_t ppm_margin) +{ + hrtime_t time_delta = after.when - before.when; + uint32_t tick_delta; + + if (after.value < before.value) { + /* handle rollover */ + tick_delta = (UINT32_MAX - before.value) + after.value; + } else { + tick_delta = after.value - before.value; + } + + /* is the number of ticks OK? */ + if (tick_delta < PMTMR_TARGET_TICKS) { + test_fail_msg("inadequate passage of ticks %u < %u\n", + tick_delta, PMTMR_TARGET_TICKS); + } else if ((tick_delta - PMTMR_TARGET_TICKS) > tick_margin) { + (void) printf("%u ticks outside margin %u\n", tick_delta, + PMTMR_TARGET_TICKS + tick_margin); + return (false); + } + + hrtime_t time_target = (tick_delta * NANOSEC) / PMTMR_FREQ; + + hrtime_t offset; + if (time_delta < time_target) { + offset = time_target - time_delta; + } else { + offset = time_delta - time_target; + } + uint64_t ppm = (offset * 1000000) / time_target; + (void) printf("margin limits: ticks=%u ppm=%lu\n", + tick_margin, ppm_margin); + (void) printf("%u ticks in %lu ns (error %lu ppm)\n", + tick_delta, time_delta, ppm); + if (ppm > ppm_margin) { + (void) printf("UNACCEPTABLE!\n"); + return (false); + } + return (true); +} + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = vm_pmtmr_set_location(ctx, IOP_PMTMR); + if (err != 0) { + test_fail_errno(err, "Could not place pmtmr"); + } + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + reading_t readings[2]; + uint_t nread = 0; + uint_t nrepeat = 0; + + const uint_t margin_ticks = MAX(1, PMTMR_TARGET_TICKS / 10000); + const uint_t margin_ppm = 400; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, &ventry, &vexit); + if (kind == VEK_REENTR) { + continue; + } else if (kind != VEK_UNHANDLED) { + test_fail_vmexit(&vexit); + } + + uint32_t v; + if (vexit_match_inout(&vexit, false, IOP_TEST_VALUE, 4, &v)) { + readings[nread].when = gethrtime(); + readings[nread].value = vexit.u.inout.eax; + + ventry_fulfill_inout(&vexit, &ventry, 0); + + nread++; + if (nread != 2) { + continue; + } + + if (check_reading(readings[0], readings[1], + margin_ticks, margin_ppm)) { + test_pass(); + } else { + nrepeat++; + if (nrepeat < 3) { + nread = 0; + (void) printf("retry %u\n", nrepeat); + continue; + } + test_fail_msg("bad result after %u retries\n", + nrepeat); + } + } else { + test_fail_vmexit(&vexit); + } + + } while (true); + + return (0); +} diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c index 501884e0df..3f137e1b4d 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c +++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c @@ -90,8 +90,9 @@ struct channel { bool ol_sel; /* read MSB from output latch */ bool fr_sel; /* read MSB from free-running timer */ - struct bintime load_bt; /* time when counter was loaded */ - struct bintime callout_bt; /* target time */ + hrtime_t time_loaded; /* time when counter was loaded */ + hrtime_t time_target; /* target time */ + uint64_t total_target; struct callout callout; struct vatpit_callout_arg callout_arg; @@ -101,8 +102,6 @@ struct vatpit { struct vm *vm; struct mtx mtx; - struct bintime freq_bt; - struct channel channel[3]; }; @@ -111,16 +110,9 @@ static void pit_timer_start_cntr0(struct vatpit *vatpit); static uint64_t vatpit_delta_ticks(struct vatpit *vatpit, struct channel *c) { - struct bintime delta; - uint64_t result; - - binuptime(&delta); - bintime_sub(&delta, &c->load_bt); + const hrtime_t delta = gethrtime() - c->time_loaded; - result = delta.sec * PIT_8254_FREQ; - result += delta.frac / vatpit->freq_bt.frac; - - return (result); + return (hrt_freq_count(delta, PIT_8254_FREQ)); } static int @@ -183,32 +175,32 @@ done: static void pit_timer_start_cntr0(struct vatpit *vatpit) { - struct channel *c; - struct bintime now, delta; - sbintime_t precision; + struct channel *c = &vatpit->channel[0]; - c = &vatpit->channel[0]; - if (c->initial != 0) { - delta.sec = 0; - delta.frac = vatpit->freq_bt.frac * c->initial; - bintime_add(&c->callout_bt, &delta); - precision = bttosbt(delta) >> tc_precexp; + if (c->initial == 0) { + return; + } - /* - * Reset 'callout_bt' if the time that the callout - * was supposed to fire is more than 'c->initial' - * ticks in the past. - */ - binuptime(&now); - if (BINTIME_CMP(&c->callout_bt, <, &now)) { - c->callout_bt = now; - bintime_add(&c->callout_bt, &delta); - } + c->total_target += c->initial; + c->time_target = c->time_loaded + + hrt_freq_interval(PIT_8254_FREQ, c->total_target); - callout_reset_sbt(&c->callout, bttosbt(c->callout_bt), - precision, vatpit_callout_handler, &c->callout_arg, - C_ABSOLUTE); + /* + * If we are more than 'c->initial' ticks behind, reset the timer base + * to fire at the next 'c->initial' interval boundary. + */ + hrtime_t now = gethrtime(); + if (c->time_target < now) { + const uint64_t ticks_behind = + hrt_freq_count(c->time_target - now, PIT_8254_FREQ); + + c->total_target += roundup(ticks_behind, c->initial); + c->time_target = c->time_loaded + + hrt_freq_interval(PIT_8254_FREQ, c->total_target); } + + callout_reset_hrtime(&c->callout, c->time_target, + vatpit_callout_handler, &c->callout_arg, C_ABSOLUTE); } static uint16_t @@ -223,15 +215,14 @@ pit_update_counter(struct vatpit *vatpit, struct channel *c, bool latch) if (c->initial == 0) { /* - * This is possibly an o/s bug - reading the value of - * the timer without having set up the initial value. + * This is possibly an OS bug - reading the value of the timer + * without having set up the initial value. * - * The original user-space version of this code set - * the timer to 100hz in this condition; do the same - * here. + * The original user-space version of this code set the timer to + * 100hz in this condition; do the same here. */ c->initial = TIMER_DIV(PIT_8254_FREQ, 100); - binuptime(&c->load_bt); + c->time_loaded = gethrtime(); c->reg_status &= ~TIMER_STS_NULLCNT; } @@ -419,10 +410,11 @@ vatpit_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *eax) c->reg_status &= ~TIMER_STS_NULLCNT; c->fr_sel = false; c->initial = c->reg_cr[0] | (uint16_t)c->reg_cr[1] << 8; - binuptime(&c->load_bt); + c->time_loaded = gethrtime(); /* Start an interval timer for channel 0 */ if (port == TIMER_CNTR0) { - c->callout_bt = c->load_bt; + c->time_target = c->time_loaded; + c->total_target = 0; pit_timer_start_cntr0(vatpit); } if (c->initial == 0) @@ -465,8 +457,6 @@ vatpit_init(struct vm *vm) mtx_init(&vatpit->mtx, "vatpit lock", NULL, MTX_SPIN); - FREQ2BT(PIT_8254_FREQ, &vatpit->freq_bt); - for (i = 0; i < 3; i++) { callout_init(&vatpit->channel[i].callout, 1); arg = &vatpit->channel[i].callout_arg; diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.c b/usr/src/uts/i86pc/io/vmm/io/vhpet.c index 14418ff5fa..deb1417b71 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vhpet.c +++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.c @@ -76,32 +76,33 @@ struct vhpet_callout_arg { int timer_num; }; +struct vhpet_timer { + uint64_t cap_config; /* Configuration */ + uint64_t msireg; /* FSB interrupt routing */ + uint32_t compval; /* Comparator */ + uint32_t comprate; + struct callout callout; + hrtime_t callout_expire; /* time when counter==compval */ + struct vhpet_callout_arg arg; +}; + struct vhpet { struct vm *vm; struct mtx mtx; - sbintime_t freq_sbt; uint64_t config; /* Configuration */ uint64_t isr; /* Interrupt Status */ - uint32_t countbase; /* HPET counter base value */ - sbintime_t countbase_sbt; /* uptime corresponding to base value */ - - struct { - uint64_t cap_config; /* Configuration */ - uint64_t msireg; /* FSB interrupt routing */ - uint32_t compval; /* Comparator */ - uint32_t comprate; - struct callout callout; - sbintime_t callout_sbt; /* time when counter==compval */ - struct vhpet_callout_arg arg; - } timer[VHPET_NUM_TIMERS]; + uint32_t base_count; /* HPET counter base value */ + hrtime_t base_time; /* uptime corresponding to base value */ + + struct vhpet_timer timer[VHPET_NUM_TIMERS]; }; #define VHPET_LOCK(vhp) mtx_lock(&((vhp)->mtx)) #define VHPET_UNLOCK(vhp) mtx_unlock(&((vhp)->mtx)) static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, - sbintime_t now); + hrtime_t now); static uint64_t vhpet_capabilities(void) @@ -151,27 +152,22 @@ vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n) } static uint32_t -vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr) +vhpet_counter(struct vhpet *vhpet, hrtime_t *nowptr) { - uint32_t val; - sbintime_t now, delta; + const hrtime_t now = gethrtime(); + uint32_t val = vhpet->base_count; - val = vhpet->countbase; if (vhpet_counter_enabled(vhpet)) { - now = sbinuptime(); - delta = now - vhpet->countbase_sbt; - KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: " - "%lx to %lx", vhpet->countbase_sbt, now)); - val += delta / vhpet->freq_sbt; - if (nowptr != NULL) - *nowptr = now; + const hrtime_t delta = now - vhpet->base_time; + + ASSERT3S(delta, >=, 0); + val += hrt_freq_count(delta, HPET_FREQ); } else { - /* - * The sbinuptime corresponding to the 'countbase' is - * meaningless when the counter is disabled. Make sure - * that the caller doesn't want to use it. - */ - KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL")); + /* Value of the counter is meaningless when it is disabled */ + } + + if (nowptr != NULL) { + *nowptr = now; } return (val); } @@ -284,7 +280,7 @@ vhpet_handler(void *a) { int n; uint32_t counter; - sbintime_t now; + hrtime_t now; struct vhpet *vhpet; struct callout *callout; struct vhpet_callout_arg *arg; @@ -317,7 +313,7 @@ done: } static void -vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now) +vhpet_stop_timer(struct vhpet *vhpet, int n, hrtime_t now) { VM_CTR1(vhpet->vm, "hpet t%d stopped", n); @@ -330,7 +326,7 @@ vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now) * in the guest. This is especially bad in one-shot mode because * the next interrupt has to wait for the counter to wrap around. */ - if (vhpet->timer[n].callout_sbt < now) { + if (vhpet->timer[n].callout_expire < now) { VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after " "stopping timer", n); vhpet_timer_interrupt(vhpet, n); @@ -338,11 +334,11 @@ vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now) } static void -vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now) +vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, hrtime_t now) { - sbintime_t delta, precision; + struct vhpet_timer *timer = &vhpet->timer[n]; - if (vhpet->timer[n].comprate != 0) + if (timer->comprate != 0) vhpet_adjust_compval(vhpet, n, counter); else { /* @@ -353,11 +349,11 @@ vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now) */ } - delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt; - precision = delta >> tc_precexp; - vhpet->timer[n].callout_sbt = now + delta; - callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt, - precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE); + const hrtime_t delta = hrt_freq_interval(HPET_FREQ, + timer->compval - counter); + timer->callout_expire = now + delta; + callout_reset_hrtime(&timer->callout, timer->callout_expire, + vhpet_handler, &timer->arg, C_ABSOLUTE); } static void @@ -365,23 +361,23 @@ vhpet_start_counting(struct vhpet *vhpet) { int i; - vhpet->countbase_sbt = sbinuptime(); + vhpet->base_time = gethrtime(); for (i = 0; i < VHPET_NUM_TIMERS; i++) { /* * Restart the timers based on the value of the main counter * when it stopped counting. */ - vhpet_start_timer(vhpet, i, vhpet->countbase, - vhpet->countbase_sbt); + vhpet_start_timer(vhpet, i, vhpet->base_count, + vhpet->base_time); } } static void -vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now) +vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, hrtime_t now) { int i; - vhpet->countbase = counter; + vhpet->base_count = counter; for (i = 0; i < VHPET_NUM_TIMERS; i++) vhpet_stop_timer(vhpet, i, now); } @@ -478,7 +474,7 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val, struct vhpet *vhpet; uint64_t data, mask, oldval, val64; uint32_t isr_clear_mask, old_compval, old_comprate, counter; - sbintime_t now, *nowptr; + hrtime_t now; int i, offset; vhpet = vm_hpet(vm); @@ -517,11 +513,10 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val, /* * Get the most recent value of the counter before updating * the 'config' register. If the HPET is going to be disabled - * then we need to update 'countbase' with the value right + * then we need to update 'base_count' with the value right * before it is disabled. */ - nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL; - counter = vhpet_counter(vhpet, nowptr); + counter = vhpet_counter(vhpet, &now); oldval = vhpet->config; update_register(&vhpet->config, data, mask); @@ -558,7 +553,7 @@ vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val, /* Zero-extend the counter to 64-bits before updating it */ val64 = vhpet_counter(vhpet, NULL); update_register(&val64, data, mask); - vhpet->countbase = val64; + vhpet->base_count = val64; if (vhpet_counter_enabled(vhpet)) vhpet_start_counting(vhpet); goto done; @@ -710,15 +705,11 @@ vhpet_init(struct vm *vm) struct vhpet *vhpet; uint64_t allowed_irqs; struct vhpet_callout_arg *arg; - struct bintime bt; vhpet = malloc(sizeof (struct vhpet), M_VHPET, M_WAITOK | M_ZERO); vhpet->vm = vm; mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF); - FREQ2BT(HPET_FREQ, &bt); - vhpet->freq_sbt = bttosbt(bt); - pincount = vioapic_pincount(vm); if (pincount >= 32) allowed_irqs = 0xff000000; /* irqs 24-31 */ diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index 55f491b664..e88438da0d 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -94,7 +94,7 @@ __FBSDID("$FreeBSD$"); /* * APIC timer frequency: * - arbitrary but chosen to be in the ballpark of contemporary hardware. - * - power-of-two to avoid loss of precision when converted to a bintime. + * - power-of-two to avoid loss of precision when calculating times */ #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) @@ -215,7 +215,6 @@ vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) static uint32_t vlapic_get_ccr(struct vlapic *vlapic) { - struct bintime bt_now, bt_rem; struct LAPIC *lapic; uint32_t ccr; @@ -228,12 +227,11 @@ vlapic_get_ccr(struct vlapic *vlapic) * If the timer is scheduled to expire in the future then * compute the value of 'ccr' based on the remaining time. */ - binuptime(&bt_now); - if (BINTIME_CMP(&vlapic->timer_fire_bt, >, &bt_now)) { - bt_rem = vlapic->timer_fire_bt; - bintime_sub(&bt_rem, &bt_now); - ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); - ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; + + const hrtime_t now = gethrtime(); + if (vlapic->timer_fire_when > now) { + ccr += hrt_freq_count(vlapic->timer_fire_when - now, + vlapic->timer_cur_freq); } } KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %x, " @@ -263,9 +261,9 @@ vlapic_dcr_write_handler(struct vlapic *vlapic) * XXX changes to the frequency divider will not take effect until * the timer is reloaded. */ - FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); - vlapic->timer_period_bt = vlapic->timer_freq_bt; - bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); + vlapic->timer_cur_freq = VLAPIC_BUS_FREQ / divisor; + vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, + lapic->icr_timer); VLAPIC_TIMER_UNLOCK(vlapic); } @@ -729,20 +727,16 @@ vlapic_trigger_lvt(struct vlapic *vlapic, int vector) } static void -vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t) +vlapic_callout_reset(struct vlapic *vlapic) { - callout_reset_sbt(&vlapic->callout, t, 0, - vlapic_callout_handler, vlapic, 0); + callout_reset_hrtime(&vlapic->callout, vlapic->timer_fire_when, + vlapic_callout_handler, vlapic, C_ABSOLUTE); } static void vlapic_callout_handler(void *arg) { - struct vlapic *vlapic; - struct bintime bt, btnow; - sbintime_t rem_sbt; - - vlapic = arg; + struct vlapic *vlapic = arg; VLAPIC_TIMER_LOCK(vlapic); if (callout_pending(&vlapic->callout)) /* callout was reset */ @@ -756,42 +750,25 @@ vlapic_callout_handler(void *arg) vlapic_fire_timer(vlapic); if (vlapic_periodic_timer(vlapic)) { - binuptime(&btnow); - - KASSERT(BINTIME_CMP(&btnow, >=, &vlapic->timer_fire_bt), - ("vlapic callout at %lx.%lx, expected at %lx.%lx", - btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, - vlapic->timer_fire_bt.frac)); - /* * Compute the delta between when the timer was supposed to - * fire and the present time. + * fire and the present time. We can depend on the fact that + * cyclics (which underly these callouts) will never be called + * early. */ - bt = btnow; - bintime_sub(&bt, &vlapic->timer_fire_bt); - - rem_sbt = bttosbt(vlapic->timer_period_bt); - if (BINTIME_CMP(&bt, <, &vlapic->timer_period_bt)) { + const hrtime_t now = gethrtime(); + const hrtime_t delta = now - vlapic->timer_fire_when; + if (delta >= vlapic->timer_period) { /* - * Adjust the time until the next countdown downward - * to account for the lost time. + * If we are so behind that we have missed an entire + * timer period, reset the time base rather than + * attempting to catch up. */ - rem_sbt -= bttosbt(bt); + vlapic->timer_fire_when = now + vlapic->timer_period; } else { - /* - * If the delta is greater than the timer period then - * just reset our time base instead of trying to catch - * up. - */ - vlapic->timer_fire_bt = btnow; - VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " - "usecs, period is %lu usecs - resetting time base", - bttosbt(bt) / SBT_1US, - bttosbt(vlapic->timer_period_bt) / SBT_1US); + vlapic->timer_fire_when += vlapic->timer_period; } - - bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); - vlapic_callout_reset(vlapic, rem_sbt); + vlapic_callout_reset(vlapic); } done: VLAPIC_TIMER_UNLOCK(vlapic); @@ -800,27 +777,18 @@ done: void vlapic_icrtmr_write_handler(struct vlapic *vlapic) { - struct LAPIC *lapic; - sbintime_t sbt; - uint32_t icr_timer; + struct LAPIC *lapic = vlapic->apic_page; VLAPIC_TIMER_LOCK(vlapic); - - lapic = vlapic->apic_page; - icr_timer = lapic->icr_timer; - - vlapic->timer_period_bt = vlapic->timer_freq_bt; - bintime_mul(&vlapic->timer_period_bt, icr_timer); - - if (icr_timer != 0) { - binuptime(&vlapic->timer_fire_bt); - bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); - - sbt = bttosbt(vlapic->timer_period_bt); - vlapic_callout_reset(vlapic, sbt); - } else + vlapic->timer_period = hrt_freq_interval(vlapic->timer_cur_freq, + lapic->icr_timer); + if (vlapic->timer_period != 0) { + vlapic->timer_fire_when = gethrtime() + vlapic->timer_period; + vlapic_callout_reset(vlapic); + } else { + vlapic->timer_fire_when = 0; callout_stop(&vlapic->callout); - + } VLAPIC_TIMER_UNLOCK(vlapic); } diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h index 8d739bcfcc..7f07665874 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic_priv.h @@ -170,9 +170,10 @@ struct vlapic { uint32_t esr_pending; struct callout callout; /* vlapic timer */ - struct bintime timer_fire_bt; /* callout expiry time */ - struct bintime timer_freq_bt; /* timer frequency */ - struct bintime timer_period_bt; /* timer period */ + hrtime_t timer_fire_when; + hrtime_t timer_period; + uint32_t timer_cur_freq; + struct mtx timer_mtx; uint64_t msr_apicbase; diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c index 2644ee61d6..9a7d7d4253 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c +++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c @@ -64,9 +64,7 @@ struct vpmtmr { struct vm *vm; void *io_cookie; uint16_t io_port; - sbintime_t freq_sbt; - sbintime_t baseuptime; - uint32_t baseval; + hrtime_t base_time; }; static MALLOC_DEFINE(M_VPMTMR, "vpmtmr", "bhyve virtual acpi timer"); @@ -75,15 +73,10 @@ struct vpmtmr * vpmtmr_init(struct vm *vm) { struct vpmtmr *vpmtmr; - struct bintime bt; vpmtmr = malloc(sizeof (struct vpmtmr), M_VPMTMR, M_WAITOK | M_ZERO); vpmtmr->vm = vm; - vpmtmr->baseuptime = sbinuptime(); - vpmtmr->baseval = 0; - - FREQ2BT(PMTMR_FREQ, &bt); - vpmtmr->freq_sbt = bttosbt(bt); + vpmtmr->base_time = gethrtime(); return (vpmtmr); } @@ -149,20 +142,18 @@ int vpmtmr_handler(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *val) { struct vpmtmr *vpmtmr = arg; - sbintime_t now, delta; if (!in || bytes != 4) return (-1); /* - * No locking needed because 'baseuptime' and 'baseval' are - * written only during initialization. + * No locking needed because 'base_time' is written only during + * initialization. */ - now = sbinuptime(); - delta = now - vpmtmr->baseuptime; - KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: " - "%lx to %lx", vpmtmr->baseuptime, now)); - *val = vpmtmr->baseval + delta / vpmtmr->freq_sbt; + const hrtime_t delta = gethrtime() - vpmtmr->base_time; + ASSERT3S(delta, >=, 0); + + *val = hrt_freq_count(delta, PMTMR_FREQ); return (0); } diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c index a67e82d156..2b3a5b5432 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c +++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c @@ -80,7 +80,7 @@ struct vrtc { struct mtx mtx; struct callout callout; uint_t addr; /* RTC register to read or write */ - sbintime_t base_uptime; + hrtime_t base_uptime; time_t base_rtctime; struct rtcdev rtcdev; }; @@ -147,23 +147,24 @@ update_enabled(struct vrtc *vrtc) } static time_t -vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime) +vrtc_curtime(struct vrtc *vrtc, hrtime_t *basetime) { - sbintime_t now, delta; - time_t t, secs; + time_t t = vrtc->base_rtctime; + hrtime_t base = vrtc->base_uptime; KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); - t = vrtc->base_rtctime; - *basetime = vrtc->base_uptime; if (update_enabled(vrtc)) { - now = sbinuptime(); - delta = now - vrtc->base_uptime; - KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: " - "%lx to %lx", vrtc->base_uptime, now)); - secs = delta / SBT_1S; - t += secs; - *basetime += secs * SBT_1S; + const hrtime_t delta = gethrtime() - vrtc->base_uptime; + const time_t sec = delta / NANOSEC; + + ASSERT3S(delta, >=, 0); + + t += sec; + base += sec * NANOSEC; + } + if (basetime != NULL) { + *basetime = base; } return (t); } @@ -389,7 +390,7 @@ fail: } static int -vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase) +vrtc_time_update(struct vrtc *vrtc, time_t newtime, hrtime_t newbase) { struct rtcdev *rtc; time_t oldtime; @@ -463,28 +464,26 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase) return (0); } -static sbintime_t +static hrtime_t vrtc_freq(struct vrtc *vrtc) { - int ratesel; - - static sbintime_t pf[16] = { + const hrtime_t rate_freq[16] = { 0, - SBT_1S / 256, - SBT_1S / 128, - SBT_1S / 8192, - SBT_1S / 4096, - SBT_1S / 2048, - SBT_1S / 1024, - SBT_1S / 512, - SBT_1S / 256, - SBT_1S / 128, - SBT_1S / 64, - SBT_1S / 32, - SBT_1S / 16, - SBT_1S / 8, - SBT_1S / 4, - SBT_1S / 2, + NANOSEC / 256, + NANOSEC / 128, + NANOSEC / 8192, + NANOSEC / 4096, + NANOSEC / 2048, + NANOSEC / 1024, + NANOSEC / 512, + NANOSEC / 256, + NANOSEC / 128, + NANOSEC / 64, + NANOSEC / 32, + NANOSEC / 16, + NANOSEC / 8, + NANOSEC / 4, + NANOSEC / 2, }; KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); @@ -497,32 +496,32 @@ vrtc_freq(struct vrtc *vrtc) * the update interrupt. */ if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) { - ratesel = vrtc->rtcdev.reg_a & 0xf; - return (pf[ratesel]); + uint_t sel = vrtc->rtcdev.reg_a & 0xf; + return (rate_freq[sel]); } else if (aintr_enabled(vrtc) && update_enabled(vrtc)) { - return (SBT_1S); + return (NANOSEC); } else if (uintr_enabled(vrtc) && update_enabled(vrtc)) { - return (SBT_1S); + return (NANOSEC); } else { return (0); } } static void -vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt) +vrtc_callout_reset(struct vrtc *vrtc, hrtime_t freqhrt) { KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); - if (freqsbt == 0) { + if (freqhrt == 0) { if (callout_active(&vrtc->callout)) { VM_CTR0(vrtc->vm, "RTC callout stopped"); callout_stop(&vrtc->callout); } return; } - VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt); - callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler, + VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", NANOSEC / freqhrt); + callout_reset_hrtime(&vrtc->callout, freqhrt, vrtc_callout_handler, vrtc, 0); } @@ -530,7 +529,6 @@ static void vrtc_callout_handler(void *arg) { struct vrtc *vrtc = arg; - sbintime_t freqsbt, basetime; time_t rtctime; int error; @@ -552,28 +550,30 @@ vrtc_callout_handler(void *arg) vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD); if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) { + hrtime_t basetime; + rtctime = vrtc_curtime(vrtc, &basetime); error = vrtc_time_update(vrtc, rtctime, basetime); KASSERT(error == 0, ("%s: vrtc_time_update error %d", __func__, error)); } - freqsbt = vrtc_freq(vrtc); - KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__)); - vrtc_callout_reset(vrtc, freqsbt); + hrtime_t freqhrt = vrtc_freq(vrtc); + KASSERT(freqhrt != 0, ("%s: vrtc frequency cannot be zero", __func__)); + vrtc_callout_reset(vrtc, freqhrt); done: VRTC_UNLOCK(vrtc); } static __inline void -vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq) +vrtc_callout_check(struct vrtc *vrtc, hrtime_t freqhrt) { int active; active = callout_active(&vrtc->callout) ? 1 : 0; - KASSERT((freq == 0 && !active) || (freq != 0 && active), - ("vrtc callout %s with frequency %lx", - active ? "active" : "inactive", freq)); + KASSERT((freqhrt == 0 && !active) || (freqhrt != 0 && active), + ("vrtc callout %s with frequency %llx", + active ? "active" : "inactive", NANOSEC / freqhrt)); } static void @@ -618,7 +618,7 @@ static int vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) { struct rtcdev *rtc; - sbintime_t oldfreq, newfreq, basetime; + hrtime_t oldfreq, newfreq; time_t curtime, rtctime; int error; uint8_t oldval, changed; @@ -637,9 +637,11 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) } if (changed & RTCSB_HALT) { + hrtime_t basetime; + if ((newval & RTCSB_HALT) == 0) { rtctime = rtc_to_secs(vrtc); - basetime = sbinuptime(); + basetime = gethrtime(); if (rtctime == VRTC_BROKEN_TIME) { if (rtc_flag_broken_time) return (-1); @@ -693,7 +695,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) static void vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) { - sbintime_t oldfreq, newfreq; + hrtime_t oldfreq, newfreq; uint8_t oldval, changed; KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); @@ -712,7 +714,7 @@ vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) * maintain the illusion that the RTC date/time was frozen * while the dividers were disabled. */ - vrtc->base_uptime = sbinuptime(); + vrtc->base_uptime = gethrtime(); VM_CTR2(vrtc->vm, "RTC divider out of reset at %lx/%lx", vrtc->base_rtctime, vrtc->base_uptime); } else { @@ -744,7 +746,7 @@ vrtc_set_time(struct vm *vm, time_t secs) vrtc = vm_rtc(vm); VRTC_LOCK(vrtc); - error = vrtc_time_update(vrtc, secs, sbinuptime()); + error = vrtc_time_update(vrtc, secs, gethrtime()); VRTC_UNLOCK(vrtc); if (error) { @@ -761,12 +763,11 @@ time_t vrtc_get_time(struct vm *vm) { struct vrtc *vrtc; - sbintime_t basetime; time_t t; vrtc = vm_rtc(vm); VRTC_LOCK(vrtc); - t = vrtc_curtime(vrtc, &basetime); + t = vrtc_curtime(vrtc, NULL); VRTC_UNLOCK(vrtc); return (t); @@ -803,7 +804,6 @@ int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval) { struct vrtc *vrtc; - sbintime_t basetime; time_t curtime; uint8_t *ptr; @@ -820,7 +820,7 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval) * Update RTC date/time fields if necessary. */ if (offset < 10 || offset == RTC_CENTURY) { - curtime = vrtc_curtime(vrtc, &basetime); + curtime = vrtc_curtime(vrtc, NULL); secs_to_rtc(curtime, vrtc, 0); } @@ -858,7 +858,7 @@ vrtc_data_handler(void *arg, bool in, uint16_t port, uint8_t bytes, { struct vrtc *vrtc = arg; struct rtcdev *rtc = &vrtc->rtcdev; - sbintime_t basetime; + hrtime_t basetime; time_t curtime; int error, offset; @@ -936,7 +936,7 @@ vrtc_data_handler(void *arg, bool in, uint16_t port, uint8_t bytes, */ if (offset == RTC_CENTURY && !rtc_halted(vrtc)) { curtime = rtc_to_secs(vrtc); - error = vrtc_time_update(vrtc, curtime, sbinuptime()); + error = vrtc_time_update(vrtc, curtime, gethrtime()); KASSERT(!error, ("vrtc_time_update error %d", error)); if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time) error = -1; @@ -990,7 +990,7 @@ vrtc_init(struct vm *vm) VRTC_LOCK(vrtc); vrtc->base_rtctime = VRTC_BROKEN_TIME; - vrtc_time_update(vrtc, curtime, sbinuptime()); + vrtc_time_update(vrtc, curtime, gethrtime()); secs_to_rtc(curtime, vrtc, 0); VRTC_UNLOCK(vrtc); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index cdcebc71d4..04bdb6a3d6 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -349,19 +349,10 @@ vmm_glue_callout_init(struct callout *c, int mpsafe) mutex_exit(&cpu_lock); } -static __inline hrtime_t -sbttohrtime(sbintime_t sbt) -{ - return (((sbt >> 32) * NANOSEC) + - (((uint64_t)NANOSEC * (uint32_t)sbt) >> 32)); -} - -int -vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr, - void (*func)(void *), void *arg, int flags) +void +callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *), + void *arg, int flags) { - hrtime_t target = sbttohrtime(sbt); - ASSERT(c->c_cyc_id != CYCLIC_NONE); if ((flags & C_ABSOLUTE) == 0) { @@ -372,8 +363,6 @@ vmm_glue_callout_reset_sbt(struct callout *c, sbintime_t sbt, sbintime_t pr, c->c_arg = arg; c->c_target = target; cyclic_reprogram(c->c_cyc_id, target); - - return (0); } int @@ -409,6 +398,34 @@ vmm_glue_callout_localize(struct callout *c) mutex_exit(&cpu_lock); } +/* + * Given an interval (in ns) and a frequency (in hz), calculate the number of + * "ticks" at that frequency which cover the interval. + */ +uint64_t +hrt_freq_count(hrtime_t interval, uint32_t freq) +{ + ASSERT3S(interval, >=, 0); + const uint64_t sec = interval / NANOSEC; + const uint64_t nsec = interval % NANOSEC; + + return ((sec * freq) + ((nsec * freq) / NANOSEC)); +} + +/* + * Given a frequency (in hz) and number of "ticks", calculate the interval + * (in ns) which would be covered by those ticks. + */ +hrtime_t +hrt_freq_interval(uint32_t freq, uint64_t count) +{ + const uint64_t sec = count / freq; + const uint64_t frac = count % freq; + + return ((NANOSEC * sec) + ((frac * NANOSEC) / freq)); +} + + uint_t cpu_high; /* Highest arg to CPUID */ uint_t cpu_exthigh; /* Highest arg to extended CPUID */ uint_t cpu_id; /* Stepping ID */ |