summaryrefslogtreecommitdiff
path: root/usr/src/uts
diff options
context:
space:
mode:
authorMichael Zeller <mike@mikezeller.net>2020-03-11 16:55:43 -0400
committerPatrick Mooney <pmooney@pfmooney.com>2020-05-22 23:32:53 +0000
commit84659b24a533984de271059abf9a1092835d15a9 (patch)
treea5b46d9b98d0d88ee35aeef492b48c1e84d82035 /usr/src/uts
parentcf3ec608f736765ec9852eed5e611848a25de9a4 (diff)
downloadillumos-gate-84659b24a533984de271059abf9a1092835d15a9.tar.gz
12735 bhyve upstream sync 2019 Sept
Reviewed by: Dan McDonald <danmcd@kebe.com> Reviewed by: John Levon <john.levon@joyent.com> Reviewed by: Patrick Mooney <pmooney@oxide.computer> Approved by: Robert Mustacchi <rm@fingolfin.org>
Diffstat (limited to 'usr/src/uts')
-rw-r--r--usr/src/uts/i86pc/io/vmm/README.sync32
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c48
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c46
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c21
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vtd.c123
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.c11
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c5
-rw-r--r--usr/src/uts/i86pc/io/vmm/vm/vm_page.h6
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c32
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_host.h7
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c95
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_lapic.c14
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_lapic.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c28
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_util.c20
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_util.h6
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h39
17 files changed, 352 insertions, 183 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync
index 1cddfd829e..e8aeaaffcf 100644
--- a/usr/src/uts/i86pc/io/vmm/README.sync
+++ b/usr/src/uts/i86pc/io/vmm/README.sync
@@ -1,18 +1,30 @@
The bhyve kernel module and its associated userland consumers have been updated
to the latest upstream FreeBSD sources as of:
+commit 37e8a0e0058c226e6bd0ed5c3a07ee15b1146122
+Author: mav <mav@FreeBSD.org>
+Date: Mon Sep 23 17:53:47 2019 +0000
-commit 3b9cb80b242682690203709aaff4eafae41c138f
-Author: jhb <jhb@FreeBSD.org>
-Date: Mon Jun 3 23:17:35 2019 +0000
+ Make nvme(4) driver some more NUMA aware.
- Emulate the AMD MSR_LS_CFG MSR used for various Ryzen errata.
+ - For each queue pair precalculate CPU and domain it is bound to.
+ If queue pairs are not per-CPU, then use the domain of the device.
+ - Allocate most of queue pair memory from the domain it is bound to.
+ - Bind callouts to the same CPUs as queue pair to avoid migrations.
+ - Do not assign queue pairs to each SMT thread. It just wasted
+ resources and increased lock congestions.
+ - Remove fixed multiplier of CPUs per queue pair, spread them even.
+ This allows to use more queue pairs in some hardware configurations.
+ - If queue pair serves multiple CPUs, bind different NVMe devices to
+ different CPUs.
- Writes are ignored and reads always return zero.
+ MFC after: 1 month
+ Sponsored by: iXsystems, Inc.
- Submitted by: José Albornoz <jojo@eljojo.net> (write-only version)
- Reviewed by: Patrick Mooney, cem
- MFC after: 2 weeks
- Differential Revision: https://reviews.freebsd.org/D19506
+Which corresponds to SVN revision: 352630
-Which corresponds to SVN revision: 348592
+
+NOTE:
+This sync ignores commit c8edafdabc27533d9c51eddc2896e772c16d965c.
+There are big changes to the virtio net devices that we haven't synced up yet
+because SmartOS relies heavily on viona instead.
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 80d76ab640..c194e3d818 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -112,11 +112,6 @@ SYSCTL_INT(_hw_vmm_svm, OID_AUTO, vmcb_clean, CTLFLAG_RDTUN, &vmcb_clean,
static MALLOC_DEFINE(M_SVM, "svm", "svm");
static MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic");
-#ifdef __FreeBSD__
-/* Per-CPU context area. */
-extern struct pcpu __pcpu[];
-#endif
-
static uint32_t svm_feature = ~0U; /* AMD SVM features. */
SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RDTUN, &svm_feature, 0,
"SVM features advertised by CPUID.8000000AH:EDX");
@@ -2160,11 +2155,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
/* Launch Virtual Machine. */
VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip);
svm_dr_enter_guest(gctx);
-#ifdef __FreeBSD__
- svm_launch(vmcb_pa, gctx, &__pcpu[curcpu]);
-#else
- svm_launch(vmcb_pa, gctx, CPU);
-#endif
+ svm_launch(vmcb_pa, gctx, get_pcpu());
svm_dr_leave_guest(gctx);
CPU_CLR_ATOMIC(curcpu, &pmap->pm_active);
@@ -2420,25 +2411,24 @@ svm_restorectx(void *arg, int vcpu)
#endif /* __FreeBSD__ */
struct vmm_ops vmm_ops_amd = {
- svm_init,
- svm_cleanup,
- svm_restore,
- svm_vminit,
- svm_vmrun,
- svm_vmcleanup,
- svm_getreg,
- svm_setreg,
- vmcb_getdesc,
- vmcb_setdesc,
- svm_getcap,
- svm_setcap,
- svm_npt_alloc,
- svm_npt_free,
- svm_vlapic_init,
- svm_vlapic_cleanup,
-
+ .init = svm_init,
+ .cleanup = svm_cleanup,
+ .resume = svm_restore,
+ .vminit = svm_vminit,
+ .vmrun = svm_vmrun,
+ .vmcleanup = svm_vmcleanup,
+ .vmgetreg = svm_getreg,
+ .vmsetreg = svm_setreg,
+ .vmgetdesc = vmcb_getdesc,
+ .vmsetdesc = vmcb_setdesc,
+ .vmgetcap = svm_getcap,
+ .vmsetcap = svm_setcap,
+ .vmspace_alloc = svm_npt_alloc,
+ .vmspace_free = svm_npt_free,
+ .vlapic_init = svm_vlapic_init,
+ .vlapic_cleanup = svm_vlapic_cleanup,
#ifndef __FreeBSD__
- svm_savectx,
- svm_restorectx,
+ .vmsavectx = svm_savectx,
+ .vmrestorectx = svm_restorectx,
#endif
};
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index eea036b253..ede3a54d66 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -2356,20 +2356,20 @@ ept_fault_type(uint64_t ept_qual)
return (fault_type);
}
-static boolean_t
+static bool
ept_emulation_fault(uint64_t ept_qual)
{
int read, write;
/* EPT fault on an instruction fetch doesn't make sense here */
if (ept_qual & EPT_VIOLATION_INST_FETCH)
- return (FALSE);
+ return (false);
/* EPT fault must be a read fault or a write fault */
read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
if ((read | write) == 0)
- return (FALSE);
+ return (false);
/*
* The EPT violation must have been caused by accessing a
@@ -2378,10 +2378,10 @@ ept_emulation_fault(uint64_t ept_qual)
*/
if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
(ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
- return (FALSE);
+ return (false);
}
- return (TRUE);
+ return (true);
}
static __inline int
@@ -4284,26 +4284,26 @@ vmx_restorectx(void *arg, int vcpu)
#endif /* __FreeBSD__ */
struct vmm_ops vmm_ops_intel = {
- vmx_init,
- vmx_cleanup,
- vmx_restore,
- vmx_vminit,
- vmx_run,
- vmx_vmcleanup,
- vmx_getreg,
- vmx_setreg,
- vmx_getdesc,
- vmx_setdesc,
- vmx_getcap,
- vmx_setcap,
- ept_vmspace_alloc,
- ept_vmspace_free,
- vmx_vlapic_init,
- vmx_vlapic_cleanup,
+ .init = vmx_init,
+ .cleanup = vmx_cleanup,
+ .resume = vmx_restore,
+ .vminit = vmx_vminit,
+ .vmrun = vmx_run,
+ .vmcleanup = vmx_vmcleanup,
+ .vmgetreg = vmx_getreg,
+ .vmsetreg = vmx_setreg,
+ .vmgetdesc = vmx_getdesc,
+ .vmsetdesc = vmx_setdesc,
+ .vmgetcap = vmx_getcap,
+ .vmsetcap = vmx_setcap,
+ .vmspace_alloc = ept_vmspace_alloc,
+ .vmspace_free = ept_vmspace_free,
+ .vlapic_init = vmx_vlapic_init,
+ .vlapic_cleanup = vmx_vlapic_cleanup,
#ifndef __FreeBSD__
- vmx_savectx,
- vmx_restorectx,
+ .vmsavectx = vmx_savectx,
+ .vmrestorectx = vmx_restorectx,
#endif
};
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
index 4a1a2cd358..9121e46b40 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -48,24 +48,18 @@ __FBSDID("$FreeBSD$");
#include "vmx.h"
#include "vmx_msr.h"
-static boolean_t
+static bool
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
{
- if (msr_val & (1UL << (bitpos + 32)))
- return (TRUE);
- else
- return (FALSE);
+ return ((msr_val & (1UL << (bitpos + 32))) != 0);
}
-static boolean_t
+static bool
vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
{
- if ((msr_val & (1UL << bitpos)) == 0)
- return (TRUE);
- else
- return (FALSE);
+ return ((msr_val & (1UL << bitpos)) == 0);
}
uint32_t
@@ -92,16 +86,13 @@ vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
{
int i;
uint64_t val, trueval;
- boolean_t true_ctls_avail, one_allowed, zero_allowed;
+ bool true_ctls_avail, one_allowed, zero_allowed;
/* We cannot ask the same bit to be set to both '1' and '0' */
if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
return (EINVAL);
- if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
- true_ctls_avail = TRUE;
- else
- true_ctls_avail = FALSE;
+ true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
val = rdmsr(ctl_reg);
if (true_ctls_avail)
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
index 902080e34c..41c2c5b2f8 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
* Architecture Spec, September 2008.
*/
+#define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1)
+
/* Section 10.4 "Register Descriptions" */
struct vtdmap {
volatile uint32_t version;
@@ -118,10 +120,11 @@ struct domain {
static SLIST_HEAD(, domain) domhead;
#define DRHD_MAX_UNITS 8
-static int drhd_num;
-static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
-static int max_domains;
-typedef int (*drhd_ident_func_t)(void);
+static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS];
+static int drhd_num;
+static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
+static int max_domains;
+typedef int (*drhd_ident_func_t)(void);
#ifndef __FreeBSD__
static dev_info_t *vtddips[DRHD_MAX_UNITS];
#endif
@@ -180,6 +183,69 @@ domain_id(void)
return (id);
}
+static struct vtdmap *
+vtd_device_scope(uint16_t rid)
+{
+ int i, remaining, pathremaining;
+ char *end, *pathend;
+ struct vtdmap *vtdmap;
+ ACPI_DMAR_HARDWARE_UNIT *drhd;
+ ACPI_DMAR_DEVICE_SCOPE *device_scope;
+ ACPI_DMAR_PCI_PATH *path;
+
+ for (i = 0; i < drhd_num; i++) {
+ drhd = drhds[i];
+
+ if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
+ /*
+ * From Intel VT-d arch spec, version 3.0:
+ * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported
+ * for a Segment, it must be enumerated by BIOS after all other
+ * DRHD structures for the same Segment.
+ */
+ vtdmap = vtdmaps[i];
+ return(vtdmap);
+ }
+
+ end = (char *)drhd + drhd->Header.Length;
+ remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT);
+ while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) {
+ device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
+ remaining -= device_scope->Length;
+
+ switch (device_scope->EntryType){
+ /* 0x01 and 0x02 are PCI device entries */
+ case 0x01:
+ case 0x02:
+ break;
+ default:
+ continue;
+ }
+
+ if (PCI_RID2BUS(rid) != device_scope->Bus)
+ continue;
+
+ pathend = (char *)device_scope + device_scope->Length;
+ pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
+ while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) {
+ path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining);
+ pathremaining -= sizeof(ACPI_DMAR_PCI_PATH);
+
+ if (PCI_RID2SLOT(rid) != path->Device)
+ continue;
+ if (PCI_RID2FUNC(rid) != path->Function)
+ continue;
+
+ vtdmap = vtdmaps[i];
+ return (vtdmap);
+ }
+ }
+ }
+
+ /* No matching scope */
+ return (NULL);
+}
+
static void
vtd_wbflush(struct vtdmap *vtdmap)
{
@@ -285,7 +351,7 @@ extern dev_info_t *vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *, int);
static int
vtd_init(void)
{
- int i, units, remaining;
+ int i, units, remaining, tmp;
struct vtdmap *vtdmap;
vm_paddr_t ctx_paddr;
char *end;
@@ -342,16 +408,16 @@ vtd_init(void)
break;
drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
+ drhds[units] = drhd;
#ifdef __FreeBSD__
- vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
+ vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
#else
vtddips[units] = vtd_get_dip(drhd, units);
vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]);
if (vtdmaps[units] == NULL)
goto fail;
- units++;
#endif
- if (units >= DRHD_MAX_UNITS)
+ if (++units >= DRHD_MAX_UNITS)
break;
remaining -= hdr->Length;
}
@@ -363,12 +429,18 @@ vtd_init(void)
skip_dmar:
#endif
drhd_num = units;
- vtdmap = vtdmaps[0];
- if (VTD_CAP_CM(vtdmap->cap) != 0)
- panic("vtd_init: invalid caching mode");
+ max_domains = 64 * 1024; /* maximum valid value */
+ for (i = 0; i < drhd_num; i++){
+ vtdmap = vtdmaps[i];
+
+ if (VTD_CAP_CM(vtdmap->cap) != 0)
+ panic("vtd_init: invalid caching mode");
- max_domains = vtd_max_domains(vtdmap);
+ /* take most compatible (minimum) value */
+ if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
+ max_domains = tmp;
+ }
/*
* Set up the root-table to point to the context-entry tables
@@ -459,7 +531,6 @@ vtd_add_device(void *arg, uint16_t rid)
struct vtdmap *vtdmap;
uint8_t bus;
- vtdmap = vtdmaps[0];
bus = PCI_RID2BUS(rid);
ctxp = ctx_tables[bus];
pt_paddr = vtophys(dom->ptp);
@@ -471,6 +542,10 @@ vtd_add_device(void *arg, uint16_t rid)
(uint16_t)(ctxp[idx + 1] >> 8));
}
+ if ((vtdmap = vtd_device_scope(rid)) == NULL)
+ panic("vtd_add_device: device %x is not in scope for "
+ "any DMA remapping unit", rid);
+
/*
* Order is important. The 'present' bit is set only after all fields
* of the context pointer are initialized.
@@ -654,8 +729,6 @@ vtd_create_domain(vm_paddr_t maxaddr)
if (drhd_num <= 0)
panic("vtd_create_domain: no dma remapping hardware available");
- vtdmap = vtdmaps[0];
-
/*
* Calculate AGAW.
* Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
@@ -680,7 +753,14 @@ vtd_create_domain(vm_paddr_t maxaddr)
pt_levels = 2;
sagaw = 30;
addrwidth = 0;
- tmp = VTD_CAP_SAGAW(vtdmap->cap);
+
+ tmp = ~0;
+ for (i = 0; i < drhd_num; i++) {
+ vtdmap = vtdmaps[i];
+ /* take most compatible value */
+ tmp &= VTD_CAP_SAGAW(vtdmap->cap);
+ }
+
for (i = 0; i < 5; i++) {
if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
break;
@@ -692,8 +772,8 @@ vtd_create_domain(vm_paddr_t maxaddr)
}
if (i >= 5) {
- panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
- VTD_CAP_SAGAW(vtdmap->cap), agaw);
+ panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
+ tmp, agaw);
}
dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
@@ -721,7 +801,12 @@ vtd_create_domain(vm_paddr_t maxaddr)
* There is not any code to deal with the demotion at the moment
* so we disable superpage mappings altogether.
*/
- dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
+ dom->spsmask = ~0;
+ for (i = 0; i < drhd_num; i++) {
+ vtdmap = vtdmaps[i];
+ /* take most compatible value */
+ dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
+ }
#endif
#else
/*
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
index 9b3e7376d5..03f63798e7 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
@@ -3,6 +3,7 @@
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
+ * Copyright (c) 2018 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -180,20 +181,20 @@ static void
pit_timer_start_cntr0(struct vatpit *vatpit)
{
struct channel *c;
+ struct bintime now, delta;
+ sbintime_t precision;
c = &vatpit->channel[0];
if (c->initial != 0) {
- sbintime_t precision;
- struct bintime now, delta;
-
delta.sec = 0;
delta.frac = vatpit->freq_bt.frac * c->initial;
bintime_add(&c->callout_bt, &delta);
precision = bttosbt(delta) >> tc_precexp;
/*
- * Reset 'callout_bt' if the time that the callout was supposed
- * to fire is more than 'c->initial' ticks in the past.
+ * Reset 'callout_bt' if the time that the callout
+ * was supposed to fire is more than 'c->initial'
+ * ticks in the past.
*/
binuptime(&now);
if (bintime_cmp(&c->callout_bt, &now, <)) {
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index 4e58249c8d..687e0e6a8e 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -339,7 +339,7 @@ vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
return (&lapic->lvt_cmci);
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
- return ((&lapic->lvt_timer) + i);;
+ return ((&lapic->lvt_timer) + i);
default:
panic("vlapic_get_lvt: invalid LVT\n");
}
@@ -854,7 +854,8 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
*/
CPU_ZERO(dmask);
vcpuid = vm_apicid2vcpuid(vm, dest);
- if (vcpuid < vm_get_maxcpus(vm))
+ amask = vm_active_cpus(vm);
+ if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask))
CPU_SET(vcpuid, dmask);
} else {
/*
diff --git a/usr/src/uts/i86pc/io/vmm/vm/vm_page.h b/usr/src/uts/i86pc/io/vmm/vm/vm_page.h
index 4559fe6d4c..deb25a6cc0 100644
--- a/usr/src/uts/i86pc/io/vmm/vm/vm_page.h
+++ b/usr/src/uts/i86pc/io/vmm/vm/vm_page.h
@@ -19,9 +19,9 @@
#include "vm_glue.h"
-void vm_page_lock(vm_page_t);
-void vm_page_unhold(vm_page_t);
-void vm_page_unlock(vm_page_t);
+#define PQ_ACTIVE 1
+
+void vm_page_unwire(vm_page_t, uint8_t);
#define VM_PAGE_TO_PHYS(page) (mmu_ptob((uintptr_t)((page)->vmp_pfn)))
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 47a5f26cb7..2238536121 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -1008,7 +1008,7 @@ vmm_sysmem_maxaddr(struct vm *vm)
}
static void
-vm_iommu_modify(struct vm *vm, boolean_t map)
+vm_iommu_modify(struct vm *vm, bool map)
{
int i, sz;
vm_paddr_t gpa, hpa;
@@ -1083,8 +1083,8 @@ vm_iommu_modify(struct vm *vm, boolean_t map)
#endif
}
-#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE)
-#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE)
+#define vm_iommu_unmap(vm) vm_iommu_modify((vm), false)
+#define vm_iommu_map(vm) vm_iommu_modify((vm), true)
#ifdef __FreeBSD__
int
@@ -1193,9 +1193,7 @@ vm_gpa_release(void *cookie)
{
vm_page_t m = cookie;
- vm_page_lock(m);
- vm_page_unhold(m);
- vm_page_unlock(m);
+ vm_page_unwire(m, PQ_ACTIVE);
}
int
@@ -1234,20 +1232,20 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
return (0);
}
-static boolean_t
+static bool
is_descriptor_table(int reg)
{
switch (reg) {
case VM_REG_GUEST_IDTR:
case VM_REG_GUEST_GDTR:
- return (TRUE);
+ return (true);
default:
- return (FALSE);
+ return (false);
}
}
-static boolean_t
+static bool
is_segment_register(int reg)
{
@@ -1260,9 +1258,9 @@ is_segment_register(int reg)
case VM_REG_GUEST_GS:
case VM_REG_GUEST_TR:
case VM_REG_GUEST_LDTR:
- return (TRUE);
+ return (true);
default:
- return (FALSE);
+ return (false);
}
}
@@ -2622,12 +2620,12 @@ vm_hpet(struct vm *vm)
}
#ifdef __FreeBSD__
-boolean_t
+bool
vmm_is_pptdev(int bus, int slot, int func)
{
- int found, i, n;
- int b, s, f;
+ int b, f, i, n, s;
char *val, *cp, *cp2;
+ bool found;
/*
* XXX
@@ -2641,7 +2639,7 @@ vmm_is_pptdev(int bus, int slot, int func)
const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
- found = 0;
+ found = false;
for (i = 0; names[i] != NULL && !found; i++) {
cp = val = kern_getenv(names[i]);
while (cp != NULL && *cp != '\0') {
@@ -2650,7 +2648,7 @@ vmm_is_pptdev(int bus, int slot, int func)
n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
if (n == 3 && bus == b && slot == s && func == f) {
- found = 1;
+ found = true;
break;
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.h b/usr/src/uts/i86pc/io/vmm/vmm_host.h
index f12047819d..e0ea1ec927 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_host.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_host.h
@@ -100,17 +100,12 @@ vmm_get_host_gdtrbase(void)
#endif
}
-#ifdef __FreeBSD__
-struct pcpu;
-extern struct pcpu __pcpu[];
-#endif
-
static __inline uint64_t
vmm_get_host_gsbase(void)
{
#ifdef __FreeBSD__
- return ((uint64_t)&__pcpu[curcpu]);
+ return ((uint64_t)get_pcpu());
#else
return (rdmsr(MSR_GSBASE));
#endif
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
index ea96cd8db0..4a4fb07eba 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -91,6 +91,7 @@ enum {
VIE_OP_TYPE_BITTEST,
VIE_OP_TYPE_TWOB_GRP15,
VIE_OP_TYPE_ADD,
+ VIE_OP_TYPE_TEST,
VIE_OP_TYPE_LAST
};
@@ -235,6 +236,12 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_byte = 0x8F,
.op_type = VIE_OP_TYPE_POP,
},
+ [0xF7] = {
+ /* XXX Group 3 extended opcode - not just TEST */
+ .op_byte = 0xF7,
+ .op_type = VIE_OP_TYPE_TEST,
+ .op_flags = VIE_OP_F_IMM,
+ },
[0xFF] = {
/* XXX Group 5 extended opcode - not just PUSH */
.op_byte = 0xFF,
@@ -465,6 +472,41 @@ getaddflags(int opsize, uint64_t x, uint64_t y)
return (getaddflags64(x, y));
}
+/*
+ * Return the status flags that would result from doing (x & y).
+ */
+#define GETANDFLAGS(sz) \
+static u_long \
+getandflags##sz(uint##sz##_t x, uint##sz##_t y) \
+{ \
+ u_long rflags; \
+ \
+ __asm __volatile("and %2,%1; pushfq; popq %0" : \
+ "=r" (rflags), "+r" (x) : "m" (y)); \
+ return (rflags); \
+} struct __hack
+
+GETANDFLAGS(8);
+GETANDFLAGS(16);
+GETANDFLAGS(32);
+GETANDFLAGS(64);
+
+static u_long
+getandflags(int opsize, uint64_t x, uint64_t y)
+{
+ KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
+ ("getandflags: invalid operand size %d", opsize));
+
+ if (opsize == 1)
+ return (getandflags8(x, y));
+ else if (opsize == 2)
+ return (getandflags16(x, y));
+ else if (opsize == 4)
+ return (getandflags32(x, y));
+ else
+ return (getandflags64(x, y));
+}
+
static int
emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -1234,6 +1276,55 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
+emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+ int error, size;
+ uint64_t op1, rflags, rflags2;
+
+ size = vie->opsize;
+ error = EINVAL;
+
+ switch (vie->op.op_byte) {
+ case 0xF7:
+ /*
+ * F7 /0 test r/m16, imm16
+ * F7 /0 test r/m32, imm32
+ * REX.W + F7 /0 test r/m64, imm32 sign-extended to 64
+ *
+ * Test mem (ModRM:r/m) with immediate and set status
+ * flags according to the results. The comparison is
+ * performed by anding the immediate from the first
+ * operand and then setting the status flags.
+ */
+ if ((vie->reg & 7) != 0)
+ return (EINVAL);
+
+ error = memread(vm, vcpuid, gpa, &op1, size, arg);
+ if (error)
+ return (error);
+
+ rflags2 = getandflags(size, op1, vie->immediate);
+ break;
+ default:
+ return (EINVAL);
+ }
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ if (error)
+ return (error);
+
+ /*
+ * OF and CF are cleared; the SF, ZF and PF flags are set according
+ * to the result; AF is undefined.
+ */
+ rflags &= ~RFLAGS_STATUS_BITS;
+ rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
+ return (error);
+}
+
+static int
emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
{
@@ -1658,6 +1749,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_add(vm, vcpuid, gpa, vie, memread,
memwrite, memarg);
break;
+ case VIE_OP_TYPE_TEST:
+ error = emulate_test(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
default:
error = EINVAL;
break;
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
index 43b2bebe97..57e4cfddf3 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c
@@ -149,13 +149,10 @@ lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg)
return (0);
}
-static boolean_t
+static bool
x2apic_msr(u_int msr)
{
- if (msr >= 0x800 && msr <= 0xBFF)
- return (TRUE);
- else
- return (FALSE);
+ return (msr >= 0x800 && msr <= 0xBFF);
}
static u_int
@@ -165,14 +162,11 @@ x2apic_msr_to_regoff(u_int msr)
return ((msr - 0x800) << 4);
}
-boolean_t
+bool
lapic_msr(u_int msr)
{
- if (x2apic_msr(msr) || (msr == MSR_APICBASE))
- return (TRUE);
- else
- return (FALSE);
+ return (x2apic_msr(msr) || msr == MSR_APICBASE);
}
int
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.h b/usr/src/uts/i86pc/io/vmm/vmm_lapic.h
index da3b0ff660..58508ad70b 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.h
@@ -45,7 +45,7 @@
struct vm;
-boolean_t lapic_msr(u_int num);
+bool lapic_msr(u_int num);
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval,
bool *retu);
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval,
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
index 66a67d9529..ddae4202b7 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
@@ -982,35 +982,19 @@ vm_segmap_space(struct vmspace *vms, off_t off, struct as *as, caddr_t *addrp,
}
void
-vm_page_lock(vm_page_t vmp)
+vm_page_unwire(vm_page_t vmp, uint8_t nqueue __unused)
{
ASSERT(!MUTEX_HELD(&vmp->vmp_lock));
-
mutex_enter(&vmp->vmp_lock);
-}
-
-void
-vm_page_unlock(vm_page_t vmp)
-{
- boolean_t purge = (vmp->vmp_pfn == PFN_INVALID);
-
- ASSERT(MUTEX_HELD(&vmp->vmp_lock));
-
- mutex_exit(&vmp->vmp_lock);
- if (purge) {
- mutex_destroy(&vmp->vmp_lock);
- kmem_free(vmp, sizeof (*vmp));
- }
-}
-
-void
-vm_page_unhold(vm_page_t vmp)
-{
- ASSERT(MUTEX_HELD(&vmp->vmp_lock));
VERIFY(vmp->vmp_pfn != PFN_INVALID);
vm_object_deallocate(vmp->vmp_obj_held);
vmp->vmp_obj_held = NULL;
vmp->vmp_pfn = PFN_INVALID;
+
+ mutex_exit(&vmp->vmp_lock);
+
+ mutex_destroy(&vmp->vmp_lock);
+ kmem_free(vmp, sizeof (*vmp));
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.c b/usr/src/uts/i86pc/io/vmm/vmm_util.c
index 3eadfe57e5..b8acff9bbc 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_util.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_util.c
@@ -50,26 +50,20 @@ __FBSDID("$FreeBSD$");
#include "vmm_util.h"
-boolean_t
+bool
vmm_is_intel(void)
{
- if (strcmp(cpu_vendor, "GenuineIntel") == 0)
- return (TRUE);
- else
- return (FALSE);
+ return (strcmp(cpu_vendor, "GenuineIntel") == 0);
}
-boolean_t
+bool
vmm_is_amd(void)
{
- if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
- return (TRUE);
- else
- return (FALSE);
+ return (strcmp(cpu_vendor, "AuthenticAMD") == 0);
}
-boolean_t
+bool
vmm_supports_1G_pages(void)
{
unsigned int regs[4];
@@ -82,9 +76,9 @@ vmm_supports_1G_pages(void)
if (cpu_exthigh >= 0x80000001) {
do_cpuid(0x80000001, regs);
if (regs[3] & (1 << 26))
- return (TRUE);
+ return (true);
}
- return (FALSE);
+ return (false);
}
#ifdef __FreeBSD__
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.h b/usr/src/uts/i86pc/io/vmm/vmm_util.h
index fc7e7364c7..8c65e7e3a6 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_util.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_util.h
@@ -33,9 +33,9 @@
struct trapframe;
-boolean_t vmm_is_intel(void);
-boolean_t vmm_is_amd(void);
-boolean_t vmm_supports_1G_pages(void);
+bool vmm_is_intel(void);
+bool vmm_is_amd(void);
+bool vmm_supports_1G_pages(void);
void dump_trapframe(struct trapframe *tf);
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index ac8f14b042..0bbc219b7f 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -127,10 +127,39 @@ enum x2apic_state {
#define VM_INTINFO_HWEXCEPTION (3 << 8)
#define VM_INTINFO_SWINTR (4 << 8)
-
-#define VM_MAX_NAMELEN 32
+#ifndef __FreeBSD__
+/*
+ * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
+ * Instead of picking an arbitrary value we will just rely on the same
+ * calculation that's made below. If this calculation ever changes we need to
+ * update the the VM_MAX_NAMELEN mapping in the bhyve brand's boot.c file.
+ */
+#else
+/*
+ * The VM name has to fit into the pathname length constraints of devfs,
+ * governed primarily by SPECNAMELEN. The length is the total number of
+ * characters in the full path, relative to the mount point and not
+ * including any leading '/' characters.
+ * A prefix and a suffix are added to the name specified by the user.
+ * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters
+ * longer for future use.
+ * The suffix is a string that identifies a bootrom image or some similar
+ * image that is attached to the VM. A separator character gets added to
+ * the suffix automatically when generating the full path, so it must be
+ * accounted for, reducing the effective length by 1.
+ * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37
+ * bytes for FreeBSD 12. A minimum length is set for safety and supports
+ * a SPECNAMELEN as small as 32 on old systems.
+ */
+#endif
+#define VM_MAX_PREFIXLEN 10
+#define VM_MAX_SUFFIXLEN 15
+#define VM_MIN_NAMELEN 6
+#define VM_MAX_NAMELEN \
+ (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1)
#ifdef _KERNEL
+CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN);
struct vm;
struct vm_exception;
@@ -309,12 +338,12 @@ vcpu_reqidle(struct vm_eventinfo *info)
int vcpu_debugged(struct vm *vm, int vcpuid);
/*
- * Return 1 if device indicated by bus/slot/func is supposed to be a
+ * Return true if device indicated by bus/slot/func is supposed to be a
* pci passthrough device.
*
- * Return 0 otherwise.
+ * Return false otherwise.
*/
-int vmm_is_pptdev(int bus, int slot, int func);
+bool vmm_is_pptdev(int bus, int slot, int func);
void *vm_iommu_domain(struct vm *vm);