diff options
author | Michael Zeller <mike@mikezeller.net> | 2020-03-11 16:55:43 -0400 |
---|---|---|
committer | Patrick Mooney <pmooney@pfmooney.com> | 2020-05-22 23:32:53 +0000 |
commit | 84659b24a533984de271059abf9a1092835d15a9 (patch) | |
tree | a5b46d9b98d0d88ee35aeef492b48c1e84d82035 /usr/src/uts | |
parent | cf3ec608f736765ec9852eed5e611848a25de9a4 (diff) | |
download | illumos-gate-84659b24a533984de271059abf9a1092835d15a9.tar.gz |
12735 bhyve upstream sync 2019 Sept
Reviewed by: Dan McDonald <danmcd@kebe.com>
Reviewed by: John Levon <john.levon@joyent.com>
Reviewed by: Patrick Mooney <pmooney@oxide.computer>
Approved by: Robert Mustacchi <rm@fingolfin.org>
Diffstat (limited to 'usr/src/uts')
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/README.sync | 32 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/amd/svm.c | 48 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx.c | 46 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c | 21 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vtd.c | 123 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/io/vatpit.c | 11 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/io/vlapic.c | 5 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vm/vm_page.h | 6 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm.c | 32 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_host.h | 7 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c | 95 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_lapic.c | 14 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_lapic.h | 2 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c | 28 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_util.c | 20 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_util.h | 6 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/vmm.h | 39 |
17 files changed, 352 insertions, 183 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync index 1cddfd829e..e8aeaaffcf 100644 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ b/usr/src/uts/i86pc/io/vmm/README.sync @@ -1,18 +1,30 @@ The bhyve kernel module and its associated userland consumers have been updated to the latest upstream FreeBSD sources as of: +commit 37e8a0e0058c226e6bd0ed5c3a07ee15b1146122 +Author: mav <mav@FreeBSD.org> +Date: Mon Sep 23 17:53:47 2019 +0000 -commit 3b9cb80b242682690203709aaff4eafae41c138f -Author: jhb <jhb@FreeBSD.org> -Date: Mon Jun 3 23:17:35 2019 +0000 + Make nvme(4) driver some more NUMA aware. - Emulate the AMD MSR_LS_CFG MSR used for various Ryzen errata. + - For each queue pair precalculate CPU and domain it is bound to. + If queue pairs are not per-CPU, then use the domain of the device. + - Allocate most of queue pair memory from the domain it is bound to. + - Bind callouts to the same CPUs as queue pair to avoid migrations. + - Do not assign queue pairs to each SMT thread. It just wasted + resources and increased lock congestions. + - Remove fixed multiplier of CPUs per queue pair, spread them even. + This allows to use more queue pairs in some hardware configurations. + - If queue pair serves multiple CPUs, bind different NVMe devices to + different CPUs. - Writes are ignored and reads always return zero. + MFC after: 1 month + Sponsored by: iXsystems, Inc. - Submitted by: José Albornoz <jojo@eljojo.net> (write-only version) - Reviewed by: Patrick Mooney, cem - MFC after: 2 weeks - Differential Revision: https://reviews.freebsd.org/D19506 +Which corresponds to SVN revision: 352630 -Which corresponds to SVN revision: 348592 + +NOTE: +This sync ignores commit c8edafdabc27533d9c51eddc2896e772c16d965c. +There are big changes to the virtio net devices that we haven't synced up yet +because SmartOS relies heavily on viona instead. diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 80d76ab640..c194e3d818 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -112,11 +112,6 @@ SYSCTL_INT(_hw_vmm_svm, OID_AUTO, vmcb_clean, CTLFLAG_RDTUN, &vmcb_clean, static MALLOC_DEFINE(M_SVM, "svm", "svm"); static MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic"); -#ifdef __FreeBSD__ -/* Per-CPU context area. */ -extern struct pcpu __pcpu[]; -#endif - static uint32_t svm_feature = ~0U; /* AMD SVM features. */ SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RDTUN, &svm_feature, 0, "SVM features advertised by CPUID.8000000AH:EDX"); @@ -2160,11 +2155,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, /* Launch Virtual Machine. */ VCPU_CTR1(vm, vcpu, "Resume execution at %#lx", state->rip); svm_dr_enter_guest(gctx); -#ifdef __FreeBSD__ - svm_launch(vmcb_pa, gctx, &__pcpu[curcpu]); -#else - svm_launch(vmcb_pa, gctx, CPU); -#endif + svm_launch(vmcb_pa, gctx, get_pcpu()); svm_dr_leave_guest(gctx); CPU_CLR_ATOMIC(curcpu, &pmap->pm_active); @@ -2420,25 +2411,24 @@ svm_restorectx(void *arg, int vcpu) #endif /* __FreeBSD__ */ struct vmm_ops vmm_ops_amd = { - svm_init, - svm_cleanup, - svm_restore, - svm_vminit, - svm_vmrun, - svm_vmcleanup, - svm_getreg, - svm_setreg, - vmcb_getdesc, - vmcb_setdesc, - svm_getcap, - svm_setcap, - svm_npt_alloc, - svm_npt_free, - svm_vlapic_init, - svm_vlapic_cleanup, - + .init = svm_init, + .cleanup = svm_cleanup, + .resume = svm_restore, + .vminit = svm_vminit, + .vmrun = svm_vmrun, + .vmcleanup = svm_vmcleanup, + .vmgetreg = svm_getreg, + .vmsetreg = svm_setreg, + .vmgetdesc = vmcb_getdesc, + .vmsetdesc = vmcb_setdesc, + .vmgetcap = svm_getcap, + .vmsetcap = svm_setcap, + .vmspace_alloc = svm_npt_alloc, + .vmspace_free = svm_npt_free, + .vlapic_init = svm_vlapic_init, + .vlapic_cleanup = svm_vlapic_cleanup, #ifndef __FreeBSD__ - svm_savectx, - svm_restorectx, + .vmsavectx = svm_savectx, + .vmrestorectx = svm_restorectx, #endif }; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index eea036b253..ede3a54d66 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -2356,20 +2356,20 @@ ept_fault_type(uint64_t ept_qual) return (fault_type); } -static boolean_t +static bool ept_emulation_fault(uint64_t ept_qual) { int read, write; /* EPT fault on an instruction fetch doesn't make sense here */ if (ept_qual & EPT_VIOLATION_INST_FETCH) - return (FALSE); + return (false); /* EPT fault must be a read fault or a write fault */ read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; if ((read | write) == 0) - return (FALSE); + return (false); /* * The EPT violation must have been caused by accessing a @@ -2378,10 +2378,10 @@ ept_emulation_fault(uint64_t ept_qual) */ if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { - return (FALSE); + return (false); } - return (TRUE); + return (true); } static __inline int @@ -4284,26 +4284,26 @@ vmx_restorectx(void *arg, int vcpu) #endif /* __FreeBSD__ */ struct vmm_ops vmm_ops_intel = { - vmx_init, - vmx_cleanup, - vmx_restore, - vmx_vminit, - vmx_run, - vmx_vmcleanup, - vmx_getreg, - vmx_setreg, - vmx_getdesc, - vmx_setdesc, - vmx_getcap, - vmx_setcap, - ept_vmspace_alloc, - ept_vmspace_free, - vmx_vlapic_init, - vmx_vlapic_cleanup, + .init = vmx_init, + .cleanup = vmx_cleanup, + .resume = vmx_restore, + .vminit = vmx_vminit, + .vmrun = vmx_run, + .vmcleanup = vmx_vmcleanup, + .vmgetreg = vmx_getreg, + .vmsetreg = vmx_setreg, + .vmgetdesc = vmx_getdesc, + .vmsetdesc = vmx_setdesc, + .vmgetcap = vmx_getcap, + .vmsetcap = vmx_setcap, + .vmspace_alloc = ept_vmspace_alloc, + .vmspace_free = ept_vmspace_free, + .vlapic_init = vmx_vlapic_init, + .vlapic_cleanup = vmx_vlapic_cleanup, #ifndef __FreeBSD__ - vmx_savectx, - vmx_restorectx, + .vmsavectx = vmx_savectx, + .vmrestorectx = vmx_restorectx, #endif }; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index 4a1a2cd358..9121e46b40 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -48,24 +48,18 @@ __FBSDID("$FreeBSD$"); #include "vmx.h" #include "vmx_msr.h" -static boolean_t +static bool vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) { - if (msr_val & (1UL << (bitpos + 32))) - return (TRUE); - else - return (FALSE); + return ((msr_val & (1UL << (bitpos + 32))) != 0); } -static boolean_t +static bool vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) { - if ((msr_val & (1UL << bitpos)) == 0) - return (TRUE); - else - return (FALSE); + return ((msr_val & (1UL << bitpos)) == 0); } uint32_t @@ -92,16 +86,13 @@ vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, { int i; uint64_t val, trueval; - boolean_t true_ctls_avail, one_allowed, zero_allowed; + bool true_ctls_avail, one_allowed, zero_allowed; /* We cannot ask the same bit to be set to both '1' and '0' */ if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) return (EINVAL); - if (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) - true_ctls_avail = TRUE; - else - true_ctls_avail = FALSE; + true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0; val = rdmsr(ctl_reg); if (true_ctls_avail) diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c index 902080e34c..41c2c5b2f8 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); * Architecture Spec, September 2008. */ +#define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1) + /* Section 10.4 "Register Descriptions" */ struct vtdmap { volatile uint32_t version; @@ -118,10 +120,11 @@ struct domain { static SLIST_HEAD(, domain) domhead; #define DRHD_MAX_UNITS 8 -static int drhd_num; -static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; -static int max_domains; -typedef int (*drhd_ident_func_t)(void); +static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS]; +static int drhd_num; +static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; +static int max_domains; +typedef int (*drhd_ident_func_t)(void); #ifndef __FreeBSD__ static dev_info_t *vtddips[DRHD_MAX_UNITS]; #endif @@ -180,6 +183,69 @@ domain_id(void) return (id); } +static struct vtdmap * +vtd_device_scope(uint16_t rid) +{ + int i, remaining, pathremaining; + char *end, *pathend; + struct vtdmap *vtdmap; + ACPI_DMAR_HARDWARE_UNIT *drhd; + ACPI_DMAR_DEVICE_SCOPE *device_scope; + ACPI_DMAR_PCI_PATH *path; + + for (i = 0; i < drhd_num; i++) { + drhd = drhds[i]; + + if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) { + /* + * From Intel VT-d arch spec, version 3.0: + * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported + * for a Segment, it must be enumerated by BIOS after all other + * DRHD structures for the same Segment. + */ + vtdmap = vtdmaps[i]; + return(vtdmap); + } + + end = (char *)drhd + drhd->Header.Length; + remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT); + while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) { + device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining); + remaining -= device_scope->Length; + + switch (device_scope->EntryType){ + /* 0x01 and 0x02 are PCI device entries */ + case 0x01: + case 0x02: + break; + default: + continue; + } + + if (PCI_RID2BUS(rid) != device_scope->Bus) + continue; + + pathend = (char *)device_scope + device_scope->Length; + pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE); + while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) { + path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining); + pathremaining -= sizeof(ACPI_DMAR_PCI_PATH); + + if (PCI_RID2SLOT(rid) != path->Device) + continue; + if (PCI_RID2FUNC(rid) != path->Function) + continue; + + vtdmap = vtdmaps[i]; + return (vtdmap); + } + } + } + + /* No matching scope */ + return (NULL); +} + static void vtd_wbflush(struct vtdmap *vtdmap) { @@ -285,7 +351,7 @@ extern dev_info_t *vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *, int); static int vtd_init(void) { - int i, units, remaining; + int i, units, remaining, tmp; struct vtdmap *vtdmap; vm_paddr_t ctx_paddr; char *end; @@ -342,16 +408,16 @@ vtd_init(void) break; drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; + drhds[units] = drhd; #ifdef __FreeBSD__ - vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); + vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); #else vtddips[units] = vtd_get_dip(drhd, units); vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]); if (vtdmaps[units] == NULL) goto fail; - units++; #endif - if (units >= DRHD_MAX_UNITS) + if (++units >= DRHD_MAX_UNITS) break; remaining -= hdr->Length; } @@ -363,12 +429,18 @@ vtd_init(void) skip_dmar: #endif drhd_num = units; - vtdmap = vtdmaps[0]; - if (VTD_CAP_CM(vtdmap->cap) != 0) - panic("vtd_init: invalid caching mode"); + max_domains = 64 * 1024; /* maximum valid value */ + for (i = 0; i < drhd_num; i++){ + vtdmap = vtdmaps[i]; + + if (VTD_CAP_CM(vtdmap->cap) != 0) + panic("vtd_init: invalid caching mode"); - max_domains = vtd_max_domains(vtdmap); + /* take most compatible (minimum) value */ + if ((tmp = vtd_max_domains(vtdmap)) < max_domains) + max_domains = tmp; + } /* * Set up the root-table to point to the context-entry tables @@ -459,7 +531,6 @@ vtd_add_device(void *arg, uint16_t rid) struct vtdmap *vtdmap; uint8_t bus; - vtdmap = vtdmaps[0]; bus = PCI_RID2BUS(rid); ctxp = ctx_tables[bus]; pt_paddr = vtophys(dom->ptp); @@ -471,6 +542,10 @@ vtd_add_device(void *arg, uint16_t rid) (uint16_t)(ctxp[idx + 1] >> 8)); } + if ((vtdmap = vtd_device_scope(rid)) == NULL) + panic("vtd_add_device: device %x is not in scope for " + "any DMA remapping unit", rid); + /* * Order is important. The 'present' bit is set only after all fields * of the context pointer are initialized. @@ -654,8 +729,6 @@ vtd_create_domain(vm_paddr_t maxaddr) if (drhd_num <= 0) panic("vtd_create_domain: no dma remapping hardware available"); - vtdmap = vtdmaps[0]; - /* * Calculate AGAW. * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. @@ -680,7 +753,14 @@ vtd_create_domain(vm_paddr_t maxaddr) pt_levels = 2; sagaw = 30; addrwidth = 0; - tmp = VTD_CAP_SAGAW(vtdmap->cap); + + tmp = ~0; + for (i = 0; i < drhd_num; i++) { + vtdmap = vtdmaps[i]; + /* take most compatible value */ + tmp &= VTD_CAP_SAGAW(vtdmap->cap); + } + for (i = 0; i < 5; i++) { if ((tmp & (1 << i)) != 0 && sagaw >= agaw) break; @@ -692,8 +772,8 @@ vtd_create_domain(vm_paddr_t maxaddr) } if (i >= 5) { - panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d", - VTD_CAP_SAGAW(vtdmap->cap), agaw); + panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d", + tmp, agaw); } dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK); @@ -721,7 +801,12 @@ vtd_create_domain(vm_paddr_t maxaddr) * There is not any code to deal with the demotion at the moment * so we disable superpage mappings altogether. */ - dom->spsmask = VTD_CAP_SPS(vtdmap->cap); + dom->spsmask = ~0; + for (i = 0; i < drhd_num; i++) { + vtdmap = vtdmaps[i]; + /* take most compatible value */ + dom->spsmask &= VTD_CAP_SPS(vtdmap->cap); + } #endif #else /* diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c index 9b3e7376d5..03f63798e7 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c +++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c @@ -3,6 +3,7 @@ * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> * Copyright (c) 2011 NetApp, Inc. * All rights reserved. + * Copyright (c) 2018 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -180,20 +181,20 @@ static void pit_timer_start_cntr0(struct vatpit *vatpit) { struct channel *c; + struct bintime now, delta; + sbintime_t precision; c = &vatpit->channel[0]; if (c->initial != 0) { - sbintime_t precision; - struct bintime now, delta; - delta.sec = 0; delta.frac = vatpit->freq_bt.frac * c->initial; bintime_add(&c->callout_bt, &delta); precision = bttosbt(delta) >> tc_precexp; /* - * Reset 'callout_bt' if the time that the callout was supposed - * to fire is more than 'c->initial' ticks in the past. + * Reset 'callout_bt' if the time that the callout + * was supposed to fire is more than 'c->initial' + * ticks in the past. */ binuptime(&now); if (bintime_cmp(&c->callout_bt, &now, <)) { diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index 4e58249c8d..687e0e6a8e 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -339,7 +339,7 @@ vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) return (&lapic->lvt_cmci); case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; - return ((&lapic->lvt_timer) + i);; + return ((&lapic->lvt_timer) + i); default: panic("vlapic_get_lvt: invalid LVT\n"); } @@ -854,7 +854,8 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, */ CPU_ZERO(dmask); vcpuid = vm_apicid2vcpuid(vm, dest); - if (vcpuid < vm_get_maxcpus(vm)) + amask = vm_active_cpus(vm); + if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask)) CPU_SET(vcpuid, dmask); } else { /* diff --git a/usr/src/uts/i86pc/io/vmm/vm/vm_page.h b/usr/src/uts/i86pc/io/vmm/vm/vm_page.h index 4559fe6d4c..deb25a6cc0 100644 --- a/usr/src/uts/i86pc/io/vmm/vm/vm_page.h +++ b/usr/src/uts/i86pc/io/vmm/vm/vm_page.h @@ -19,9 +19,9 @@ #include "vm_glue.h" -void vm_page_lock(vm_page_t); -void vm_page_unhold(vm_page_t); -void vm_page_unlock(vm_page_t); +#define PQ_ACTIVE 1 + +void vm_page_unwire(vm_page_t, uint8_t); #define VM_PAGE_TO_PHYS(page) (mmu_ptob((uintptr_t)((page)->vmp_pfn))) diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 47a5f26cb7..2238536121 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -1008,7 +1008,7 @@ vmm_sysmem_maxaddr(struct vm *vm) } static void -vm_iommu_modify(struct vm *vm, boolean_t map) +vm_iommu_modify(struct vm *vm, bool map) { int i, sz; vm_paddr_t gpa, hpa; @@ -1083,8 +1083,8 @@ vm_iommu_modify(struct vm *vm, boolean_t map) #endif } -#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) -#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) +#define vm_iommu_unmap(vm) vm_iommu_modify((vm), false) +#define vm_iommu_map(vm) vm_iommu_modify((vm), true) #ifdef __FreeBSD__ int @@ -1193,9 +1193,7 @@ vm_gpa_release(void *cookie) { vm_page_t m = cookie; - vm_page_lock(m); - vm_page_unhold(m); - vm_page_unlock(m); + vm_page_unwire(m, PQ_ACTIVE); } int @@ -1234,20 +1232,20 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) return (0); } -static boolean_t +static bool is_descriptor_table(int reg) { switch (reg) { case VM_REG_GUEST_IDTR: case VM_REG_GUEST_GDTR: - return (TRUE); + return (true); default: - return (FALSE); + return (false); } } -static boolean_t +static bool is_segment_register(int reg) { @@ -1260,9 +1258,9 @@ is_segment_register(int reg) case VM_REG_GUEST_GS: case VM_REG_GUEST_TR: case VM_REG_GUEST_LDTR: - return (TRUE); + return (true); default: - return (FALSE); + return (false); } } @@ -2622,12 +2620,12 @@ vm_hpet(struct vm *vm) } #ifdef __FreeBSD__ -boolean_t +bool vmm_is_pptdev(int bus, int slot, int func) { - int found, i, n; - int b, s, f; + int b, f, i, n, s; char *val, *cp, *cp2; + bool found; /* * XXX @@ -2641,7 +2639,7 @@ vmm_is_pptdev(int bus, int slot, int func) const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ - found = 0; + found = false; for (i = 0; names[i] != NULL && !found; i++) { cp = val = kern_getenv(names[i]); while (cp != NULL && *cp != '\0') { @@ -2650,7 +2648,7 @@ vmm_is_pptdev(int bus, int slot, int func) n = sscanf(cp, "%d/%d/%d", &b, &s, &f); if (n == 3 && bus == b && slot == s && func == f) { - found = 1; + found = true; break; } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.h b/usr/src/uts/i86pc/io/vmm/vmm_host.h index f12047819d..e0ea1ec927 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_host.h +++ b/usr/src/uts/i86pc/io/vmm/vmm_host.h @@ -100,17 +100,12 @@ vmm_get_host_gdtrbase(void) #endif } -#ifdef __FreeBSD__ -struct pcpu; -extern struct pcpu __pcpu[]; -#endif - static __inline uint64_t vmm_get_host_gsbase(void) { #ifdef __FreeBSD__ - return ((uint64_t)&__pcpu[curcpu]); + return ((uint64_t)get_pcpu()); #else return (rdmsr(MSR_GSBASE)); #endif diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index ea96cd8db0..4a4fb07eba 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -91,6 +91,7 @@ enum { VIE_OP_TYPE_BITTEST, VIE_OP_TYPE_TWOB_GRP15, VIE_OP_TYPE_ADD, + VIE_OP_TYPE_TEST, VIE_OP_TYPE_LAST }; @@ -235,6 +236,12 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x8F, .op_type = VIE_OP_TYPE_POP, }, + [0xF7] = { + /* XXX Group 3 extended opcode - not just TEST */ + .op_byte = 0xF7, + .op_type = VIE_OP_TYPE_TEST, + .op_flags = VIE_OP_F_IMM, + }, [0xFF] = { /* XXX Group 5 extended opcode - not just PUSH */ .op_byte = 0xFF, @@ -465,6 +472,41 @@ getaddflags(int opsize, uint64_t x, uint64_t y) return (getaddflags64(x, y)); } +/* + * Return the status flags that would result from doing (x & y). + */ +#define GETANDFLAGS(sz) \ +static u_long \ +getandflags##sz(uint##sz##_t x, uint##sz##_t y) \ +{ \ + u_long rflags; \ + \ + __asm __volatile("and %2,%1; pushfq; popq %0" : \ + "=r" (rflags), "+r" (x) : "m" (y)); \ + return (rflags); \ +} struct __hack + +GETANDFLAGS(8); +GETANDFLAGS(16); +GETANDFLAGS(32); +GETANDFLAGS(64); + +static u_long +getandflags(int opsize, uint64_t x, uint64_t y) +{ + KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8, + ("getandflags: invalid operand size %d", opsize)); + + if (opsize == 1) + return (getandflags8(x, y)); + else if (opsize == 2) + return (getandflags16(x, y)); + else if (opsize == 4) + return (getandflags32(x, y)); + else + return (getandflags64(x, y)); +} + static int emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) @@ -1234,6 +1276,55 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int +emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +{ + int error, size; + uint64_t op1, rflags, rflags2; + + size = vie->opsize; + error = EINVAL; + + switch (vie->op.op_byte) { + case 0xF7: + /* + * F7 /0 test r/m16, imm16 + * F7 /0 test r/m32, imm32 + * REX.W + F7 /0 test r/m64, imm32 sign-extended to 64 + * + * Test mem (ModRM:r/m) with immediate and set status + * flags according to the results. The comparison is + * performed by anding the immediate from the first + * operand and then setting the status flags. + */ + if ((vie->reg & 7) != 0) + return (EINVAL); + + error = memread(vm, vcpuid, gpa, &op1, size, arg); + if (error) + return (error); + + rflags2 = getandflags(size, op1, vie->immediate); + break; + default: + return (EINVAL); + } + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return (error); + + /* + * OF and CF are cleared; the SF, ZF and PF flags are set according + * to the result; AF is undefined. + */ + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + return (error); +} + +static int emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -1658,6 +1749,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_add(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_TEST: + error = emulate_test(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c index 43b2bebe97..57e4cfddf3 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c @@ -149,13 +149,10 @@ lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg) return (0); } -static boolean_t +static bool x2apic_msr(u_int msr) { - if (msr >= 0x800 && msr <= 0xBFF) - return (TRUE); - else - return (FALSE); + return (msr >= 0x800 && msr <= 0xBFF); } static u_int @@ -165,14 +162,11 @@ x2apic_msr_to_regoff(u_int msr) return ((msr - 0x800) << 4); } -boolean_t +bool lapic_msr(u_int msr) { - if (x2apic_msr(msr) || (msr == MSR_APICBASE)) - return (TRUE); - else - return (FALSE); + return (x2apic_msr(msr) || msr == MSR_APICBASE); } int diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.h b/usr/src/uts/i86pc/io/vmm/vmm_lapic.h index da3b0ff660..58508ad70b 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.h +++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.h @@ -45,7 +45,7 @@ struct vm; -boolean_t lapic_msr(u_int num); +bool lapic_msr(u_int num); int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu); int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval, diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c index 66a67d9529..ddae4202b7 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c @@ -982,35 +982,19 @@ vm_segmap_space(struct vmspace *vms, off_t off, struct as *as, caddr_t *addrp, } void -vm_page_lock(vm_page_t vmp) +vm_page_unwire(vm_page_t vmp, uint8_t nqueue __unused) { ASSERT(!MUTEX_HELD(&vmp->vmp_lock)); - mutex_enter(&vmp->vmp_lock); -} - -void -vm_page_unlock(vm_page_t vmp) -{ - boolean_t purge = (vmp->vmp_pfn == PFN_INVALID); - - ASSERT(MUTEX_HELD(&vmp->vmp_lock)); - - mutex_exit(&vmp->vmp_lock); - if (purge) { - mutex_destroy(&vmp->vmp_lock); - kmem_free(vmp, sizeof (*vmp)); - } -} - -void -vm_page_unhold(vm_page_t vmp) -{ - ASSERT(MUTEX_HELD(&vmp->vmp_lock)); VERIFY(vmp->vmp_pfn != PFN_INVALID); vm_object_deallocate(vmp->vmp_obj_held); vmp->vmp_obj_held = NULL; vmp->vmp_pfn = PFN_INVALID; + + mutex_exit(&vmp->vmp_lock); + + mutex_destroy(&vmp->vmp_lock); + kmem_free(vmp, sizeof (*vmp)); } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.c b/usr/src/uts/i86pc/io/vmm/vmm_util.c index 3eadfe57e5..b8acff9bbc 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_util.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_util.c @@ -50,26 +50,20 @@ __FBSDID("$FreeBSD$"); #include "vmm_util.h" -boolean_t +bool vmm_is_intel(void) { - if (strcmp(cpu_vendor, "GenuineIntel") == 0) - return (TRUE); - else - return (FALSE); + return (strcmp(cpu_vendor, "GenuineIntel") == 0); } -boolean_t +bool vmm_is_amd(void) { - if (strcmp(cpu_vendor, "AuthenticAMD") == 0) - return (TRUE); - else - return (FALSE); + return (strcmp(cpu_vendor, "AuthenticAMD") == 0); } -boolean_t +bool vmm_supports_1G_pages(void) { unsigned int regs[4]; @@ -82,9 +76,9 @@ vmm_supports_1G_pages(void) if (cpu_exthigh >= 0x80000001) { do_cpuid(0x80000001, regs); if (regs[3] & (1 << 26)) - return (TRUE); + return (true); } - return (FALSE); + return (false); } #ifdef __FreeBSD__ diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.h b/usr/src/uts/i86pc/io/vmm/vmm_util.h index fc7e7364c7..8c65e7e3a6 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_util.h +++ b/usr/src/uts/i86pc/io/vmm/vmm_util.h @@ -33,9 +33,9 @@ struct trapframe; -boolean_t vmm_is_intel(void); -boolean_t vmm_is_amd(void); -boolean_t vmm_supports_1G_pages(void); +bool vmm_is_intel(void); +bool vmm_is_amd(void); +bool vmm_supports_1G_pages(void); void dump_trapframe(struct trapframe *tf); diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h index ac8f14b042..0bbc219b7f 100644 --- a/usr/src/uts/i86pc/sys/vmm.h +++ b/usr/src/uts/i86pc/sys/vmm.h @@ -127,10 +127,39 @@ enum x2apic_state { #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) - -#define VM_MAX_NAMELEN 32 +#ifndef __FreeBSD__ +/* + * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does. + * Instead of picking an arbitrary value we will just rely on the same + * calculation that's made below. If this calculation ever changes we need to + * update the the VM_MAX_NAMELEN mapping in the bhyve brand's boot.c file. + */ +#else +/* + * The VM name has to fit into the pathname length constraints of devfs, + * governed primarily by SPECNAMELEN. The length is the total number of + * characters in the full path, relative to the mount point and not + * including any leading '/' characters. + * A prefix and a suffix are added to the name specified by the user. + * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters + * longer for future use. + * The suffix is a string that identifies a bootrom image or some similar + * image that is attached to the VM. A separator character gets added to + * the suffix automatically when generating the full path, so it must be + * accounted for, reducing the effective length by 1. + * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 + * bytes for FreeBSD 12. A minimum length is set for safety and supports + * a SPECNAMELEN as small as 32 on old systems. + */ +#endif +#define VM_MAX_PREFIXLEN 10 +#define VM_MAX_SUFFIXLEN 15 +#define VM_MIN_NAMELEN 6 +#define VM_MAX_NAMELEN \ + (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) #ifdef _KERNEL +CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN); struct vm; struct vm_exception; @@ -309,12 +338,12 @@ vcpu_reqidle(struct vm_eventinfo *info) int vcpu_debugged(struct vm *vm, int vcpuid); /* - * Return 1 if device indicated by bus/slot/func is supposed to be a + * Return true if device indicated by bus/slot/func is supposed to be a * pci passthrough device. * - * Return 0 otherwise. + * Return false otherwise. */ -int vmm_is_pptdev(int bus, int slot, int func); +bool vmm_is_pptdev(int bus, int slot, int func); void *vm_iommu_domain(struct vm *vm); |