summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2018-11-27 01:52:56 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2018-12-05 19:16:29 +0000
commit5b87e03ada692fb585a6a15a0cccfff5eb39e8ea (patch)
tree8ece2c3fcf45af7b9b912989bef258635495036a
parent35384f5aac82fb54e996c2526a0fc6064466cf05 (diff)
downloadillumos-joyent-5b87e03ada692fb585a6a15a0cccfff5eb39e8ea.tar.gz
OS-7397 reduce lock contention in bhyve instr emul
Reviewed by: John Levon <john.levon@joyent.com> Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com> Approved by: Hans Rosenfeld <hans.rosenfeld@joyent.com>
-rw-r--r--usr/src/uts/i86pc/io/vmm/vm/vm_glue.h10
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c73
2 files changed, 59 insertions, 24 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h b/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h
index cb6d4c358e..c5620a91f6 100644
--- a/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h
+++ b/usr/src/uts/i86pc/io/vmm/vm/vm_glue.h
@@ -44,6 +44,7 @@ struct vmspace {
/* Implementation private */
kmutex_t vms_lock;
+ boolean_t vms_map_changing;
struct pmap vms_pmap;
uintptr_t vms_size; /* fixed after creation */
@@ -53,13 +54,16 @@ struct vmspace {
typedef pfn_t (*vm_pager_fn_t)(vm_object_t, uintptr_t, pfn_t *, uint_t *);
struct vm_object {
- kmutex_t vmo_lock;
- uint_t vmo_refcnt;
+ uint_t vmo_refcnt; /* manipulated with atomic ops */
+
+ /* This group of fields are fixed at creation time */
objtype_t vmo_type;
size_t vmo_size;
- vm_memattr_t vmo_attr;
vm_pager_fn_t vmo_pager;
void *vmo_data;
+
+ kmutex_t vmo_lock; /* protects fields below */
+ vm_memattr_t vmo_attr;
};
struct vm_page {
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
index 95b0e13a95..58c10d9da0 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_vm.c
@@ -136,7 +136,8 @@ static pfn_t eptable_mapin(eptable_map_t *, uintptr_t, pfn_t, uint_t, uint_t,
vm_memattr_t);
static void eptable_mapout(eptable_map_t *, uintptr_t);
static int eptable_find(eptable_map_t *, uintptr_t, pfn_t *, uint_t *);
-static vmspace_mapping_t *vm_mapping_find(struct vmspace *, uintptr_t, size_t);
+static vmspace_mapping_t *vm_mapping_find(struct vmspace *, uintptr_t, size_t,
+ boolean_t);
static void vm_mapping_remove(struct vmspace *, vmspace_mapping_t *);
static kmutex_t eptable_map_lock;
@@ -230,7 +231,7 @@ vmspace_find_kva(struct vmspace *vms, uintptr_t addr, size_t size)
void *result = NULL;
mutex_enter(&vms->vms_lock);
- vmsm = vm_mapping_find(vms, addr, size);
+ vmsm = vm_mapping_find(vms, addr, size, B_FALSE);
if (vmsm != NULL) {
struct vm_object *vmo = vmsm->vmsm_object;
@@ -1041,11 +1042,10 @@ vm_object_deallocate(vm_object_t vmo)
{
ASSERT(vmo != NULL);
- mutex_enter(&vmo->vmo_lock);
- VERIFY(vmo->vmo_refcnt);
- vmo->vmo_refcnt--;
- if (vmo->vmo_refcnt != 0) {
- mutex_exit(&vmo->vmo_lock);
+ uint_t ref = atomic_dec_uint_nv(&vmo->vmo_refcnt);
+ /* underflow would be a deadly serious mistake */
+ VERIFY3U(ref, !=, UINT_MAX);
+ if (ref != 0) {
return;
}
@@ -1064,7 +1064,6 @@ vm_object_deallocate(vm_object_t vmo)
vmo->vmo_pager = vm_object_pager_none;
vmo->vmo_data = NULL;
vmo->vmo_size = 0;
- mutex_exit(&vmo->vmo_lock);
mutex_destroy(&vmo->vmo_lock);
kmem_free(vmo, sizeof (*vmo));
}
@@ -1090,21 +1089,32 @@ vm_object_reference(vm_object_t vmo)
{
ASSERT(vmo != NULL);
- mutex_enter(&vmo->vmo_lock);
- vmo->vmo_refcnt++;
- mutex_exit(&vmo->vmo_lock);
+ uint_t ref = atomic_inc_uint_nv(&vmo->vmo_refcnt);
+ /* overflow would be a deadly serious mistake */
+ VERIFY3U(ref, !=, 0);
}
static vmspace_mapping_t *
-vm_mapping_find(struct vmspace *vms, uintptr_t addr, size_t size)
+vm_mapping_find(struct vmspace *vms, uintptr_t addr, size_t size,
+ boolean_t no_lock)
{
vmspace_mapping_t *vmsm;
list_t *ml = &vms->vms_maplist;
const uintptr_t range_end = addr + size;
- ASSERT(MUTEX_HELD(&vms->vms_lock));
ASSERT(addr <= range_end);
+ if (no_lock) {
+ /*
+ * This check should be superflous with the protections
+ * promised by the bhyve logic which calls into the VM shim.
+ * All the same, it is cheap to be paranoid.
+ */
+ VERIFY(!vms->vms_map_changing);
+ } else {
+ VERIFY(MUTEX_HELD(&vms->vms_lock));
+ }
+
if (addr >= vms->vms_size) {
return (NULL);
}
@@ -1148,6 +1158,7 @@ vm_mapping_remove(struct vmspace *vms, vmspace_mapping_t *vmsm)
list_t *ml = &vms->vms_maplist;
ASSERT(MUTEX_HELD(&vms->vms_lock));
+ ASSERT(vms->vms_map_changing);
list_remove(ml, vmsm);
vm_object_deallocate(vmsm->vmsm_object);
@@ -1186,7 +1197,7 @@ vm_fault(vm_map_t map, vm_offset_t off, vm_prot_t type, int flag)
}
/* Try to wire up the address */
- if ((vmsm = vm_mapping_find(vms, addr, 0)) == NULL) {
+ if ((vmsm = vm_mapping_find(vms, addr, 0, B_FALSE)) == NULL) {
mutex_exit(&vms->vms_lock);
return (FC_NOMAP);
}
@@ -1224,10 +1235,26 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
ASSERT(len == PAGESIZE);
ASSERT(max_count == 1);
- mutex_enter(&vms->vms_lock);
- if ((vmsm = vm_mapping_find(vms, vaddr, PAGESIZE)) == NULL ||
+ /*
+ * Unlike practically all of the other logic that queries or
+ * manipulates vmspace objects, vm_fault_quick_hold_pages() does so
+ * without holding vms_lock. This is safe because bhyve ensures that
+ * changes to the vmspace map occur only when all other threads have
+ * been excluded from running.
+ *
+ * Since this task can count on vms_maplist remaining static and does
+ * not need to modify the pmap (like vm_fault might), it can proceed
+ * without the lock. The vm_object has independent refcount and lock
+ * protection, while the vmo_pager methods do not rely on vms_lock for
+ * safety.
+ *
+ * Performing this work without locks is critical in cases where
+ * multiple vCPUs require simultaneous instruction emulation, such as
+ * for frequent guest APIC accesses on a host that lacks hardware
+ * acceleration for that behavior.
+ */
+ if ((vmsm = vm_mapping_find(vms, vaddr, PAGESIZE, B_TRUE)) == NULL ||
(prot & ~vmsm->vmsm_prot) != 0) {
- mutex_exit(&vms->vms_lock);
return (-1);
}
@@ -1239,7 +1266,6 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
vmp->vmp_pfn = vmo->vmo_pager(vmo, VMSM_OFFSET(vmsm, vaddr), NULL,
NULL);
- mutex_exit(&vms->vms_lock);
*ma = vmp;
return (1);
}
@@ -1275,6 +1301,7 @@ vm_map_find(vm_map_t map, vm_object_t vmo, vm_ooffset_t off, vm_offset_t *addr,
vmsm = kmem_alloc(sizeof (*vmsm), KM_SLEEP);
mutex_enter(&vms->vms_lock);
+ vms->vms_map_changing = B_TRUE;
if (!vm_mapping_gap(vms, base, size)) {
res = ENOMEM;
goto out;
@@ -1292,6 +1319,7 @@ vm_map_find(vm_map_t map, vm_object_t vmo, vm_ooffset_t off, vm_offset_t *addr,
*addr = (vm_offset_t)base;
}
out:
+ vms->vms_map_changing = B_FALSE;
mutex_exit(&vms->vms_lock);
if (res != 0) {
kmem_free(vmsm, sizeof (*vmsm));
@@ -1311,9 +1339,11 @@ vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
ASSERT(start < end);
mutex_enter(&vms->vms_lock);
+ vms->vms_map_changing = B_TRUE;
/* expect to match existing mapping exactly */
- if ((vmsm = vm_mapping_find(vms, addr, size)) == NULL ||
+ if ((vmsm = vm_mapping_find(vms, addr, size, B_FALSE)) == NULL ||
vmsm->vmsm_addr != addr || vmsm->vmsm_len != size) {
+ vms->vms_map_changing = B_FALSE;
mutex_exit(&vms->vms_lock);
return (ENOENT);
}
@@ -1331,6 +1361,7 @@ vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
}
vm_mapping_remove(vms, vmsm);
+ vms->vms_map_changing = B_FALSE;
mutex_exit(&vms->vms_lock);
return (0);
}
@@ -1348,7 +1379,7 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
mutex_enter(&vms->vms_lock);
/* For the time being, only exact-match mappings are expected */
- if ((vmsm = vm_mapping_find(vms, addr, size)) == NULL) {
+ if ((vmsm = vm_mapping_find(vms, addr, size, B_FALSE)) == NULL) {
mutex_exit(&vms->vms_lock);
return (FC_NOMAP);
}
@@ -1424,7 +1455,7 @@ vm_segmap_space(struct vmspace *vms, off_t off, struct as *as, caddr_t *addrp,
}
mutex_enter(&vms->vms_lock);
- if ((vmsm = vm_mapping_find(vms, addr, size)) == NULL) {
+ if ((vmsm = vm_mapping_find(vms, addr, size, B_FALSE)) == NULL) {
mutex_exit(&vms->vms_lock);
return (ENXIO);
}