summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/vm
diff options
context:
space:
mode:
authorMichael Corcoran <Michael.Corcoran@Sun.COM>2010-04-16 16:34:51 -0700
committerMichael Corcoran <Michael.Corcoran@Sun.COM>2010-04-16 16:34:51 -0700
commit23d9e5ac1800241d2b06a5418bd3fdc6ec3b48b5 (patch)
tree2523c5b99667b7092fb39adda66f5e816a9ff17f /usr/src/uts/common/vm
parent85bcc4e57d6d451b2647973b01b8ab11c489351a (diff)
downloadillumos-joyent-23d9e5ac1800241d2b06a5418bd3fdc6ec3b48b5.tar.gz
6322069 Unscalability of AH_LOCK_SIZE causes anonhash_lock contention on larger systems
Diffstat (limited to 'usr/src/uts/common/vm')
-rw-r--r--usr/src/uts/common/vm/anon.h14
-rw-r--r--usr/src/uts/common/vm/vm_anon.c84
-rw-r--r--usr/src/uts/common/vm/vm_swap.c16
3 files changed, 67 insertions, 47 deletions
diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h
index 13672d5c0b..652fcc0951 100644
--- a/usr/src/uts/common/vm/anon.h
+++ b/usr/src/uts/common/vm/anon.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -39,8 +38,6 @@
#ifndef _VM_ANON_H
#define _VM_ANON_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/cred.h>
#include <sys/zone.h>
#include <vm/seg.h>
@@ -116,7 +113,7 @@ struct anon {
*/
extern kmutex_t anoninfo_lock;
extern kmutex_t swapinfo_lock;
-extern kmutex_t anonhash_lock[];
+extern pad_mutex_t *anonhash_lock;
extern pad_mutex_t anon_array_lock[];
extern kcondvar_t anon_array_cv[];
@@ -130,8 +127,11 @@ extern struct anon **anon_hash;
#define ANON_HASH(VP, OFF) \
((((uintptr_t)(VP) >> 7) ^ ((OFF) >> PAGESHIFT)) & (ANON_HASH_SIZE - 1))
-#define AH_LOCK_SIZE 64
-#define AH_LOCK(vp, off) (ANON_HASH((vp), (off)) & (AH_LOCK_SIZE -1))
+#define AH_LOCK_SIZE (2 << NCPU_LOG2)
+
+#define AH_MUTEX(vp, off) \
+ (&anonhash_lock[(ANON_HASH((vp), (off)) & \
+ (AH_LOCK_SIZE - 1))].pad_mutex)
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c
index 5018c41c2e..6ded5d7192 100644
--- a/usr/src/uts/common/vm/vm_anon.c
+++ b/usr/src/uts/common/vm/vm_anon.c
@@ -143,6 +143,19 @@ struct anon **anon_hash;
static struct kmem_cache *anon_cache;
static struct kmem_cache *anonmap_cache;
+pad_mutex_t *anonhash_lock;
+
+/*
+ * Used to make the increment of all refcnts of all anon slots of a large
+ * page appear to be atomic. The lock is grabbed for the first anon slot of
+ * a large page.
+ */
+pad_mutex_t *anonpages_hash_lock;
+
+#define APH_MUTEX(vp, off) \
+ (&anonpages_hash_lock[(ANON_HASH((vp), (off)) & \
+ (AH_LOCK_SIZE - 1))].pad_mutex)
+
#ifdef VM_STATS
static struct anonvmstats_str {
ulong_t getpages[30];
@@ -179,19 +192,32 @@ anonmap_cache_destructor(void *buf, void *cdrarg)
mutex_destroy(&amp->a_purgemtx);
}
-kmutex_t anonhash_lock[AH_LOCK_SIZE];
-kmutex_t anonpages_hash_lock[AH_LOCK_SIZE];
-
void
anon_init(void)
{
int i;
+ pad_mutex_t *tmp;
- anon_hash_size = 1L << highbit(physmem / ANON_HASHAVELEN);
+ /* These both need to be powers of 2 so round up to the next power */
+ anon_hash_size = 1L << highbit((physmem / ANON_HASHAVELEN) - 1);
+
+ /*
+ * We need to align the anonhash_lock and anonpages_hash_lock arrays
+ * to a 64B boundary to avoid false sharing. We add 63B to our
+ * allocation so that we can get a 64B aligned address to use.
+ * We allocate both of these together to avoid wasting an additional
+ * 63B.
+ */
+ tmp = kmem_zalloc((2 * AH_LOCK_SIZE * sizeof (pad_mutex_t)) + 63,
+ KM_SLEEP);
+ anonhash_lock = (pad_mutex_t *)P2ROUNDUP((uintptr_t)tmp, 64);
+ anonpages_hash_lock = anonhash_lock + AH_LOCK_SIZE;
for (i = 0; i < AH_LOCK_SIZE; i++) {
- mutex_init(&anonhash_lock[i], NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&anonpages_hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&anonhash_lock[i].pad_mutex, NULL, MUTEX_DEFAULT,
+ NULL);
+ mutex_init(&anonpages_hash_lock[i].pad_mutex, NULL,
+ MUTEX_DEFAULT, NULL);
}
for (i = 0; i < ANON_LOCKSIZE; i++) {
@@ -225,7 +251,7 @@ anon_addhash(struct anon *ap)
{
int index;
- ASSERT(MUTEX_HELD(&anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]));
+ ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off)));
index = ANON_HASH(ap->an_vp, ap->an_off);
ap->an_hash = anon_hash[index];
anon_hash[index] = ap;
@@ -236,7 +262,7 @@ anon_rmhash(struct anon *ap)
{
struct anon **app;
- ASSERT(MUTEX_HELD(&anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]));
+ ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off)));
for (app = &anon_hash[ANON_HASH(ap->an_vp, ap->an_off)];
*app; app = &((*app)->an_hash)) {
@@ -942,7 +968,7 @@ anon_alloc(struct vnode *vp, anoff_t off)
ap->an_refcnt = 1;
ap->an_pvp = NULL;
ap->an_poff = 0;
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
anon_addhash(ap);
mutex_exit(ahm);
@@ -975,7 +1001,7 @@ anon_swap_free(struct anon *ap, page_t *pp)
return;
page_io_lock(pp);
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
ASSERT(ap->an_refcnt != 0);
@@ -1007,7 +1033,7 @@ anon_decref(struct anon *ap)
anoff_t off;
kmutex_t *ahm;
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
ASSERT(ap->an_refcnt != 0);
if (ap->an_refcnt == 0)
@@ -1063,7 +1089,7 @@ anon_szcshare(struct anon_hdr *ahp, ulong_t anon_index)
if (ap == NULL)
return (0);
- ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahmpages);
ASSERT(ap->an_refcnt >= 1);
if (ap->an_refcnt == 1) {
@@ -1128,7 +1154,7 @@ anon_decref_pages(
VM_STAT_ADD(anonvmstats.decrefpages[0]);
if (ap != NULL) {
- ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahmpages);
ASSERT((refcnt = ap->an_refcnt) != 0);
VM_STAT_ADD(anonvmstats.decrefpages[1]);
@@ -1156,8 +1182,7 @@ anon_decref_pages(
pp = page_lookup(vp, (u_offset_t)off, SE_EXCL);
if (pp == NULL || pp->p_szc == 0) {
VM_STAT_ADD(anonvmstats.decrefpages[3]);
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp,
- ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
(void) anon_set_ptr(ahp, an_idx + i, NULL,
ANON_SLEEP);
mutex_enter(ahm);
@@ -1224,8 +1249,7 @@ anon_decref_pages(
ap = anon_get_ptr(ahp, an_idx + j);
ASSERT(ap != NULL &&
ap->an_refcnt == 1);
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp,
- ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
(void) anon_set_ptr(ahp, an_idx + j,
NULL, ANON_SLEEP);
mutex_enter(ahm);
@@ -1262,7 +1286,7 @@ anon_decref_pages(
} else {
VM_STAT_ADD(anonvmstats.decrefpages[8]);
(void) anon_set_ptr(ahp, an_idx + i, NULL, ANON_SLEEP);
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
ap->an_refcnt--;
mutex_exit(ahm);
@@ -1305,7 +1329,7 @@ anon_dup(struct anon_hdr *old, ulong_t old_idx, struct anon_hdr *new,
break;
(void) anon_set_ptr(new, new_idx + off, ap, ANON_SLEEP);
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
ap->an_refcnt++;
@@ -1397,11 +1421,9 @@ anon_dup_fill_holes(
* getting an anonpages_hash_lock for the
* first anon slot of a large page.
*/
- int hash = AH_LOCK(ap->an_vp, ap->an_off);
-
VM_STAT_ADD(anonvmstats.dupfillholes[2]);
- ahmpages = &anonpages_hash_lock[hash];
+ ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahmpages);
/*LINTED*/
ASSERT(refcnt = ap->an_refcnt);
@@ -1411,7 +1433,7 @@ anon_dup_fill_holes(
}
(void) anon_set_ptr(new, new_idx + off + i, ap,
ANON_SLEEP);
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
ASSERT(ahmpages != NULL || ap->an_refcnt == 1);
ASSERT(i == 0 || ahmpages == NULL ||
@@ -1680,7 +1702,7 @@ anon_disclaim(struct anon_map *amp, ulong_t index, size_t size)
continue;
}
- ahm = &anonhash_lock[AH_LOCK(vp, off)];
+ ahm = AH_MUTEX(vp, off);
mutex_enter(ahm);
ASSERT(ap->an_refcnt != 0);
/*
@@ -1767,7 +1789,7 @@ anon_disclaim(struct anon_map *amp, ulong_t index, size_t size)
if (ap == NULL)
break;
swap_xlate(ap, &vp, &off);
- ahm = &anonhash_lock[AH_LOCK(vp, off)];
+ ahm = AH_MUTEX(vp, off);
mutex_enter(ahm);
ASSERT(ap->an_refcnt != 0);
@@ -1830,7 +1852,7 @@ anon_getpage(
* routine does.
*/
if (pl != NULL && (pp = page_lookup(vp, (u_offset_t)off, SE_SHARED))) {
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
if (ap->an_refcnt == 1)
*protp = PROT_ALL;
@@ -1854,7 +1876,7 @@ anon_getpage(
seg, addr, rw, cred, NULL);
if (err == 0 && pl != NULL) {
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
if (ap->an_refcnt != 1)
*protp &= ~PROT_WRITE; /* make read-only */
@@ -2511,8 +2533,7 @@ anon_map_privatepages(
* first anon slot of a large page.
*/
if (ap != NULL) {
- ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp,
- ap->an_off)];
+ ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahmpages);
if (ap->an_refcnt == 1) {
VM_STAT_ADD(anonvmstats.privatepages[4]);
@@ -2820,8 +2841,7 @@ anon_map_createpages(
*/
if (ap->an_pvp != NULL) {
page_io_lock(pp);
- ahm = &anonhash_lock[AH_LOCK(ap->an_vp,
- ap->an_off)];
+ ahm = AH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahm);
if (ap->an_pvp != NULL) {
swap_phys_free(ap->an_pvp,
@@ -3007,7 +3027,7 @@ anon_try_demote_pages(
ap = anon_get_ptr(ahp, sidx);
if (ap != NULL && private) {
VM_STAT_ADD(anonvmstats.demotepages[1]);
- ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)];
+ ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
mutex_enter(ahmpages);
}
diff --git a/usr/src/uts/common/vm/vm_swap.c b/usr/src/uts/common/vm/vm_swap.c
index 61210201aa..ed325d18d4 100644
--- a/usr/src/uts/common/vm/vm_swap.c
+++ b/usr/src/uts/common/vm/vm_swap.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1987, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -357,7 +356,7 @@ swap_anon(struct vnode *vp, u_offset_t off)
{
struct anon *ap;
- ASSERT(MUTEX_HELD(&anonhash_lock[AH_LOCK(vp, off)]));
+ ASSERT(MUTEX_HELD(AH_MUTEX(vp, off)));
for (ap = anon_hash[ANON_HASH(vp, off)]; ap != NULL; ap = ap->an_hash) {
if (ap->an_vp == vp && ap->an_off == off)
@@ -1452,7 +1451,8 @@ swapdel(
* may change under us.
*/
for (app = anon_hash; app < &anon_hash[ANON_HASH_SIZE]; app++) {
- ahm = &anonhash_lock[(app-anon_hash) & (AH_LOCK_SIZE - 1)];
+ ahm = &anonhash_lock[(app - anon_hash) &
+ (AH_LOCK_SIZE - 1)].pad_mutex;
mutex_enter(ahm);
top:
for (ap = *app; ap != NULL; ap = ap->an_hash) {
@@ -1612,7 +1612,7 @@ again:
*/
if (!alloc_pg)
page_io_lock(pp);
- ahm = &anonhash_lock[AH_LOCK(vp, off)];
+ ahm = AH_MUTEX(vp, off);
mutex_enter(ahm);
ap = swap_anon(vp, off);
if ((ap == NULL || ap->an_pvp == NULL) && alloc_pg) {
@@ -1681,7 +1681,7 @@ swap_newphysname(
* No swap available so return error unless requested
* offset is already backed in which case return that.
*/
- ahm = &anonhash_lock[AH_LOCK(vp, offset)];
+ ahm = AH_MUTEX(vp, offset);
mutex_enter(ahm);
if ((ap = swap_anon(vp, offset)) == NULL) {
error = SE_NOANON;
@@ -1709,7 +1709,7 @@ swap_newphysname(
for (off = start, poff = pstart; poff < pstart + plen;
off += PAGESIZE, poff += PAGESIZE) {
- ahm = &anonhash_lock[AH_LOCK(vp, off)];
+ ahm = AH_MUTEX(vp, off);
mutex_enter(ahm);
if ((ap = swap_anon(vp, off)) != NULL) {
/* Free old slot if any, and assign new one */
@@ -1779,7 +1779,7 @@ swap_getphysname(
int error = 0;
kmutex_t *ahm;
- ahm = &anonhash_lock[AH_LOCK(vp, off)];
+ ahm = AH_MUTEX(vp, off);
mutex_enter(ahm);
/* Get anon slot for vp, off */