diff options
| author | Michael Corcoran <Michael.Corcoran@Sun.COM> | 2010-04-16 16:34:51 -0700 |
|---|---|---|
| committer | Michael Corcoran <Michael.Corcoran@Sun.COM> | 2010-04-16 16:34:51 -0700 |
| commit | 23d9e5ac1800241d2b06a5418bd3fdc6ec3b48b5 (patch) | |
| tree | 2523c5b99667b7092fb39adda66f5e816a9ff17f /usr/src/uts/common/vm | |
| parent | 85bcc4e57d6d451b2647973b01b8ab11c489351a (diff) | |
| download | illumos-joyent-23d9e5ac1800241d2b06a5418bd3fdc6ec3b48b5.tar.gz | |
6322069 Unscalability of AH_LOCK_SIZE causes anonhash_lock contention on larger systems
Diffstat (limited to 'usr/src/uts/common/vm')
| -rw-r--r-- | usr/src/uts/common/vm/anon.h | 14 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/vm_anon.c | 84 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/vm_swap.c | 16 |
3 files changed, 67 insertions, 47 deletions
diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h index 13672d5c0b..652fcc0951 100644 --- a/usr/src/uts/common/vm/anon.h +++ b/usr/src/uts/common/vm/anon.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -39,8 +38,6 @@ #ifndef _VM_ANON_H #define _VM_ANON_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/cred.h> #include <sys/zone.h> #include <vm/seg.h> @@ -116,7 +113,7 @@ struct anon { */ extern kmutex_t anoninfo_lock; extern kmutex_t swapinfo_lock; -extern kmutex_t anonhash_lock[]; +extern pad_mutex_t *anonhash_lock; extern pad_mutex_t anon_array_lock[]; extern kcondvar_t anon_array_cv[]; @@ -130,8 +127,11 @@ extern struct anon **anon_hash; #define ANON_HASH(VP, OFF) \ ((((uintptr_t)(VP) >> 7) ^ ((OFF) >> PAGESHIFT)) & (ANON_HASH_SIZE - 1)) -#define AH_LOCK_SIZE 64 -#define AH_LOCK(vp, off) (ANON_HASH((vp), (off)) & (AH_LOCK_SIZE -1)) +#define AH_LOCK_SIZE (2 << NCPU_LOG2) + +#define AH_MUTEX(vp, off) \ + (&anonhash_lock[(ANON_HASH((vp), (off)) & \ + (AH_LOCK_SIZE - 1))].pad_mutex) #endif /* _KERNEL */ diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c index 5018c41c2e..6ded5d7192 100644 --- a/usr/src/uts/common/vm/vm_anon.c +++ b/usr/src/uts/common/vm/vm_anon.c @@ -143,6 +143,19 @@ struct anon **anon_hash; static struct kmem_cache *anon_cache; static struct kmem_cache *anonmap_cache; +pad_mutex_t *anonhash_lock; + +/* + * Used to make the increment of all refcnts of all anon slots of a large + * page appear to be atomic. The lock is grabbed for the first anon slot of + * a large page. + */ +pad_mutex_t *anonpages_hash_lock; + +#define APH_MUTEX(vp, off) \ + (&anonpages_hash_lock[(ANON_HASH((vp), (off)) & \ + (AH_LOCK_SIZE - 1))].pad_mutex) + #ifdef VM_STATS static struct anonvmstats_str { ulong_t getpages[30]; @@ -179,19 +192,32 @@ anonmap_cache_destructor(void *buf, void *cdrarg) mutex_destroy(&->a_purgemtx); } -kmutex_t anonhash_lock[AH_LOCK_SIZE]; -kmutex_t anonpages_hash_lock[AH_LOCK_SIZE]; - void anon_init(void) { int i; + pad_mutex_t *tmp; - anon_hash_size = 1L << highbit(physmem / ANON_HASHAVELEN); + /* These both need to be powers of 2 so round up to the next power */ + anon_hash_size = 1L << highbit((physmem / ANON_HASHAVELEN) - 1); + + /* + * We need to align the anonhash_lock and anonpages_hash_lock arrays + * to a 64B boundary to avoid false sharing. We add 63B to our + * allocation so that we can get a 64B aligned address to use. + * We allocate both of these together to avoid wasting an additional + * 63B. + */ + tmp = kmem_zalloc((2 * AH_LOCK_SIZE * sizeof (pad_mutex_t)) + 63, + KM_SLEEP); + anonhash_lock = (pad_mutex_t *)P2ROUNDUP((uintptr_t)tmp, 64); + anonpages_hash_lock = anonhash_lock + AH_LOCK_SIZE; for (i = 0; i < AH_LOCK_SIZE; i++) { - mutex_init(&anonhash_lock[i], NULL, MUTEX_DEFAULT, NULL); - mutex_init(&anonpages_hash_lock[i], NULL, MUTEX_DEFAULT, NULL); + mutex_init(&anonhash_lock[i].pad_mutex, NULL, MUTEX_DEFAULT, + NULL); + mutex_init(&anonpages_hash_lock[i].pad_mutex, NULL, + MUTEX_DEFAULT, NULL); } for (i = 0; i < ANON_LOCKSIZE; i++) { @@ -225,7 +251,7 @@ anon_addhash(struct anon *ap) { int index; - ASSERT(MUTEX_HELD(&anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)])); + ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off))); index = ANON_HASH(ap->an_vp, ap->an_off); ap->an_hash = anon_hash[index]; anon_hash[index] = ap; @@ -236,7 +262,7 @@ anon_rmhash(struct anon *ap) { struct anon **app; - ASSERT(MUTEX_HELD(&anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)])); + ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off))); for (app = &anon_hash[ANON_HASH(ap->an_vp, ap->an_off)]; *app; app = &((*app)->an_hash)) { @@ -942,7 +968,7 @@ anon_alloc(struct vnode *vp, anoff_t off) ap->an_refcnt = 1; ap->an_pvp = NULL; ap->an_poff = 0; - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); anon_addhash(ap); mutex_exit(ahm); @@ -975,7 +1001,7 @@ anon_swap_free(struct anon *ap, page_t *pp) return; page_io_lock(pp); - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); ASSERT(ap->an_refcnt != 0); @@ -1007,7 +1033,7 @@ anon_decref(struct anon *ap) anoff_t off; kmutex_t *ahm; - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); ASSERT(ap->an_refcnt != 0); if (ap->an_refcnt == 0) @@ -1063,7 +1089,7 @@ anon_szcshare(struct anon_hdr *ahp, ulong_t anon_index) if (ap == NULL) return (0); - ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahmpages); ASSERT(ap->an_refcnt >= 1); if (ap->an_refcnt == 1) { @@ -1128,7 +1154,7 @@ anon_decref_pages( VM_STAT_ADD(anonvmstats.decrefpages[0]); if (ap != NULL) { - ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahmpages); ASSERT((refcnt = ap->an_refcnt) != 0); VM_STAT_ADD(anonvmstats.decrefpages[1]); @@ -1156,8 +1182,7 @@ anon_decref_pages( pp = page_lookup(vp, (u_offset_t)off, SE_EXCL); if (pp == NULL || pp->p_szc == 0) { VM_STAT_ADD(anonvmstats.decrefpages[3]); - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, - ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); (void) anon_set_ptr(ahp, an_idx + i, NULL, ANON_SLEEP); mutex_enter(ahm); @@ -1224,8 +1249,7 @@ anon_decref_pages( ap = anon_get_ptr(ahp, an_idx + j); ASSERT(ap != NULL && ap->an_refcnt == 1); - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, - ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); (void) anon_set_ptr(ahp, an_idx + j, NULL, ANON_SLEEP); mutex_enter(ahm); @@ -1262,7 +1286,7 @@ anon_decref_pages( } else { VM_STAT_ADD(anonvmstats.decrefpages[8]); (void) anon_set_ptr(ahp, an_idx + i, NULL, ANON_SLEEP); - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); ap->an_refcnt--; mutex_exit(ahm); @@ -1305,7 +1329,7 @@ anon_dup(struct anon_hdr *old, ulong_t old_idx, struct anon_hdr *new, break; (void) anon_set_ptr(new, new_idx + off, ap, ANON_SLEEP); - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); ap->an_refcnt++; @@ -1397,11 +1421,9 @@ anon_dup_fill_holes( * getting an anonpages_hash_lock for the * first anon slot of a large page. */ - int hash = AH_LOCK(ap->an_vp, ap->an_off); - VM_STAT_ADD(anonvmstats.dupfillholes[2]); - ahmpages = &anonpages_hash_lock[hash]; + ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahmpages); /*LINTED*/ ASSERT(refcnt = ap->an_refcnt); @@ -1411,7 +1433,7 @@ anon_dup_fill_holes( } (void) anon_set_ptr(new, new_idx + off + i, ap, ANON_SLEEP); - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); ASSERT(ahmpages != NULL || ap->an_refcnt == 1); ASSERT(i == 0 || ahmpages == NULL || @@ -1680,7 +1702,7 @@ anon_disclaim(struct anon_map *amp, ulong_t index, size_t size) continue; } - ahm = &anonhash_lock[AH_LOCK(vp, off)]; + ahm = AH_MUTEX(vp, off); mutex_enter(ahm); ASSERT(ap->an_refcnt != 0); /* @@ -1767,7 +1789,7 @@ anon_disclaim(struct anon_map *amp, ulong_t index, size_t size) if (ap == NULL) break; swap_xlate(ap, &vp, &off); - ahm = &anonhash_lock[AH_LOCK(vp, off)]; + ahm = AH_MUTEX(vp, off); mutex_enter(ahm); ASSERT(ap->an_refcnt != 0); @@ -1830,7 +1852,7 @@ anon_getpage( * routine does. */ if (pl != NULL && (pp = page_lookup(vp, (u_offset_t)off, SE_SHARED))) { - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); if (ap->an_refcnt == 1) *protp = PROT_ALL; @@ -1854,7 +1876,7 @@ anon_getpage( seg, addr, rw, cred, NULL); if (err == 0 && pl != NULL) { - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); if (ap->an_refcnt != 1) *protp &= ~PROT_WRITE; /* make read-only */ @@ -2511,8 +2533,7 @@ anon_map_privatepages( * first anon slot of a large page. */ if (ap != NULL) { - ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, - ap->an_off)]; + ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahmpages); if (ap->an_refcnt == 1) { VM_STAT_ADD(anonvmstats.privatepages[4]); @@ -2820,8 +2841,7 @@ anon_map_createpages( */ if (ap->an_pvp != NULL) { page_io_lock(pp); - ahm = &anonhash_lock[AH_LOCK(ap->an_vp, - ap->an_off)]; + ahm = AH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahm); if (ap->an_pvp != NULL) { swap_phys_free(ap->an_pvp, @@ -3007,7 +3027,7 @@ anon_try_demote_pages( ap = anon_get_ptr(ahp, sidx); if (ap != NULL && private) { VM_STAT_ADD(anonvmstats.demotepages[1]); - ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); mutex_enter(ahmpages); } diff --git a/usr/src/uts/common/vm/vm_swap.c b/usr/src/uts/common/vm/vm_swap.c index 61210201aa..ed325d18d4 100644 --- a/usr/src/uts/common/vm/vm_swap.c +++ b/usr/src/uts/common/vm/vm_swap.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1987, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -357,7 +356,7 @@ swap_anon(struct vnode *vp, u_offset_t off) { struct anon *ap; - ASSERT(MUTEX_HELD(&anonhash_lock[AH_LOCK(vp, off)])); + ASSERT(MUTEX_HELD(AH_MUTEX(vp, off))); for (ap = anon_hash[ANON_HASH(vp, off)]; ap != NULL; ap = ap->an_hash) { if (ap->an_vp == vp && ap->an_off == off) @@ -1452,7 +1451,8 @@ swapdel( * may change under us. */ for (app = anon_hash; app < &anon_hash[ANON_HASH_SIZE]; app++) { - ahm = &anonhash_lock[(app-anon_hash) & (AH_LOCK_SIZE - 1)]; + ahm = &anonhash_lock[(app - anon_hash) & + (AH_LOCK_SIZE - 1)].pad_mutex; mutex_enter(ahm); top: for (ap = *app; ap != NULL; ap = ap->an_hash) { @@ -1612,7 +1612,7 @@ again: */ if (!alloc_pg) page_io_lock(pp); - ahm = &anonhash_lock[AH_LOCK(vp, off)]; + ahm = AH_MUTEX(vp, off); mutex_enter(ahm); ap = swap_anon(vp, off); if ((ap == NULL || ap->an_pvp == NULL) && alloc_pg) { @@ -1681,7 +1681,7 @@ swap_newphysname( * No swap available so return error unless requested * offset is already backed in which case return that. */ - ahm = &anonhash_lock[AH_LOCK(vp, offset)]; + ahm = AH_MUTEX(vp, offset); mutex_enter(ahm); if ((ap = swap_anon(vp, offset)) == NULL) { error = SE_NOANON; @@ -1709,7 +1709,7 @@ swap_newphysname( for (off = start, poff = pstart; poff < pstart + plen; off += PAGESIZE, poff += PAGESIZE) { - ahm = &anonhash_lock[AH_LOCK(vp, off)]; + ahm = AH_MUTEX(vp, off); mutex_enter(ahm); if ((ap = swap_anon(vp, off)) != NULL) { /* Free old slot if any, and assign new one */ @@ -1779,7 +1779,7 @@ swap_getphysname( int error = 0; kmutex_t *ahm; - ahm = &anonhash_lock[AH_LOCK(vp, off)]; + ahm = AH_MUTEX(vp, off); mutex_enter(ahm); /* Get anon slot for vp, off */ |
