summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/vm
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/vm')
-rw-r--r--usr/src/uts/common/vm/anon.h22
-rw-r--r--usr/src/uts/common/vm/page.h98
-rw-r--r--usr/src/uts/common/vm/page_lock.c15
-rw-r--r--usr/src/uts/common/vm/seg_vn.c27
-rw-r--r--usr/src/uts/common/vm/vm_anon.c4
-rw-r--r--usr/src/uts/common/vm/vm_page.c58
-rw-r--r--usr/src/uts/common/vm/vm_pagelist.c4
7 files changed, 169 insertions, 59 deletions
diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h
index 652fcc0951..a2e07d0b18 100644
--- a/usr/src/uts/common/vm/anon.h
+++ b/usr/src/uts/common/vm/anon.h
@@ -91,6 +91,11 @@ struct anon {
int an_refcnt; /* # of people sharing slot */
};
+#define AN_CACHE_ALIGN_LOG2 4 /* log2(AN_CACHE_ALIGN) */
+#define AN_CACHE_ALIGN (1U << AN_CACHE_ALIGN_LOG2) /* anon address aligned */
+ /* 16 bytes */
+
+
#ifdef _KERNEL
/*
* The swapinfo_lock protects:
@@ -121,11 +126,24 @@ extern kcondvar_t anon_array_cv[];
* Global hash table to provide a function from (vp, off) -> ap
*/
extern size_t anon_hash_size;
+extern unsigned int anon_hash_shift;
extern struct anon **anon_hash;
#define ANON_HASH_SIZE anon_hash_size
#define ANON_HASHAVELEN 4
-#define ANON_HASH(VP, OFF) \
-((((uintptr_t)(VP) >> 7) ^ ((OFF) >> PAGESHIFT)) & (ANON_HASH_SIZE - 1))
+/*
+ * Try to use as many bits of randomness from both vp and off as we can.
+ * This should help spreading evenly for a variety of workloads. See comments
+ * for PAGE_HASH_FUNC for more explanation.
+ */
+#define ANON_HASH(vp, off) \
+ (((((uintptr_t)(off) >> PAGESHIFT) ^ \
+ ((uintptr_t)(off) >> (PAGESHIFT + anon_hash_shift))) ^ \
+ (((uintptr_t)(vp) >> 3) ^ \
+ ((uintptr_t)(vp) >> (3 + anon_hash_shift)) ^ \
+ ((uintptr_t)(vp) >> (3 + 2 * anon_hash_shift)) ^ \
+ ((uintptr_t)(vp) << \
+ (anon_hash_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \
+ (anon_hash_size - 1))
#define AH_LOCK_SIZE (2 << NCPU_LOG2)
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 026ea7c29b..7fa4af9a4a 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -102,10 +102,37 @@ typedef int selock_t;
#ifdef _KERNEL
/*
- * Macros to acquire and release the page logical lock.
+ * PAGE_LLOCK_SIZE is 2 * NCPU, but no smaller than 128.
+ * PAGE_LLOCK_SHIFT is log2(PAGE_LLOCK_SIZE).
*/
-#define page_struct_lock(pp) mutex_enter(&page_llock)
-#define page_struct_unlock(pp) mutex_exit(&page_llock)
+#if ((2*NCPU_P2) > 128)
+#define PAGE_LLOCK_SHIFT ((unsigned)(NCPU_LOG2 + 1))
+#else
+#define PAGE_LLOCK_SHIFT 7U
+#endif
+#define PAGE_LLOCK_SIZE (1 << PAGE_LLOCK_SHIFT)
+
+/*
+ * The number of low order 0 bits in the page_t address.
+ */
+#define PP_SHIFT 7
+
+/*
+ * pp may be the root of a large page, and many low order bits will be 0.
+ * Shift and XOR multiple times to capture the good bits across the range of
+ * possible page sizes.
+ */
+#define PAGE_LLOCK_HASH(pp) \
+ (((((uintptr_t)(pp) >> PP_SHIFT) ^ \
+ ((uintptr_t)(pp) >> (PAGE_LLOCK_SHIFT + PP_SHIFT))) ^ \
+ ((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 2) + PP_SHIFT)) ^ \
+ ((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 3) + PP_SHIFT))) & \
+ (PAGE_LLOCK_SIZE - 1))
+
+#define page_struct_lock(pp) \
+ mutex_enter(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex)
+#define page_struct_unlock(pp) \
+ mutex_exit(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex)
#endif /* _KERNEL */
@@ -171,7 +198,7 @@ struct as;
* p_next
* p_prev
*
- * The following fields are protected by the global page_llock:
+ * The following fields are protected by the global page_llocks[]:
*
* p_lckcnt
* p_cowcnt
@@ -348,8 +375,11 @@ struct as;
* sleep while holding
* this lock.
* =====================================================================
- * p_lckcnt p_selock(E,S) p_selock(E) &&
- * p_cowcnt page_llock
+ * p_lckcnt p_selock(E,S) p_selock(E)
+ * OR
+ * p_selock(S) &&
+ * page_llocks[]
+ * p_cowcnt
* =====================================================================
* p_nrm hat layer lock hat layer lock
* p_mapping
@@ -535,44 +565,61 @@ typedef page_t devpage_t;
* resulting hashed value. Note that this will perform quickly, since the
* shifting/summing are fast register to register operations with no additional
* memory references).
+ *
+ * PH_SHIFT_SIZE is the amount to use for the successive shifts in the hash
+ * function below. The actual value is LOG2(PH_TABLE_SIZE), so that as many
+ * bits as possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX().
*/
#if defined(_LP64)
#if NCPU < 4
#define PH_TABLE_SIZE 128
-#define VP_SHIFT 7
+#define PH_SHIFT_SIZE 7
#else
-#define PH_TABLE_SIZE 1024
-#define VP_SHIFT 9
+#define PH_TABLE_SIZE (2 * NCPU_P2)
+#define PH_SHIFT_SIZE (NCPU_LOG2 + 1)
#endif
#else /* 32 bits */
#if NCPU < 4
#define PH_TABLE_SIZE 16
-#define VP_SHIFT 7
+#define PH_SHIFT_SIZE 4
#else
#define PH_TABLE_SIZE 128
-#define VP_SHIFT 9
+#define PH_SHIFT_SIZE 7
#endif
#endif /* _LP64 */
/*
- * The amount to use for the successive shifts in the hash function below.
- * The actual value is LOG2(PH_TABLE_SIZE), so that as many bits as
- * possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX().
+ *
+ * We take care to get as much randomness as possible from both the vp and
+ * the offset. Workloads can have few vnodes with many offsets, many vnodes
+ * with few offsets or a moderate mix of both. This hash should perform
+ * equally well for each of these possibilities and for all types of memory
+ * allocations.
+ *
+ * vnodes representing files are created over a long period of time and
+ * have good variation in the upper vp bits, and the right shifts below
+ * capture these bits. However, swap vnodes are created quickly in a
+ * narrow vp* range. Refer to comments at swap_alloc: vnum has exactly
+ * AN_VPSHIFT bits, so the kmem_alloc'd vnode addresses have approximately
+ * AN_VPSHIFT bits of variation above their VNODE_ALIGN low order 0 bits.
+ * Spread swap vnodes widely in the hash table by XOR'ing a term with the
+ * vp bits of variation left shifted to the top of the range.
*/
-#define PH_SHIFT_SIZE (7)
#define PAGE_HASHSZ page_hashsz
#define PAGE_HASHAVELEN 4
#define PAGE_HASH_FUNC(vp, off) \
- ((((uintptr_t)(off) >> PAGESHIFT) + \
- ((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE)) + \
- ((uintptr_t)(vp) >> 3) + \
- ((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) + \
- ((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE))) & \
+ (((((uintptr_t)(off) >> PAGESHIFT) ^ \
+ ((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE))) ^ \
+ (((uintptr_t)(vp) >> 3) ^ \
+ ((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) ^ \
+ ((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE)) ^ \
+ ((uintptr_t)(vp) << \
+ (page_hashsz_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \
(PAGE_HASHSZ - 1))
#ifdef _KERNEL
@@ -588,16 +635,10 @@ typedef page_t devpage_t;
* Since sizeof (kmutex_t) is 8, we shift an additional 3 to skew to a different
* 64 byte sub-block.
*/
-typedef struct pad_mutex {
- kmutex_t pad_mutex;
-#ifdef _LP64
- char pad_pad[64 - sizeof (kmutex_t)];
-#endif
-} pad_mutex_t;
extern pad_mutex_t ph_mutex[];
#define PAGE_HASH_MUTEX(x) \
- &(ph_mutex[((x) + ((x) >> VP_SHIFT) + ((x) << 3)) & \
+ &(ph_mutex[((x) ^ ((x) >> PH_SHIFT_SIZE) + ((x) << 3)) & \
(PH_TABLE_SIZE - 1)].pad_mutex)
/*
@@ -626,9 +667,10 @@ extern pad_mutex_t ph_mutex[];
((se) == SE_EXCL ? PAGE_EXCL(pp) : PAGE_SHARED(pp))
extern long page_hashsz;
+extern unsigned int page_hashsz_shift;
extern page_t **page_hash;
-extern kmutex_t page_llock; /* page logical lock mutex */
+extern pad_mutex_t page_llocks[]; /* page logical lock mutex */
extern kmutex_t freemem_lock; /* freemem lock */
extern pgcnt_t total_pages; /* total pages in the system */
diff --git a/usr/src/uts/common/vm/page_lock.c b/usr/src/uts/common/vm/page_lock.c
index 8003884652..7e48602189 100644
--- a/usr/src/uts/common/vm/page_lock.c
+++ b/usr/src/uts/common/vm/page_lock.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -42,14 +41,14 @@
#include <vm/seg_kmem.h>
/*
- * This global mutex is for logical page locking.
+ * This global mutex array is for logical page locking.
* The following fields in the page structure are protected
* by this lock:
*
* p_lckcnt
* p_cowcnt
*/
-kmutex_t page_llock;
+pad_mutex_t page_llocks[8 * NCPU_P2];
/*
* This is a global lock for the logical page free list. The
@@ -127,14 +126,10 @@ static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE];
* an address of a vnode.
*/
-/*
- * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world.
- * Need to review again.
- */
#if defined(_LP64)
-#define VPH_TABLE_SIZE (1 << (VP_SHIFT + 3))
+#define VPH_TABLE_SIZE (8 * NCPU_P2)
#else /* 32 bits */
-#define VPH_TABLE_SIZE (2 << VP_SHIFT)
+#define VPH_TABLE_SIZE (2 * NCPU_P2)
#endif
#define VP_HASH_FUNC(vp) \
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 666b98f389..31c293d416 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -6483,10 +6482,26 @@ segvn_claim_pages(
ASSERT(pg_idx <= pgcnt);
ppa[pg_idx] = NULL;
- if (prot & PROT_WRITE)
- err = page_addclaim_pages(ppa);
- else
- err = page_subclaim_pages(ppa);
+
+ /* Find each large page within ppa, and adjust its claim */
+
+ /* Does ppa cover a single large page? */
+ if (ppa[0]->p_szc == seg->s_szc) {
+ if (prot & PROT_WRITE)
+ err = page_addclaim_pages(ppa);
+ else
+ err = page_subclaim_pages(ppa);
+ } else {
+ for (i = 0; ppa[i]; i += pgcnt) {
+ ASSERT(IS_P2ALIGNED(page_pptonum(ppa[i]), pgcnt));
+ if (prot & PROT_WRITE)
+ err = page_addclaim_pages(&ppa[i]);
+ else
+ err = page_subclaim_pages(&ppa[i]);
+ if (err == 0)
+ break;
+ }
+ }
for (i = 0; i < pg_idx; i++) {
ASSERT(ppa[i] != NULL);
diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c
index 6ded5d7192..4916f5d376 100644
--- a/usr/src/uts/common/vm/vm_anon.c
+++ b/usr/src/uts/common/vm/vm_anon.c
@@ -138,6 +138,7 @@ kcondvar_t anon_array_cv[ANON_LOCKSIZE];
*/
extern int swap_maxcontig;
size_t anon_hash_size;
+unsigned int anon_hash_shift;
struct anon **anon_hash;
static struct kmem_cache *anon_cache;
@@ -199,7 +200,8 @@ anon_init(void)
pad_mutex_t *tmp;
/* These both need to be powers of 2 so round up to the next power */
- anon_hash_size = 1L << highbit((physmem / ANON_HASHAVELEN) - 1);
+ anon_hash_shift = highbit((physmem / ANON_HASHAVELEN) - 1);
+ anon_hash_size = 1L << anon_hash_shift;
/*
* We need to align the anonhash_lock and anonpages_hash_lock arrays
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index a35f7cc196..169b9c84e7 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -3977,11 +3977,27 @@ page_pp_useclaim(
uint_t write_perm) /* set if vpage has PROT_WRITE */
{
int payback = 0;
+ int nidx, oidx;
ASSERT(PAGE_LOCKED(opp));
ASSERT(PAGE_LOCKED(npp));
- page_struct_lock(opp);
+ /*
+ * Since we have two pages we probably have two locks. We need to take
+ * them in a defined order to avoid deadlocks. It's also possible they
+ * both hash to the same lock in which case this is a non-issue.
+ */
+ nidx = PAGE_LLOCK_HASH(PP_PAGEROOT(npp));
+ oidx = PAGE_LLOCK_HASH(PP_PAGEROOT(opp));
+ if (nidx < oidx) {
+ page_struct_lock(npp);
+ page_struct_lock(opp);
+ } else if (oidx < nidx) {
+ page_struct_lock(opp);
+ page_struct_lock(npp);
+ } else { /* The pages hash to the same lock */
+ page_struct_lock(npp);
+ }
ASSERT(npp->p_cowcnt == 0);
ASSERT(npp->p_lckcnt == 0);
@@ -4017,7 +4033,16 @@ page_pp_useclaim(
pages_useclaim--;
mutex_exit(&freemem_lock);
}
- page_struct_unlock(opp);
+
+ if (nidx < oidx) {
+ page_struct_unlock(opp);
+ page_struct_unlock(npp);
+ } else if (oidx < nidx) {
+ page_struct_unlock(npp);
+ page_struct_unlock(opp);
+ } else { /* The pages hash to the same lock */
+ page_struct_unlock(npp);
+ }
}
/*
@@ -4103,21 +4128,27 @@ page_subclaim(page_t *pp)
return (r);
}
+/*
+ * Variant of page_addclaim(), where ppa[] contains the pages of a single large
+ * page.
+ */
int
page_addclaim_pages(page_t **ppa)
{
-
pgcnt_t lckpgs = 0, pg_idx;
VM_STAT_ADD(pagecnt.pc_addclaim_pages);
- mutex_enter(&page_llock);
+ /*
+ * Only need to take the page struct lock on the large page root.
+ */
+ page_struct_lock(ppa[0]);
for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) {
ASSERT(PAGE_LOCKED(ppa[pg_idx]));
ASSERT(ppa[pg_idx]->p_lckcnt != 0);
if (ppa[pg_idx]->p_cowcnt == (ushort_t)PAGE_LOCK_MAXIMUM) {
- mutex_exit(&page_llock);
+ page_struct_unlock(ppa[0]);
return (0);
}
if (ppa[pg_idx]->p_lckcnt > 1)
@@ -4131,7 +4162,7 @@ page_addclaim_pages(page_t **ppa)
pages_claimed += lckpgs;
} else {
mutex_exit(&freemem_lock);
- mutex_exit(&page_llock);
+ page_struct_unlock(ppa[0]);
return (0);
}
mutex_exit(&freemem_lock);
@@ -4141,10 +4172,14 @@ page_addclaim_pages(page_t **ppa)
ppa[pg_idx]->p_lckcnt--;
ppa[pg_idx]->p_cowcnt++;
}
- mutex_exit(&page_llock);
+ page_struct_unlock(ppa[0]);
return (1);
}
+/*
+ * Variant of page_subclaim(), where ppa[] contains the pages of a single large
+ * page.
+ */
int
page_subclaim_pages(page_t **ppa)
{
@@ -4152,13 +4187,16 @@ page_subclaim_pages(page_t **ppa)
VM_STAT_ADD(pagecnt.pc_subclaim_pages);
- mutex_enter(&page_llock);
+ /*
+ * Only need to take the page struct lock on the large page root.
+ */
+ page_struct_lock(ppa[0]);
for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) {
ASSERT(PAGE_LOCKED(ppa[pg_idx]));
ASSERT(ppa[pg_idx]->p_cowcnt != 0);
if (ppa[pg_idx]->p_lckcnt == (ushort_t)PAGE_LOCK_MAXIMUM) {
- mutex_exit(&page_llock);
+ page_struct_unlock(ppa[0]);
return (0);
}
if (ppa[pg_idx]->p_lckcnt != 0)
@@ -4177,7 +4215,7 @@ page_subclaim_pages(page_t **ppa)
ppa[pg_idx]->p_lckcnt++;
}
- mutex_exit(&page_llock);
+ page_struct_unlock(ppa[0]);
return (1);
}
diff --git a/usr/src/uts/common/vm/vm_pagelist.c b/usr/src/uts/common/vm/vm_pagelist.c
index 7b761da108..eda3552c03 100644
--- a/usr/src/uts/common/vm/vm_pagelist.c
+++ b/usr/src/uts/common/vm/vm_pagelist.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -60,6 +59,7 @@
#include <sys/mem_cage.h>
#include <sys/sdt.h>
#include <sys/dumphdr.h>
+#include <sys/swap.h>
extern uint_t vac_colors;