1 files changed, 70 insertions, 28 deletions
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 026ea7c29b..7fa4af9a4a 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -102,10 +102,37 @@ typedef int	selock_t;
 #ifdef _KERNEL
 
 /*
- * Macros to acquire and release the page logical lock.
+ * PAGE_LLOCK_SIZE is 2 * NCPU, but no smaller than 128.
+ * PAGE_LLOCK_SHIFT is log2(PAGE_LLOCK_SIZE).
  */
-#define	page_struct_lock(pp)	mutex_enter(&page_llock)
-#define	page_struct_unlock(pp)	mutex_exit(&page_llock)
+#if ((2*NCPU_P2) > 128)
+#define	PAGE_LLOCK_SHIFT	((unsigned)(NCPU_LOG2 + 1))
+#else
+#define	PAGE_LLOCK_SHIFT	7U
+#endif
+#define	PAGE_LLOCK_SIZE (1 << PAGE_LLOCK_SHIFT)
+
+/*
+ * The number of low order 0 bits in the page_t address.
+ */
+#define	PP_SHIFT		7
+
+/*
+ * pp may be the root of a large page, and many low order bits will be 0.
+ * Shift and XOR multiple times to capture the good bits across the range of
+ * possible page sizes.
+ */
+#define	PAGE_LLOCK_HASH(pp)	\
+	(((((uintptr_t)(pp) >> PP_SHIFT) ^ \
+	((uintptr_t)(pp) >> (PAGE_LLOCK_SHIFT + PP_SHIFT))) ^ \
+	((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 2) + PP_SHIFT)) ^ \
+	((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 3) + PP_SHIFT))) & \
+	(PAGE_LLOCK_SIZE - 1))
+
+#define	page_struct_lock(pp)	\
+	mutex_enter(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex)
+#define	page_struct_unlock(pp)	\
+	mutex_exit(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex)
 
 #endif	/* _KERNEL */
 
@@ -171,7 +198,7 @@ struct as;
  *				p_next
  *				p_prev
  *
- * The following fields are protected by the global page_llock:
+ * The following fields are protected by the global page_llocks[]:
  *
  *				p_lckcnt
  *				p_cowcnt
@@ -348,8 +375,11 @@ struct as;
  *							    sleep while holding
  *							    this lock.
  *	=====================================================================
- *	p_lckcnt	p_selock(E,S)	p_selock(E) &&
- *	p_cowcnt			page_llock
+ *	p_lckcnt	p_selock(E,S)	p_selock(E)
+ *					    OR
+ *					p_selock(S) &&
+ *					page_llocks[]
+ *	p_cowcnt
  *	=====================================================================
  *	p_nrm		hat layer lock	hat layer lock
  *	p_mapping
@@ -535,44 +565,61 @@ typedef	page_t	devpage_t;
  * resulting hashed value.  Note that this will perform quickly, since the
  * shifting/summing are fast register to register operations with no additional
  * memory references).
+ *
+ * PH_SHIFT_SIZE is the amount to use for the successive shifts in the hash
+ * function below.  The actual value is LOG2(PH_TABLE_SIZE), so that as many
+ * bits as possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX().
  */
 #if defined(_LP64)
 
 #if NCPU < 4
 #define	PH_TABLE_SIZE	128
-#define	VP_SHIFT	7
+#define	PH_SHIFT_SIZE	7
 #else
-#define	PH_TABLE_SIZE	1024
-#define	VP_SHIFT	9
+#define	PH_TABLE_SIZE	(2 * NCPU_P2)
+#define	PH_SHIFT_SIZE	(NCPU_LOG2 + 1)
 #endif
 
 #else	/* 32 bits */
 
 #if NCPU < 4
 #define	PH_TABLE_SIZE	16
-#define	VP_SHIFT	7
+#define	PH_SHIFT_SIZE	4
 #else
 #define	PH_TABLE_SIZE	128
-#define	VP_SHIFT	9
+#define	PH_SHIFT_SIZE	7
 #endif
 
 #endif	/* _LP64 */
 
 /*
- * The amount to use for the successive shifts in the hash function below.
- * The actual value is LOG2(PH_TABLE_SIZE), so that as many bits as
- * possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX().
+ *
+ * We take care to get as much randomness as possible from both the vp and
+ * the offset.  Workloads can have few vnodes with many offsets, many vnodes
+ * with few offsets or a moderate mix of both.  This hash should perform
+ * equally well for each of these possibilities and for all types of memory
+ * allocations.
+ *
+ * vnodes representing files are created over a long period of time and
+ * have good variation in the upper vp bits, and the right shifts below
+ * capture these bits.  However, swap vnodes are created quickly in a
+ * narrow vp* range.  Refer to comments at swap_alloc: vnum has exactly
+ * AN_VPSHIFT bits, so the kmem_alloc'd vnode addresses have approximately
+ * AN_VPSHIFT bits of variation above their VNODE_ALIGN low order 0 bits.
+ * Spread swap vnodes widely in the hash table by XOR'ing a term with the
+ * vp bits of variation left shifted to the top of the range.
  */
-#define	PH_SHIFT_SIZE   (7)
 
 #define	PAGE_HASHSZ	page_hashsz
 #define	PAGE_HASHAVELEN		4
 #define	PAGE_HASH_FUNC(vp, off) \
-	((((uintptr_t)(off) >> PAGESHIFT) + \
-		((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE)) + \
-		((uintptr_t)(vp) >> 3) + \
-		((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) + \
-		((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE))) & \
+	(((((uintptr_t)(off) >> PAGESHIFT) ^ \
+		((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE))) ^ \
+		(((uintptr_t)(vp) >> 3) ^ \
+		((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) ^ \
+		((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE)) ^ \
+		((uintptr_t)(vp) << \
+		(page_hashsz_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \
 		(PAGE_HASHSZ - 1))
 #ifdef _KERNEL
 
@@ -588,16 +635,10 @@ typedef	page_t	devpage_t;
  * Since sizeof (kmutex_t) is 8, we shift an additional 3 to skew to a different
  * 64 byte sub-block.
  */
-typedef struct pad_mutex {
-	kmutex_t	pad_mutex;
-#ifdef _LP64
-	char		pad_pad[64 - sizeof (kmutex_t)];
-#endif
-} pad_mutex_t;
 extern pad_mutex_t ph_mutex[];
 
 #define	PAGE_HASH_MUTEX(x) \
-	&(ph_mutex[((x) + ((x) >> VP_SHIFT) + ((x) << 3)) & \
+	&(ph_mutex[((x) ^ ((x) >> PH_SHIFT_SIZE) + ((x) << 3)) & \
 		(PH_TABLE_SIZE - 1)].pad_mutex)
 
 /*
@@ -626,9 +667,10 @@ extern pad_mutex_t ph_mutex[];
 	((se) == SE_EXCL ? PAGE_EXCL(pp) : PAGE_SHARED(pp))
 
 extern	long page_hashsz;
+extern	unsigned int page_hashsz_shift;
 extern	page_t **page_hash;
 
-extern	kmutex_t page_llock;		/* page logical lock mutex */
+extern	pad_mutex_t page_llocks[];	/* page logical lock mutex */
 extern	kmutex_t freemem_lock;		/* freemem lock */
 
 extern	pgcnt_t	total_pages;		/* total pages in the system */