diff options
Diffstat (limited to 'usr/src/uts/common/vm/page.h')
-rw-r--r-- | usr/src/uts/common/vm/page.h | 98 |
1 files changed, 70 insertions, 28 deletions
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h index 026ea7c29b..7fa4af9a4a 100644 --- a/usr/src/uts/common/vm/page.h +++ b/usr/src/uts/common/vm/page.h @@ -102,10 +102,37 @@ typedef int selock_t; #ifdef _KERNEL /* - * Macros to acquire and release the page logical lock. + * PAGE_LLOCK_SIZE is 2 * NCPU, but no smaller than 128. + * PAGE_LLOCK_SHIFT is log2(PAGE_LLOCK_SIZE). */ -#define page_struct_lock(pp) mutex_enter(&page_llock) -#define page_struct_unlock(pp) mutex_exit(&page_llock) +#if ((2*NCPU_P2) > 128) +#define PAGE_LLOCK_SHIFT ((unsigned)(NCPU_LOG2 + 1)) +#else +#define PAGE_LLOCK_SHIFT 7U +#endif +#define PAGE_LLOCK_SIZE (1 << PAGE_LLOCK_SHIFT) + +/* + * The number of low order 0 bits in the page_t address. + */ +#define PP_SHIFT 7 + +/* + * pp may be the root of a large page, and many low order bits will be 0. + * Shift and XOR multiple times to capture the good bits across the range of + * possible page sizes. + */ +#define PAGE_LLOCK_HASH(pp) \ + (((((uintptr_t)(pp) >> PP_SHIFT) ^ \ + ((uintptr_t)(pp) >> (PAGE_LLOCK_SHIFT + PP_SHIFT))) ^ \ + ((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 2) + PP_SHIFT)) ^ \ + ((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 3) + PP_SHIFT))) & \ + (PAGE_LLOCK_SIZE - 1)) + +#define page_struct_lock(pp) \ + mutex_enter(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex) +#define page_struct_unlock(pp) \ + mutex_exit(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex) #endif /* _KERNEL */ @@ -171,7 +198,7 @@ struct as; * p_next * p_prev * - * The following fields are protected by the global page_llock: + * The following fields are protected by the global page_llocks[]: * * p_lckcnt * p_cowcnt @@ -348,8 +375,11 @@ struct as; * sleep while holding * this lock. * ===================================================================== - * p_lckcnt p_selock(E,S) p_selock(E) && - * p_cowcnt page_llock + * p_lckcnt p_selock(E,S) p_selock(E) + * OR + * p_selock(S) && + * page_llocks[] + * p_cowcnt * ===================================================================== * p_nrm hat layer lock hat layer lock * p_mapping @@ -535,44 +565,61 @@ typedef page_t devpage_t; * resulting hashed value. Note that this will perform quickly, since the * shifting/summing are fast register to register operations with no additional * memory references). + * + * PH_SHIFT_SIZE is the amount to use for the successive shifts in the hash + * function below. The actual value is LOG2(PH_TABLE_SIZE), so that as many + * bits as possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX(). */ #if defined(_LP64) #if NCPU < 4 #define PH_TABLE_SIZE 128 -#define VP_SHIFT 7 +#define PH_SHIFT_SIZE 7 #else -#define PH_TABLE_SIZE 1024 -#define VP_SHIFT 9 +#define PH_TABLE_SIZE (2 * NCPU_P2) +#define PH_SHIFT_SIZE (NCPU_LOG2 + 1) #endif #else /* 32 bits */ #if NCPU < 4 #define PH_TABLE_SIZE 16 -#define VP_SHIFT 7 +#define PH_SHIFT_SIZE 4 #else #define PH_TABLE_SIZE 128 -#define VP_SHIFT 9 +#define PH_SHIFT_SIZE 7 #endif #endif /* _LP64 */ /* - * The amount to use for the successive shifts in the hash function below. - * The actual value is LOG2(PH_TABLE_SIZE), so that as many bits as - * possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX(). + * + * We take care to get as much randomness as possible from both the vp and + * the offset. Workloads can have few vnodes with many offsets, many vnodes + * with few offsets or a moderate mix of both. This hash should perform + * equally well for each of these possibilities and for all types of memory + * allocations. + * + * vnodes representing files are created over a long period of time and + * have good variation in the upper vp bits, and the right shifts below + * capture these bits. However, swap vnodes are created quickly in a + * narrow vp* range. Refer to comments at swap_alloc: vnum has exactly + * AN_VPSHIFT bits, so the kmem_alloc'd vnode addresses have approximately + * AN_VPSHIFT bits of variation above their VNODE_ALIGN low order 0 bits. + * Spread swap vnodes widely in the hash table by XOR'ing a term with the + * vp bits of variation left shifted to the top of the range. */ -#define PH_SHIFT_SIZE (7) #define PAGE_HASHSZ page_hashsz #define PAGE_HASHAVELEN 4 #define PAGE_HASH_FUNC(vp, off) \ - ((((uintptr_t)(off) >> PAGESHIFT) + \ - ((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE)) + \ - ((uintptr_t)(vp) >> 3) + \ - ((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) + \ - ((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE))) & \ + (((((uintptr_t)(off) >> PAGESHIFT) ^ \ + ((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE))) ^ \ + (((uintptr_t)(vp) >> 3) ^ \ + ((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) ^ \ + ((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE)) ^ \ + ((uintptr_t)(vp) << \ + (page_hashsz_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \ (PAGE_HASHSZ - 1)) #ifdef _KERNEL @@ -588,16 +635,10 @@ typedef page_t devpage_t; * Since sizeof (kmutex_t) is 8, we shift an additional 3 to skew to a different * 64 byte sub-block. */ -typedef struct pad_mutex { - kmutex_t pad_mutex; -#ifdef _LP64 - char pad_pad[64 - sizeof (kmutex_t)]; -#endif -} pad_mutex_t; extern pad_mutex_t ph_mutex[]; #define PAGE_HASH_MUTEX(x) \ - &(ph_mutex[((x) + ((x) >> VP_SHIFT) + ((x) << 3)) & \ + &(ph_mutex[((x) ^ ((x) >> PH_SHIFT_SIZE) + ((x) << 3)) & \ (PH_TABLE_SIZE - 1)].pad_mutex) /* @@ -626,9 +667,10 @@ extern pad_mutex_t ph_mutex[]; ((se) == SE_EXCL ? PAGE_EXCL(pp) : PAGE_SHARED(pp)) extern long page_hashsz; +extern unsigned int page_hashsz_shift; extern page_t **page_hash; -extern kmutex_t page_llock; /* page logical lock mutex */ +extern pad_mutex_t page_llocks[]; /* page logical lock mutex */ extern kmutex_t freemem_lock; /* freemem lock */ extern pgcnt_t total_pages; /* total pages in the system */ |