diff options
author | Peter Rival <Frank.Rival@oracle.com> | 2010-04-23 13:26:05 -0400 |
---|---|---|
committer | Peter Rival <Frank.Rival@oracle.com> | 2010-04-23 13:26:05 -0400 |
commit | cb15d5d96b3b2730714c28bfe06cfe7421758b8c (patch) | |
tree | 7fd5c3cf5bb49647be8b2eb022e8d75a7d78eab5 | |
parent | 03c76a6ef5c04e818b6badeeb6155961505af45c (diff) | |
download | illumos-joyent-cb15d5d96b3b2730714c28bfe06cfe7421758b8c.tar.gz |
6778289 vm locks need to scale with the size of system (strands/memory size)
-rw-r--r-- | usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/conf/param.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/vnode.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/sys/mutex.h | 15 | ||||
-rw-r--r-- | usr/src/uts/common/sys/param.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/swap.h | 56 | ||||
-rw-r--r-- | usr/src/uts/common/sys/vnode.h | 7 | ||||
-rw-r--r-- | usr/src/uts/common/vm/anon.h | 22 | ||||
-rw-r--r-- | usr/src/uts/common/vm/page.h | 98 | ||||
-rw-r--r-- | usr/src/uts/common/vm/page_lock.c | 15 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_vn.c | 27 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_anon.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_page.c | 58 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_pagelist.c | 4 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/startup.c | 6 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/machparam.h | 3 | ||||
-rw-r--r-- | usr/src/uts/i86pc/vm/vm_machdep.c | 4 | ||||
-rw-r--r-- | usr/src/uts/sfmmu/vm/hat_sfmmu.c | 24 | ||||
-rw-r--r-- | usr/src/uts/sun4/os/startup.c | 12 | ||||
-rw-r--r-- | usr/src/uts/sun4u/sys/machparam.h | 3 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/machparam.h | 3 |
21 files changed, 265 insertions, 117 deletions
diff --git a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c index c400b16ef3..ea9e747ba7 100644 --- a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c +++ b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -53,6 +52,7 @@ #include <sys/cpuvar.h> #include <sys/dlpi.h> #include <sys/clock_impl.h> +#include <sys/swap.h> #include <errno.h> #include <vm/seg_vn.h> @@ -628,9 +628,11 @@ uintptr_t mdb_page_lookup(uintptr_t vp, u_offset_t offset) { long page_hashsz, ndx; + int page_hashsz_shift; /* Needed for PAGE_HASH_FUNC */ uintptr_t page_hash, pp; if (mdb_readvar(&page_hashsz, "page_hashsz") == -1 || + mdb_readvar(&page_hashsz_shift, "page_hashsz_shift") == -1 || mdb_readvar(&page_hash, "page_hash") == -1) return (NULL); diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c index bb0cb4de1e..f13030b4cb 100644 --- a/usr/src/uts/common/conf/param.c +++ b/usr/src/uts/common/conf/param.c @@ -114,6 +114,7 @@ const unsigned int _maxslp = (unsigned int)MAXSLP; const unsigned long _maxhandspreadpages = (unsigned long)MAXHANDSPREADPAGES; const int _ncpu = (int)NCPU; const int _ncpu_log2 = (int)NCPU_LOG2; +const int _ncpu_p2 = (int)NCPU_P2; const unsigned long _defaultstksz = (unsigned long)DEFAULTSTKSZ; const unsigned int _nbpg = (unsigned int)MMU_PAGESIZE; diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index f1a69479b8..8ba373ad19 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -2284,8 +2283,11 @@ vn_cache_destructor(void *buf, void *cdrarg) void vn_create_cache(void) { - vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode), 64, - vn_cache_constructor, vn_cache_destructor, NULL, NULL, + /* LINTED */ + ASSERT((1 << VNODE_ALIGN_LOG2) == + P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN)); + vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode), + VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL, NULL, 0); } diff --git a/usr/src/uts/common/sys/mutex.h b/usr/src/uts/common/sys/mutex.h index 5c11b4afe0..db34243dcc 100644 --- a/usr/src/uts/common/sys/mutex.h +++ b/usr/src/uts/common/sys/mutex.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_MUTEX_H @@ -71,6 +70,18 @@ typedef struct mutex { #ifdef _KERNEL +/* + * A padded mutex, one per 64 byte cache line. Use when false sharing is + * an issue but beware of the extra memory it uses. Consumers may want to + * consider aligning their pad_mutex_t's to a cache line boundary as well. + */ +typedef struct pad_mutex { + kmutex_t pad_mutex; +#ifdef _LP64 + char pad_pad[64 - sizeof (kmutex_t)]; +#endif +} pad_mutex_t; + #define MUTEX_HELD(x) (mutex_owned(x)) #define MUTEX_NOT_HELD(x) (!mutex_owned(x) || panicstr || quiesce_active) diff --git a/usr/src/uts/common/sys/param.h b/usr/src/uts/common/sys/param.h index 40af8ce04d..e795131fe4 100644 --- a/usr/src/uts/common/sys/param.h +++ b/usr/src/uts/common/sys/param.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -380,6 +380,7 @@ extern const unsigned long _defaultstksz; extern const unsigned int _nbpg; extern const int _ncpu; extern const int _ncpu_log2; +extern const int _ncpu_p2; extern const int _clsize; #endif /* defined(_KERNEL) && !defined(_ASM) */ @@ -399,6 +400,7 @@ extern const int _clsize; #define DEFAULTSTKSZ _defaultstksz #define NCPU _ncpu #define NCPU_LOG2 _ncpu_log2 +#define NCPU_P2 _ncpu_p2 #endif /* defined(_MACHDEP) */ diff --git a/usr/src/uts/common/sys/swap.h b/usr/src/uts/common/sys/swap.h index 9bba487ec1..bfe5fe3349 100644 --- a/usr/src/uts/common/sys/swap.h +++ b/usr/src/uts/common/sys/swap.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1987, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -40,8 +38,6 @@ #ifndef _SYS_SWAP_H #define _SYS_SWAP_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/isa_defs.h> #include <sys/feature_tests.h> #include <vm/anon.h> @@ -159,23 +155,17 @@ struct swapinfo { /* * Stuff to convert an anon slot pointer to a page name. * Because the address of the slot (ap) is a unique identifier, we - * use it to generate a unique (vp,off), as shown below. - * - * |<-- 11 bits -->|<------32 - 11 --------->| - * vp index bits off bits + * use it to generate a unique (vp,off), as shown in the comment for + * swap_alloc(). * * The off bits are shifted PAGESHIFT to directly form a page aligned * offset; the vp index bits map 1-1 to a vnode. * - * Note: if we go to 64 bit offsets, we could use all the bits as the - * unique offset and just have one vnode. */ -#define AN_OFFSHIFT 11 /* vnum # bits */ -#define AN_VPSHIFT 21 /* 32 - 11 */ -#define AN_VPSIZEMASK 0x7FF /* vp index mask */ -#define MAX_SWAP_VNODES 2048 /* 1 << AN_OFFSHIFT */ -#define AN_CACHE_ALIGN 16 /* anon address aligned */ - /* 16 bytes */ +#define MAX_SWAP_VNODES_LOG2 11 /* log2(MAX_SWAP_VNODES) */ +#define MAX_SWAP_VNODES (1U << MAX_SWAP_VNODES_LOG2) /* max # swap vnodes */ +#define AN_VPMASK (MAX_SWAP_VNODES - 1) /* vp index mask */ +#define AN_VPSHIFT MAX_SWAP_VNODES_LOG2 /* * Convert from an anon slot to associated vnode and offset. */ @@ -189,24 +179,24 @@ struct swapinfo { /* * Get a vnode name for an anon slot. * The vnum, offset are derived from anon struct address which is - * 16 bytes aligned. To get swap offset the anon address is shifted - * by additional 11 bits which yields 32K aligned swap offset - * (11 bits plus 4 bits alignment). - * The vnum (vnode index) is created from bits 31-21. - * The 64 bit swap offset is created from bits 63-32 and 20-4. - * The 32 bit offset is created from bits 20-4. + * 16 bytes aligned. anon structs may be kmem_cache_alloc'd concurrently by + * multiple threads and come from a small range of addresses (same slab), in + * which case high order AP bits do not vary much, so choose vnum from low + * order bits which vary the most. Different threads will thus get different + * vnums and vnodes, which avoids vph_mutex_contention on the subsequent + * page_hashin(). * - * +-----------...----------+--------+-----------------------+----+ - * | swap offset | vnum | swap offset |0000| - * +-----------...----------+--------+-----------------------+----+ - * 63 32 31 21 20 4 3 0 + * +-----------...-------------------+-----------------------+----+ + * | swap offset | vnum |0000| + * +-----------...-------------------+-----------------------+----+ + * 63 15 14 4 3 0 */ #define swap_alloc(AP) \ { \ - (AP)->an_vp = swapfs_getvp(((uintptr_t)(AP) >> AN_VPSHIFT) \ - & AN_VPSIZEMASK); \ - (AP)->an_off = (anoff_t)(((uintptr_t)(AP) & ~(uintptr_t)0xFFFFFFFF) \ - | (((uintptr_t)(AP) << AN_OFFSHIFT) & (uintptr_t)0xFFFFFFFF)); \ + (AP)->an_vp = swapfs_getvp(((uintptr_t)(AP) >> AN_CACHE_ALIGN_LOG2) \ + & AN_VPMASK); \ + (AP)->an_off = (anoff_t)((((uintptr_t)(AP)) >> \ + AN_VPSHIFT + AN_CACHE_ALIGN_LOG2) << PAGESHIFT); \ } /* diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index 8b75225a64..e9f247ae1a 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -261,6 +260,10 @@ typedef struct vnode { #define IS_DEVVP(vp) \ ((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO) +#define VNODE_ALIGN 64 +/* Count of low-order 0 bits in a vnode *, based on size and alignment. */ +#define VNODE_ALIGN_LOG2 8 + /* * vnode flags. */ diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h index 652fcc0951..a2e07d0b18 100644 --- a/usr/src/uts/common/vm/anon.h +++ b/usr/src/uts/common/vm/anon.h @@ -91,6 +91,11 @@ struct anon { int an_refcnt; /* # of people sharing slot */ }; +#define AN_CACHE_ALIGN_LOG2 4 /* log2(AN_CACHE_ALIGN) */ +#define AN_CACHE_ALIGN (1U << AN_CACHE_ALIGN_LOG2) /* anon address aligned */ + /* 16 bytes */ + + #ifdef _KERNEL /* * The swapinfo_lock protects: @@ -121,11 +126,24 @@ extern kcondvar_t anon_array_cv[]; * Global hash table to provide a function from (vp, off) -> ap */ extern size_t anon_hash_size; +extern unsigned int anon_hash_shift; extern struct anon **anon_hash; #define ANON_HASH_SIZE anon_hash_size #define ANON_HASHAVELEN 4 -#define ANON_HASH(VP, OFF) \ -((((uintptr_t)(VP) >> 7) ^ ((OFF) >> PAGESHIFT)) & (ANON_HASH_SIZE - 1)) +/* + * Try to use as many bits of randomness from both vp and off as we can. + * This should help spreading evenly for a variety of workloads. See comments + * for PAGE_HASH_FUNC for more explanation. + */ +#define ANON_HASH(vp, off) \ + (((((uintptr_t)(off) >> PAGESHIFT) ^ \ + ((uintptr_t)(off) >> (PAGESHIFT + anon_hash_shift))) ^ \ + (((uintptr_t)(vp) >> 3) ^ \ + ((uintptr_t)(vp) >> (3 + anon_hash_shift)) ^ \ + ((uintptr_t)(vp) >> (3 + 2 * anon_hash_shift)) ^ \ + ((uintptr_t)(vp) << \ + (anon_hash_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \ + (anon_hash_size - 1)) #define AH_LOCK_SIZE (2 << NCPU_LOG2) diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h index 026ea7c29b..7fa4af9a4a 100644 --- a/usr/src/uts/common/vm/page.h +++ b/usr/src/uts/common/vm/page.h @@ -102,10 +102,37 @@ typedef int selock_t; #ifdef _KERNEL /* - * Macros to acquire and release the page logical lock. + * PAGE_LLOCK_SIZE is 2 * NCPU, but no smaller than 128. + * PAGE_LLOCK_SHIFT is log2(PAGE_LLOCK_SIZE). */ -#define page_struct_lock(pp) mutex_enter(&page_llock) -#define page_struct_unlock(pp) mutex_exit(&page_llock) +#if ((2*NCPU_P2) > 128) +#define PAGE_LLOCK_SHIFT ((unsigned)(NCPU_LOG2 + 1)) +#else +#define PAGE_LLOCK_SHIFT 7U +#endif +#define PAGE_LLOCK_SIZE (1 << PAGE_LLOCK_SHIFT) + +/* + * The number of low order 0 bits in the page_t address. + */ +#define PP_SHIFT 7 + +/* + * pp may be the root of a large page, and many low order bits will be 0. + * Shift and XOR multiple times to capture the good bits across the range of + * possible page sizes. + */ +#define PAGE_LLOCK_HASH(pp) \ + (((((uintptr_t)(pp) >> PP_SHIFT) ^ \ + ((uintptr_t)(pp) >> (PAGE_LLOCK_SHIFT + PP_SHIFT))) ^ \ + ((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 2) + PP_SHIFT)) ^ \ + ((uintptr_t)(pp) >> ((PAGE_LLOCK_SHIFT * 3) + PP_SHIFT))) & \ + (PAGE_LLOCK_SIZE - 1)) + +#define page_struct_lock(pp) \ + mutex_enter(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex) +#define page_struct_unlock(pp) \ + mutex_exit(&page_llocks[PAGE_LLOCK_HASH(PP_PAGEROOT(pp))].pad_mutex) #endif /* _KERNEL */ @@ -171,7 +198,7 @@ struct as; * p_next * p_prev * - * The following fields are protected by the global page_llock: + * The following fields are protected by the global page_llocks[]: * * p_lckcnt * p_cowcnt @@ -348,8 +375,11 @@ struct as; * sleep while holding * this lock. * ===================================================================== - * p_lckcnt p_selock(E,S) p_selock(E) && - * p_cowcnt page_llock + * p_lckcnt p_selock(E,S) p_selock(E) + * OR + * p_selock(S) && + * page_llocks[] + * p_cowcnt * ===================================================================== * p_nrm hat layer lock hat layer lock * p_mapping @@ -535,44 +565,61 @@ typedef page_t devpage_t; * resulting hashed value. Note that this will perform quickly, since the * shifting/summing are fast register to register operations with no additional * memory references). + * + * PH_SHIFT_SIZE is the amount to use for the successive shifts in the hash + * function below. The actual value is LOG2(PH_TABLE_SIZE), so that as many + * bits as possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX(). */ #if defined(_LP64) #if NCPU < 4 #define PH_TABLE_SIZE 128 -#define VP_SHIFT 7 +#define PH_SHIFT_SIZE 7 #else -#define PH_TABLE_SIZE 1024 -#define VP_SHIFT 9 +#define PH_TABLE_SIZE (2 * NCPU_P2) +#define PH_SHIFT_SIZE (NCPU_LOG2 + 1) #endif #else /* 32 bits */ #if NCPU < 4 #define PH_TABLE_SIZE 16 -#define VP_SHIFT 7 +#define PH_SHIFT_SIZE 4 #else #define PH_TABLE_SIZE 128 -#define VP_SHIFT 9 +#define PH_SHIFT_SIZE 7 #endif #endif /* _LP64 */ /* - * The amount to use for the successive shifts in the hash function below. - * The actual value is LOG2(PH_TABLE_SIZE), so that as many bits as - * possible will filter thru PAGE_HASH_FUNC() and PAGE_HASH_MUTEX(). + * + * We take care to get as much randomness as possible from both the vp and + * the offset. Workloads can have few vnodes with many offsets, many vnodes + * with few offsets or a moderate mix of both. This hash should perform + * equally well for each of these possibilities and for all types of memory + * allocations. + * + * vnodes representing files are created over a long period of time and + * have good variation in the upper vp bits, and the right shifts below + * capture these bits. However, swap vnodes are created quickly in a + * narrow vp* range. Refer to comments at swap_alloc: vnum has exactly + * AN_VPSHIFT bits, so the kmem_alloc'd vnode addresses have approximately + * AN_VPSHIFT bits of variation above their VNODE_ALIGN low order 0 bits. + * Spread swap vnodes widely in the hash table by XOR'ing a term with the + * vp bits of variation left shifted to the top of the range. */ -#define PH_SHIFT_SIZE (7) #define PAGE_HASHSZ page_hashsz #define PAGE_HASHAVELEN 4 #define PAGE_HASH_FUNC(vp, off) \ - ((((uintptr_t)(off) >> PAGESHIFT) + \ - ((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE)) + \ - ((uintptr_t)(vp) >> 3) + \ - ((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) + \ - ((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE))) & \ + (((((uintptr_t)(off) >> PAGESHIFT) ^ \ + ((uintptr_t)(off) >> (PAGESHIFT + PH_SHIFT_SIZE))) ^ \ + (((uintptr_t)(vp) >> 3) ^ \ + ((uintptr_t)(vp) >> (3 + PH_SHIFT_SIZE)) ^ \ + ((uintptr_t)(vp) >> (3 + 2 * PH_SHIFT_SIZE)) ^ \ + ((uintptr_t)(vp) << \ + (page_hashsz_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \ (PAGE_HASHSZ - 1)) #ifdef _KERNEL @@ -588,16 +635,10 @@ typedef page_t devpage_t; * Since sizeof (kmutex_t) is 8, we shift an additional 3 to skew to a different * 64 byte sub-block. */ -typedef struct pad_mutex { - kmutex_t pad_mutex; -#ifdef _LP64 - char pad_pad[64 - sizeof (kmutex_t)]; -#endif -} pad_mutex_t; extern pad_mutex_t ph_mutex[]; #define PAGE_HASH_MUTEX(x) \ - &(ph_mutex[((x) + ((x) >> VP_SHIFT) + ((x) << 3)) & \ + &(ph_mutex[((x) ^ ((x) >> PH_SHIFT_SIZE) + ((x) << 3)) & \ (PH_TABLE_SIZE - 1)].pad_mutex) /* @@ -626,9 +667,10 @@ extern pad_mutex_t ph_mutex[]; ((se) == SE_EXCL ? PAGE_EXCL(pp) : PAGE_SHARED(pp)) extern long page_hashsz; +extern unsigned int page_hashsz_shift; extern page_t **page_hash; -extern kmutex_t page_llock; /* page logical lock mutex */ +extern pad_mutex_t page_llocks[]; /* page logical lock mutex */ extern kmutex_t freemem_lock; /* freemem lock */ extern pgcnt_t total_pages; /* total pages in the system */ diff --git a/usr/src/uts/common/vm/page_lock.c b/usr/src/uts/common/vm/page_lock.c index 8003884652..7e48602189 100644 --- a/usr/src/uts/common/vm/page_lock.c +++ b/usr/src/uts/common/vm/page_lock.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -42,14 +41,14 @@ #include <vm/seg_kmem.h> /* - * This global mutex is for logical page locking. + * This global mutex array is for logical page locking. * The following fields in the page structure are protected * by this lock: * * p_lckcnt * p_cowcnt */ -kmutex_t page_llock; +pad_mutex_t page_llocks[8 * NCPU_P2]; /* * This is a global lock for the logical page free list. The @@ -127,14 +126,10 @@ static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE]; * an address of a vnode. */ -/* - * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world. - * Need to review again. - */ #if defined(_LP64) -#define VPH_TABLE_SIZE (1 << (VP_SHIFT + 3)) +#define VPH_TABLE_SIZE (8 * NCPU_P2) #else /* 32 bits */ -#define VPH_TABLE_SIZE (2 << VP_SHIFT) +#define VPH_TABLE_SIZE (2 * NCPU_P2) #endif #define VP_HASH_FUNC(vp) \ diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 666b98f389..31c293d416 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -6483,10 +6482,26 @@ segvn_claim_pages( ASSERT(pg_idx <= pgcnt); ppa[pg_idx] = NULL; - if (prot & PROT_WRITE) - err = page_addclaim_pages(ppa); - else - err = page_subclaim_pages(ppa); + + /* Find each large page within ppa, and adjust its claim */ + + /* Does ppa cover a single large page? */ + if (ppa[0]->p_szc == seg->s_szc) { + if (prot & PROT_WRITE) + err = page_addclaim_pages(ppa); + else + err = page_subclaim_pages(ppa); + } else { + for (i = 0; ppa[i]; i += pgcnt) { + ASSERT(IS_P2ALIGNED(page_pptonum(ppa[i]), pgcnt)); + if (prot & PROT_WRITE) + err = page_addclaim_pages(&ppa[i]); + else + err = page_subclaim_pages(&ppa[i]); + if (err == 0) + break; + } + } for (i = 0; i < pg_idx; i++) { ASSERT(ppa[i] != NULL); diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c index 6ded5d7192..4916f5d376 100644 --- a/usr/src/uts/common/vm/vm_anon.c +++ b/usr/src/uts/common/vm/vm_anon.c @@ -138,6 +138,7 @@ kcondvar_t anon_array_cv[ANON_LOCKSIZE]; */ extern int swap_maxcontig; size_t anon_hash_size; +unsigned int anon_hash_shift; struct anon **anon_hash; static struct kmem_cache *anon_cache; @@ -199,7 +200,8 @@ anon_init(void) pad_mutex_t *tmp; /* These both need to be powers of 2 so round up to the next power */ - anon_hash_size = 1L << highbit((physmem / ANON_HASHAVELEN) - 1); + anon_hash_shift = highbit((physmem / ANON_HASHAVELEN) - 1); + anon_hash_size = 1L << anon_hash_shift; /* * We need to align the anonhash_lock and anonpages_hash_lock arrays diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c index a35f7cc196..169b9c84e7 100644 --- a/usr/src/uts/common/vm/vm_page.c +++ b/usr/src/uts/common/vm/vm_page.c @@ -3977,11 +3977,27 @@ page_pp_useclaim( uint_t write_perm) /* set if vpage has PROT_WRITE */ { int payback = 0; + int nidx, oidx; ASSERT(PAGE_LOCKED(opp)); ASSERT(PAGE_LOCKED(npp)); - page_struct_lock(opp); + /* + * Since we have two pages we probably have two locks. We need to take + * them in a defined order to avoid deadlocks. It's also possible they + * both hash to the same lock in which case this is a non-issue. + */ + nidx = PAGE_LLOCK_HASH(PP_PAGEROOT(npp)); + oidx = PAGE_LLOCK_HASH(PP_PAGEROOT(opp)); + if (nidx < oidx) { + page_struct_lock(npp); + page_struct_lock(opp); + } else if (oidx < nidx) { + page_struct_lock(opp); + page_struct_lock(npp); + } else { /* The pages hash to the same lock */ + page_struct_lock(npp); + } ASSERT(npp->p_cowcnt == 0); ASSERT(npp->p_lckcnt == 0); @@ -4017,7 +4033,16 @@ page_pp_useclaim( pages_useclaim--; mutex_exit(&freemem_lock); } - page_struct_unlock(opp); + + if (nidx < oidx) { + page_struct_unlock(opp); + page_struct_unlock(npp); + } else if (oidx < nidx) { + page_struct_unlock(npp); + page_struct_unlock(opp); + } else { /* The pages hash to the same lock */ + page_struct_unlock(npp); + } } /* @@ -4103,21 +4128,27 @@ page_subclaim(page_t *pp) return (r); } +/* + * Variant of page_addclaim(), where ppa[] contains the pages of a single large + * page. + */ int page_addclaim_pages(page_t **ppa) { - pgcnt_t lckpgs = 0, pg_idx; VM_STAT_ADD(pagecnt.pc_addclaim_pages); - mutex_enter(&page_llock); + /* + * Only need to take the page struct lock on the large page root. + */ + page_struct_lock(ppa[0]); for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) { ASSERT(PAGE_LOCKED(ppa[pg_idx])); ASSERT(ppa[pg_idx]->p_lckcnt != 0); if (ppa[pg_idx]->p_cowcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { - mutex_exit(&page_llock); + page_struct_unlock(ppa[0]); return (0); } if (ppa[pg_idx]->p_lckcnt > 1) @@ -4131,7 +4162,7 @@ page_addclaim_pages(page_t **ppa) pages_claimed += lckpgs; } else { mutex_exit(&freemem_lock); - mutex_exit(&page_llock); + page_struct_unlock(ppa[0]); return (0); } mutex_exit(&freemem_lock); @@ -4141,10 +4172,14 @@ page_addclaim_pages(page_t **ppa) ppa[pg_idx]->p_lckcnt--; ppa[pg_idx]->p_cowcnt++; } - mutex_exit(&page_llock); + page_struct_unlock(ppa[0]); return (1); } +/* + * Variant of page_subclaim(), where ppa[] contains the pages of a single large + * page. + */ int page_subclaim_pages(page_t **ppa) { @@ -4152,13 +4187,16 @@ page_subclaim_pages(page_t **ppa) VM_STAT_ADD(pagecnt.pc_subclaim_pages); - mutex_enter(&page_llock); + /* + * Only need to take the page struct lock on the large page root. + */ + page_struct_lock(ppa[0]); for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) { ASSERT(PAGE_LOCKED(ppa[pg_idx])); ASSERT(ppa[pg_idx]->p_cowcnt != 0); if (ppa[pg_idx]->p_lckcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { - mutex_exit(&page_llock); + page_struct_unlock(ppa[0]); return (0); } if (ppa[pg_idx]->p_lckcnt != 0) @@ -4177,7 +4215,7 @@ page_subclaim_pages(page_t **ppa) ppa[pg_idx]->p_lckcnt++; } - mutex_exit(&page_llock); + page_struct_unlock(ppa[0]); return (1); } diff --git a/usr/src/uts/common/vm/vm_pagelist.c b/usr/src/uts/common/vm/vm_pagelist.c index 7b761da108..eda3552c03 100644 --- a/usr/src/uts/common/vm/vm_pagelist.c +++ b/usr/src/uts/common/vm/vm_pagelist.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -60,6 +59,7 @@ #include <sys/mem_cage.h> #include <sys/sdt.h> #include <sys/dumphdr.h> +#include <sys/swap.h> extern uint_t vac_colors; diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index ad719dfcdd..f69b37a9f2 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -289,6 +289,7 @@ int segzio_fromheap = 1; * VM data structures */ long page_hashsz; /* Size of page hash table (power of two) */ +unsigned int page_hashsz_shift; /* log2(page_hashsz) */ struct page *pp_base; /* Base of initial system page struct array */ struct page **page_hash; /* Page hash table */ pad_mutex_t *pse_mutex; /* Locks protecting pp->p_selock */ @@ -1126,12 +1127,15 @@ startup_memlist(void) ADD_TO_ALLOCATIONS(bios_rsvd, rsvdmemlist_sz); PRM_DEBUG(rsvdmemlist_sz); + /* LINTED */ + ASSERT(P2SAMEHIGHBIT((1 << PP_SHIFT), sizeof (struct page))); /* * The page structure hash table size is a power of 2 * such that the average hash chain length is PAGE_HASHAVELEN. */ page_hashsz = npages / PAGE_HASHAVELEN; - page_hashsz = 1 << highbit(page_hashsz); + page_hashsz_shift = highbit(page_hashsz); + page_hashsz = 1 << page_hashsz_shift; pagehash_sz = sizeof (struct page *) * page_hashsz; ADD_TO_ALLOCATIONS(page_hash, pagehash_sz); PRM_DEBUG(pagehash_sz); diff --git a/usr/src/uts/i86pc/sys/machparam.h b/usr/src/uts/i86pc/sys/machparam.h index ed3c5d5fcc..a0fa08db16 100644 --- a/usr/src/uts/i86pc/sys/machparam.h +++ b/usr/src/uts/i86pc/sys/machparam.h @@ -60,6 +60,9 @@ extern "C" { #define NCPU_LOG2 5 #endif +/* NCPU_P2 is NCPU rounded to a power of 2 */ +#define NCPU_P2 (1 << NCPU_LOG2) + /* * The value defined below could grow to 16. hat structure and * page_t have room for 16 nodes. diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c index 79c0ee073e..dfdca87e1c 100644 --- a/usr/src/uts/i86pc/vm/vm_machdep.c +++ b/usr/src/uts/i86pc/vm/vm_machdep.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * Copyright (c) 2010, Intel Corporation. @@ -58,6 +57,7 @@ #include <sys/exechdr.h> #include <sys/debug.h> #include <sys/vmsystm.h> +#include <sys/swap.h> #include <vm/hat.h> #include <vm/as.h> diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c index 6156017a5e..baaf08d7e5 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -775,15 +774,26 @@ kpm_shlk_t *kpmp_stable; uint_t kpmp_stable_sz; /* must be a power of 2 */ /* - * SPL_HASH was improved to avoid false cache line sharing + * SPL_TABLE_SIZE is 2 * NCPU, but no smaller than 128. + * SPL_SHIFT is log2(SPL_TABLE_SIZE). */ -#define SPL_TABLE_SIZE 128 +#if ((2*NCPU_P2) > 128) +#define SPL_SHIFT ((unsigned)(NCPU_LOG2 + 1)) +#else +#define SPL_SHIFT 7U +#endif +#define SPL_TABLE_SIZE (1U << SPL_SHIFT) #define SPL_MASK (SPL_TABLE_SIZE - 1) -#define SPL_SHIFT 7 /* log2(SPL_TABLE_SIZE) */ +/* + * We shift by PP_SHIFT to take care of the low-order 0 bits of a page_t + * and by multiples of SPL_SHIFT to get as many varied bits as we can. + */ #define SPL_INDEX(pp) \ - ((((uintptr_t)(pp) >> SPL_SHIFT) ^ \ - ((uintptr_t)(pp) >> (SPL_SHIFT << 1))) & \ + ((((uintptr_t)(pp) >> PP_SHIFT) ^ \ + ((uintptr_t)(pp) >> (PP_SHIFT + SPL_SHIFT)) ^ \ + ((uintptr_t)(pp) >> (PP_SHIFT + SPL_SHIFT * 2)) ^ \ + ((uintptr_t)(pp) >> (PP_SHIFT + SPL_SHIFT * 3))) & \ (SPL_TABLE_SIZE - 1)) #define SPL_HASH(pp) \ diff --git a/usr/src/uts/sun4/os/startup.c b/usr/src/uts/sun4/os/startup.c index 914451cf4f..fe1dffc468 100644 --- a/usr/src/uts/sun4/os/startup.c +++ b/usr/src/uts/sun4/os/startup.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/machsystm.h> @@ -62,6 +61,7 @@ #include <sys/memnode.h> #include <sys/mem_cage.h> #include <sys/mmu.h> +#include <sys/swap.h> extern void setup_trap_table(void); extern int cpu_intrq_setup(struct cpu *); @@ -174,6 +174,7 @@ pgcnt_t obp_pages; /* Physical pages used by OBP */ * VM data structures */ long page_hashsz; /* Size of page hash table (power of two) */ +unsigned int page_hashsz_shift; /* log2(page_hashsz) */ struct page *pp_base; /* Base of system page struct array */ size_t pp_sz; /* Size in bytes of page struct array */ struct page **page_hash; /* Page hash table */ @@ -748,13 +749,16 @@ calc_kpmpp_sz(pgcnt_t npages) size_t calc_pagehash_sz(pgcnt_t npages) { - + /* LINTED */ + ASSERT(P2SAMEHIGHBIT((1 << PP_SHIFT), (sizeof (struct page)))); /* * The page structure hash table size is a power of 2 * such that the average hash chain length is PAGE_HASHAVELEN. */ page_hashsz = npages / PAGE_HASHAVELEN; - page_hashsz = 1 << highbit(page_hashsz); + page_hashsz_shift = MAX((AN_VPSHIFT + VNODE_ALIGN_LOG2 + 1), + highbit(page_hashsz)); + page_hashsz = 1 << page_hashsz_shift; return (page_hashsz * sizeof (struct page *)); } diff --git a/usr/src/uts/sun4u/sys/machparam.h b/usr/src/uts/sun4u/sys/machparam.h index e60d02a2cc..b0130af21f 100644 --- a/usr/src/uts/sun4u/sys/machparam.h +++ b/usr/src/uts/sun4u/sys/machparam.h @@ -103,6 +103,9 @@ extern "C" { #error "add test for larger NCPU" #endif +/* NCPU_P2 is NCPU rounded to a power of 2 */ +#define NCPU_P2 (1 << NCPU_LOG2) + /* * Maximum number of processors that we support. With CMP processors, the * portid may not be equal to cpuid. MAX_CPU_CHIPID can be defined in a diff --git a/usr/src/uts/sun4v/sys/machparam.h b/usr/src/uts/sun4v/sys/machparam.h index 4fe46b985b..b47b33c370 100644 --- a/usr/src/uts/sun4v/sys/machparam.h +++ b/usr/src/uts/sun4v/sys/machparam.h @@ -63,6 +63,9 @@ extern "C" { #error "Must define NCPU_LOG2 together with NCPU" #endif +/* NCPU_P2 is NCPU rounded to a power of 2 */ +#define NCPU_P2 (1 << NCPU_LOG2) + /* * Maximum number of processors that we support. With CMP processors, the * portid may not be equal to cpuid. MAX_CPU_CHIPID can be defined in a |