diff options
| author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
|---|---|---|
| committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
| commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
| tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/vm/page_lock.c | |
| download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz | |
OpenSolaris Launch
Diffstat (limited to 'usr/src/uts/common/vm/page_lock.c')
| -rw-r--r-- | usr/src/uts/common/vm/page_lock.c | 861 |
1 files changed, 861 insertions, 0 deletions
diff --git a/usr/src/uts/common/vm/page_lock.c b/usr/src/uts/common/vm/page_lock.c new file mode 100644 index 0000000000..9a2d12dd8e --- /dev/null +++ b/usr/src/uts/common/vm/page_lock.c @@ -0,0 +1,861 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * VM - page locking primitives + */ +#include <sys/param.h> +#include <sys/t_lock.h> +#include <sys/vtrace.h> +#include <sys/debug.h> +#include <sys/cmn_err.h> +#include <sys/vnode.h> +#include <sys/bitmap.h> +#include <sys/lockstat.h> +#include <sys/condvar_impl.h> +#include <vm/page.h> +#include <vm/seg_enum.h> +#include <vm/vm_dep.h> + +/* + * This global mutex is for logical page locking. + * The following fields in the page structure are protected + * by this lock: + * + * p_lckcnt + * p_cowcnt + */ +kmutex_t page_llock; + +/* + * This is a global lock for the logical page free list. The + * logical free list, in this implementation, is maintained as two + * separate physical lists - the cache list and the free list. + */ +kmutex_t page_freelock; + +/* + * The hash table, page_hash[], the p_selock fields, and the + * list of pages associated with vnodes are protected by arrays of mutexes. + * + * Unless the hashes are changed radically, the table sizes must be + * a power of two. Also, we typically need more mutexes for the + * vnodes since these locks are occasionally held for long periods. + * And since there seem to be two special vnodes (kvp and swapvp), + * we make room for private mutexes for them. + * + * The pse_mutex[] array holds the mutexes to protect the p_selock + * fields of all page_t structures. + * + * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex + * when given a pointer to a page_t. + * + * PSE_TABLE_SIZE must be a power of two. One could argue that we + * should go to the trouble of setting it up at run time and base it + * on memory size rather than the number of compile time CPUs. + * + * XX64 We should be using physmem size to calculate PSE_TABLE_SIZE, + * PSE_SHIFT, PIO_SHIFT. + * + * These might break in 64 bit world. + */ +#define PSE_SHIFT 7 /* log2(PSE_TABLE_SIZE) */ + +#define PSE_TABLE_SIZE 128 /* number of mutexes to have */ + +#define PIO_SHIFT PSE_SHIFT /* next power of 2 bigger than page_t */ +#define PIO_TABLE_SIZE PSE_TABLE_SIZE /* number of io mutexes to have */ + +pad_mutex_t ph_mutex[PH_TABLE_SIZE]; +pad_mutex_t pse_mutex[PSE_TABLE_SIZE]; +kmutex_t pio_mutex[PIO_TABLE_SIZE]; + +#define PAGE_SE_MUTEX(pp) \ + &pse_mutex[((((uintptr_t)(pp) >> PSE_SHIFT) ^ \ + ((uintptr_t)(pp) >> (PSE_SHIFT << 1))) & \ + (PSE_TABLE_SIZE - 1))].pad_mutex + +#define PAGE_IO_MUTEX(pp) \ + &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)] + +#define PSZC_MTX_TABLE_SIZE 128 +#define PSZC_MTX_TABLE_SHIFT 7 + +static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE]; + +#define PAGE_SZC_MUTEX(_pp) \ + &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \ + ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \ + ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \ + (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex + +/* + * The vph_mutex[] array holds the mutexes to protect the vnode chains, + * (i.e., the list of pages anchored by v_pages and connected via p_vpprev + * and p_vpnext). + * + * The page_vnode_mutex(vp) function returns the address of the appropriate + * mutex from this array given a pointer to a vnode. It is complicated + * by the fact that the kernel's vnode and the swapfs vnode are referenced + * frequently enough to warrent their own mutexes. + * + * The VP_HASH_FUNC returns the index into the vph_mutex array given + * an address of a vnode. + */ + +/* + * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world. + * Need to review again. + */ +#define VPH_TABLE_SIZE (2 << VP_SHIFT) + +#define VP_HASH_FUNC(vp) \ + ((((uintptr_t)(vp) >> 6) + \ + ((uintptr_t)(vp) >> 8) + \ + ((uintptr_t)(vp) >> 10) + \ + ((uintptr_t)(vp) >> 12)) \ + & (VPH_TABLE_SIZE - 1)) + +extern struct vnode kvp; + +kmutex_t vph_mutex[VPH_TABLE_SIZE + 2]; + +/* + * Initialize the locks used by the Virtual Memory Management system. + */ +void +page_lock_init() +{ +} + +/* + * At present we only use page ownership to aid debugging, so it's + * OK if the owner field isn't exact. In the 32-bit world two thread ids + * can map to the same owner because we just 'or' in 0x80000000 and + * then clear the second highest bit, so that (for example) 0x2faced00 + * and 0xafaced00 both map to 0xafaced00. + * In the 64-bit world, p_selock may not be large enough to hold a full + * thread pointer. If we ever need precise ownership (e.g. if we implement + * priority inheritance for page locks) then p_selock should become a + * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2). + */ +#define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED) +#define SE_READER 1 + +/* + * A page that is deleted must be marked as such using the + * page_lock_delete() function. The page must be exclusively locked. + * The SE_DELETED marker is put in p_selock when this function is called. + * SE_DELETED must be distinct from any SE_WRITER value. + */ +#define SE_DELETED (1 | INT_MIN) + +#ifdef VM_STATS +uint_t vph_kvp_count; +uint_t vph_swapfsvp_count; +uint_t vph_other; +#endif /* VM_STATS */ + +#ifdef VM_STATS +uint_t page_lock_count; +uint_t page_lock_miss; +uint_t page_lock_miss_lock; +uint_t page_lock_reclaim; +uint_t page_lock_bad_reclaim; +uint_t page_lock_same_page; +uint_t page_lock_upgrade; +uint_t page_lock_upgrade_failed; +uint_t page_lock_deleted; + +uint_t page_trylock_locked; +uint_t page_trylock_missed; + +uint_t page_try_reclaim_upgrade; +#endif /* VM_STATS */ + + +/* + * Acquire the "shared/exclusive" lock on a page. + * + * Returns 1 on success and locks the page appropriately. + * 0 on failure and does not lock the page. + * + * If `lock' is non-NULL, it will be dropped and reacquired in the + * failure case. This routine can block, and if it does + * it will always return a failure since the page identity [vp, off] + * or state may have changed. + */ + +int +page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim) +{ + return (page_lock_es(pp, se, lock, reclaim, 0)); +} + +/* + * With the addition of reader-writer lock semantics to page_lock_es, + * callers wanting an exclusive (writer) lock may prevent shared-lock + * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. + * In this case, when an exclusive lock cannot be acquired, p_selock's + * SE_EWANTED bit is set. + * This bit, along with the se and es parameters, are used to decide + * if the requested lock should be granted: + * + * Lock wanted SE_EXCL_WANTED p_selock/SE_EWANTED Action + * ---------- -------------- ------------------- --------- + * SE_EXCL no dont-care/1 deny lock + * SE_EXCL any(see note) unlocked/any grant lock, clear SE_EWANTED + * SE_EXCL yes any lock/any deny, set SE_EWANTED + * SE_EXCL no any lock/any deny + * SE_SHARED not applicable shared/0 grant + * SE_SHARED not applicable unlocked/0 grant + * SE_SHARED not applicable shared/1 deny + * SE_SHARED not applicable unlocked/1 deny + * SE_SHARED not applicable excl/any deny + * + * Note: the code grants an exclusive lock to the caller and clears + * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED + * bit's value. This was deemed acceptable as we are not concerned about + * exclusive-lock starvation. If this ever becomes an issue, a priority or + * fifo mechanism should also be implemented. + */ +int +page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) +{ + int retval; + kmutex_t *pse = PAGE_SE_MUTEX(pp); + int upgraded; + int reclaim_it; + + ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); + + VM_STAT_ADD(page_lock_count); + + upgraded = 0; + reclaim_it = 0; + + mutex_enter(pse); + + /* + * Current uses of 'es': + * es == 1 page_lookup_create will attempt page relocation + * es == SE_EXCL_WANTED caller wants SE_EWANTED set (eg. delete + * memory thread); this prevents reader-starvation of waiting + * writer thread(s). + */ + + + ASSERT(((es & SE_EXCL_WANTED) == 0) || + ((es == SE_EXCL_WANTED) && (se == SE_EXCL))); + + if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { + se = SE_EXCL; + } + + if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { + + reclaim_it = 1; + if (se == SE_SHARED) { + /* + * This is an interesting situation. + * + * Remember that p_free can only change if + * p_selock < 0. + * p_free does not depend on our holding `pse'. + * And, since we hold `pse', p_selock can not change. + * So, if p_free changes on us, the page is already + * exclusively held, and we would fail to get p_selock + * regardless. + * + * We want to avoid getting the share + * lock on a free page that needs to be reclaimed. + * It is possible that some other thread has the share + * lock and has left the free page on the cache list. + * pvn_vplist_dirty() does this for brief periods. + * If the se_share is currently SE_EXCL, we will fail + * to acquire p_selock anyway. Blocking is the + * right thing to do. + * If we need to reclaim this page, we must get + * exclusive access to it, force the upgrade now. + * Again, we will fail to acquire p_selock if the + * page is not free and block. + */ + upgraded = 1; + se = SE_EXCL; + VM_STAT_ADD(page_lock_upgrade); + } + } + + if (se == SE_EXCL) { + if ((es != SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { + /* + * if the caller wants a writer lock (but did not + * specify exclusive access), and there is a pending + * writer that wants exclusive access, return failure + */ + retval = 0; + } else if ((pp->p_selock & ~SE_EWANTED) == 0) { + /* no reader/writer lock held */ + THREAD_KPRI_REQUEST(); + /* this clears our setting of the SE_EWANTED bit */ + pp->p_selock = SE_WRITER; + retval = 1; + } else { + /* page is locked */ + if (es == SE_EXCL_WANTED) { + /* set the SE_EWANTED bit */ + pp->p_selock |= SE_EWANTED; + } + retval = 0; + } + } else { + retval = 0; + if (pp->p_selock >= 0) { + /* readers are not allowed when excl wanted */ + if (!(pp->p_selock & SE_EWANTED)) { + pp->p_selock += SE_READER; + retval = 1; + } + } + } + + if (retval == 0) { + if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { + VM_STAT_ADD(page_lock_deleted); + mutex_exit(pse); + return (retval); + } + +#ifdef VM_STATS + VM_STAT_ADD(page_lock_miss); + if (upgraded) { + VM_STAT_ADD(page_lock_upgrade_failed); + } +#endif + if (lock) { + VM_STAT_ADD(page_lock_miss_lock); + mutex_exit(lock); + } + + /* + * Now, wait for the page to be unlocked and + * release the lock protecting p_cv and p_selock. + */ + cv_wait(&pp->p_cv, pse); + mutex_exit(pse); + + /* + * The page identity may have changed while we were + * blocked. If we are willing to depend on "pp" + * still pointing to a valid page structure (i.e., + * assuming page structures are not dynamically allocated + * or freed), we could try to lock the page if its + * identity hasn't changed. + * + * This needs to be measured, since we come back from + * cv_wait holding pse (the expensive part of this + * operation) we might as well try the cheap part. + * Though we would also have to confirm that dropping + * `lock' did not cause any grief to the callers. + */ + if (lock) { + mutex_enter(lock); + } + } else { + /* + * We have the page lock. + * If we needed to reclaim the page, and the page + * needed reclaiming (ie, it was free), then we + * have the page exclusively locked. We may need + * to downgrade the page. + */ + ASSERT((upgraded) ? + ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); + mutex_exit(pse); + + /* + * We now hold this page's lock, either shared or + * exclusive. This will prevent its identity from changing. + * The page, however, may or may not be free. If the caller + * requested, and it is free, go reclaim it from the + * free list. If the page can't be reclaimed, return failure + * so that the caller can start all over again. + * + * NOTE:page_reclaim() releases the page lock (p_selock) + * if it can't be reclaimed. + */ + if (reclaim_it) { + if (!page_reclaim(pp, lock)) { + VM_STAT_ADD(page_lock_bad_reclaim); + retval = 0; + } else { + VM_STAT_ADD(page_lock_reclaim); + if (upgraded) { + page_downgrade(pp); + } + } + } + } + return (retval); +} + +/* + * Clear the SE_EWANTED bit from p_selock. This function allows + * callers of page_lock_es and page_try_reclaim_lock to clear + * their setting of this bit if they decide they no longer wish + * to gain exclusive access to the page. Currently only + * delete_memory_thread uses this when the delete memory + * operation is cancelled. + */ +void +page_lock_clr_exclwanted(page_t *pp) +{ + kmutex_t *pse = PAGE_SE_MUTEX(pp); + + mutex_enter(pse); + pp->p_selock &= ~SE_EWANTED; + if (CV_HAS_WAITERS(&pp->p_cv)) + cv_broadcast(&pp->p_cv); + mutex_exit(pse); +} + +/* + * Read the comments inside of page_lock_es() carefully. + * + * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the + * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. + * This is used by threads subject to reader-starvation (eg. memory delete). + * + * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, + * it is expected that it will retry at a later time. Threads that will + * not retry the lock *must* call page_lock_clr_exclwanted to clear the + * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, + * the bit is cleared.) + */ +int +page_try_reclaim_lock(page_t *pp, se_t se, int es) +{ + kmutex_t *pse = PAGE_SE_MUTEX(pp); + selock_t old; + + mutex_enter(pse); + + old = pp->p_selock; + + ASSERT(((es & SE_EXCL_WANTED) == 0) || + ((es == SE_EXCL_WANTED) && (se == SE_EXCL))); + + if (se == SE_SHARED && es == 1 && old == 0) { + se = SE_EXCL; + } + + if (se == SE_SHARED) { + if (!PP_ISFREE(pp)) { + if (old >= 0) { + /* readers are not allowed when excl wanted */ + if (!(old & SE_EWANTED)) { + pp->p_selock = old + SE_READER; + mutex_exit(pse); + return (1); + } + } + mutex_exit(pse); + return (0); + } + /* + * The page is free, so we really want SE_EXCL (below) + */ + VM_STAT_ADD(page_try_reclaim_upgrade); + } + + /* + * The caller wants a writer lock. We try for it only if + * SE_EWANTED is not set, or if the caller specified + * SE_EXCL_WANTED. + */ + if (!(old & SE_EWANTED) || (es == SE_EXCL_WANTED)) { + if ((old & ~SE_EWANTED) == 0) { + /* no reader/writer lock held */ + THREAD_KPRI_REQUEST(); + /* this clears out our setting of the SE_EWANTED bit */ + pp->p_selock = SE_WRITER; + mutex_exit(pse); + return (1); + } + } + if (es == SE_EXCL_WANTED) { + /* page is locked, set the SE_EWANTED bit */ + pp->p_selock |= SE_EWANTED; + } + mutex_exit(pse); + return (0); +} + +/* + * Acquire a page's "shared/exclusive" lock, but never block. + * Returns 1 on success, 0 on failure. + */ +int +page_trylock(page_t *pp, se_t se) +{ + kmutex_t *pse = PAGE_SE_MUTEX(pp); + + mutex_enter(pse); + if (pp->p_selock & SE_EWANTED) { + /* fail if a thread wants exclusive access */ + mutex_exit(pse); + return (0); + } + + if (se == SE_EXCL) { + if (pp->p_selock == 0) { + THREAD_KPRI_REQUEST(); + pp->p_selock = SE_WRITER; + mutex_exit(pse); + return (1); + } + } else { + if (pp->p_selock >= 0) { + pp->p_selock += SE_READER; + mutex_exit(pse); + return (1); + } + } + mutex_exit(pse); + return (0); +} + +/* + * Release the page's "shared/exclusive" lock and wake up anyone + * who might be waiting for it. + */ +void +page_unlock(page_t *pp) +{ + kmutex_t *pse = PAGE_SE_MUTEX(pp); + selock_t old; + + mutex_enter(pse); + old = pp->p_selock; + if ((old & ~SE_EWANTED) == SE_READER) { + pp->p_selock = old & ~SE_READER; + if (CV_HAS_WAITERS(&pp->p_cv)) + cv_broadcast(&pp->p_cv); + } else if ((old & ~SE_EWANTED) == SE_DELETED) { + panic("page_unlock: page %p is deleted", pp); + } else if (old < 0) { + THREAD_KPRI_RELEASE(); + pp->p_selock &= SE_EWANTED; + if (CV_HAS_WAITERS(&pp->p_cv)) + cv_broadcast(&pp->p_cv); + } else if ((old & ~SE_EWANTED) > SE_READER) { + pp->p_selock = old - SE_READER; + } else { + panic("page_unlock: page %p is not locked", pp); + } + mutex_exit(pse); +} + +/* + * Try to upgrade the lock on the page from a "shared" to an + * "exclusive" lock. Since this upgrade operation is done while + * holding the mutex protecting this page, no one else can acquire this page's + * lock and change the page. Thus, it is safe to drop the "shared" + * lock and attempt to acquire the "exclusive" lock. + * + * Returns 1 on success, 0 on failure. + */ +int +page_tryupgrade(page_t *pp) +{ + kmutex_t *pse = PAGE_SE_MUTEX(pp); + + mutex_enter(pse); + if (!(pp->p_selock & SE_EWANTED)) { + /* no threads want exclusive access, try upgrade */ + if (pp->p_selock == SE_READER) { + THREAD_KPRI_REQUEST(); + /* convert to exclusive lock */ + pp->p_selock = SE_WRITER; + mutex_exit(pse); + return (1); + } + } + mutex_exit(pse); + return (0); +} + +/* + * Downgrade the "exclusive" lock on the page to a "shared" lock + * while holding the mutex protecting this page's p_selock field. + */ +void +page_downgrade(page_t *pp) +{ + kmutex_t *pse = PAGE_SE_MUTEX(pp); + int excl_waiting; + + ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED); + ASSERT(PAGE_EXCL(pp)); + + mutex_enter(pse); + excl_waiting = pp->p_selock & SE_EWANTED; + THREAD_KPRI_RELEASE(); + pp->p_selock = SE_READER | excl_waiting; + if (CV_HAS_WAITERS(&pp->p_cv)) + cv_broadcast(&pp->p_cv); + mutex_exit(pse); +} + +void +page_lock_delete(page_t *pp) +{ + kmutex_t *pse = PAGE_SE_MUTEX(pp); + + ASSERT(PAGE_EXCL(pp)); + ASSERT(pp->p_vnode == NULL); + ASSERT(pp->p_offset == (u_offset_t)-1); + ASSERT(!PP_ISFREE(pp)); + + mutex_enter(pse); + THREAD_KPRI_RELEASE(); + pp->p_selock = SE_DELETED; + if (CV_HAS_WAITERS(&pp->p_cv)) + cv_broadcast(&pp->p_cv); + mutex_exit(pse); +} + +/* + * Implement the io lock for pages + */ +void +page_iolock_init(page_t *pp) +{ + pp->p_iolock_state = 0; + cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL); +} + +/* + * Acquire the i/o lock on a page. + */ +void +page_io_lock(page_t *pp) +{ + kmutex_t *pio; + + pio = PAGE_IO_MUTEX(pp); + mutex_enter(pio); + while (pp->p_iolock_state & PAGE_IO_INUSE) { + cv_wait(&(pp->p_io_cv), pio); + } + pp->p_iolock_state |= PAGE_IO_INUSE; + mutex_exit(pio); +} + +/* + * Release the i/o lock on a page. + */ +void +page_io_unlock(page_t *pp) +{ + kmutex_t *pio; + + pio = PAGE_IO_MUTEX(pp); + mutex_enter(pio); + cv_signal(&pp->p_io_cv); + pp->p_iolock_state &= ~PAGE_IO_INUSE; + mutex_exit(pio); +} + +/* + * Try to acquire the i/o lock on a page without blocking. + * Returns 1 on success, 0 on failure. + */ +int +page_io_trylock(page_t *pp) +{ + kmutex_t *pio; + + if (pp->p_iolock_state & PAGE_IO_INUSE) + return (0); + + pio = PAGE_IO_MUTEX(pp); + mutex_enter(pio); + + if (pp->p_iolock_state & PAGE_IO_INUSE) { + mutex_exit(pio); + return (0); + } + pp->p_iolock_state |= PAGE_IO_INUSE; + mutex_exit(pio); + + return (1); +} + +/* + * Assert that the i/o lock on a page is held. + * Returns 1 on success, 0 on failure. + */ +int +page_iolock_assert(page_t *pp) +{ + return (pp->p_iolock_state & PAGE_IO_INUSE); +} + +/* + * Wrapper exported to kernel routines that are built + * platform-independent (the macro is platform-dependent; + * the size of vph_mutex[] is based on NCPU). + * + * Note that you can do stress testing on this by setting the + * variable page_vnode_mutex_stress to something other than + * zero in a DEBUG kernel in a debugger after loading the kernel. + * Setting it after the kernel is running may not work correctly. + */ +#ifdef DEBUG +static int page_vnode_mutex_stress = 0; +#endif + +kmutex_t * +page_vnode_mutex(vnode_t *vp) +{ + if (vp == &kvp) + return (&vph_mutex[VPH_TABLE_SIZE + 0]); +#ifdef DEBUG + if (page_vnode_mutex_stress != 0) + return (&vph_mutex[0]); +#endif + + return (&vph_mutex[VP_HASH_FUNC(vp)]); +} + +kmutex_t * +page_se_mutex(page_t *pp) +{ + return (PAGE_SE_MUTEX(pp)); +} + +#ifdef VM_STATS +uint_t pszclck_stat[4]; +#endif +/* + * Find, take and return a mutex held by hat_page_demote(). + * Called by page_demote_vp_pages() before hat_page_demote() call and by + * routines that want to block hat_page_demote() but can't do it + * via locking all constituent pages. + * + * Return NULL if p_szc is 0. + * + * It should only be used for pages that can be demoted by hat_page_demote() + * i.e. non swapfs file system pages. The logic here is lifted from + * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase + * since the page is locked and not free. + * + * Hash of the root page is used to find the lock. + * To find the root in the presense of hat_page_demote() chageing the location + * of the root this routine relies on the fact that hat_page_demote() changes + * root last. + * + * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is + * returned pp's p_szc may be any value. + */ +kmutex_t * +page_szc_lock(page_t *pp) +{ + kmutex_t *mtx; + page_t *rootpp; + uint_t szc; + uint_t rszc; + uint_t pszc = pp->p_szc; + + ASSERT(pp != NULL); + ASSERT(PAGE_LOCKED(pp)); + ASSERT(!PP_ISFREE(pp)); + ASSERT(pp->p_vnode != NULL); + ASSERT(!IS_SWAPFSVP(pp->p_vnode)); + ASSERT(pp->p_vnode != &kvp); + +again: + if (pszc == 0) { + VM_STAT_ADD(pszclck_stat[0]); + return (NULL); + } + + /* The lock lives in the root page */ + + rootpp = PP_GROUPLEADER(pp, pszc); + mtx = PAGE_SZC_MUTEX(rootpp); + mutex_enter(mtx); + + /* + * since p_szc can only decrease if pp == rootpp + * rootpp will be always the same i.e we have the right root + * regardless of rootpp->p_szc. + * If location of pp's root didn't change after we took + * the lock we have the right root. return mutex hashed off it. + */ + if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) { + VM_STAT_ADD(pszclck_stat[1]); + return (mtx); + } + + /* + * root location changed because page got demoted. + * locate the new root. + */ + if (rszc < pszc) { + szc = pp->p_szc; + ASSERT(szc < pszc); + mutex_exit(mtx); + pszc = szc; + VM_STAT_ADD(pszclck_stat[2]); + goto again; + } + + VM_STAT_ADD(pszclck_stat[3]); + /* + * current hat_page_demote not done yet. + * wait for it to finish. + */ + mutex_exit(mtx); + rootpp = PP_GROUPLEADER(rootpp, rszc); + mtx = PAGE_SZC_MUTEX(rootpp); + mutex_enter(mtx); + mutex_exit(mtx); + ASSERT(rootpp->p_szc < rszc); + goto again; +} + +int +page_szc_lock_assert(page_t *pp) +{ + page_t *rootpp = PP_PAGEROOT(pp); + kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp); + + return (MUTEX_HELD(mtx)); +} |
