diff options
| author | Pavel Filipensky <Pavel.Filipensky@Sun.COM> | 2010-03-11 07:11:09 +0000 |
|---|---|---|
| committer | Pavel Filipensky <Pavel.Filipensky@Sun.COM> | 2010-03-11 07:11:09 +0000 |
| commit | f8bbc5716bd2f470114db82bab03cedb9ce9d72e (patch) | |
| tree | f2628c4db9db057b54d602ed9759b4dbcf0ae5aa /usr/src/uts/common/vm | |
| parent | a1ec3a853d53c2a2d23f24cea187c3df87adf49f (diff) | |
| download | illumos-joyent-f8bbc5716bd2f470114db82bab03cedb9ce9d72e.tar.gz | |
6213799 VMODSORT support for NFS. AKA "close()/fsync() slow on clients with lots of memory and cached pages"
Diffstat (limited to 'usr/src/uts/common/vm')
| -rw-r--r-- | usr/src/uts/common/vm/pvn.h | 16 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/vm_pvn.c | 77 |
2 files changed, 75 insertions, 18 deletions
diff --git a/usr/src/uts/common/vm/pvn.h b/usr/src/uts/common/vm/pvn.h index 0467589ae6..db8171170f 100644 --- a/usr/src/uts/common/vm/pvn.h +++ b/usr/src/uts/common/vm/pvn.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,8 +39,6 @@ #ifndef _VM_PVN_H #define _VM_PVN_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/buf.h> #include <vm/seg.h> @@ -73,6 +70,7 @@ int pvn_vplist_dirty(struct vnode *vp, u_offset_t off, int (*putapage)(vnode_t *, struct page *, u_offset_t *, size_t *, int, cred_t *), int flags, struct cred *cred); +void pvn_vplist_setdirty(vnode_t *vp, int (*page_check)(page_t *)); int pvn_getdirty(struct page *pp, int flags); void pvn_vpzero(struct vnode *vp, u_offset_t vplen, size_t zbytes); int pvn_getpages( @@ -88,6 +86,12 @@ void pvn_plist_init(struct page *pp, struct page **pl, size_t plsz, void pvn_init(void); /* + * The value is put in p_hash to identify marker pages. It is safe to + * test p_hash ==(!=) PVN_VPLIST_HASH_TAG even without holding p_selock. + */ +#define PVN_VPLIST_HASH_TAG ((page_t *)-1) + +/* * When requesting pages from the getpage routines, pvn_getpages will * allocate space to return PVN_GETPAGE_NUM pages which map PVN_GETPAGE_SZ * worth of bytes. These numbers are chosen to be the minimum of the max's diff --git a/usr/src/uts/common/vm/vm_pvn.c b/usr/src/uts/common/vm/vm_pvn.c index 186235767d..22e7a89a34 100644 --- a/usr/src/uts/common/vm/vm_pvn.c +++ b/usr/src/uts/common/vm/vm_pvn.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -217,8 +217,8 @@ pvn_read_kluster( } } TRACE_3(TR_FAC_VM, TR_PVN_READ_KLUSTER, - "pvn_read_kluster:seg %p addr %x isra %x", - seg, addr, isra); + "pvn_read_kluster:seg %p addr %x isra %x", + seg, addr, isra); return (plist); } @@ -460,7 +460,7 @@ pvn_write_done(page_t *plist, int flags) pgout = 1; pgpgout++; TRACE_1(TR_FAC_VM, TR_PAGE_WS_OUT, - "page_ws_out:pp %p", pp); + "page_ws_out:pp %p", pp); /* * The page_struct_lock need not be acquired to @@ -483,7 +483,7 @@ pvn_write_done(page_t *plist, int flags) * to avoid having to flush the cache. */ ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO | - HAT_SYNC_STOPON_MOD); + HAT_SYNC_STOPON_MOD); ck_refmod: if (!(ppattr & (P_REF | P_MOD))) { if (hat_page_is_mapped(pp)) { @@ -502,9 +502,9 @@ pvn_write_done(page_t *plist, int flags) * lock on the page. */ (void) hat_pageunload(pp, - HAT_FORCE_PGUNLOAD); + HAT_FORCE_PGUNLOAD); ppattr = hat_page_getattr(pp, - P_REF | P_MOD); + P_REF | P_MOD); goto ck_refmod; } /* @@ -525,7 +525,7 @@ pvn_write_done(page_t *plist, int flags) } /*LINTED: constant in conditional ctx*/ VN_DISPOSE(pp, B_FREE, - (flags & B_DONTNEED), kcred); + (flags & B_DONTNEED), kcred); dfree++; } else { page_unlock(pp); @@ -567,10 +567,10 @@ pvn_write_done(page_t *plist, int flags) /* Kernel probe */ TNF_PROBE_4(pageout, "vm pageio io", /* CSTYLED */, - tnf_opaque, vnode, vp, - tnf_ulong, pages_pageout, pgpgout, - tnf_ulong, pages_freed, dfree, - tnf_ulong, pages_reclaimed, pgrec); + tnf_opaque, vnode, vp, + tnf_ulong, pages_pageout, pgpgout, + tnf_ulong, pages_freed, dfree, + tnf_ulong, pages_reclaimed, pgrec); } /* @@ -699,6 +699,7 @@ marker_constructor(void *buf, void *cdrarg, int kmflags) { page_t *mark = buf; bzero(mark, sizeof (page_t)); + mark->p_hash = PVN_VPLIST_HASH_TAG; return (0); } @@ -993,6 +994,58 @@ leave: } /* + * Walk the vp->v_pages list, for every page call the callback function + * pointed by *page_check. If page_check returns non-zero, then mark the + * page as modified and if VMODSORT is set, move it to the end of v_pages + * list. Moving makes sense only if we have at least two pages - this also + * avoids having v_pages temporarily being NULL after calling page_vpsub() + * if there was just one page. + */ +void +pvn_vplist_setdirty(vnode_t *vp, int (*page_check)(page_t *)) +{ + page_t *pp, *next, *end; + kmutex_t *vphm; + int shuffle; + + vphm = page_vnode_mutex(vp); + mutex_enter(vphm); + + if (vp->v_pages == NULL) { + mutex_exit(vphm); + return; + } + + end = vp->v_pages->p_vpprev; + shuffle = IS_VMODSORT(vp) && (vp->v_pages != end); + pp = vp->v_pages; + + for (;;) { + next = pp->p_vpnext; + if (pp->p_hash != PVN_VPLIST_HASH_TAG && page_check(pp)) { + /* + * hat_setmod_only() in contrast to hat_setmod() does + * not shuffle the pages and does not grab the mutex + * page_vnode_mutex. Exactly what we need. + */ + hat_setmod_only(pp); + if (shuffle) { + page_vpsub(&vp->v_pages, pp); + ASSERT(vp->v_pages != NULL); + page_vpadd(&vp->v_pages->p_vpprev->p_vpnext, + pp); + } + } + /* Stop if we have just processed the last page. */ + if (pp == end) + break; + pp = next; + } + + mutex_exit(vphm); +} + +/* * Zero out zbytes worth of data. Caller should be aware that this * routine may enter back into the fs layer (xxx_getpage). Locks * that the xxx_getpage routine may need should not be held while |
