summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/vm
diff options
context:
space:
mode:
authorPavel Filipensky <Pavel.Filipensky@Sun.COM>2010-03-11 07:11:09 +0000
committerPavel Filipensky <Pavel.Filipensky@Sun.COM>2010-03-11 07:11:09 +0000
commitf8bbc5716bd2f470114db82bab03cedb9ce9d72e (patch)
treef2628c4db9db057b54d602ed9759b4dbcf0ae5aa /usr/src/uts/common/vm
parenta1ec3a853d53c2a2d23f24cea187c3df87adf49f (diff)
downloadillumos-joyent-f8bbc5716bd2f470114db82bab03cedb9ce9d72e.tar.gz
6213799 VMODSORT support for NFS. AKA "close()/fsync() slow on clients with lots of memory and cached pages"
Diffstat (limited to 'usr/src/uts/common/vm')
-rw-r--r--usr/src/uts/common/vm/pvn.h16
-rw-r--r--usr/src/uts/common/vm/vm_pvn.c77
2 files changed, 75 insertions, 18 deletions
diff --git a/usr/src/uts/common/vm/pvn.h b/usr/src/uts/common/vm/pvn.h
index 0467589ae6..db8171170f 100644
--- a/usr/src/uts/common/vm/pvn.h
+++ b/usr/src/uts/common/vm/pvn.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -40,8 +39,6 @@
#ifndef _VM_PVN_H
#define _VM_PVN_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/buf.h>
#include <vm/seg.h>
@@ -73,6 +70,7 @@ int pvn_vplist_dirty(struct vnode *vp, u_offset_t off,
int (*putapage)(vnode_t *, struct page *, u_offset_t *,
size_t *, int, cred_t *),
int flags, struct cred *cred);
+void pvn_vplist_setdirty(vnode_t *vp, int (*page_check)(page_t *));
int pvn_getdirty(struct page *pp, int flags);
void pvn_vpzero(struct vnode *vp, u_offset_t vplen, size_t zbytes);
int pvn_getpages(
@@ -88,6 +86,12 @@ void pvn_plist_init(struct page *pp, struct page **pl, size_t plsz,
void pvn_init(void);
/*
+ * The value is put in p_hash to identify marker pages. It is safe to
+ * test p_hash ==(!=) PVN_VPLIST_HASH_TAG even without holding p_selock.
+ */
+#define PVN_VPLIST_HASH_TAG ((page_t *)-1)
+
+/*
* When requesting pages from the getpage routines, pvn_getpages will
* allocate space to return PVN_GETPAGE_NUM pages which map PVN_GETPAGE_SZ
* worth of bytes. These numbers are chosen to be the minimum of the max's
diff --git a/usr/src/uts/common/vm/vm_pvn.c b/usr/src/uts/common/vm/vm_pvn.c
index 186235767d..22e7a89a34 100644
--- a/usr/src/uts/common/vm/vm_pvn.c
+++ b/usr/src/uts/common/vm/vm_pvn.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -217,8 +217,8 @@ pvn_read_kluster(
}
}
TRACE_3(TR_FAC_VM, TR_PVN_READ_KLUSTER,
- "pvn_read_kluster:seg %p addr %x isra %x",
- seg, addr, isra);
+ "pvn_read_kluster:seg %p addr %x isra %x",
+ seg, addr, isra);
return (plist);
}
@@ -460,7 +460,7 @@ pvn_write_done(page_t *plist, int flags)
pgout = 1;
pgpgout++;
TRACE_1(TR_FAC_VM, TR_PAGE_WS_OUT,
- "page_ws_out:pp %p", pp);
+ "page_ws_out:pp %p", pp);
/*
* The page_struct_lock need not be acquired to
@@ -483,7 +483,7 @@ pvn_write_done(page_t *plist, int flags)
* to avoid having to flush the cache.
*/
ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO |
- HAT_SYNC_STOPON_MOD);
+ HAT_SYNC_STOPON_MOD);
ck_refmod:
if (!(ppattr & (P_REF | P_MOD))) {
if (hat_page_is_mapped(pp)) {
@@ -502,9 +502,9 @@ pvn_write_done(page_t *plist, int flags)
* lock on the page.
*/
(void) hat_pageunload(pp,
- HAT_FORCE_PGUNLOAD);
+ HAT_FORCE_PGUNLOAD);
ppattr = hat_page_getattr(pp,
- P_REF | P_MOD);
+ P_REF | P_MOD);
goto ck_refmod;
}
/*
@@ -525,7 +525,7 @@ pvn_write_done(page_t *plist, int flags)
}
/*LINTED: constant in conditional ctx*/
VN_DISPOSE(pp, B_FREE,
- (flags & B_DONTNEED), kcred);
+ (flags & B_DONTNEED), kcred);
dfree++;
} else {
page_unlock(pp);
@@ -567,10 +567,10 @@ pvn_write_done(page_t *plist, int flags)
/* Kernel probe */
TNF_PROBE_4(pageout, "vm pageio io", /* CSTYLED */,
- tnf_opaque, vnode, vp,
- tnf_ulong, pages_pageout, pgpgout,
- tnf_ulong, pages_freed, dfree,
- tnf_ulong, pages_reclaimed, pgrec);
+ tnf_opaque, vnode, vp,
+ tnf_ulong, pages_pageout, pgpgout,
+ tnf_ulong, pages_freed, dfree,
+ tnf_ulong, pages_reclaimed, pgrec);
}
/*
@@ -699,6 +699,7 @@ marker_constructor(void *buf, void *cdrarg, int kmflags)
{
page_t *mark = buf;
bzero(mark, sizeof (page_t));
+ mark->p_hash = PVN_VPLIST_HASH_TAG;
return (0);
}
@@ -993,6 +994,58 @@ leave:
}
/*
+ * Walk the vp->v_pages list, for every page call the callback function
+ * pointed by *page_check. If page_check returns non-zero, then mark the
+ * page as modified and if VMODSORT is set, move it to the end of v_pages
+ * list. Moving makes sense only if we have at least two pages - this also
+ * avoids having v_pages temporarily being NULL after calling page_vpsub()
+ * if there was just one page.
+ */
+void
+pvn_vplist_setdirty(vnode_t *vp, int (*page_check)(page_t *))
+{
+ page_t *pp, *next, *end;
+ kmutex_t *vphm;
+ int shuffle;
+
+ vphm = page_vnode_mutex(vp);
+ mutex_enter(vphm);
+
+ if (vp->v_pages == NULL) {
+ mutex_exit(vphm);
+ return;
+ }
+
+ end = vp->v_pages->p_vpprev;
+ shuffle = IS_VMODSORT(vp) && (vp->v_pages != end);
+ pp = vp->v_pages;
+
+ for (;;) {
+ next = pp->p_vpnext;
+ if (pp->p_hash != PVN_VPLIST_HASH_TAG && page_check(pp)) {
+ /*
+ * hat_setmod_only() in contrast to hat_setmod() does
+ * not shuffle the pages and does not grab the mutex
+ * page_vnode_mutex. Exactly what we need.
+ */
+ hat_setmod_only(pp);
+ if (shuffle) {
+ page_vpsub(&vp->v_pages, pp);
+ ASSERT(vp->v_pages != NULL);
+ page_vpadd(&vp->v_pages->p_vpprev->p_vpnext,
+ pp);
+ }
+ }
+ /* Stop if we have just processed the last page. */
+ if (pp == end)
+ break;
+ pp = next;
+ }
+
+ mutex_exit(vphm);
+}
+
+/*
* Zero out zbytes worth of data. Caller should be aware that this
* routine may enter back into the fs layer (xxx_getpage). Locks
* that the xxx_getpage routine may need should not be held while