summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcel Telka <marcel@telka.sk>2018-04-25 00:28:01 +0200
committerDan McDonald <danmcd@joyent.com>2018-07-18 20:10:13 -0400
commite010bda94b034e413b6fe35fd45bca0afaf1a0df (patch)
tree252bdfc74d19e9790fb82af170be1a0d9ac0db65
parent5882b622b7e2afa5385d4601dd82f81066f62d67 (diff)
downloadillumos-joyent-e010bda94b034e413b6fe35fd45bca0afaf1a0df.tar.gz
9447 NFS unmount is slow
Reviewed by: Toomas Soome <tsoome@me.com> Reviewed by: Arne Jansen <arne@die-jansens.de> Reviewed by: Ken Mays <kmays2000@gmail.com> Reviewed by: Evan Layton <evan.layton@nexenta.com> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_vfsops.c6
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_client.c2
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_rnode.c187
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vfsops.c4
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_client.c19
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_subr.c169
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_vfsops.c6
-rw-r--r--usr/src/uts/common/nfs/nfs4_clnt.h5
-rw-r--r--usr/src/uts/common/nfs/nfs_clnt.h5
-rw-r--r--usr/src/uts/common/nfs/rnode.h2
-rw-r--r--usr/src/uts/common/nfs/rnode4.h8
11 files changed, 268 insertions, 145 deletions
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
index 207a708771..d6a88a97c3 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
@@ -1008,7 +1008,7 @@ static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO;
static int
nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
- int flags, cred_t *cr, zone_t *zone)
+ int flags, cred_t *cr, zone_t *zone)
{
vnode_t *rtvp;
mntinfo_t *mi;
@@ -1075,6 +1075,10 @@ nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
if (flags & NFSMNT_DIRECTIO)
mi->mi_flags |= MI_DIRECTIO;
+ mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&mi->mi_rnodes, sizeof (rnode_t),
+ offsetof(rnode_t, r_mi_link));
+
/*
* Make a vfs struct for nfs. We do this here instead of below
* because rtvp needs a vfs before we can do a getattr on it.
diff --git a/usr/src/uts/common/fs/nfs/nfs4_client.c b/usr/src/uts/common/fs/nfs/nfs4_client.c
index 1734853f57..5456fc7c63 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_client.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_client.c
@@ -3116,6 +3116,7 @@ nfs_free_mi4(mntinfo4_t *mi)
mutex_destroy(&mi->mi_lock);
mutex_destroy(&mi->mi_async_lock);
mutex_destroy(&mi->mi_msg_list_lock);
+ mutex_destroy(&mi->mi_rnodes_lock);
nfs_rw_destroy(&mi->mi_recovlock);
nfs_rw_destroy(&mi->mi_rename_lock);
nfs_rw_destroy(&mi->mi_fh_lock);
@@ -3152,6 +3153,7 @@ nfs_free_mi4(mntinfo4_t *mi)
list_destroy(&mi->mi_foo_list);
list_destroy(&mi->mi_bseqid_list);
list_destroy(&mi->mi_lost_state);
+ list_destroy(&mi->mi_rnodes);
avl_destroy(&mi->mi_filehandles);
kmem_free(mi, sizeof (*mi));
}
diff --git a/usr/src/uts/common/fs/nfs/nfs4_rnode.c b/usr/src/uts/common/fs/nfs/nfs4_rnode.c
index 14abf17329..5f7be519e6 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_rnode.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_rnode.c
@@ -969,6 +969,8 @@ rp4_rmfree(rnode4_t *rp)
void
rp4_addhash(rnode4_t *rp)
{
+ mntinfo4_t *mi;
+
ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
ASSERT(!(rp->r_flags & R4HASHED));
@@ -984,6 +986,11 @@ rp4_addhash(rnode4_t *rp)
mutex_enter(&rp->r_statelock);
rp->r_flags |= R4HASHED;
mutex_exit(&rp->r_statelock);
+
+ mi = VTOMI4(RTOV4(rp));
+ mutex_enter(&mi->mi_rnodes_lock);
+ list_insert_tail(&mi->mi_rnodes, rp);
+ mutex_exit(&mi->mi_rnodes_lock);
}
/*
@@ -994,6 +1001,8 @@ rp4_addhash(rnode4_t *rp)
void
rp4_rmhash_locked(rnode4_t *rp)
{
+ mntinfo4_t *mi;
+
ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
ASSERT(rp->r_flags & R4HASHED);
@@ -1003,6 +1012,12 @@ rp4_rmhash_locked(rnode4_t *rp)
mutex_enter(&rp->r_statelock);
rp->r_flags &= ~R4HASHED;
mutex_exit(&rp->r_statelock);
+
+ mi = VTOMI4(RTOV4(rp));
+ mutex_enter(&mi->mi_rnodes_lock);
+ if (list_link_active(&rp->r_mi_link))
+ list_remove(&mi->mi_rnodes, rp);
+ mutex_exit(&mi->mi_rnodes_lock);
}
/*
@@ -1100,11 +1115,11 @@ r4find_unlocked(nfs4_sharedfh_t *fh, struct vfs *vfsp)
}
/*
- * Return >0 if there is a active vnode belonging to this vfs in the
+ * Return 1 if there is an active vnode belonging to this vfs in the
* rtable4 cache.
*
* Several of these checks are done without holding the usual
- * locks. This is safe because destroy_rtable(), rp_addfree(),
+ * locks. This is safe because destroy_rtable4(), rp4_addfree(),
* etc. will redo the necessary checks before actually destroying
* any rnodes.
*/
@@ -1113,45 +1128,26 @@ check_rtable4(struct vfs *vfsp)
{
rnode4_t *rp;
vnode_t *vp;
- int busy = NFSV4_RTABLE4_OK;
- int index;
-
- for (index = 0; index < rtable4size; index++) {
- rw_enter(&rtable4[index].r_lock, RW_READER);
-
- for (rp = rtable4[index].r_hashf;
- rp != (rnode4_t *)(&rtable4[index]);
- rp = rp->r_hashf) {
+ mntinfo4_t *mi;
- vp = RTOV4(rp);
- if (vp->v_vfsp == vfsp) {
- if (rp->r_freef == NULL) {
- busy = NFSV4_RTABLE4_NOT_FREE_LIST;
- } else if (nfs4_has_pages(vp) &&
- (rp->r_flags & R4DIRTY)) {
- busy = NFSV4_RTABLE4_DIRTY_PAGES;
- } else if (rp->r_count > 0) {
- busy = NFSV4_RTABLE4_POS_R_COUNT;
- }
+ ASSERT(vfsp != NULL);
+ mi = VFTOMI4(vfsp);
- if (busy != NFSV4_RTABLE4_OK) {
-#ifdef DEBUG
- char *path;
+ mutex_enter(&mi->mi_rnodes_lock);
+ for (rp = list_head(&mi->mi_rnodes); rp != NULL;
+ rp = list_next(&mi->mi_rnodes, rp)) {
+ vp = RTOV4(rp);
- path = fn_path(rp->r_svnode.sv_name);
- DTRACE_NFSV4_3(rnode__e__debug,
- int, busy, char *, path,
- rnode4_t *, rp);
- kmem_free(path, strlen(path)+1);
-#endif
- rw_exit(&rtable4[index].r_lock);
- return (busy);
- }
- }
+ if (rp->r_freef == NULL ||
+ (nfs4_has_pages(vp) && (rp->r_flags & R4DIRTY)) ||
+ rp->r_count > 0) {
+ mutex_exit(&mi->mi_rnodes_lock);
+ return (1);
}
- rw_exit(&rtable4[index].r_lock);
}
- return (busy);
+ mutex_exit(&mi->mi_rnodes_lock);
+
+ return (0);
}
/*
@@ -1164,45 +1160,42 @@ check_rtable4(struct vfs *vfsp)
void
destroy_rtable4(struct vfs *vfsp, cred_t *cr)
{
- int index;
- vnode_t *vp;
- rnode4_t *rp, *r_hashf, *rlist;
+ rnode4_t *rp;
+ mntinfo4_t *mi;
- rlist = NULL;
+ ASSERT(vfsp != NULL);
- for (index = 0; index < rtable4size; index++) {
- rw_enter(&rtable4[index].r_lock, RW_WRITER);
- for (rp = rtable4[index].r_hashf;
- rp != (rnode4_t *)(&rtable4[index]);
- rp = r_hashf) {
- /* save the hash pointer before destroying */
- r_hashf = rp->r_hashf;
+ mi = VFTOMI4(vfsp);
- vp = RTOV4(rp);
- if (vp->v_vfsp == vfsp) {
- mutex_enter(&rp4freelist_lock);
- if (rp->r_freef != NULL) {
- rp4_rmfree(rp);
- mutex_exit(&rp4freelist_lock);
- rp4_rmhash_locked(rp);
- rp->r_hashf = rlist;
- rlist = rp;
- } else
- mutex_exit(&rp4freelist_lock);
- }
- }
- rw_exit(&rtable4[index].r_lock);
- }
+ mutex_enter(&rp4freelist_lock);
+ mutex_enter(&mi->mi_rnodes_lock);
+ while ((rp = list_remove_head(&mi->mi_rnodes)) != NULL) {
+ /*
+ * If the rnode is no longer on the freelist it is not
+ * ours and it will be handled by some other thread, so
+ * skip it.
+ */
+ if (rp->r_freef == NULL)
+ continue;
+ mutex_exit(&mi->mi_rnodes_lock);
+
+ rp4_rmfree(rp);
+ mutex_exit(&rp4freelist_lock);
+
+ rp4_rmhash(rp);
- for (rp = rlist; rp != NULL; rp = r_hashf) {
- r_hashf = rp->r_hashf;
/*
* This call to rp4_addfree will end up destroying the
* rnode, but in a safe way with the appropriate set
* of checks done.
*/
rp4_addfree(rp, cr);
+
+ mutex_enter(&rp4freelist_lock);
+ mutex_enter(&mi->mi_rnodes_lock);
}
+ mutex_exit(&mi->mi_rnodes_lock);
+ mutex_exit(&rp4freelist_lock);
}
/*
@@ -1297,6 +1290,53 @@ r4flush(struct vfs *vfsp, cred_t *cr)
cnt = 0;
/*
+ * If the vfs is known we can do fast path by iterating all rnodes that
+ * belongs to this vfs. This is much faster than the traditional way
+ * of iterating rtable4 (below) in a case there is a lot of rnodes that
+ * does not belong to our vfs.
+ */
+ if (vfsp != NULL) {
+ mntinfo4_t *mi = VFTOMI4(vfsp);
+
+ mutex_enter(&mi->mi_rnodes_lock);
+ for (rp = list_head(&mi->mi_rnodes); rp != NULL;
+ rp = list_next(&mi->mi_rnodes, rp)) {
+ vp = RTOV4(rp);
+ /*
+ * Don't bother sync'ing a vp if it
+ * is part of virtual swap device or
+ * if VFS is read-only
+ */
+ if (IS_SWAPVP(vp) || vn_is_readonly(vp))
+ continue;
+ /*
+ * If the vnode has pages and is marked as either dirty
+ * or mmap'd, hold and add this vnode to the list of
+ * vnodes to flush.
+ */
+ ASSERT(vp->v_vfsp == vfsp);
+ if (nfs4_has_pages(vp) &&
+ ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
+ VN_HOLD(vp);
+ vplist[cnt++] = vp;
+ if (cnt == num) {
+ /*
+ * The vplist is full because there is
+ * too many rnodes. We are done for
+ * now.
+ */
+ break;
+ }
+ }
+ }
+ mutex_exit(&mi->mi_rnodes_lock);
+
+ goto done;
+ }
+
+ ASSERT(vfsp == NULL);
+
+ /*
* Walk the hash queues looking for rnodes with page
* lists associated with them. Make a list of these
* files.
@@ -1315,26 +1355,29 @@ r4flush(struct vfs *vfsp, cred_t *cr)
if (IS_SWAPVP(vp) || vn_is_readonly(vp))
continue;
/*
- * If flushing all mounted file systems or
- * the vnode belongs to this vfs, has pages
- * and is marked as either dirty or mmap'd,
- * hold and add this vnode to the list of
+ * If the vnode has pages and is marked as either dirty
+ * or mmap'd, hold and add this vnode to the list of
* vnodes to flush.
*/
- if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
- nfs4_has_pages(vp) &&
+ if (nfs4_has_pages(vp) &&
((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
VN_HOLD(vp);
vplist[cnt++] = vp;
if (cnt == num) {
rw_exit(&rtable4[index].r_lock);
- goto toomany;
+ /*
+ * The vplist is full because there is
+ * too many rnodes. We are done for
+ * now.
+ */
+ goto done;
}
}
}
rw_exit(&rtable4[index].r_lock);
}
-toomany:
+
+done:
/*
* Flush and release all of the files on the list.
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
index 8efaf20602..f0320aaee0 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
@@ -2348,6 +2348,10 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head,
mi->mi_flags |= MI4_MOUNTING;
+ mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&mi->mi_rnodes, sizeof (rnode4_t),
+ offsetof(rnode4_t, r_mi_link));
+
/*
* Make a vfs struct for nfs. We do this here instead of below
* because rtvp needs a vfs before we can do a getattr on it.
diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c
index 7a6c545d9a..b034aa4a77 100644
--- a/usr/src/uts/common/fs/nfs/nfs_client.c
+++ b/usr/src/uts/common/fs/nfs/nfs_client.c
@@ -1354,8 +1354,8 @@ nfs_async_manager_stop(vfs_t *vfsp)
int
nfs_async_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr,
- struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *,
- u_offset_t, caddr_t, struct seg *, cred_t *))
+ struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *,
+ u_offset_t, caddr_t, struct seg *, cred_t *))
{
rnode_t *rp;
mntinfo_t *mi;
@@ -1454,8 +1454,8 @@ noasync:
int
nfs_async_putapage(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
- int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *,
- u_offset_t, size_t, int, cred_t *))
+ int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *,
+ u_offset_t, size_t, int, cred_t *))
{
rnode_t *rp;
mntinfo_t *mi;
@@ -1576,8 +1576,8 @@ noasync:
int
nfs_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
- int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t,
- size_t, int, cred_t *))
+ int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t,
+ size_t, int, cred_t *))
{
rnode_t *rp;
mntinfo_t *mi;
@@ -1709,7 +1709,7 @@ noasync:
void
nfs_async_readdir(vnode_t *vp, rddir_cache *rdc, cred_t *cr,
- int (*readdir)(vnode_t *, rddir_cache *, cred_t *))
+ int (*readdir)(vnode_t *, rddir_cache *, cred_t *))
{
rnode_t *rp;
mntinfo_t *mi;
@@ -1806,8 +1806,7 @@ noasync:
void
nfs_async_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count,
- cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3,
- cred_t *))
+ cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3, cred_t *))
{
rnode_t *rp;
mntinfo_t *mi;
@@ -3223,11 +3222,13 @@ nfs_free_mi(mntinfo_t *mi)
mutex_destroy(&mi->mi_lock);
mutex_destroy(&mi->mi_remap_lock);
mutex_destroy(&mi->mi_async_lock);
+ mutex_destroy(&mi->mi_rnodes_lock);
cv_destroy(&mi->mi_failover_cv);
cv_destroy(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE]);
cv_destroy(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE]);
cv_destroy(&mi->mi_async_reqs_cv);
cv_destroy(&mi->mi_async_cv);
+ list_destroy(&mi->mi_rnodes);
zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFS);
kmem_free(mi, sizeof (*mi));
}
diff --git a/usr/src/uts/common/fs/nfs/nfs_subr.c b/usr/src/uts/common/fs/nfs/nfs_subr.c
index cda666c27e..68cd0df081 100644
--- a/usr/src/uts/common/fs/nfs/nfs_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs_subr.c
@@ -2821,6 +2821,7 @@ rp_rmfree(rnode_t *rp)
static void
rp_addhash(rnode_t *rp)
{
+ mntinfo_t *mi;
ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
ASSERT(!(rp->r_flags & RHASHED));
@@ -2833,6 +2834,11 @@ rp_addhash(rnode_t *rp)
mutex_enter(&rp->r_statelock);
rp->r_flags |= RHASHED;
mutex_exit(&rp->r_statelock);
+
+ mi = VTOMI(RTOV(rp));
+ mutex_enter(&mi->mi_rnodes_lock);
+ list_insert_tail(&mi->mi_rnodes, rp);
+ mutex_exit(&mi->mi_rnodes_lock);
}
/*
@@ -2843,6 +2849,7 @@ rp_addhash(rnode_t *rp)
static void
rp_rmhash_locked(rnode_t *rp)
{
+ mntinfo_t *mi;
ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
ASSERT(rp->r_flags & RHASHED);
@@ -2853,6 +2860,12 @@ rp_rmhash_locked(rnode_t *rp)
mutex_enter(&rp->r_statelock);
rp->r_flags &= ~RHASHED;
mutex_exit(&rp->r_statelock);
+
+ mi = VTOMI(RTOV(rp));
+ mutex_enter(&mi->mi_rnodes_lock);
+ if (list_link_active(&rp->r_mi_link))
+ list_remove(&mi->mi_rnodes, rp);
+ mutex_exit(&mi->mi_rnodes_lock);
}
/*
@@ -2914,7 +2927,7 @@ rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp)
}
/*
- * Return 1 if there is a active vnode belonging to this vfs in the
+ * Return 1 if there is an active vnode belonging to this vfs in the
* rtable cache.
*
* Several of these checks are done without holding the usual
@@ -2925,28 +2938,27 @@ rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp)
int
check_rtable(struct vfs *vfsp)
{
- int index;
rnode_t *rp;
vnode_t *vp;
+ mntinfo_t *mi;
- for (index = 0; index < rtablesize; index++) {
- rw_enter(&rtable[index].r_lock, RW_READER);
- for (rp = rtable[index].r_hashf;
- rp != (rnode_t *)(&rtable[index]);
- rp = rp->r_hashf) {
- vp = RTOV(rp);
- if (vp->v_vfsp == vfsp) {
- if (rp->r_freef == NULL ||
- (vn_has_cached_data(vp) &&
- (rp->r_flags & RDIRTY)) ||
- rp->r_count > 0) {
- rw_exit(&rtable[index].r_lock);
- return (1);
- }
- }
+ ASSERT(vfsp != NULL);
+ mi = VFTOMI(vfsp);
+
+ mutex_enter(&mi->mi_rnodes_lock);
+ for (rp = list_head(&mi->mi_rnodes); rp != NULL;
+ rp = list_next(&mi->mi_rnodes, rp)) {
+ vp = RTOV(rp);
+
+ if (rp->r_freef == NULL ||
+ (vn_has_cached_data(vp) && (rp->r_flags & RDIRTY)) ||
+ rp->r_count > 0) {
+ mutex_exit(&mi->mi_rnodes_lock);
+ return (1);
}
- rw_exit(&rtable[index].r_lock);
}
+ mutex_exit(&mi->mi_rnodes_lock);
+
return (0);
}
@@ -2958,47 +2970,42 @@ check_rtable(struct vfs *vfsp)
void
destroy_rtable(struct vfs *vfsp, cred_t *cr)
{
- int index;
rnode_t *rp;
- rnode_t *rlist;
- rnode_t *r_hashf;
- vnode_t *vp;
+ mntinfo_t *mi;
- rlist = NULL;
+ ASSERT(vfsp != NULL);
- for (index = 0; index < rtablesize; index++) {
- rw_enter(&rtable[index].r_lock, RW_WRITER);
- for (rp = rtable[index].r_hashf;
- rp != (rnode_t *)(&rtable[index]);
- rp = r_hashf) {
- /* save the hash pointer before destroying */
- r_hashf = rp->r_hashf;
- vp = RTOV(rp);
- if (vp->v_vfsp == vfsp) {
- mutex_enter(&rpfreelist_lock);
- if (rp->r_freef != NULL) {
- rp_rmfree(rp);
- mutex_exit(&rpfreelist_lock);
- rp_rmhash_locked(rp);
- rp->r_hashf = rlist;
- rlist = rp;
- } else
- mutex_exit(&rpfreelist_lock);
- }
- }
- rw_exit(&rtable[index].r_lock);
- }
+ mi = VFTOMI(vfsp);
+
+ mutex_enter(&rpfreelist_lock);
+ mutex_enter(&mi->mi_rnodes_lock);
+ while ((rp = list_remove_head(&mi->mi_rnodes)) != NULL) {
+ /*
+ * If the rnode is no longer on the freelist it is not
+ * ours and it will be handled by some other thread, so
+ * skip it.
+ */
+ if (rp->r_freef == NULL)
+ continue;
+ mutex_exit(&mi->mi_rnodes_lock);
+
+ rp_rmfree(rp);
+ mutex_exit(&rpfreelist_lock);
+
+ rp_rmhash(rp);
- for (rp = rlist; rp != NULL; rp = rlist) {
- rlist = rp->r_hashf;
/*
* This call to rp_addfree will end up destroying the
* rnode, but in a safe way with the appropriate set
* of checks done.
*/
rp_addfree(rp, cr);
- }
+ mutex_enter(&rpfreelist_lock);
+ mutex_enter(&mi->mi_rnodes_lock);
+ }
+ mutex_exit(&mi->mi_rnodes_lock);
+ mutex_exit(&rpfreelist_lock);
}
/*
@@ -3066,6 +3073,53 @@ rflush(struct vfs *vfsp, cred_t *cr)
cnt = 0;
/*
+ * If the vfs is known we can do fast path by iterating all rnodes that
+ * belongs to this vfs. This is much faster than the traditional way
+ * of iterating rtable (below) in a case there is a lot of rnodes that
+ * does not belong to our vfs.
+ */
+ if (vfsp != NULL) {
+ mntinfo_t *mi = VFTOMI(vfsp);
+
+ mutex_enter(&mi->mi_rnodes_lock);
+ for (rp = list_head(&mi->mi_rnodes); rp != NULL;
+ rp = list_next(&mi->mi_rnodes, rp)) {
+ vp = RTOV(rp);
+ /*
+ * Don't bother sync'ing a vp if it
+ * is part of virtual swap device or
+ * if VFS is read-only
+ */
+ if (IS_SWAPVP(vp) || vn_is_readonly(vp))
+ continue;
+ /*
+ * If the vnode has pages and is marked as either dirty
+ * or mmap'd, hold and add this vnode to the list of
+ * vnodes to flush.
+ */
+ ASSERT(vp->v_vfsp == vfsp);
+ if (vn_has_cached_data(vp) &&
+ ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) {
+ VN_HOLD(vp);
+ vplist[cnt++] = vp;
+ if (cnt == num) {
+ /*
+ * The vplist is full because there is
+ * too many rnodes. We are done for
+ * now.
+ */
+ break;
+ }
+ }
+ }
+ mutex_exit(&mi->mi_rnodes_lock);
+
+ goto done;
+ }
+
+ ASSERT(vfsp == NULL);
+
+ /*
* Walk the hash queues looking for rnodes with page
* lists associated with them. Make a list of these
* files.
@@ -3084,26 +3138,29 @@ rflush(struct vfs *vfsp, cred_t *cr)
if (IS_SWAPVP(vp) || vn_is_readonly(vp))
continue;
/*
- * If flushing all mounted file systems or
- * the vnode belongs to this vfs, has pages
- * and is marked as either dirty or mmap'd,
- * hold and add this vnode to the list of
+ * If the vnode has pages and is marked as either dirty
+ * or mmap'd, hold and add this vnode to the list of
* vnodes to flush.
*/
- if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
- vn_has_cached_data(vp) &&
+ if (vn_has_cached_data(vp) &&
((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) {
VN_HOLD(vp);
vplist[cnt++] = vp;
if (cnt == num) {
rw_exit(&rtable[index].r_lock);
- goto toomany;
+ /*
+ * The vplist is full because there is
+ * too many rnodes. We are done for
+ * now.
+ */
+ goto done;
}
}
}
rw_exit(&rtable[index].r_lock);
}
-toomany:
+
+done:
/*
* Flush and release all of the files on the list.
diff --git a/usr/src/uts/common/fs/nfs/nfs_vfsops.c b/usr/src/uts/common/fs/nfs/nfs_vfsops.c
index 57b21778b4..c9cc306f95 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vfsops.c
@@ -1142,7 +1142,7 @@ static uint_t nfs_cots_timeo = NFS_COTS_TIMEO;
static int
nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
- int flags, cred_t *cr, zone_t *zone)
+ int flags, cred_t *cr, zone_t *zone)
{
vnode_t *rtvp;
mntinfo_t *mi;
@@ -1204,6 +1204,10 @@ nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
if (flags & NFSMNT_DIRECTIO)
mi->mi_flags |= MI_DIRECTIO;
+ mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&mi->mi_rnodes, sizeof (rnode_t),
+ offsetof(rnode_t, r_mi_link));
+
/*
* Make a vfs struct for nfs. We do this here instead of below
* because rtvp needs a vfs before we can do a getattr on it.
diff --git a/usr/src/uts/common/nfs/nfs4_clnt.h b/usr/src/uts/common/nfs/nfs4_clnt.h
index 8f8cb7d78e..68c810f7c7 100644
--- a/usr/src/uts/common/nfs/nfs4_clnt.h
+++ b/usr/src/uts/common/nfs/nfs4_clnt.h
@@ -1099,6 +1099,11 @@ typedef struct mntinfo4 {
* Referral related info.
*/
int mi_vfs_referral_loop_cnt;
+ /*
+ * List of rnode4_t structures that belongs to this mntinfo4
+ */
+ kmutex_t mi_rnodes_lock; /* protects the mi_rnodes list */
+ list_t mi_rnodes; /* the list */
} mntinfo4_t;
/*
diff --git a/usr/src/uts/common/nfs/nfs_clnt.h b/usr/src/uts/common/nfs/nfs_clnt.h
index ae2948ed12..391640c14c 100644
--- a/usr/src/uts/common/nfs/nfs_clnt.h
+++ b/usr/src/uts/common/nfs/nfs_clnt.h
@@ -428,6 +428,11 @@ typedef struct mntinfo {
* before acquiring any other rnode lock.
*/
kmutex_t mi_remap_lock;
+ /*
+ * List of rnode_t structures that belongs to this mntinfo
+ */
+ kmutex_t mi_rnodes_lock; /* protects the mi_rnodes list */
+ list_t mi_rnodes; /* the list */
} mntinfo_t;
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/nfs/rnode.h b/usr/src/uts/common/nfs/rnode.h
index 33091445c4..a9433b9496 100644
--- a/usr/src/uts/common/nfs/rnode.h
+++ b/usr/src/uts/common/nfs/rnode.h
@@ -303,6 +303,8 @@ typedef struct rnode {
kthread_t *r_serial; /* id of purging thread */
list_t r_indelmap; /* list of delmap callers */
uint_t r_inmap; /* to serialize read/write and mmap */
+ list_node_t r_mi_link; /* linkage into list of rnodes for */
+ /* this mntinfo */
} rnode_t;
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/nfs/rnode4.h b/usr/src/uts/common/nfs/rnode4.h
index 0e0090e24d..ca2e38c8e8 100644
--- a/usr/src/uts/common/nfs/rnode4.h
+++ b/usr/src/uts/common/nfs/rnode4.h
@@ -336,6 +336,8 @@ typedef struct rnode4 {
nfs4_stub_type_t r_stub_type;
/* e.g. mirror-mount or referral */
uint_t r_inmap; /* to serialize read/write and mmap */
+ list_node_t r_mi_link; /* linkage into list of rnodes for */
+ /* this mntinfo */
} rnode4_t;
#define r_vnode r_svnode.sv_r_vnode
@@ -393,12 +395,6 @@ extern long nrnode;
#define NFS4_INITIAL_DELAY_INTERVAL 1
#define NFS4_MAX_DELAY_INTERVAL 20
-/* Used for check_rtable4 */
-#define NFSV4_RTABLE4_OK 0
-#define NFSV4_RTABLE4_NOT_FREE_LIST 1
-#define NFSV4_RTABLE4_DIRTY_PAGES 2
-#define NFSV4_RTABLE4_POS_R_COUNT 3
-
extern rnode4_t *r4find(r4hashq_t *, nfs4_sharedfh_t *, struct vfs *);
extern rnode4_t *r4find_unlocked(nfs4_sharedfh_t *, struct vfs *);
extern void r4flush(struct vfs *, cred_t *);