diff options
author | Marcel Telka <marcel@telka.sk> | 2018-04-25 00:28:01 +0200 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2018-07-18 20:10:13 -0400 |
commit | e010bda94b034e413b6fe35fd45bca0afaf1a0df (patch) | |
tree | 252bdfc74d19e9790fb82af170be1a0d9ac0db65 | |
parent | 5882b622b7e2afa5385d4601dd82f81066f62d67 (diff) | |
download | illumos-joyent-e010bda94b034e413b6fe35fd45bca0afaf1a0df.tar.gz |
9447 NFS unmount is slow
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Arne Jansen <arne@die-jansens.de>
Reviewed by: Ken Mays <kmays2000@gmail.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs3_vfsops.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_client.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_rnode.c | 187 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_vfsops.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_client.c | 19 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_subr.c | 169 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_vfsops.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/nfs/nfs4_clnt.h | 5 | ||||
-rw-r--r-- | usr/src/uts/common/nfs/nfs_clnt.h | 5 | ||||
-rw-r--r-- | usr/src/uts/common/nfs/rnode.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/nfs/rnode4.h | 8 |
11 files changed, 268 insertions, 145 deletions
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c index 207a708771..d6a88a97c3 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c +++ b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c @@ -1008,7 +1008,7 @@ static uint_t nfs3_cots_timeo = NFS_COTS_TIMEO; static int nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, - int flags, cred_t *cr, zone_t *zone) + int flags, cred_t *cr, zone_t *zone) { vnode_t *rtvp; mntinfo_t *mi; @@ -1075,6 +1075,10 @@ nfs3rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, if (flags & NFSMNT_DIRECTIO) mi->mi_flags |= MI_DIRECTIO; + mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&mi->mi_rnodes, sizeof (rnode_t), + offsetof(rnode_t, r_mi_link)); + /* * Make a vfs struct for nfs. We do this here instead of below * because rtvp needs a vfs before we can do a getattr on it. diff --git a/usr/src/uts/common/fs/nfs/nfs4_client.c b/usr/src/uts/common/fs/nfs/nfs4_client.c index 1734853f57..5456fc7c63 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client.c @@ -3116,6 +3116,7 @@ nfs_free_mi4(mntinfo4_t *mi) mutex_destroy(&mi->mi_lock); mutex_destroy(&mi->mi_async_lock); mutex_destroy(&mi->mi_msg_list_lock); + mutex_destroy(&mi->mi_rnodes_lock); nfs_rw_destroy(&mi->mi_recovlock); nfs_rw_destroy(&mi->mi_rename_lock); nfs_rw_destroy(&mi->mi_fh_lock); @@ -3152,6 +3153,7 @@ nfs_free_mi4(mntinfo4_t *mi) list_destroy(&mi->mi_foo_list); list_destroy(&mi->mi_bseqid_list); list_destroy(&mi->mi_lost_state); + list_destroy(&mi->mi_rnodes); avl_destroy(&mi->mi_filehandles); kmem_free(mi, sizeof (*mi)); } diff --git a/usr/src/uts/common/fs/nfs/nfs4_rnode.c b/usr/src/uts/common/fs/nfs/nfs4_rnode.c index 14abf17329..5f7be519e6 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_rnode.c +++ b/usr/src/uts/common/fs/nfs/nfs4_rnode.c @@ -969,6 +969,8 @@ rp4_rmfree(rnode4_t *rp) void rp4_addhash(rnode4_t *rp) { + mntinfo4_t *mi; + ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); ASSERT(!(rp->r_flags & R4HASHED)); @@ -984,6 +986,11 @@ rp4_addhash(rnode4_t *rp) mutex_enter(&rp->r_statelock); rp->r_flags |= R4HASHED; mutex_exit(&rp->r_statelock); + + mi = VTOMI4(RTOV4(rp)); + mutex_enter(&mi->mi_rnodes_lock); + list_insert_tail(&mi->mi_rnodes, rp); + mutex_exit(&mi->mi_rnodes_lock); } /* @@ -994,6 +1001,8 @@ rp4_addhash(rnode4_t *rp) void rp4_rmhash_locked(rnode4_t *rp) { + mntinfo4_t *mi; + ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); ASSERT(rp->r_flags & R4HASHED); @@ -1003,6 +1012,12 @@ rp4_rmhash_locked(rnode4_t *rp) mutex_enter(&rp->r_statelock); rp->r_flags &= ~R4HASHED; mutex_exit(&rp->r_statelock); + + mi = VTOMI4(RTOV4(rp)); + mutex_enter(&mi->mi_rnodes_lock); + if (list_link_active(&rp->r_mi_link)) + list_remove(&mi->mi_rnodes, rp); + mutex_exit(&mi->mi_rnodes_lock); } /* @@ -1100,11 +1115,11 @@ r4find_unlocked(nfs4_sharedfh_t *fh, struct vfs *vfsp) } /* - * Return >0 if there is a active vnode belonging to this vfs in the + * Return 1 if there is an active vnode belonging to this vfs in the * rtable4 cache. * * Several of these checks are done without holding the usual - * locks. This is safe because destroy_rtable(), rp_addfree(), + * locks. This is safe because destroy_rtable4(), rp4_addfree(), * etc. will redo the necessary checks before actually destroying * any rnodes. */ @@ -1113,45 +1128,26 @@ check_rtable4(struct vfs *vfsp) { rnode4_t *rp; vnode_t *vp; - int busy = NFSV4_RTABLE4_OK; - int index; - - for (index = 0; index < rtable4size; index++) { - rw_enter(&rtable4[index].r_lock, RW_READER); - - for (rp = rtable4[index].r_hashf; - rp != (rnode4_t *)(&rtable4[index]); - rp = rp->r_hashf) { + mntinfo4_t *mi; - vp = RTOV4(rp); - if (vp->v_vfsp == vfsp) { - if (rp->r_freef == NULL) { - busy = NFSV4_RTABLE4_NOT_FREE_LIST; - } else if (nfs4_has_pages(vp) && - (rp->r_flags & R4DIRTY)) { - busy = NFSV4_RTABLE4_DIRTY_PAGES; - } else if (rp->r_count > 0) { - busy = NFSV4_RTABLE4_POS_R_COUNT; - } + ASSERT(vfsp != NULL); + mi = VFTOMI4(vfsp); - if (busy != NFSV4_RTABLE4_OK) { -#ifdef DEBUG - char *path; + mutex_enter(&mi->mi_rnodes_lock); + for (rp = list_head(&mi->mi_rnodes); rp != NULL; + rp = list_next(&mi->mi_rnodes, rp)) { + vp = RTOV4(rp); - path = fn_path(rp->r_svnode.sv_name); - DTRACE_NFSV4_3(rnode__e__debug, - int, busy, char *, path, - rnode4_t *, rp); - kmem_free(path, strlen(path)+1); -#endif - rw_exit(&rtable4[index].r_lock); - return (busy); - } - } + if (rp->r_freef == NULL || + (nfs4_has_pages(vp) && (rp->r_flags & R4DIRTY)) || + rp->r_count > 0) { + mutex_exit(&mi->mi_rnodes_lock); + return (1); } - rw_exit(&rtable4[index].r_lock); } - return (busy); + mutex_exit(&mi->mi_rnodes_lock); + + return (0); } /* @@ -1164,45 +1160,42 @@ check_rtable4(struct vfs *vfsp) void destroy_rtable4(struct vfs *vfsp, cred_t *cr) { - int index; - vnode_t *vp; - rnode4_t *rp, *r_hashf, *rlist; + rnode4_t *rp; + mntinfo4_t *mi; - rlist = NULL; + ASSERT(vfsp != NULL); - for (index = 0; index < rtable4size; index++) { - rw_enter(&rtable4[index].r_lock, RW_WRITER); - for (rp = rtable4[index].r_hashf; - rp != (rnode4_t *)(&rtable4[index]); - rp = r_hashf) { - /* save the hash pointer before destroying */ - r_hashf = rp->r_hashf; + mi = VFTOMI4(vfsp); - vp = RTOV4(rp); - if (vp->v_vfsp == vfsp) { - mutex_enter(&rp4freelist_lock); - if (rp->r_freef != NULL) { - rp4_rmfree(rp); - mutex_exit(&rp4freelist_lock); - rp4_rmhash_locked(rp); - rp->r_hashf = rlist; - rlist = rp; - } else - mutex_exit(&rp4freelist_lock); - } - } - rw_exit(&rtable4[index].r_lock); - } + mutex_enter(&rp4freelist_lock); + mutex_enter(&mi->mi_rnodes_lock); + while ((rp = list_remove_head(&mi->mi_rnodes)) != NULL) { + /* + * If the rnode is no longer on the freelist it is not + * ours and it will be handled by some other thread, so + * skip it. + */ + if (rp->r_freef == NULL) + continue; + mutex_exit(&mi->mi_rnodes_lock); + + rp4_rmfree(rp); + mutex_exit(&rp4freelist_lock); + + rp4_rmhash(rp); - for (rp = rlist; rp != NULL; rp = r_hashf) { - r_hashf = rp->r_hashf; /* * This call to rp4_addfree will end up destroying the * rnode, but in a safe way with the appropriate set * of checks done. */ rp4_addfree(rp, cr); + + mutex_enter(&rp4freelist_lock); + mutex_enter(&mi->mi_rnodes_lock); } + mutex_exit(&mi->mi_rnodes_lock); + mutex_exit(&rp4freelist_lock); } /* @@ -1297,6 +1290,53 @@ r4flush(struct vfs *vfsp, cred_t *cr) cnt = 0; /* + * If the vfs is known we can do fast path by iterating all rnodes that + * belongs to this vfs. This is much faster than the traditional way + * of iterating rtable4 (below) in a case there is a lot of rnodes that + * does not belong to our vfs. + */ + if (vfsp != NULL) { + mntinfo4_t *mi = VFTOMI4(vfsp); + + mutex_enter(&mi->mi_rnodes_lock); + for (rp = list_head(&mi->mi_rnodes); rp != NULL; + rp = list_next(&mi->mi_rnodes, rp)) { + vp = RTOV4(rp); + /* + * Don't bother sync'ing a vp if it + * is part of virtual swap device or + * if VFS is read-only + */ + if (IS_SWAPVP(vp) || vn_is_readonly(vp)) + continue; + /* + * If the vnode has pages and is marked as either dirty + * or mmap'd, hold and add this vnode to the list of + * vnodes to flush. + */ + ASSERT(vp->v_vfsp == vfsp); + if (nfs4_has_pages(vp) && + ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) { + VN_HOLD(vp); + vplist[cnt++] = vp; + if (cnt == num) { + /* + * The vplist is full because there is + * too many rnodes. We are done for + * now. + */ + break; + } + } + } + mutex_exit(&mi->mi_rnodes_lock); + + goto done; + } + + ASSERT(vfsp == NULL); + + /* * Walk the hash queues looking for rnodes with page * lists associated with them. Make a list of these * files. @@ -1315,26 +1355,29 @@ r4flush(struct vfs *vfsp, cred_t *cr) if (IS_SWAPVP(vp) || vn_is_readonly(vp)) continue; /* - * If flushing all mounted file systems or - * the vnode belongs to this vfs, has pages - * and is marked as either dirty or mmap'd, - * hold and add this vnode to the list of + * If the vnode has pages and is marked as either dirty + * or mmap'd, hold and add this vnode to the list of * vnodes to flush. */ - if ((vfsp == NULL || vp->v_vfsp == vfsp) && - nfs4_has_pages(vp) && + if (nfs4_has_pages(vp) && ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) { VN_HOLD(vp); vplist[cnt++] = vp; if (cnt == num) { rw_exit(&rtable4[index].r_lock); - goto toomany; + /* + * The vplist is full because there is + * too many rnodes. We are done for + * now. + */ + goto done; } } } rw_exit(&rtable4[index].r_lock); } -toomany: + +done: /* * Flush and release all of the files on the list. diff --git a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c index 8efaf20602..f0320aaee0 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c @@ -2348,6 +2348,10 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, mi->mi_flags |= MI4_MOUNTING; + mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&mi->mi_rnodes, sizeof (rnode4_t), + offsetof(rnode4_t, r_mi_link)); + /* * Make a vfs struct for nfs. We do this here instead of below * because rtvp needs a vfs before we can do a getattr on it. diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c index 7a6c545d9a..b034aa4a77 100644 --- a/usr/src/uts/common/fs/nfs/nfs_client.c +++ b/usr/src/uts/common/fs/nfs/nfs_client.c @@ -1354,8 +1354,8 @@ nfs_async_manager_stop(vfs_t *vfsp) int nfs_async_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr, - struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *, - u_offset_t, caddr_t, struct seg *, cred_t *)) + struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *, + u_offset_t, caddr_t, struct seg *, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1454,8 +1454,8 @@ noasync: int nfs_async_putapage(vnode_t *vp, page_t *pp, u_offset_t off, size_t len, - int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *, - u_offset_t, size_t, int, cred_t *)) + int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *, + u_offset_t, size_t, int, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1576,8 +1576,8 @@ noasync: int nfs_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len, - int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t, - size_t, int, cred_t *)) + int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t, + size_t, int, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1709,7 +1709,7 @@ noasync: void nfs_async_readdir(vnode_t *vp, rddir_cache *rdc, cred_t *cr, - int (*readdir)(vnode_t *, rddir_cache *, cred_t *)) + int (*readdir)(vnode_t *, rddir_cache *, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1806,8 +1806,7 @@ noasync: void nfs_async_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count, - cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3, - cred_t *)) + cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -3223,11 +3222,13 @@ nfs_free_mi(mntinfo_t *mi) mutex_destroy(&mi->mi_lock); mutex_destroy(&mi->mi_remap_lock); mutex_destroy(&mi->mi_async_lock); + mutex_destroy(&mi->mi_rnodes_lock); cv_destroy(&mi->mi_failover_cv); cv_destroy(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE]); cv_destroy(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE]); cv_destroy(&mi->mi_async_reqs_cv); cv_destroy(&mi->mi_async_cv); + list_destroy(&mi->mi_rnodes); zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFS); kmem_free(mi, sizeof (*mi)); } diff --git a/usr/src/uts/common/fs/nfs/nfs_subr.c b/usr/src/uts/common/fs/nfs/nfs_subr.c index cda666c27e..68cd0df081 100644 --- a/usr/src/uts/common/fs/nfs/nfs_subr.c +++ b/usr/src/uts/common/fs/nfs/nfs_subr.c @@ -2821,6 +2821,7 @@ rp_rmfree(rnode_t *rp) static void rp_addhash(rnode_t *rp) { + mntinfo_t *mi; ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); ASSERT(!(rp->r_flags & RHASHED)); @@ -2833,6 +2834,11 @@ rp_addhash(rnode_t *rp) mutex_enter(&rp->r_statelock); rp->r_flags |= RHASHED; mutex_exit(&rp->r_statelock); + + mi = VTOMI(RTOV(rp)); + mutex_enter(&mi->mi_rnodes_lock); + list_insert_tail(&mi->mi_rnodes, rp); + mutex_exit(&mi->mi_rnodes_lock); } /* @@ -2843,6 +2849,7 @@ rp_addhash(rnode_t *rp) static void rp_rmhash_locked(rnode_t *rp) { + mntinfo_t *mi; ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); ASSERT(rp->r_flags & RHASHED); @@ -2853,6 +2860,12 @@ rp_rmhash_locked(rnode_t *rp) mutex_enter(&rp->r_statelock); rp->r_flags &= ~RHASHED; mutex_exit(&rp->r_statelock); + + mi = VTOMI(RTOV(rp)); + mutex_enter(&mi->mi_rnodes_lock); + if (list_link_active(&rp->r_mi_link)) + list_remove(&mi->mi_rnodes, rp); + mutex_exit(&mi->mi_rnodes_lock); } /* @@ -2914,7 +2927,7 @@ rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp) } /* - * Return 1 if there is a active vnode belonging to this vfs in the + * Return 1 if there is an active vnode belonging to this vfs in the * rtable cache. * * Several of these checks are done without holding the usual @@ -2925,28 +2938,27 @@ rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp) int check_rtable(struct vfs *vfsp) { - int index; rnode_t *rp; vnode_t *vp; + mntinfo_t *mi; - for (index = 0; index < rtablesize; index++) { - rw_enter(&rtable[index].r_lock, RW_READER); - for (rp = rtable[index].r_hashf; - rp != (rnode_t *)(&rtable[index]); - rp = rp->r_hashf) { - vp = RTOV(rp); - if (vp->v_vfsp == vfsp) { - if (rp->r_freef == NULL || - (vn_has_cached_data(vp) && - (rp->r_flags & RDIRTY)) || - rp->r_count > 0) { - rw_exit(&rtable[index].r_lock); - return (1); - } - } + ASSERT(vfsp != NULL); + mi = VFTOMI(vfsp); + + mutex_enter(&mi->mi_rnodes_lock); + for (rp = list_head(&mi->mi_rnodes); rp != NULL; + rp = list_next(&mi->mi_rnodes, rp)) { + vp = RTOV(rp); + + if (rp->r_freef == NULL || + (vn_has_cached_data(vp) && (rp->r_flags & RDIRTY)) || + rp->r_count > 0) { + mutex_exit(&mi->mi_rnodes_lock); + return (1); } - rw_exit(&rtable[index].r_lock); } + mutex_exit(&mi->mi_rnodes_lock); + return (0); } @@ -2958,47 +2970,42 @@ check_rtable(struct vfs *vfsp) void destroy_rtable(struct vfs *vfsp, cred_t *cr) { - int index; rnode_t *rp; - rnode_t *rlist; - rnode_t *r_hashf; - vnode_t *vp; + mntinfo_t *mi; - rlist = NULL; + ASSERT(vfsp != NULL); - for (index = 0; index < rtablesize; index++) { - rw_enter(&rtable[index].r_lock, RW_WRITER); - for (rp = rtable[index].r_hashf; - rp != (rnode_t *)(&rtable[index]); - rp = r_hashf) { - /* save the hash pointer before destroying */ - r_hashf = rp->r_hashf; - vp = RTOV(rp); - if (vp->v_vfsp == vfsp) { - mutex_enter(&rpfreelist_lock); - if (rp->r_freef != NULL) { - rp_rmfree(rp); - mutex_exit(&rpfreelist_lock); - rp_rmhash_locked(rp); - rp->r_hashf = rlist; - rlist = rp; - } else - mutex_exit(&rpfreelist_lock); - } - } - rw_exit(&rtable[index].r_lock); - } + mi = VFTOMI(vfsp); + + mutex_enter(&rpfreelist_lock); + mutex_enter(&mi->mi_rnodes_lock); + while ((rp = list_remove_head(&mi->mi_rnodes)) != NULL) { + /* + * If the rnode is no longer on the freelist it is not + * ours and it will be handled by some other thread, so + * skip it. + */ + if (rp->r_freef == NULL) + continue; + mutex_exit(&mi->mi_rnodes_lock); + + rp_rmfree(rp); + mutex_exit(&rpfreelist_lock); + + rp_rmhash(rp); - for (rp = rlist; rp != NULL; rp = rlist) { - rlist = rp->r_hashf; /* * This call to rp_addfree will end up destroying the * rnode, but in a safe way with the appropriate set * of checks done. */ rp_addfree(rp, cr); - } + mutex_enter(&rpfreelist_lock); + mutex_enter(&mi->mi_rnodes_lock); + } + mutex_exit(&mi->mi_rnodes_lock); + mutex_exit(&rpfreelist_lock); } /* @@ -3066,6 +3073,53 @@ rflush(struct vfs *vfsp, cred_t *cr) cnt = 0; /* + * If the vfs is known we can do fast path by iterating all rnodes that + * belongs to this vfs. This is much faster than the traditional way + * of iterating rtable (below) in a case there is a lot of rnodes that + * does not belong to our vfs. + */ + if (vfsp != NULL) { + mntinfo_t *mi = VFTOMI(vfsp); + + mutex_enter(&mi->mi_rnodes_lock); + for (rp = list_head(&mi->mi_rnodes); rp != NULL; + rp = list_next(&mi->mi_rnodes, rp)) { + vp = RTOV(rp); + /* + * Don't bother sync'ing a vp if it + * is part of virtual swap device or + * if VFS is read-only + */ + if (IS_SWAPVP(vp) || vn_is_readonly(vp)) + continue; + /* + * If the vnode has pages and is marked as either dirty + * or mmap'd, hold and add this vnode to the list of + * vnodes to flush. + */ + ASSERT(vp->v_vfsp == vfsp); + if (vn_has_cached_data(vp) && + ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) { + VN_HOLD(vp); + vplist[cnt++] = vp; + if (cnt == num) { + /* + * The vplist is full because there is + * too many rnodes. We are done for + * now. + */ + break; + } + } + } + mutex_exit(&mi->mi_rnodes_lock); + + goto done; + } + + ASSERT(vfsp == NULL); + + /* * Walk the hash queues looking for rnodes with page * lists associated with them. Make a list of these * files. @@ -3084,26 +3138,29 @@ rflush(struct vfs *vfsp, cred_t *cr) if (IS_SWAPVP(vp) || vn_is_readonly(vp)) continue; /* - * If flushing all mounted file systems or - * the vnode belongs to this vfs, has pages - * and is marked as either dirty or mmap'd, - * hold and add this vnode to the list of + * If the vnode has pages and is marked as either dirty + * or mmap'd, hold and add this vnode to the list of * vnodes to flush. */ - if ((vfsp == NULL || vp->v_vfsp == vfsp) && - vn_has_cached_data(vp) && + if (vn_has_cached_data(vp) && ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) { VN_HOLD(vp); vplist[cnt++] = vp; if (cnt == num) { rw_exit(&rtable[index].r_lock); - goto toomany; + /* + * The vplist is full because there is + * too many rnodes. We are done for + * now. + */ + goto done; } } } rw_exit(&rtable[index].r_lock); } -toomany: + +done: /* * Flush and release all of the files on the list. diff --git a/usr/src/uts/common/fs/nfs/nfs_vfsops.c b/usr/src/uts/common/fs/nfs/nfs_vfsops.c index 57b21778b4..c9cc306f95 100644 --- a/usr/src/uts/common/fs/nfs/nfs_vfsops.c +++ b/usr/src/uts/common/fs/nfs/nfs_vfsops.c @@ -1142,7 +1142,7 @@ static uint_t nfs_cots_timeo = NFS_COTS_TIMEO; static int nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, - int flags, cred_t *cr, zone_t *zone) + int flags, cred_t *cr, zone_t *zone) { vnode_t *rtvp; mntinfo_t *mi; @@ -1204,6 +1204,10 @@ nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp, if (flags & NFSMNT_DIRECTIO) mi->mi_flags |= MI_DIRECTIO; + mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&mi->mi_rnodes, sizeof (rnode_t), + offsetof(rnode_t, r_mi_link)); + /* * Make a vfs struct for nfs. We do this here instead of below * because rtvp needs a vfs before we can do a getattr on it. diff --git a/usr/src/uts/common/nfs/nfs4_clnt.h b/usr/src/uts/common/nfs/nfs4_clnt.h index 8f8cb7d78e..68c810f7c7 100644 --- a/usr/src/uts/common/nfs/nfs4_clnt.h +++ b/usr/src/uts/common/nfs/nfs4_clnt.h @@ -1099,6 +1099,11 @@ typedef struct mntinfo4 { * Referral related info. */ int mi_vfs_referral_loop_cnt; + /* + * List of rnode4_t structures that belongs to this mntinfo4 + */ + kmutex_t mi_rnodes_lock; /* protects the mi_rnodes list */ + list_t mi_rnodes; /* the list */ } mntinfo4_t; /* diff --git a/usr/src/uts/common/nfs/nfs_clnt.h b/usr/src/uts/common/nfs/nfs_clnt.h index ae2948ed12..391640c14c 100644 --- a/usr/src/uts/common/nfs/nfs_clnt.h +++ b/usr/src/uts/common/nfs/nfs_clnt.h @@ -428,6 +428,11 @@ typedef struct mntinfo { * before acquiring any other rnode lock. */ kmutex_t mi_remap_lock; + /* + * List of rnode_t structures that belongs to this mntinfo + */ + kmutex_t mi_rnodes_lock; /* protects the mi_rnodes list */ + list_t mi_rnodes; /* the list */ } mntinfo_t; #endif /* _KERNEL */ diff --git a/usr/src/uts/common/nfs/rnode.h b/usr/src/uts/common/nfs/rnode.h index 33091445c4..a9433b9496 100644 --- a/usr/src/uts/common/nfs/rnode.h +++ b/usr/src/uts/common/nfs/rnode.h @@ -303,6 +303,8 @@ typedef struct rnode { kthread_t *r_serial; /* id of purging thread */ list_t r_indelmap; /* list of delmap callers */ uint_t r_inmap; /* to serialize read/write and mmap */ + list_node_t r_mi_link; /* linkage into list of rnodes for */ + /* this mntinfo */ } rnode_t; #endif /* _KERNEL */ diff --git a/usr/src/uts/common/nfs/rnode4.h b/usr/src/uts/common/nfs/rnode4.h index 0e0090e24d..ca2e38c8e8 100644 --- a/usr/src/uts/common/nfs/rnode4.h +++ b/usr/src/uts/common/nfs/rnode4.h @@ -336,6 +336,8 @@ typedef struct rnode4 { nfs4_stub_type_t r_stub_type; /* e.g. mirror-mount or referral */ uint_t r_inmap; /* to serialize read/write and mmap */ + list_node_t r_mi_link; /* linkage into list of rnodes for */ + /* this mntinfo */ } rnode4_t; #define r_vnode r_svnode.sv_r_vnode @@ -393,12 +395,6 @@ extern long nrnode; #define NFS4_INITIAL_DELAY_INTERVAL 1 #define NFS4_MAX_DELAY_INTERVAL 20 -/* Used for check_rtable4 */ -#define NFSV4_RTABLE4_OK 0 -#define NFSV4_RTABLE4_NOT_FREE_LIST 1 -#define NFSV4_RTABLE4_DIRTY_PAGES 2 -#define NFSV4_RTABLE4_POS_R_COUNT 3 - extern rnode4_t *r4find(r4hashq_t *, nfs4_sharedfh_t *, struct vfs *); extern rnode4_t *r4find_unlocked(nfs4_sharedfh_t *, struct vfs *); extern void r4flush(struct vfs *, cred_t *); |