diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2016-05-16 17:25:56 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@pfmooney.com> | 2016-06-10 14:50:51 +0000 |
| commit | 0f70e4e962f4fb06a29565b595bb50659d077f04 (patch) | |
| tree | 66da07a8f8ec62f93b1a1d9af9589f041d2fcb29 /usr/src | |
| parent | 301612afc3ef38a67f1adb329b36dae1bddf9034 (diff) | |
| download | illumos-joyent-0f70e4e962f4fb06a29565b595bb50659d077f04.tar.gz | |
OS-5167 cached v_path should be kept fresh
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/uts/common/fs/lookup.c | 181 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/vfs.c | 18 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/vnode.c | 316 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/vnode.h | 61 |
4 files changed, 383 insertions, 193 deletions
diff --git a/usr/src/uts/common/fs/lookup.c b/usr/src/uts/common/fs/lookup.c index 59ec5d1829..7dc83ff8d5 100644 --- a/usr/src/uts/common/fs/lookup.c +++ b/usr/src/uts/common/fs/lookup.c @@ -21,7 +21,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2015, Joyent, Inc. All rights reserved. + * Copyright 2016 Joyent, Inc. * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -1069,6 +1069,19 @@ vnode_valid_pn(vnode_t *vp, vnode_t *vrootp, pathname_t *pn, pathname_t *rpn, } /* + * Struct for tracking vnodes with invalidated v_path entries during a + * dirtopath reverse lookup. By keepeing adequate state, those vnode can be + * revisted to populate v_path. + */ +struct dirpath_walk { + struct dirpath_walk *dw_next; + vnode_t *dw_vnode; + vnode_t *dw_pvnode; + size_t dw_len; + char *dw_name; +}; + +/* * Given a directory, return the full, resolved path. This looks up "..", * searches for the given vnode in the parent, appends the component, etc. It * is used to implement vnodetopath() and getcwd() when the cached path fails. @@ -1077,18 +1090,14 @@ static int dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags, cred_t *cr) { - pathname_t pn, rpn, emptypn; - vnode_t *cmpvp, *pvp = NULL; - vnode_t *startvp = vp; - int err = 0, vprivs; - size_t complen; - char *dbuf; - dirent64_t *dp; - char *bufloc; - size_t dlen = DIRENT64_RECLEN(MAXPATHLEN); - refstr_t *mntpt; - char *vpath_cached; - boolean_t vpath_stale; + pathname_t pn, rpn, emptypn; + vnode_t *pvp = NULL, *startvp = vp; + int err = 0; + size_t complen; + dirent64_t *dp; + char *bufloc, *dbuf; + const size_t dlen = DIRENT64_RECLEN(MAXPATHLEN); + struct dirpath_walk *dw_chain = NULL, *dw_entry; /* Operation only allowed on directories */ ASSERT(vp->v_type == VDIR); @@ -1113,6 +1122,9 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags, VN_HOLD(vp); for (;;) { + int vprivs; + hrtime_t cached_stamp; + /* * Return if we've reached the root. If the buffer is empty, * return '/'. We explicitly don't use vn_compare(), since it @@ -1137,57 +1149,13 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags, } /* - * Shortcut: see if this vnode is a mountpoint. If so, - * grab the path information from the vfs_t. - */ - if (vp->v_flag & VROOT) { - - mntpt = vfs_getmntpoint(vp->v_vfsp); - if ((err = pn_set(&pn, (char *)refstr_value(mntpt))) - == 0) { - refstr_rele(mntpt); - rpn.pn_path = rpn.pn_buf; - - /* - * Ensure the mountpoint still exists. - */ - VN_HOLD(vrootp); - if (vrootp != rootdir) - VN_HOLD(vrootp); - if (lookuppnvp(&pn, &rpn, flags, NULL, - &cmpvp, vrootp, vrootp, cr) == 0) { - - if (VN_CMP(vp, cmpvp)) { - VN_RELE(cmpvp); - - complen = strlen(rpn.pn_path); - bufloc -= complen; - if (bufloc < buf) { - err = ERANGE; - goto out; - } - bcopy(rpn.pn_path, bufloc, - complen); - break; - } else { - VN_RELE(cmpvp); - } - } - } else { - refstr_rele(mntpt); - } - } - - /* * Shortcut: see if this vnode has correct v_path. If so, * we have the work done. */ - vpath_cached = NULL; - vpath_stale = B_FALSE; mutex_enter(&vp->v_lock); if (vp->v_path != vn_vpath_empty && pn_set(&pn, vp->v_path) == 0) { - vpath_cached = vp->v_path; + cached_stamp = vp->v_path_stamp; mutex_exit(&vp->v_lock); rpn.pn_path = rpn.pn_buf; @@ -1203,7 +1171,11 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags, bcopy(rpn.pn_path, bufloc, complen); break; } else { - vpath_stale = B_TRUE; + /* + * Immediately nuke cached v_path entries known + * to be invalid. + */ + vn_clearpath(vp, cached_stamp); } } else { mutex_exit(&vp->v_lock); @@ -1265,10 +1237,18 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags, /* Prepend a slash to the current path. */ *--bufloc = '/'; - /* Clear vp->v_path if it was found to be stale. */ - if (vpath_stale == B_TRUE) { - vnode_clear_vpath(vp, vpath_cached); - } + /* + * Record the name and directory for later reconstruction and + * link it up with the others. + */ + dw_entry = kmem_alloc(sizeof (*dw_entry), KM_SLEEP); + dw_entry->dw_name = kmem_alloc(complen + 1, KM_SLEEP); + VN_HOLD(dw_entry->dw_vnode = vp); + VN_HOLD(dw_entry->dw_pvnode = pvp); + bcopy(dp->d_name, dw_entry->dw_name, complen + 1); + dw_entry->dw_len = complen; + dw_entry->dw_next = dw_chain; + dw_chain = dw_entry; /* And continue with the next component */ VN_RELE(vp); @@ -1284,6 +1264,37 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags, out: /* + * Walk over encountered directory entries which were afflicted with a + * stale or absent v_path. If the dirtopath was successful, we should + * possess the necessary information to populate all of them with a + * valid v_path. + * + * While processing this list, it is safe to call vn_setpath despite + * the fact that racing vnode actions may have altered v_path entries + * while the above loopwas still executing. Any updated entries will + * have a newer v_path_stamp value which prevents an invalid overwrite. + * + * If an error was encountered during the search, freeing the chain is + * still required. + */ + dw_entry = dw_chain; + while (dw_entry != NULL) { + struct dirpath_walk *next = dw_entry->dw_next; + + if (err == 0) { + vn_setpath(NULL, dw_entry->dw_pvnode, + dw_entry->dw_vnode, dw_entry->dw_name, + dw_entry->dw_len); + } + + VN_RELE(dw_entry->dw_vnode); + VN_RELE(dw_entry->dw_pvnode); + kmem_free(dw_entry->dw_name, dw_entry->dw_len + 1); + kmem_free(dw_entry, sizeof (*dw_entry)); + dw_entry = next; + } + + /* * If the error was ESTALE and the current directory to look in * was the root for this lookup, the root for a mounted file * system, or the starting directory for lookups, then @@ -1323,18 +1334,18 @@ static int vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr, int flags) { - pathname_t pn, rpn; - int ret, len; - vnode_t *compvp, *pvp, *realvp; - proc_t *p = curproc; - char path[MAXNAMELEN]; - int doclose = 0; + pathname_t pn; + int ret = 0; + vnode_t *realvp; + boolean_t doclose = B_FALSE; /* * If vrootp is NULL, get the root for curproc. Callers with any other * requirements should pass in a different vrootp. */ if (vrootp == NULL) { + proc_t *p = curproc; + mutex_enter(&p->p_lock); if ((vrootp = PTOU(p)->u_rdir) == NULL) vrootp = rootdir; @@ -1356,18 +1367,21 @@ vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, realvp != vp) { VN_HOLD(vp); if (VOP_OPEN(&vp, FREAD, cr, NULL) == 0) - doclose = 1; + doclose = B_TRUE; else VN_RELE(vp); } - /* * Check to see if we have a valid cached path in the vnode. */ pn_alloc(&pn); mutex_enter(&vp->v_lock); if (vp->v_path != vn_vpath_empty) { + hrtime_t cached_stamp; + pathname_t rpn; + + cached_stamp = vp->v_path_stamp; (void) pn_set(&pn, vp->v_path); mutex_exit(&vp->v_lock); @@ -1379,32 +1393,33 @@ vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, /* Return the result, if we're able. */ if (buflen > rpn.pn_pathlen) { bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1); - pn_free(&pn); - pn_free(&rpn); - VN_RELE(vrootp); - if (doclose) { - (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, - NULL); - VN_RELE(vp); - } - return (0); + } else { + ret = ENAMETOOLONG; } + pn_free(&pn); + pn_free(&rpn); + goto out; } - /* - * A stale v_path will be purged by the later dirtopath lookup. - */ pn_free(&rpn); + vn_clearpath(vp, cached_stamp); } else { mutex_exit(&vp->v_lock); } pn_free(&pn); if (vp->v_type != VDIR) { + /* + * The reverse lookup tricks used by dirtopath aren't possible + * for non-directory entries. The best which can be done is + * clearing any stale v_path so later lookups can potentially + * repopulate it with a valid path. + */ ret = ENOENT; } else { ret = dirtopath(vrootp, vp, buf, buflen, flags, cr); } +out: VN_RELE(vrootp); if (doclose) { (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL); diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c index 35e65f15e6..ead382ce07 100644 --- a/usr/src/uts/common/fs/vfs.c +++ b/usr/src/uts/common/fs/vfs.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, Joyent, Inc. + * Copyright 2016 Joyent, Inc. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ @@ -238,9 +238,11 @@ fsop_root(vfs_t *vfsp, vnode_t **vpp) */ if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == vn_vpath_empty) { + const char *path; + mntpt = vfs_getmntpoint(vfsp); - vn_setpath_str(*vpp, refstr_value(mntpt), - strlen(refstr_value(mntpt))); + path = refstr_value(mntpt); + vn_setpath_str(*vpp, path, strlen(path)); refstr_rele(mntpt); } @@ -356,8 +358,8 @@ fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual, } void -zfs_boot_init() { - +zfs_boot_init() +{ if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0) spa_boot_init(); } @@ -1106,7 +1108,7 @@ out: */ int domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, - struct vfs **vfspp) + struct vfs **vfspp) { struct vfssw *vswp; vfsops_t *vfsops; @@ -2789,7 +2791,7 @@ vfs_freeopttbl(mntopts_t *mp) /* ARGSUSED */ static int vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, - caller_context_t *ct) + caller_context_t *ct) { return (0); } @@ -2797,7 +2799,7 @@ vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, /* ARGSUSED */ static int vfs_mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, - caller_context_t *ct) + caller_context_t *ct) { return (0); } diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index 77b30da871..a0aba42d83 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, Joyent, Inc. + * Copyright 2016, Joyent, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -67,6 +67,7 @@ #include <sys/taskq.h> #include <fs/fs_reparse.h> #include <sys/time.h> +#include <sys/sdt.h> /* Determine if this vnode is a file that is read-only */ #define ISROFILE(vp) \ @@ -1652,7 +1653,7 @@ vn_rename(char *from, char *to, enum uio_seg seg) int vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp, - char *tname, enum uio_seg seg) + char *tname, enum uio_seg seg) { int error; struct vattr vattr; @@ -2294,6 +2295,7 @@ vn_cache_constructor(void *buf, void *cdrarg, int kmflags) rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL); vp->v_femhead = NULL; /* Must be done before vn_reinit() */ vp->v_path = vn_vpath_empty; + vp->v_path_stamp = 0; vp->v_mpssdata = NULL; vp->v_vsd = NULL; vp->v_fopdata = NULL; @@ -2367,6 +2369,7 @@ vn_recycle(vnode_t *vp) kmem_free(vp->v_path, strlen(vp->v_path) + 1); vp->v_path = vn_vpath_empty; } + vp->v_path_stamp = 0; if (vp->v_fopdata != NULL) { free_fopdata(vp); @@ -2974,108 +2977,229 @@ fs_new_caller_id() } /* - * Given a starting vnode and a path, updates the path in the target vnode in - * a safe manner. If the vnode already has path information embedded, then the - * cached path is left untouched. + * The value stored in v_path is relative to rootdir, located in the global + * zone. Zones or chroot environments which reside deeper inside the VFS + * hierarchy will have a relative view of MAXPATHLEN since they are unaware of + * what lies below their perceived root. In order to keep v_path usable for + * these child environments, its allocations are allowed to exceed MAXPATHLEN. + * + * An upper bound of max_vnode_path is placed upon v_path allocations to + * prevent the system from going too wild at the behest of pathological + * behavior from the operator. */ - size_t max_vnode_path = 4 * MAXPATHLEN; + void -vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp, - const char *path, size_t plen) -{ - char *rpath; - vnode_t *base; - size_t rpathlen, rpathalloc; - int doslash = 1; - - if (*path == '/') { - base = rootvp; - path++; - plen--; - } else { - base = startvp; - } +vn_clearpath(vnode_t *vp, hrtime_t compare_stamp) +{ + char *buf; + mutex_enter(&vp->v_lock); /* - * We cannot grab base->v_lock while we hold vp->v_lock because of - * the potential for deadlock. + * If the snapshot of v_path_stamp passed in via compare_stamp does not + * match the present value on the vnode, it indicates that subsequent + * changes have occurred. The v_path value is not cleared in this case + * since the new value may be valid. */ - mutex_enter(&base->v_lock); - if (base->v_path == vn_vpath_empty) { - mutex_exit(&base->v_lock); + if (compare_stamp != 0 && vp->v_path_stamp != compare_stamp) { + mutex_exit(&vp->v_lock); return; } + buf = vp->v_path; + vp->v_path = vn_vpath_empty; + vp->v_path_stamp = 0; + mutex_exit(&vp->v_lock); + if (buf != vn_vpath_empty) { + kmem_free(buf, strlen(buf) + 1); + } +} - rpathlen = strlen(base->v_path); - rpathalloc = rpathlen + plen + 1; - /* Avoid adding a slash if there's already one there */ - if (base->v_path[rpathlen-1] == '/') - doslash = 0; - else - rpathalloc++; - - /* - * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held, - * so we must do this dance. If, by chance, something changes the path, - * just give up since there is no real harm. - */ - mutex_exit(&base->v_lock); +static void +vn_setpath_common(vnode_t *pvp, vnode_t *vp, const char *name, size_t len, + boolean_t is_rename) +{ + char *buf, *oldbuf; + hrtime_t pstamp; + size_t baselen, buflen = 0; + + /* Handle the vn_setpath_str case. */ + if (pvp == NULL) { + if (len + 1 > max_vnode_path) { + DTRACE_PROBE4(vn__setpath__too__long, vnode_t *, pvp, + vnode_t *, vp, char *, name, size_t, len + 1); + return; + } + buf = kmem_alloc(len + 1, KM_SLEEP); + bcopy(name, buf, len); + buf[len] = '\0'; - /* Paths should stay within reason */ - if (rpathalloc > max_vnode_path) + mutex_enter(&vp->v_lock); + oldbuf = vp->v_path; + vp->v_path = buf; + vp->v_path_stamp = gethrtime(); + mutex_exit(&vp->v_lock); + if (oldbuf != vn_vpath_empty) { + kmem_free(oldbuf, strlen(oldbuf) + 1); + } return; + } + + /* Take snapshot of parent dir */ + mutex_enter(&pvp->v_lock); +retrybuf: + if (pvp->v_path == vn_vpath_empty) { + /* + * Without v_path from the parent directory, generating a child + * path from the name is impossible. + */ + if (len > 0) { + pstamp = pvp->v_path_stamp; + mutex_exit(&pvp->v_lock); + vn_clearpath(vp, pstamp); + return; + } - rpath = kmem_alloc(rpathalloc, KM_SLEEP); + /* + * The only feasible case here is where a NUL lookup is being + * performed on rootdir prior to its v_path being populated. + */ + ASSERT(pvp->v_path_stamp = 0); + baselen = 0; + pstamp = 0; + } else { + pstamp = pvp->v_path_stamp; + baselen = strlen(pvp->v_path); + /* ignore a trailing slash if present */ + if (pvp->v_path[baselen - 1] == '/') { + /* This should only the be case for rootdir */ + ASSERT(baselen == 1 && pvp == rootdir); + baselen--; + } + } + mutex_exit(&pvp->v_lock); - mutex_enter(&base->v_lock); - if (base->v_path == vn_vpath_empty || - strlen(base->v_path) != rpathlen) { - mutex_exit(&base->v_lock); - kmem_free(rpath, rpathalloc); + if (buflen != 0) { + /* Free the existing (mis-sized) buffer in case of retry */ + kmem_free(buf, buflen); + } + /* base, '/', name and trailing NUL */ + buflen = baselen + len + 2; + if (buflen > max_vnode_path) { + DTRACE_PROBE4(vn__setpath_too__long, vnode_t *, pvp, + vnode_t *, vp, char *, name, size_t, buflen); return; } - bcopy(base->v_path, rpath, rpathlen); - mutex_exit(&base->v_lock); + buf = kmem_alloc(buflen, KM_SLEEP); + + mutex_enter(&pvp->v_lock); + if (pvp->v_path_stamp != pstamp) { + size_t vlen; + + /* + * Since v_path_stamp changed on the parent, it is likely that + * v_path has been altered as well. If the length does not + * exactly match what was previously measured, the buffer + * allocation must be repeated for proper sizing. + */ + if (pvp->v_path == vn_vpath_empty) { + /* Give up if parent lack v_path */ + mutex_exit(&pvp->v_lock); + kmem_free(buf, buflen); + return; + } + vlen = strlen(pvp->v_path); + if (pvp->v_path[vlen - 1] == '/') { + vlen--; + } + if (vlen != baselen) { + goto retrybuf; + } + } + bcopy(pvp->v_path, buf, baselen); + mutex_exit(&pvp->v_lock); - if (doslash) - rpath[rpathlen++] = '/'; - bcopy(path, rpath + rpathlen, plen); - rpath[rpathlen + plen] = '\0'; + buf[baselen] = '/'; + baselen++; + bcopy(name, &buf[baselen], len + 1); mutex_enter(&vp->v_lock); - if (vp->v_path != vn_vpath_empty) { + if (vp->v_path_stamp == 0) { + /* never-visited vnode can inherit stamp from parent */ + ASSERT(vp->v_path == vn_vpath_empty); + vp->v_path_stamp = pstamp; + vp->v_path = buf; + mutex_exit(&vp->v_lock); + } else if (vp->v_path_stamp < pstamp || is_rename) { + /* + * Install the updated path and stamp, ensuring that the v_path + * pointer is valid at all times for dtrace. + */ + oldbuf = vp->v_path; + vp->v_path = buf; + vp->v_path_stamp = gethrtime(); mutex_exit(&vp->v_lock); - kmem_free(rpath, rpathalloc); + kmem_free(oldbuf, strlen(oldbuf) + 1); } else { - vp->v_path = rpath; + /* + * If the timestamp matches or is greater, it means another + * thread performed the update first while locks were dropped + * here to make the allocation. We defer to the newer value. + */ mutex_exit(&vp->v_lock); + kmem_free(buf, buflen); } + ASSERT(MUTEX_NOT_HELD(&vp->v_lock)); } -/* - * Sets the path to the vnode to be the given string, regardless of current - * context. The string must be a complete path from rootdir. This is only used - * by fsop_root() for setting the path based on the mountpoint. - */ void -vn_setpath_str(struct vnode *vp, const char *str, size_t len) +vn_updatepath(vnode_t *pvp, vnode_t *vp, const char *name) { - char *buf = kmem_alloc(len + 1, KM_SLEEP); + size_t len; - mutex_enter(&vp->v_lock); - if (vp->v_path != vn_vpath_empty) { - mutex_exit(&vp->v_lock); - kmem_free(buf, len + 1); + /* + * If the parent is older or empty, there's nothing further to do. + */ + if (pvp->v_path == vn_vpath_empty || + pvp->v_path_stamp <= vp->v_path_stamp) { return; } - vp->v_path = buf; - bcopy(str, vp->v_path, len); - vp->v_path[len] = '\0'; + /* + * Given the lack of appropriate context, meaningful updates to v_path + * cannot be made for during lookups for the '.' or '..' entries. + */ + len = strlen(name); + if (len == 0 || (len == 1 && name[0] == '.') || + (len == 2 && name[0] == '.' && name[1] == '.')) { + return; + } - mutex_exit(&vp->v_lock); + vn_setpath_common(pvp, vp, name, len, B_FALSE); +} + +/* + * Given a starting vnode and a path, updates the path in the target vnode in + * a safe manner. If the vnode already has path information embedded, then the + * cached path is left untouched. + */ +/* ARGSUSED */ +void +vn_setpath(vnode_t *rootvp, vnode_t *pvp, vnode_t *vp, const char *name, + size_t len) +{ + vn_setpath_common(pvp, vp, name, len, B_FALSE); +} + +/* + * Sets the path to the vnode to be the given string, regardless of current + * context. The string must be a complete path from rootdir. This is only used + * by fsop_root() for setting the path based on the mountpoint. + */ +void +vn_setpath_str(vnode_t *vp, const char *str, size_t len) +{ + vn_setpath_common(NULL, vp, str, len, B_FALSE); } /* @@ -3083,17 +3207,9 @@ vn_setpath_str(struct vnode *vp, const char *str, size_t len) * target vnode is available. */ void -vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len) +vn_renamepath(vnode_t *pvp, vnode_t *vp, const char *name, size_t len) { - char *tmp; - - mutex_enter(&vp->v_lock); - tmp = vp->v_path; - vp->v_path = vn_vpath_empty; - mutex_exit(&vp->v_lock); - vn_setpath(rootdir, dvp, vp, nm, len); - if (tmp != vn_vpath_empty) - kmem_free(tmp, strlen(tmp) + 1); + vn_setpath_common(pvp, vp, name, len, B_TRUE); } /* @@ -3104,37 +3220,42 @@ void vn_copypath(struct vnode *src, struct vnode *dst) { char *buf; - int alloc; + hrtime_t stamp; + size_t buflen; mutex_enter(&src->v_lock); if (src->v_path == vn_vpath_empty) { mutex_exit(&src->v_lock); return; } - alloc = strlen(src->v_path) + 1; - - /* avoid kmem_alloc() with lock held */ + buflen = strlen(src->v_path) + 1; mutex_exit(&src->v_lock); - buf = kmem_alloc(alloc, KM_SLEEP); + + buf = kmem_alloc(buflen, KM_SLEEP); + mutex_enter(&src->v_lock); - if (src->v_path == vn_vpath_empty || strlen(src->v_path) + 1 != alloc) { + if (src->v_path == vn_vpath_empty || + strlen(src->v_path) + 1 != buflen) { mutex_exit(&src->v_lock); - kmem_free(buf, alloc); + kmem_free(buf, buflen); return; } - bcopy(src->v_path, buf, alloc); + bcopy(src->v_path, buf, buflen); + stamp = src->v_path_stamp; mutex_exit(&src->v_lock); mutex_enter(&dst->v_lock); if (dst->v_path != vn_vpath_empty) { mutex_exit(&dst->v_lock); - kmem_free(buf, alloc); + kmem_free(buf, buflen); return; } dst->v_path = buf; + dst->v_path_stamp = stamp; mutex_exit(&dst->v_lock); } + /* * XXX Private interface for segvn routines that handle vnode * large page segments. @@ -3565,9 +3686,7 @@ fop_lookup( } if (ret == 0 && *vpp) { VOPSTATS_UPDATE(*vpp, lookup); - if ((*vpp)->v_path == vn_vpath_empty) { - vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm)); - } + vn_updatepath(dvp, *vpp, nm); } return (ret); @@ -3607,9 +3726,7 @@ fop_create( (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp); if (ret == 0 && *vpp) { VOPSTATS_UPDATE(*vpp, create); - if ((*vpp)->v_path == vn_vpath_empty) { - vn_setpath(rootdir, dvp, *vpp, name, strlen(name)); - } + vn_updatepath(dvp, *vpp, name); } return (ret); @@ -3729,10 +3846,7 @@ fop_mkdir( (dvp, dirname, vap, vpp, cr, ct, flags, vsecp); if (ret == 0 && *vpp) { VOPSTATS_UPDATE(*vpp, mkdir); - if ((*vpp)->v_path == vn_vpath_empty) { - vn_setpath(rootdir, dvp, *vpp, dirname, - strlen(dirname)); - } + vn_updatepath(dvp, *vpp, dirname); } return (ret); diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index d12f6c4046..c779cc1ff6 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -21,7 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -221,6 +221,59 @@ struct vsd_node { * In particular, file systems should not access other fields; they may * change or even be removed. The functionality which was once provided * by these fields is available through vn_* functions. + * + * VNODE PATH THEORY: + * In each vnode, the v_path field holds a cached version of the canonical + * filesystem path which that node represents. Because vnodes lack contextual + * information about their own name or position in the VFS hierarchy, this path + * must be calculated when the vnode is instantiated by operations such as + * fop_create, fop_lookup, or fop_mkdir. During said operations, both the + * parent vnode (and its cached v_path) and future name are known, so the + * v_path of the resulting object can easily be set. + * + * The caching nature of v_path is complicated in the face of directory + * renames. Filesystem drivers are responsible for calling vn_renamepath when + * a fop_rename operation succeeds. While the v_path on the renamed vnode will + * be updated, existing children of the directory (direct, or at deeper levels) + * will now possess v_path caches which are stale. + * + * It is expensive (and for non-directories, impossible) to recalculate stale + * v_path entries during operations such as vnodetopath. The best time during + * which to correct such wrongs is the same as when v_path is first + * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate + * context is available to generate the current path. + * + * In order to quickly detect stale v_path entries (without full lookup + * verification) to trigger a v_path update, the v_path_stamp field has been + * added to vnode_t. As part of successful fop_create/fop_lookup/fop_mkdir + * operations, where the name and parent vnode are available, the following + * rules are used to determine updates to the child: + * + * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp + * on the child. Until the parent v_path is refreshed to a valid state, the + * child v_path must be considered invalid too. + * + * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the + * v_path_stamp value from its parent and its v_path is updated. + * + * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is + * an indication that the child v_path is stale. The v_path is updated and + * v_path_stamp in the child is set to the current hrtime(). + * + * It does _not_ inherit the parent v_path_stamp in order to propagate the + * the time of v_path invalidation through the directory structure. This + * prevents concurrent invalidations (operating with a now-incorrect v_path) + * at deeper levels in the tree from persisting. + * + * 4. If the child v_path_stamp is greater or equal to the parent, no action + * needs to be taken. + * + * Note that fop_rename operations do not follow this ruleset. They perform an + * explicit update of v_path and v_path_stamp (setting it to the current time) + * + * With these constraints in place, v_path invalidations and updates should + * proceed in a timely manner as vnodes are accessed. While there still are + * limited cases where vnodetopath operations will fail, the risk is minimized. */ struct fem_head; /* from fem.h */ @@ -247,6 +300,7 @@ typedef struct vnode { void *v_locality; /* hook for locality info */ struct fem_head *v_femhead; /* fs monitoring */ char *v_path; /* cached path */ + hrtime_t v_path_stamp; /* timestamp for cached path */ uint_t v_rdcnt; /* open for read count (VREG only) */ uint_t v_wrcnt; /* open for write count (VREG only) */ u_longlong_t v_mmap_read; /* mmap read count */ @@ -1293,6 +1347,11 @@ void vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp, const char *path, size_t plen); void vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len); +/* Private vnode manipulation functions */ +void vn_clearpath(vnode_t *, hrtime_t); +void vn_updatepath(vnode_t *, vnode_t *, const char *); + + /* Vnode event notification */ void vnevent_rename_src(vnode_t *, vnode_t *, char *, caller_context_t *); void vnevent_rename_dest(vnode_t *, vnode_t *, char *, caller_context_t *); |
