summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2016-05-16 17:25:56 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2016-06-10 14:50:51 +0000
commit0f70e4e962f4fb06a29565b595bb50659d077f04 (patch)
tree66da07a8f8ec62f93b1a1d9af9589f041d2fcb29 /usr/src
parent301612afc3ef38a67f1adb329b36dae1bddf9034 (diff)
downloadillumos-joyent-0f70e4e962f4fb06a29565b595bb50659d077f04.tar.gz
OS-5167 cached v_path should be kept fresh
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/fs/lookup.c181
-rw-r--r--usr/src/uts/common/fs/vfs.c18
-rw-r--r--usr/src/uts/common/fs/vnode.c316
-rw-r--r--usr/src/uts/common/sys/vnode.h61
4 files changed, 383 insertions, 193 deletions
diff --git a/usr/src/uts/common/fs/lookup.c b/usr/src/uts/common/fs/lookup.c
index 59ec5d1829..7dc83ff8d5 100644
--- a/usr/src/uts/common/fs/lookup.c
+++ b/usr/src/uts/common/fs/lookup.c
@@ -21,7 +21,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -1069,6 +1069,19 @@ vnode_valid_pn(vnode_t *vp, vnode_t *vrootp, pathname_t *pn, pathname_t *rpn,
}
/*
+ * Struct for tracking vnodes with invalidated v_path entries during a
+ * dirtopath reverse lookup. By keepeing adequate state, those vnode can be
+ * revisted to populate v_path.
+ */
+struct dirpath_walk {
+ struct dirpath_walk *dw_next;
+ vnode_t *dw_vnode;
+ vnode_t *dw_pvnode;
+ size_t dw_len;
+ char *dw_name;
+};
+
+/*
* Given a directory, return the full, resolved path. This looks up "..",
* searches for the given vnode in the parent, appends the component, etc. It
* is used to implement vnodetopath() and getcwd() when the cached path fails.
@@ -1077,18 +1090,14 @@ static int
dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
cred_t *cr)
{
- pathname_t pn, rpn, emptypn;
- vnode_t *cmpvp, *pvp = NULL;
- vnode_t *startvp = vp;
- int err = 0, vprivs;
- size_t complen;
- char *dbuf;
- dirent64_t *dp;
- char *bufloc;
- size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
- refstr_t *mntpt;
- char *vpath_cached;
- boolean_t vpath_stale;
+ pathname_t pn, rpn, emptypn;
+ vnode_t *pvp = NULL, *startvp = vp;
+ int err = 0;
+ size_t complen;
+ dirent64_t *dp;
+ char *bufloc, *dbuf;
+ const size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
+ struct dirpath_walk *dw_chain = NULL, *dw_entry;
/* Operation only allowed on directories */
ASSERT(vp->v_type == VDIR);
@@ -1113,6 +1122,9 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
VN_HOLD(vp);
for (;;) {
+ int vprivs;
+ hrtime_t cached_stamp;
+
/*
* Return if we've reached the root. If the buffer is empty,
* return '/'. We explicitly don't use vn_compare(), since it
@@ -1137,57 +1149,13 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
}
/*
- * Shortcut: see if this vnode is a mountpoint. If so,
- * grab the path information from the vfs_t.
- */
- if (vp->v_flag & VROOT) {
-
- mntpt = vfs_getmntpoint(vp->v_vfsp);
- if ((err = pn_set(&pn, (char *)refstr_value(mntpt)))
- == 0) {
- refstr_rele(mntpt);
- rpn.pn_path = rpn.pn_buf;
-
- /*
- * Ensure the mountpoint still exists.
- */
- VN_HOLD(vrootp);
- if (vrootp != rootdir)
- VN_HOLD(vrootp);
- if (lookuppnvp(&pn, &rpn, flags, NULL,
- &cmpvp, vrootp, vrootp, cr) == 0) {
-
- if (VN_CMP(vp, cmpvp)) {
- VN_RELE(cmpvp);
-
- complen = strlen(rpn.pn_path);
- bufloc -= complen;
- if (bufloc < buf) {
- err = ERANGE;
- goto out;
- }
- bcopy(rpn.pn_path, bufloc,
- complen);
- break;
- } else {
- VN_RELE(cmpvp);
- }
- }
- } else {
- refstr_rele(mntpt);
- }
- }
-
- /*
* Shortcut: see if this vnode has correct v_path. If so,
* we have the work done.
*/
- vpath_cached = NULL;
- vpath_stale = B_FALSE;
mutex_enter(&vp->v_lock);
if (vp->v_path != vn_vpath_empty &&
pn_set(&pn, vp->v_path) == 0) {
- vpath_cached = vp->v_path;
+ cached_stamp = vp->v_path_stamp;
mutex_exit(&vp->v_lock);
rpn.pn_path = rpn.pn_buf;
@@ -1203,7 +1171,11 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
bcopy(rpn.pn_path, bufloc, complen);
break;
} else {
- vpath_stale = B_TRUE;
+ /*
+ * Immediately nuke cached v_path entries known
+ * to be invalid.
+ */
+ vn_clearpath(vp, cached_stamp);
}
} else {
mutex_exit(&vp->v_lock);
@@ -1265,10 +1237,18 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
/* Prepend a slash to the current path. */
*--bufloc = '/';
- /* Clear vp->v_path if it was found to be stale. */
- if (vpath_stale == B_TRUE) {
- vnode_clear_vpath(vp, vpath_cached);
- }
+ /*
+ * Record the name and directory for later reconstruction and
+ * link it up with the others.
+ */
+ dw_entry = kmem_alloc(sizeof (*dw_entry), KM_SLEEP);
+ dw_entry->dw_name = kmem_alloc(complen + 1, KM_SLEEP);
+ VN_HOLD(dw_entry->dw_vnode = vp);
+ VN_HOLD(dw_entry->dw_pvnode = pvp);
+ bcopy(dp->d_name, dw_entry->dw_name, complen + 1);
+ dw_entry->dw_len = complen;
+ dw_entry->dw_next = dw_chain;
+ dw_chain = dw_entry;
/* And continue with the next component */
VN_RELE(vp);
@@ -1284,6 +1264,37 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
out:
/*
+ * Walk over encountered directory entries which were afflicted with a
+ * stale or absent v_path. If the dirtopath was successful, we should
+ * possess the necessary information to populate all of them with a
+ * valid v_path.
+ *
+ * While processing this list, it is safe to call vn_setpath despite
+ * the fact that racing vnode actions may have altered v_path entries
+ * while the above loopwas still executing. Any updated entries will
+ * have a newer v_path_stamp value which prevents an invalid overwrite.
+ *
+ * If an error was encountered during the search, freeing the chain is
+ * still required.
+ */
+ dw_entry = dw_chain;
+ while (dw_entry != NULL) {
+ struct dirpath_walk *next = dw_entry->dw_next;
+
+ if (err == 0) {
+ vn_setpath(NULL, dw_entry->dw_pvnode,
+ dw_entry->dw_vnode, dw_entry->dw_name,
+ dw_entry->dw_len);
+ }
+
+ VN_RELE(dw_entry->dw_vnode);
+ VN_RELE(dw_entry->dw_pvnode);
+ kmem_free(dw_entry->dw_name, dw_entry->dw_len + 1);
+ kmem_free(dw_entry, sizeof (*dw_entry));
+ dw_entry = next;
+ }
+
+ /*
* If the error was ESTALE and the current directory to look in
* was the root for this lookup, the root for a mounted file
* system, or the starting directory for lookups, then
@@ -1323,18 +1334,18 @@ static int
vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
cred_t *cr, int flags)
{
- pathname_t pn, rpn;
- int ret, len;
- vnode_t *compvp, *pvp, *realvp;
- proc_t *p = curproc;
- char path[MAXNAMELEN];
- int doclose = 0;
+ pathname_t pn;
+ int ret = 0;
+ vnode_t *realvp;
+ boolean_t doclose = B_FALSE;
/*
* If vrootp is NULL, get the root for curproc. Callers with any other
* requirements should pass in a different vrootp.
*/
if (vrootp == NULL) {
+ proc_t *p = curproc;
+
mutex_enter(&p->p_lock);
if ((vrootp = PTOU(p)->u_rdir) == NULL)
vrootp = rootdir;
@@ -1356,18 +1367,21 @@ vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
realvp != vp) {
VN_HOLD(vp);
if (VOP_OPEN(&vp, FREAD, cr, NULL) == 0)
- doclose = 1;
+ doclose = B_TRUE;
else
VN_RELE(vp);
}
-
/*
* Check to see if we have a valid cached path in the vnode.
*/
pn_alloc(&pn);
mutex_enter(&vp->v_lock);
if (vp->v_path != vn_vpath_empty) {
+ hrtime_t cached_stamp;
+ pathname_t rpn;
+
+ cached_stamp = vp->v_path_stamp;
(void) pn_set(&pn, vp->v_path);
mutex_exit(&vp->v_lock);
@@ -1379,32 +1393,33 @@ vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
/* Return the result, if we're able. */
if (buflen > rpn.pn_pathlen) {
bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1);
- pn_free(&pn);
- pn_free(&rpn);
- VN_RELE(vrootp);
- if (doclose) {
- (void) VOP_CLOSE(vp, FREAD, 1, 0, cr,
- NULL);
- VN_RELE(vp);
- }
- return (0);
+ } else {
+ ret = ENAMETOOLONG;
}
+ pn_free(&pn);
+ pn_free(&rpn);
+ goto out;
}
- /*
- * A stale v_path will be purged by the later dirtopath lookup.
- */
pn_free(&rpn);
+ vn_clearpath(vp, cached_stamp);
} else {
mutex_exit(&vp->v_lock);
}
pn_free(&pn);
if (vp->v_type != VDIR) {
+ /*
+ * The reverse lookup tricks used by dirtopath aren't possible
+ * for non-directory entries. The best which can be done is
+ * clearing any stale v_path so later lookups can potentially
+ * repopulate it with a valid path.
+ */
ret = ENOENT;
} else {
ret = dirtopath(vrootp, vp, buf, buflen, flags, cr);
}
+out:
VN_RELE(vrootp);
if (doclose) {
(void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL);
diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c
index 35e65f15e6..ead382ce07 100644
--- a/usr/src/uts/common/fs/vfs.c
+++ b/usr/src/uts/common/fs/vfs.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -238,9 +238,11 @@ fsop_root(vfs_t *vfsp, vnode_t **vpp)
*/
if (ret == 0 && vfsp->vfs_mntpt != NULL &&
(*vpp)->v_path == vn_vpath_empty) {
+ const char *path;
+
mntpt = vfs_getmntpoint(vfsp);
- vn_setpath_str(*vpp, refstr_value(mntpt),
- strlen(refstr_value(mntpt)));
+ path = refstr_value(mntpt);
+ vn_setpath_str(*vpp, path, strlen(path));
refstr_rele(mntpt);
}
@@ -356,8 +358,8 @@ fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual,
}
void
-zfs_boot_init() {
-
+zfs_boot_init()
+{
if (strcmp(rootfs.bo_fstype, MNTTYPE_ZFS) == 0)
spa_boot_init();
}
@@ -1106,7 +1108,7 @@ out:
*/
int
domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp,
- struct vfs **vfspp)
+ struct vfs **vfspp)
{
struct vfssw *vswp;
vfsops_t *vfsops;
@@ -2789,7 +2791,7 @@ vfs_freeopttbl(mntopts_t *mp)
/* ARGSUSED */
static int
vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
- caller_context_t *ct)
+ caller_context_t *ct)
{
return (0);
}
@@ -2797,7 +2799,7 @@ vfs_mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
/* ARGSUSED */
static int
vfs_mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred,
- caller_context_t *ct)
+ caller_context_t *ct)
{
return (0);
}
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index 77b30da871..a0aba42d83 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, Joyent, Inc.
+ * Copyright 2016, Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -67,6 +67,7 @@
#include <sys/taskq.h>
#include <fs/fs_reparse.h>
#include <sys/time.h>
+#include <sys/sdt.h>
/* Determine if this vnode is a file that is read-only */
#define ISROFILE(vp) \
@@ -1652,7 +1653,7 @@ vn_rename(char *from, char *to, enum uio_seg seg)
int
vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
- char *tname, enum uio_seg seg)
+ char *tname, enum uio_seg seg)
{
int error;
struct vattr vattr;
@@ -2294,6 +2295,7 @@ vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
vp->v_femhead = NULL; /* Must be done before vn_reinit() */
vp->v_path = vn_vpath_empty;
+ vp->v_path_stamp = 0;
vp->v_mpssdata = NULL;
vp->v_vsd = NULL;
vp->v_fopdata = NULL;
@@ -2367,6 +2369,7 @@ vn_recycle(vnode_t *vp)
kmem_free(vp->v_path, strlen(vp->v_path) + 1);
vp->v_path = vn_vpath_empty;
}
+ vp->v_path_stamp = 0;
if (vp->v_fopdata != NULL) {
free_fopdata(vp);
@@ -2974,108 +2977,229 @@ fs_new_caller_id()
}
/*
- * Given a starting vnode and a path, updates the path in the target vnode in
- * a safe manner. If the vnode already has path information embedded, then the
- * cached path is left untouched.
+ * The value stored in v_path is relative to rootdir, located in the global
+ * zone. Zones or chroot environments which reside deeper inside the VFS
+ * hierarchy will have a relative view of MAXPATHLEN since they are unaware of
+ * what lies below their perceived root. In order to keep v_path usable for
+ * these child environments, its allocations are allowed to exceed MAXPATHLEN.
+ *
+ * An upper bound of max_vnode_path is placed upon v_path allocations to
+ * prevent the system from going too wild at the behest of pathological
+ * behavior from the operator.
*/
-
size_t max_vnode_path = 4 * MAXPATHLEN;
+
void
-vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
- const char *path, size_t plen)
-{
- char *rpath;
- vnode_t *base;
- size_t rpathlen, rpathalloc;
- int doslash = 1;
-
- if (*path == '/') {
- base = rootvp;
- path++;
- plen--;
- } else {
- base = startvp;
- }
+vn_clearpath(vnode_t *vp, hrtime_t compare_stamp)
+{
+ char *buf;
+ mutex_enter(&vp->v_lock);
/*
- * We cannot grab base->v_lock while we hold vp->v_lock because of
- * the potential for deadlock.
+ * If the snapshot of v_path_stamp passed in via compare_stamp does not
+ * match the present value on the vnode, it indicates that subsequent
+ * changes have occurred. The v_path value is not cleared in this case
+ * since the new value may be valid.
*/
- mutex_enter(&base->v_lock);
- if (base->v_path == vn_vpath_empty) {
- mutex_exit(&base->v_lock);
+ if (compare_stamp != 0 && vp->v_path_stamp != compare_stamp) {
+ mutex_exit(&vp->v_lock);
return;
}
+ buf = vp->v_path;
+ vp->v_path = vn_vpath_empty;
+ vp->v_path_stamp = 0;
+ mutex_exit(&vp->v_lock);
+ if (buf != vn_vpath_empty) {
+ kmem_free(buf, strlen(buf) + 1);
+ }
+}
- rpathlen = strlen(base->v_path);
- rpathalloc = rpathlen + plen + 1;
- /* Avoid adding a slash if there's already one there */
- if (base->v_path[rpathlen-1] == '/')
- doslash = 0;
- else
- rpathalloc++;
-
- /*
- * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
- * so we must do this dance. If, by chance, something changes the path,
- * just give up since there is no real harm.
- */
- mutex_exit(&base->v_lock);
+static void
+vn_setpath_common(vnode_t *pvp, vnode_t *vp, const char *name, size_t len,
+ boolean_t is_rename)
+{
+ char *buf, *oldbuf;
+ hrtime_t pstamp;
+ size_t baselen, buflen = 0;
+
+ /* Handle the vn_setpath_str case. */
+ if (pvp == NULL) {
+ if (len + 1 > max_vnode_path) {
+ DTRACE_PROBE4(vn__setpath__too__long, vnode_t *, pvp,
+ vnode_t *, vp, char *, name, size_t, len + 1);
+ return;
+ }
+ buf = kmem_alloc(len + 1, KM_SLEEP);
+ bcopy(name, buf, len);
+ buf[len] = '\0';
- /* Paths should stay within reason */
- if (rpathalloc > max_vnode_path)
+ mutex_enter(&vp->v_lock);
+ oldbuf = vp->v_path;
+ vp->v_path = buf;
+ vp->v_path_stamp = gethrtime();
+ mutex_exit(&vp->v_lock);
+ if (oldbuf != vn_vpath_empty) {
+ kmem_free(oldbuf, strlen(oldbuf) + 1);
+ }
return;
+ }
+
+ /* Take snapshot of parent dir */
+ mutex_enter(&pvp->v_lock);
+retrybuf:
+ if (pvp->v_path == vn_vpath_empty) {
+ /*
+ * Without v_path from the parent directory, generating a child
+ * path from the name is impossible.
+ */
+ if (len > 0) {
+ pstamp = pvp->v_path_stamp;
+ mutex_exit(&pvp->v_lock);
+ vn_clearpath(vp, pstamp);
+ return;
+ }
- rpath = kmem_alloc(rpathalloc, KM_SLEEP);
+ /*
+ * The only feasible case here is where a NUL lookup is being
+ * performed on rootdir prior to its v_path being populated.
+ */
+ ASSERT(pvp->v_path_stamp = 0);
+ baselen = 0;
+ pstamp = 0;
+ } else {
+ pstamp = pvp->v_path_stamp;
+ baselen = strlen(pvp->v_path);
+ /* ignore a trailing slash if present */
+ if (pvp->v_path[baselen - 1] == '/') {
+ /* This should only the be case for rootdir */
+ ASSERT(baselen == 1 && pvp == rootdir);
+ baselen--;
+ }
+ }
+ mutex_exit(&pvp->v_lock);
- mutex_enter(&base->v_lock);
- if (base->v_path == vn_vpath_empty ||
- strlen(base->v_path) != rpathlen) {
- mutex_exit(&base->v_lock);
- kmem_free(rpath, rpathalloc);
+ if (buflen != 0) {
+ /* Free the existing (mis-sized) buffer in case of retry */
+ kmem_free(buf, buflen);
+ }
+ /* base, '/', name and trailing NUL */
+ buflen = baselen + len + 2;
+ if (buflen > max_vnode_path) {
+ DTRACE_PROBE4(vn__setpath_too__long, vnode_t *, pvp,
+ vnode_t *, vp, char *, name, size_t, buflen);
return;
}
- bcopy(base->v_path, rpath, rpathlen);
- mutex_exit(&base->v_lock);
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
+ mutex_enter(&pvp->v_lock);
+ if (pvp->v_path_stamp != pstamp) {
+ size_t vlen;
+
+ /*
+ * Since v_path_stamp changed on the parent, it is likely that
+ * v_path has been altered as well. If the length does not
+ * exactly match what was previously measured, the buffer
+ * allocation must be repeated for proper sizing.
+ */
+ if (pvp->v_path == vn_vpath_empty) {
+ /* Give up if parent lack v_path */
+ mutex_exit(&pvp->v_lock);
+ kmem_free(buf, buflen);
+ return;
+ }
+ vlen = strlen(pvp->v_path);
+ if (pvp->v_path[vlen - 1] == '/') {
+ vlen--;
+ }
+ if (vlen != baselen) {
+ goto retrybuf;
+ }
+ }
+ bcopy(pvp->v_path, buf, baselen);
+ mutex_exit(&pvp->v_lock);
- if (doslash)
- rpath[rpathlen++] = '/';
- bcopy(path, rpath + rpathlen, plen);
- rpath[rpathlen + plen] = '\0';
+ buf[baselen] = '/';
+ baselen++;
+ bcopy(name, &buf[baselen], len + 1);
mutex_enter(&vp->v_lock);
- if (vp->v_path != vn_vpath_empty) {
+ if (vp->v_path_stamp == 0) {
+ /* never-visited vnode can inherit stamp from parent */
+ ASSERT(vp->v_path == vn_vpath_empty);
+ vp->v_path_stamp = pstamp;
+ vp->v_path = buf;
+ mutex_exit(&vp->v_lock);
+ } else if (vp->v_path_stamp < pstamp || is_rename) {
+ /*
+ * Install the updated path and stamp, ensuring that the v_path
+ * pointer is valid at all times for dtrace.
+ */
+ oldbuf = vp->v_path;
+ vp->v_path = buf;
+ vp->v_path_stamp = gethrtime();
mutex_exit(&vp->v_lock);
- kmem_free(rpath, rpathalloc);
+ kmem_free(oldbuf, strlen(oldbuf) + 1);
} else {
- vp->v_path = rpath;
+ /*
+ * If the timestamp matches or is greater, it means another
+ * thread performed the update first while locks were dropped
+ * here to make the allocation. We defer to the newer value.
+ */
mutex_exit(&vp->v_lock);
+ kmem_free(buf, buflen);
}
+ ASSERT(MUTEX_NOT_HELD(&vp->v_lock));
}
-/*
- * Sets the path to the vnode to be the given string, regardless of current
- * context. The string must be a complete path from rootdir. This is only used
- * by fsop_root() for setting the path based on the mountpoint.
- */
void
-vn_setpath_str(struct vnode *vp, const char *str, size_t len)
+vn_updatepath(vnode_t *pvp, vnode_t *vp, const char *name)
{
- char *buf = kmem_alloc(len + 1, KM_SLEEP);
+ size_t len;
- mutex_enter(&vp->v_lock);
- if (vp->v_path != vn_vpath_empty) {
- mutex_exit(&vp->v_lock);
- kmem_free(buf, len + 1);
+ /*
+ * If the parent is older or empty, there's nothing further to do.
+ */
+ if (pvp->v_path == vn_vpath_empty ||
+ pvp->v_path_stamp <= vp->v_path_stamp) {
return;
}
- vp->v_path = buf;
- bcopy(str, vp->v_path, len);
- vp->v_path[len] = '\0';
+ /*
+ * Given the lack of appropriate context, meaningful updates to v_path
+ * cannot be made for during lookups for the '.' or '..' entries.
+ */
+ len = strlen(name);
+ if (len == 0 || (len == 1 && name[0] == '.') ||
+ (len == 2 && name[0] == '.' && name[1] == '.')) {
+ return;
+ }
- mutex_exit(&vp->v_lock);
+ vn_setpath_common(pvp, vp, name, len, B_FALSE);
+}
+
+/*
+ * Given a starting vnode and a path, updates the path in the target vnode in
+ * a safe manner. If the vnode already has path information embedded, then the
+ * cached path is left untouched.
+ */
+/* ARGSUSED */
+void
+vn_setpath(vnode_t *rootvp, vnode_t *pvp, vnode_t *vp, const char *name,
+ size_t len)
+{
+ vn_setpath_common(pvp, vp, name, len, B_FALSE);
+}
+
+/*
+ * Sets the path to the vnode to be the given string, regardless of current
+ * context. The string must be a complete path from rootdir. This is only used
+ * by fsop_root() for setting the path based on the mountpoint.
+ */
+void
+vn_setpath_str(vnode_t *vp, const char *str, size_t len)
+{
+ vn_setpath_common(NULL, vp, str, len, B_FALSE);
}
/*
@@ -3083,17 +3207,9 @@ vn_setpath_str(struct vnode *vp, const char *str, size_t len)
* target vnode is available.
*/
void
-vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
+vn_renamepath(vnode_t *pvp, vnode_t *vp, const char *name, size_t len)
{
- char *tmp;
-
- mutex_enter(&vp->v_lock);
- tmp = vp->v_path;
- vp->v_path = vn_vpath_empty;
- mutex_exit(&vp->v_lock);
- vn_setpath(rootdir, dvp, vp, nm, len);
- if (tmp != vn_vpath_empty)
- kmem_free(tmp, strlen(tmp) + 1);
+ vn_setpath_common(pvp, vp, name, len, B_TRUE);
}
/*
@@ -3104,37 +3220,42 @@ void
vn_copypath(struct vnode *src, struct vnode *dst)
{
char *buf;
- int alloc;
+ hrtime_t stamp;
+ size_t buflen;
mutex_enter(&src->v_lock);
if (src->v_path == vn_vpath_empty) {
mutex_exit(&src->v_lock);
return;
}
- alloc = strlen(src->v_path) + 1;
-
- /* avoid kmem_alloc() with lock held */
+ buflen = strlen(src->v_path) + 1;
mutex_exit(&src->v_lock);
- buf = kmem_alloc(alloc, KM_SLEEP);
+
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
mutex_enter(&src->v_lock);
- if (src->v_path == vn_vpath_empty || strlen(src->v_path) + 1 != alloc) {
+ if (src->v_path == vn_vpath_empty ||
+ strlen(src->v_path) + 1 != buflen) {
mutex_exit(&src->v_lock);
- kmem_free(buf, alloc);
+ kmem_free(buf, buflen);
return;
}
- bcopy(src->v_path, buf, alloc);
+ bcopy(src->v_path, buf, buflen);
+ stamp = src->v_path_stamp;
mutex_exit(&src->v_lock);
mutex_enter(&dst->v_lock);
if (dst->v_path != vn_vpath_empty) {
mutex_exit(&dst->v_lock);
- kmem_free(buf, alloc);
+ kmem_free(buf, buflen);
return;
}
dst->v_path = buf;
+ dst->v_path_stamp = stamp;
mutex_exit(&dst->v_lock);
}
+
/*
* XXX Private interface for segvn routines that handle vnode
* large page segments.
@@ -3565,9 +3686,7 @@ fop_lookup(
}
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, lookup);
- if ((*vpp)->v_path == vn_vpath_empty) {
- vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
- }
+ vn_updatepath(dvp, *vpp, nm);
}
return (ret);
@@ -3607,9 +3726,7 @@ fop_create(
(dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, create);
- if ((*vpp)->v_path == vn_vpath_empty) {
- vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
- }
+ vn_updatepath(dvp, *vpp, name);
}
return (ret);
@@ -3729,10 +3846,7 @@ fop_mkdir(
(dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, mkdir);
- if ((*vpp)->v_path == vn_vpath_empty) {
- vn_setpath(rootdir, dvp, *vpp, dirname,
- strlen(dirname));
- }
+ vn_updatepath(dvp, *vpp, dirname);
}
return (ret);
diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h
index d12f6c4046..c779cc1ff6 100644
--- a/usr/src/uts/common/sys/vnode.h
+++ b/usr/src/uts/common/sys/vnode.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -221,6 +221,59 @@ struct vsd_node {
* In particular, file systems should not access other fields; they may
* change or even be removed. The functionality which was once provided
* by these fields is available through vn_* functions.
+ *
+ * VNODE PATH THEORY:
+ * In each vnode, the v_path field holds a cached version of the canonical
+ * filesystem path which that node represents. Because vnodes lack contextual
+ * information about their own name or position in the VFS hierarchy, this path
+ * must be calculated when the vnode is instantiated by operations such as
+ * fop_create, fop_lookup, or fop_mkdir. During said operations, both the
+ * parent vnode (and its cached v_path) and future name are known, so the
+ * v_path of the resulting object can easily be set.
+ *
+ * The caching nature of v_path is complicated in the face of directory
+ * renames. Filesystem drivers are responsible for calling vn_renamepath when
+ * a fop_rename operation succeeds. While the v_path on the renamed vnode will
+ * be updated, existing children of the directory (direct, or at deeper levels)
+ * will now possess v_path caches which are stale.
+ *
+ * It is expensive (and for non-directories, impossible) to recalculate stale
+ * v_path entries during operations such as vnodetopath. The best time during
+ * which to correct such wrongs is the same as when v_path is first
+ * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate
+ * context is available to generate the current path.
+ *
+ * In order to quickly detect stale v_path entries (without full lookup
+ * verification) to trigger a v_path update, the v_path_stamp field has been
+ * added to vnode_t. As part of successful fop_create/fop_lookup/fop_mkdir
+ * operations, where the name and parent vnode are available, the following
+ * rules are used to determine updates to the child:
+ *
+ * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp
+ * on the child. Until the parent v_path is refreshed to a valid state, the
+ * child v_path must be considered invalid too.
+ *
+ * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the
+ * v_path_stamp value from its parent and its v_path is updated.
+ *
+ * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is
+ * an indication that the child v_path is stale. The v_path is updated and
+ * v_path_stamp in the child is set to the current hrtime().
+ *
+ * It does _not_ inherit the parent v_path_stamp in order to propagate the
+ * the time of v_path invalidation through the directory structure. This
+ * prevents concurrent invalidations (operating with a now-incorrect v_path)
+ * at deeper levels in the tree from persisting.
+ *
+ * 4. If the child v_path_stamp is greater or equal to the parent, no action
+ * needs to be taken.
+ *
+ * Note that fop_rename operations do not follow this ruleset. They perform an
+ * explicit update of v_path and v_path_stamp (setting it to the current time)
+ *
+ * With these constraints in place, v_path invalidations and updates should
+ * proceed in a timely manner as vnodes are accessed. While there still are
+ * limited cases where vnodetopath operations will fail, the risk is minimized.
*/
struct fem_head; /* from fem.h */
@@ -247,6 +300,7 @@ typedef struct vnode {
void *v_locality; /* hook for locality info */
struct fem_head *v_femhead; /* fs monitoring */
char *v_path; /* cached path */
+ hrtime_t v_path_stamp; /* timestamp for cached path */
uint_t v_rdcnt; /* open for read count (VREG only) */
uint_t v_wrcnt; /* open for write count (VREG only) */
u_longlong_t v_mmap_read; /* mmap read count */
@@ -1293,6 +1347,11 @@ void vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
const char *path, size_t plen);
void vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len);
+/* Private vnode manipulation functions */
+void vn_clearpath(vnode_t *, hrtime_t);
+void vn_updatepath(vnode_t *, vnode_t *, const char *);
+
+
/* Vnode event notification */
void vnevent_rename_src(vnode_t *, vnode_t *, char *, caller_context_t *);
void vnevent_rename_dest(vnode_t *, vnode_t *, char *, caller_context_t *);