summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/vnode.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/fs/vnode.c')
-rw-r--r--usr/src/uts/common/fs/vnode.c173
1 files changed, 164 insertions, 9 deletions
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index 4e73f7f6e6..953ee80471 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -25,6 +25,7 @@
* Copyright 2022 Spencer Evans-Cole.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -209,6 +210,11 @@ static void (**vsd_destructor)(void *);
cr = crgetmapped(cr); \
}
+#define VOP_LATENCY_10MS 10000000
+#define VOP_LATENCY_100MS 100000000
+#define VOP_LATENCY_1S 1000000000
+#define VOP_LATENCY_10S 10000000000
+
/*
* Convert stat(2) formats to vnode types and vice versa. (Knows about
* numerical order of S_IFMT and vnode types.)
@@ -849,6 +855,36 @@ vn_rele(vnode_t *vp)
mutex_exit(&vp->v_lock);
}
+void
+vn_phantom_rele(vnode_t *vp)
+{
+ mutex_enter(&vp->v_lock);
+ VERIFY3U(vp->v_count, >=, vp->v_phantom_count);
+ vp->v_phantom_count--;
+ DTRACE_PROBE1(vn__phantom_rele, vnode_t *, vp);
+ if (vp->v_count == 1) {
+ ASSERT0(vp->v_phantom_count);
+ mutex_exit(&vp->v_lock);
+ VOP_INACTIVE(vp, CRED(), NULL);
+ return;
+ }
+ VERIFY(vp->v_count > 0);
+ VN_RELE_LOCKED(vp);
+ mutex_exit(&vp->v_lock);
+}
+
+/*
+ * Return the number of non-phantom holds. Things such as portfs will use
+ * phantom holds to prevent it from blocking filesystems from mounting over
+ * watched directories.
+ */
+uint_t
+vn_count(vnode_t *vp)
+{
+ ASSERT(MUTEX_HELD(&vp->v_lock));
+ return (vp->v_count - vp->v_phantom_count);
+}
+
/*
* Release a vnode referenced by the DNLC. Multiple DNLC references are treated
* as a single reference, so v_count is not decremented until the last DNLC hold
@@ -1130,7 +1166,20 @@ top:
* Do remaining checks for FNOFOLLOW and FNOLINKS.
*/
if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
- error = ELOOP;
+ /*
+ * The __FLXPATH flag is a private interface for use by the lx
+ * brand in order to emulate open(O_NOFOLLOW|O_PATH) which,
+ * when a symbolic link is encountered, returns a file
+ * descriptor which references it.
+ * See uts/common/brand/lx/syscall/lx_open.c
+ *
+ * When this flag is set, VOP_OPEN() is not called (for a
+ * symlink, most filesystems will return ENOSYS anyway)
+ * and the link's vnode is returned to be linked to the
+ * file descriptor.
+ */
+ if ((filemode & __FLXPATH) == 0)
+ error = ELOOP;
goto out;
}
if (filemode & FNOLINKS) {
@@ -2441,6 +2490,7 @@ vn_reinit(vnode_t *vp)
{
vp->v_count = 1;
vp->v_count_dnlc = 0;
+ vp->v_phantom_count = 0;
vp->v_vfsp = NULL;
vp->v_stream = NULL;
vp->v_vfsmountedhere = NULL;
@@ -2497,6 +2547,7 @@ vn_free(vnode_t *vp)
*/
ASSERT((vp->v_count == 0) || (vp->v_count == 1));
ASSERT(vp->v_count_dnlc == 0);
+ ASSERT0(vp->v_phantom_count);
VERIFY(vp->v_path != NULL);
if (vp->v_path != vn_vpath_empty) {
kmem_free(vp->v_path, strlen(vp->v_path) + 1);
@@ -2587,6 +2638,7 @@ vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
if (vp == NULL || vp->v_femhead == NULL) {
return;
}
+ (void) VOP_VNEVENT(dvp, VE_RENAME_SRC_DIR, vp, name, ct);
(void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
}
@@ -2601,12 +2653,13 @@ vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
}
void
-vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
+vnevent_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
+ caller_context_t *ct)
{
if (vp == NULL || vp->v_femhead == NULL) {
return;
}
- (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
+ (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, nvp, name, ct);
}
void
@@ -2693,6 +2746,15 @@ vnevent_truncate(vnode_t *vp, caller_context_t *ct)
(void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct);
}
+void
+vnevent_resize(vnode_t *vp, caller_context_t *ct)
+{
+ if (vp == NULL || vp->v_femhead == NULL) {
+ return;
+ }
+ (void) VOP_VNEVENT(vp, VE_RESIZE, NULL, NULL, ct);
+}
+
/*
* Vnode accessors.
*/
@@ -3468,14 +3530,58 @@ fop_read(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
+ zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start = 0, lat;
+ ssize_t len;
+ int err;
+
+ if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) &&
+ vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) {
+ start = gethrtime();
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_runq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ }
VOPXID_MAP_CR(vp, cr);
err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
- VOPSTATS_UPDATE_IO(vp, read,
- read_bytes, (resid_start - uiop->uio_resid));
+ len = resid_start - uiop->uio_resid;
+
+ VOPSTATS_UPDATE_IO(vp, read, read_bytes, len);
+
+ if (start != 0) {
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.reads++;
+ zonep->zone_vfs_rwstats.nread += len;
+ kstat_runq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+ }
+
return (err);
}
@@ -3487,14 +3593,63 @@ fop_write(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
+ zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start = 0, lat;
+ ssize_t len;
+ int err;
+
+ /*
+ * For the purposes of VFS kstat consumers, the "waitq" calculation is
+ * repurposed as the active queue for VFS write operations. There's no
+ * actual wait queue for VFS operations.
+ */
+ if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) &&
+ vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) {
+ start = gethrtime();
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_waitq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ }
VOPXID_MAP_CR(vp, cr);
err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
- VOPSTATS_UPDATE_IO(vp, write,
- write_bytes, (resid_start - uiop->uio_resid));
+ len = resid_start - uiop->uio_resid;
+
+ VOPSTATS_UPDATE_IO(vp, write, write_bytes, len);
+
+ if (start != 0) {
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.writes++;
+ zonep->zone_vfs_rwstats.nwritten += len;
+ kstat_waitq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+ }
+
return (err);
}