diff options
Diffstat (limited to 'usr/src/uts/common/fs/vnode.c')
-rw-r--r-- | usr/src/uts/common/fs/vnode.c | 173 |
1 files changed, 164 insertions, 9 deletions
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index 4e73f7f6e6..953ee80471 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -25,6 +25,7 @@ * Copyright 2022 Spencer Evans-Cole. * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -209,6 +210,11 @@ static void (**vsd_destructor)(void *); cr = crgetmapped(cr); \ } +#define VOP_LATENCY_10MS 10000000 +#define VOP_LATENCY_100MS 100000000 +#define VOP_LATENCY_1S 1000000000 +#define VOP_LATENCY_10S 10000000000 + /* * Convert stat(2) formats to vnode types and vice versa. (Knows about * numerical order of S_IFMT and vnode types.) @@ -849,6 +855,36 @@ vn_rele(vnode_t *vp) mutex_exit(&vp->v_lock); } +void +vn_phantom_rele(vnode_t *vp) +{ + mutex_enter(&vp->v_lock); + VERIFY3U(vp->v_count, >=, vp->v_phantom_count); + vp->v_phantom_count--; + DTRACE_PROBE1(vn__phantom_rele, vnode_t *, vp); + if (vp->v_count == 1) { + ASSERT0(vp->v_phantom_count); + mutex_exit(&vp->v_lock); + VOP_INACTIVE(vp, CRED(), NULL); + return; + } + VERIFY(vp->v_count > 0); + VN_RELE_LOCKED(vp); + mutex_exit(&vp->v_lock); +} + +/* + * Return the number of non-phantom holds. Things such as portfs will use + * phantom holds to prevent it from blocking filesystems from mounting over + * watched directories. + */ +uint_t +vn_count(vnode_t *vp) +{ + ASSERT(MUTEX_HELD(&vp->v_lock)); + return (vp->v_count - vp->v_phantom_count); +} + /* * Release a vnode referenced by the DNLC. Multiple DNLC references are treated * as a single reference, so v_count is not decremented until the last DNLC hold @@ -1130,7 +1166,20 @@ top: * Do remaining checks for FNOFOLLOW and FNOLINKS. */ if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) { - error = ELOOP; + /* + * The __FLXPATH flag is a private interface for use by the lx + * brand in order to emulate open(O_NOFOLLOW|O_PATH) which, + * when a symbolic link is encountered, returns a file + * descriptor which references it. + * See uts/common/brand/lx/syscall/lx_open.c + * + * When this flag is set, VOP_OPEN() is not called (for a + * symlink, most filesystems will return ENOSYS anyway) + * and the link's vnode is returned to be linked to the + * file descriptor. + */ + if ((filemode & __FLXPATH) == 0) + error = ELOOP; goto out; } if (filemode & FNOLINKS) { @@ -2441,6 +2490,7 @@ vn_reinit(vnode_t *vp) { vp->v_count = 1; vp->v_count_dnlc = 0; + vp->v_phantom_count = 0; vp->v_vfsp = NULL; vp->v_stream = NULL; vp->v_vfsmountedhere = NULL; @@ -2497,6 +2547,7 @@ vn_free(vnode_t *vp) */ ASSERT((vp->v_count == 0) || (vp->v_count == 1)); ASSERT(vp->v_count_dnlc == 0); + ASSERT0(vp->v_phantom_count); VERIFY(vp->v_path != NULL); if (vp->v_path != vn_vpath_empty) { kmem_free(vp->v_path, strlen(vp->v_path) + 1); @@ -2587,6 +2638,7 @@ vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) if (vp == NULL || vp->v_femhead == NULL) { return; } + (void) VOP_VNEVENT(dvp, VE_RENAME_SRC_DIR, vp, name, ct); (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct); } @@ -2601,12 +2653,13 @@ vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name, } void -vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct) +vnevent_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name, + caller_context_t *ct) { if (vp == NULL || vp->v_femhead == NULL) { return; } - (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct); + (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, nvp, name, ct); } void @@ -2693,6 +2746,15 @@ vnevent_truncate(vnode_t *vp, caller_context_t *ct) (void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct); } +void +vnevent_resize(vnode_t *vp, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_RESIZE, NULL, NULL, ct); +} + /* * Vnode accessors. */ @@ -3468,14 +3530,58 @@ fop_read( cred_t *cr, caller_context_t *ct) { - int err; ssize_t resid_start = uiop->uio_resid; + zone_t *zonep = curzone; + zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats; + + hrtime_t start = 0, lat; + ssize_t len; + int err; + + if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) && + vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) { + start = gethrtime(); + + mutex_enter(&zonep->zone_vfs_lock); + kstat_runq_enter(&zonep->zone_vfs_rwstats); + mutex_exit(&zonep->zone_vfs_lock); + } VOPXID_MAP_CR(vp, cr); err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct); - VOPSTATS_UPDATE_IO(vp, read, - read_bytes, (resid_start - uiop->uio_resid)); + len = resid_start - uiop->uio_resid; + + VOPSTATS_UPDATE_IO(vp, read, read_bytes, len); + + if (start != 0) { + mutex_enter(&zonep->zone_vfs_lock); + zonep->zone_vfs_rwstats.reads++; + zonep->zone_vfs_rwstats.nread += len; + kstat_runq_exit(&zonep->zone_vfs_rwstats); + mutex_exit(&zonep->zone_vfs_lock); + + lat = gethrtime() - start; + + if (lat >= VOP_LATENCY_10MS) { + if (lat < VOP_LATENCY_100MS) + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + else if (lat < VOP_LATENCY_1S) { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + } else if (lat < VOP_LATENCY_10S) { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_1s_ops.value.ui64); + } else { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_1s_ops.value.ui64); + atomic_inc_64(&zvp->zv_10s_ops.value.ui64); + } + } + } + return (err); } @@ -3487,14 +3593,63 @@ fop_write( cred_t *cr, caller_context_t *ct) { - int err; ssize_t resid_start = uiop->uio_resid; + zone_t *zonep = curzone; + zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats; + + hrtime_t start = 0, lat; + ssize_t len; + int err; + + /* + * For the purposes of VFS kstat consumers, the "waitq" calculation is + * repurposed as the active queue for VFS write operations. There's no + * actual wait queue for VFS operations. + */ + if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) && + vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) { + start = gethrtime(); + + mutex_enter(&zonep->zone_vfs_lock); + kstat_waitq_enter(&zonep->zone_vfs_rwstats); + mutex_exit(&zonep->zone_vfs_lock); + } VOPXID_MAP_CR(vp, cr); err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct); - VOPSTATS_UPDATE_IO(vp, write, - write_bytes, (resid_start - uiop->uio_resid)); + len = resid_start - uiop->uio_resid; + + VOPSTATS_UPDATE_IO(vp, write, write_bytes, len); + + if (start != 0) { + mutex_enter(&zonep->zone_vfs_lock); + zonep->zone_vfs_rwstats.writes++; + zonep->zone_vfs_rwstats.nwritten += len; + kstat_waitq_exit(&zonep->zone_vfs_rwstats); + mutex_exit(&zonep->zone_vfs_lock); + + lat = gethrtime() - start; + + if (lat >= VOP_LATENCY_10MS) { + if (lat < VOP_LATENCY_100MS) + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + else if (lat < VOP_LATENCY_1S) { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + } else if (lat < VOP_LATENCY_10S) { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_1s_ops.value.ui64); + } else { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_1s_ops.value.ui64); + atomic_inc_64(&zvp->zv_10s_ops.value.ui64); + } + } + } + return (err); } |