summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Haley <Tim.Haley@Sun.COM>2009-06-27 19:22:00 -0600
committerTim Haley <Tim.Haley@Sun.COM>2009-06-27 19:22:00 -0600
commitd47621a49c68c359358f6630aa45cc320762f51f (patch)
treeba8270fd3d8f4c072056432553d71da4b98b96a7
parentaa2aa9a662539940ddbc8610da5a3a874ebd7503 (diff)
downloadillumos-gate-d47621a49c68c359358f6630aa45cc320762f51f.tar.gz
6775100 stat() performance on files on zfs should be improved
6827779 rrwlock is overly protective of its counters
-rw-r--r--usr/src/uts/common/fs/zfs/rrwlock.c39
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_acl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_znode.h2
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_acl.c111
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c99
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_znode.c6
6 files changed, 200 insertions, 58 deletions
diff --git a/usr/src/uts/common/fs/zfs/rrwlock.c b/usr/src/uts/common/fs/zfs/rrwlock.c
index 710685dbc7..4cef53f951 100644
--- a/usr/src/uts/common/fs/zfs/rrwlock.c
+++ b/usr/src/uts/common/fs/zfs/rrwlock.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/refcount.h>
#include <sys/rrwlock.h>
@@ -118,7 +116,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
rrw_node_t *prev = NULL;
if (refcount_count(&rrl->rr_linked_rcount) == 0)
- return (NULL);
+ return (B_FALSE);
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
if (rn->rn_rrl == rrl) {
@@ -159,6 +157,14 @@ static void
rrw_enter_read(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
+#if !defined(DEBUG) && defined(_KERNEL)
+ if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
+ rrl->rr_anon_rcount.rc_count++;
+ mutex_exit(&rrl->rr_lock);
+ return;
+ }
+ DTRACE_PROBE(zfs__rrwfastpath__rdmiss);
+#endif
ASSERT(rrl->rr_writer != curthread);
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
@@ -208,19 +214,28 @@ void
rrw_exit(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
+#if !defined(DEBUG) && defined(_KERNEL)
+ if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) {
+ rrl->rr_anon_rcount.rc_count--;
+ if (rrl->rr_anon_rcount.rc_count == 0)
+ cv_broadcast(&rrl->rr_cv);
+ mutex_exit(&rrl->rr_lock);
+ return;
+ }
+ DTRACE_PROBE(zfs__rrwfastpath__exitmiss);
+#endif
ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) ||
!refcount_is_zero(&rrl->rr_linked_rcount) ||
rrl->rr_writer != NULL);
if (rrl->rr_writer == NULL) {
- if (rrn_find_and_remove(rrl)) {
- if (refcount_remove(&rrl->rr_linked_rcount, tag) == 0)
- cv_broadcast(&rrl->rr_cv);
-
- } else {
- if (refcount_remove(&rrl->rr_anon_rcount, tag) == 0)
- cv_broadcast(&rrl->rr_cv);
- }
+ int64_t count;
+ if (rrn_find_and_remove(rrl))
+ count = refcount_remove(&rrl->rr_linked_rcount, tag);
+ else
+ count = refcount_remove(&rrl->rr_anon_rcount, tag);
+ if (count == 0)
+ cv_broadcast(&rrl->rr_cv);
} else {
ASSERT(rrl->rr_writer == curthread);
ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) &&
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_acl.h b/usr/src/uts/common/fs/zfs/sys/zfs_acl.h
index f5e5aa7f4a..3488962e21 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_acl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_acl.h
@@ -203,6 +203,7 @@ void zfs_oldace_byteswap(ace_t *, int);
void zfs_ace_byteswap(void *, size_t, boolean_t);
extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr);
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
+int zfs_fastaccesschk_execute(struct znode *, cred_t *);
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
extern int zfs_acl_access(struct znode *, int, cred_t *);
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
index 69f4b50f5b..5db5b8d518 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
@@ -77,6 +77,7 @@ extern "C" {
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
+#define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */
/*
* Is ID ephemeral?
@@ -200,6 +201,7 @@ typedef struct znode {
uint64_t z_gen; /* generation (same as zp_gen) */
uint32_t z_sync_cnt; /* synchronous open count */
kmutex_t z_acl_lock; /* acl data lock */
+ zfs_acl_t *z_acl_cached; /* cached acl */
list_node_t z_link_node; /* all znodes in fs link */
/*
* These are dmu managed fields.
diff --git a/usr/src/uts/common/fs/zfs/zfs_acl.c b/usr/src/uts/common/fs/zfs/zfs_acl.c
index 531af5150c..c0ccfd47cf 100644
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c
@@ -781,6 +781,7 @@ zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
uint64_t who;
uint16_t iflags, type;
uint32_t access_mask;
+ boolean_t an_exec_denied = B_FALSE;
mode = (zp->z_phys->zp_mode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
@@ -905,8 +906,26 @@ zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
}
}
}
+ } else {
+ /*
+ * Only care if this IDENTIFIER_GROUP or
+ * USER ACE denies execute access to someone,
+ * mode is not affected
+ */
+ if ((access_mask & ACE_EXECUTE) && type == DENY)
+ an_exec_denied = B_TRUE;
}
}
+
+ if (!an_exec_denied && !(seen & (S_IXUSR | S_IXGRP | S_IXOTH)) ||
+ !(mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
+ an_exec_denied = B_TRUE;
+
+ if (an_exec_denied)
+ zp->z_phys->zp_flags &= ~ZFS_NO_EXECS_DENIED;
+ else
+ zp->z_phys->zp_flags |= ZFS_NO_EXECS_DENIED;
+
return (mode);
}
@@ -960,8 +979,14 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+ if (zp->z_acl_cached) {
+ *aclpp = zp->z_acl_cached;
+ return (0);
+ }
+
if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) {
*aclpp = zfs_acl_node_read_internal(zp, will_modify);
+ zp->z_acl_cached = *aclpp;
return (0);
}
@@ -994,7 +1019,7 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
return (error);
}
- *aclpp = aclp;
+ zp->z_acl_cached = *aclpp = aclp;
return (0);
}
@@ -1019,6 +1044,11 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
dmu_buf_will_dirty(zp->z_dbuf, tx);
+ if (zp->z_acl_cached != aclp && zp->z_acl_cached) {
+ zfs_acl_free(zp->z_acl_cached);
+ zp->z_acl_cached = NULL;
+ }
+
zphys->zp_mode = zfs_mode_compute(zp, aclp);
/*
@@ -1606,6 +1636,7 @@ zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
if (error == 0) {
(*aclp)->z_hints = zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS;
zfs_acl_chmod(zp->z_zfsvfs, zp->z_phys->zp_uid, mode, *aclp);
+ zp->z_acl_cached = *aclp;
}
mutex_exit(&zp->z_acl_lock);
mutex_exit(&zp->z_lock);
@@ -1869,7 +1900,6 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
mutex_exit(&dzp->z_acl_lock);
acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
- zfs_acl_free(paclp);
} else {
acl_ids->z_aclp =
zfs_acl_alloc(zfs_acl_version_zp(dzp));
@@ -1998,8 +2028,6 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
mutex_exit(&zp->z_acl_lock);
- zfs_acl_free(aclp);
-
return (0);
}
@@ -2095,11 +2123,6 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
aclp->z_hints |= (zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS);
}
top:
- if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) {
- zfs_acl_free(aclp);
- return (error);
- }
-
mutex_enter(&zp->z_lock);
mutex_enter(&zp->z_acl_lock);
@@ -2154,7 +2177,7 @@ top:
if (fuidp)
zfs_fuid_info_free(fuidp);
- zfs_acl_free(aclp);
+ zp->z_acl_cached = aclp;
dmu_tx_commit(tx);
done:
mutex_exit(&zp->z_acl_lock);
@@ -2301,7 +2324,6 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
checkit = B_TRUE;
break;
} else {
- zfs_acl_free(aclp);
mutex_exit(&zp->z_acl_lock);
return (EIO);
}
@@ -2334,7 +2356,6 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
}
mutex_exit(&zp->z_acl_lock);
- zfs_acl_free(aclp);
/* Put the found 'denies' back on the working mode */
if (deny_mask) {
@@ -2420,6 +2441,72 @@ zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
check_privs, B_FALSE, cr));
}
+int
+zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
+{
+ boolean_t owner = B_FALSE;
+ boolean_t groupmbr = B_FALSE;
+ boolean_t is_attr;
+ uid_t fowner;
+ uid_t gowner;
+ uid_t uid = crgetuid(cr);
+ int error;
+
+ if (zdp->z_phys->zp_flags & ZFS_AV_QUARANTINED)
+ return (EACCES);
+
+ is_attr = ((zdp->z_phys->zp_flags & ZFS_XATTR) &&
+ (ZTOV(zdp)->v_type == VDIR));
+ if (is_attr)
+ goto slow;
+
+ mutex_enter(&zdp->z_acl_lock);
+
+ if (zdp->z_phys->zp_flags & ZFS_NO_EXECS_DENIED) {
+ mutex_exit(&zdp->z_acl_lock);
+ return (0);
+ }
+
+ if (FUID_INDEX(zdp->z_phys->zp_uid) != 0 ||
+ FUID_INDEX(zdp->z_phys->zp_gid) != 0) {
+ mutex_exit(&zdp->z_acl_lock);
+ goto slow;
+ }
+
+ fowner = (uid_t)zdp->z_phys->zp_uid;
+ gowner = (uid_t)zdp->z_phys->zp_gid;
+
+ if (uid == fowner) {
+ owner = B_TRUE;
+ if (zdp->z_phys->zp_mode & S_IXUSR) {
+ mutex_exit(&zdp->z_acl_lock);
+ return (0);
+ }
+ }
+ if (groupmember(gowner, cr)) {
+ groupmbr = B_TRUE;
+ if (zdp->z_phys->zp_mode & S_IXGRP) {
+ mutex_exit(&zdp->z_acl_lock);
+ return (0);
+ }
+ }
+ if (!owner && !groupmbr) {
+ if (zdp->z_phys->zp_mode & S_IXOTH) {
+ mutex_exit(&zdp->z_acl_lock);
+ return (0);
+ }
+ }
+
+ mutex_exit(&zdp->z_acl_lock);
+
+slow:
+ DTRACE_PROBE(zfs__fastpath__execute__access__miss);
+ ZFS_ENTER(zdp->z_zfsvfs);
+ error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
+ ZFS_EXIT(zdp->z_zfsvfs);
+ return (error);
+}
+
/*
* Determine whether Access should be granted/denied, invoking least
* priv subsytem when a deny is determined.
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 8dc8b710cc..4e238756cf 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -988,6 +988,27 @@ zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
}
/*
+ * If vnode is for a device return a specfs vnode instead.
+ */
+static int
+specvp_check(vnode_t **vpp, cred_t *cr)
+{
+ int error = 0;
+
+ if (IS_DEVVP(*vpp)) {
+ struct vnode *svp;
+
+ svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+ VN_RELE(*vpp);
+ if (svp == NULL)
+ error = ENOSYS;
+ *vpp = svp;
+ }
+ return (error);
+}
+
+
+/*
* Lookup an entry in a directory, or an extended attribute directory.
* If it exists, return a held vnode reference for it.
*
@@ -1017,7 +1038,46 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
{
znode_t *zdp = VTOZ(dvp);
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
- int error;
+ int error = 0;
+
+ /* fast path */
+ if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
+
+ if (dvp->v_type != VDIR) {
+ return (ENOTDIR);
+ } else if (zdp->z_dbuf == NULL) {
+ return (EIO);
+ }
+
+ if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
+ error = zfs_fastaccesschk_execute(zdp, cr);
+ if (!error) {
+ *vpp = dvp;
+ VN_HOLD(*vpp);
+ return (0);
+ }
+ return (error);
+ } else {
+ vnode_t *tvp = dnlc_lookup(dvp, nm);
+
+ if (tvp) {
+ error = zfs_fastaccesschk_execute(zdp, cr);
+ if (error) {
+ VN_RELE(tvp);
+ return (error);
+ }
+ if (tvp == DNLC_NO_VNODE) {
+ VN_RELE(tvp);
+ return (ENOENT);
+ } else {
+ *vpp = tvp;
+ return (specvp_check(vpp, cr));
+ }
+ }
+ }
+ }
+
+ DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zdp);
@@ -1082,21 +1142,8 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
}
error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp);
- if (error == 0) {
- /*
- * Convert device special files
- */
- if (IS_DEVVP(*vpp)) {
- vnode_t *svp;
-
- svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
- VN_RELE(*vpp);
- if (svp == NULL)
- error = ENOSYS;
- else
- *vpp = svp;
- }
- }
+ if (error == 0)
+ error = specvp_check(vpp, cr);
ZFS_EXIT(zfsvfs);
return (error);
@@ -1332,19 +1379,7 @@ out:
VN_RELE(ZTOV(zp));
} else {
*vpp = ZTOV(zp);
- /*
- * If vnode is for a device return a specfs vnode instead.
- */
- if (IS_DEVVP(*vpp)) {
- struct vnode *svp;
-
- svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
- VN_RELE(*vpp);
- if (svp == NULL) {
- error = ENOSYS;
- }
- *vpp = svp;
- }
+ error = specvp_check(vpp, cr);
}
ZFS_EXIT(zfsvfs);
@@ -2456,6 +2491,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
top:
attrzp = NULL;
+ /* Can this be moved to before the top label? */
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
ZFS_EXIT(zfsvfs);
return (EROFS);
@@ -2856,11 +2892,6 @@ out:
if (attrzp)
VN_RELE(ZTOV(attrzp));
- if (aclp) {
- zfs_acl_free(aclp);
- aclp = NULL;
- }
-
if (fuidp) {
zfs_fuid_info_free(fuidp);
fuidp = NULL;
diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c
index 8ced951743..3ba1e89c86 100644
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c
@@ -133,6 +133,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_dbuf = NULL;
zp->z_dirlocks = NULL;
+ zp->z_acl_cached = NULL;
return (0);
}
@@ -1081,6 +1082,11 @@ zfs_znode_free(znode_t *zp)
list_remove(&zfsvfs->z_all_znodes, zp);
mutex_exit(&zfsvfs->z_znodes_lock);
+ if (zp->z_acl_cached) {
+ zfs_acl_free(zp->z_acl_cached);
+ zp->z_acl_cached = NULL;
+ }
+
kmem_cache_free(znode_cache, zp);
VFS_RELE(zfsvfs->z_vfs);