diff options
author | jwahlig <none@none> | 2007-12-05 05:02:08 -0800 |
---|---|---|
committer | jwahlig <none@none> | 2007-12-05 05:02:08 -0800 |
commit | cfae96c24c7523c74c9efb583764b812b6b309c5 (patch) | |
tree | 654e8eb9b8a8b8961ce0805776722efa9f05309f /usr/src | |
parent | 1c2187e7a735b31a46941879f0bd124e0aa325a3 (diff) | |
download | illumos-gate-cfae96c24c7523c74c9efb583764b812b6b309c5.tar.gz |
PSARC 2007/632 Caller context flags
6627507 Remove cross calls from v2 and v3 server
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs3_srv.c | 168 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c | 391 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_srv.c | 26 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_srv.c | 231 | ||||
-rw-r--r-- | usr/src/uts/common/nfs/nfs4.h | 28 | ||||
-rw-r--r-- | usr/src/uts/common/sys/vnode.h | 9 |
6 files changed, 528 insertions, 325 deletions
diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c index 0660a85e59..bf48665d6d 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c @@ -84,6 +84,8 @@ static int vattr_to_wcc_attr(struct vattr *, wcc_attr *); static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *); static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *); +u_longlong_t nfs3_srv_caller_id; + /* ARGSUSED */ void rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi, @@ -141,6 +143,7 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, int flag; int in_crit = 0; struct flock64 bf; + caller_context_t ct; bvap = NULL; avap = NULL; @@ -181,10 +184,6 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, * allow the client to retrasmit its request. */ if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) { - if (rfs4_check_delegated(FWRITE, vp, TRUE)) { - resp->status = NFS3ERR_JUKEBOX; - goto out1; - } if (nbl_need_check(vp)) { nbl_start_crit(vp, RW_READER); in_crit = 1; @@ -233,6 +232,11 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, (exi->exi_export.ex_flags & EX_NOSUID)) ava.va_mode &= ~(VSUID | VSGID); + ct.cc_sysid = 0; + ct.cc_pid = 0; + ct.cc_caller_id = nfs3_srv_caller_id; + ct.cc_flags = CC_DONTBLOCK; + /* * We need to specially handle size changes because it is * possible for the client to create a file with modes @@ -278,12 +282,18 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, bf.l_sysid = 0; bf.l_pid = 0; error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, - (offset_t)ava.va_size, cr, NULL); + (offset_t)ava.va_size, cr, &ct); } } if (!error && ava.va_mask) - error = VOP_SETATTR(vp, &ava, flag, cr, NULL); + error = VOP_SETATTR(vp, &ava, flag, cr, &ct); + + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + resp->status = NFS3ERR_JUKEBOX; + goto out1; + } #ifdef DEBUG if (rfs3_do_post_op_attr) { @@ -299,7 +309,7 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, /* * Force modified metadata out to stable storage. */ - (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); + (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); if (error) goto out; @@ -401,7 +411,7 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, if (PUBLIC_FH3(&args->what.dir)) { publicfh_flag = TRUE; error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp, - &exi, &sec); + &exi, &sec); if (error && exi != NULL) exi_rele(exi); /* See comment below Re: publicfh_flag */ /* @@ -439,7 +449,7 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, } } else { error = VOP_LOOKUP(dvp, args->what.name, &vp, - NULL, 0, NULL, cr, NULL, NULL, NULL); + NULL, 0, NULL, cr, NULL, NULL, NULL); } if (is_system_labeled() && error == 0) { @@ -855,6 +865,7 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, int alloc_err = 0; int in_crit = 0; int need_rwunlock = 0; + caller_context_t ct; vap = NULL; @@ -879,15 +890,10 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, } } - /* - * Check to see if the v4 side of the server has delegated - * this file. If so, then we return JUKEBOX to allow the - * client to retrasmit its request. - */ - if (rfs4_check_delegated(FREAD, vp, FALSE)) { - resp->status = NFS3ERR_JUKEBOX; - goto out1; - } + ct.cc_sysid = 0; + ct.cc_pid = 0; + ct.cc_caller_id = nfs3_srv_caller_id; + ct.cc_flags = CC_DONTBLOCK; /* * Enter the critical region before calling VOP_RWLOCK @@ -903,11 +909,18 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, } } - (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); + error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); + + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + resp->status = NFS3ERR_JUKEBOX; + goto out1; + } + need_rwunlock = 1; va.va_mask = AT_ALL; - error = VOP_GETATTR(vp, &va, 0, cr, NULL); + error = VOP_GETATTR(vp, &va, 0, cr, &ct); /* * If we can't get the attributes, then we can't do the @@ -929,11 +942,11 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, } if (crgetuid(cr) != va.va_uid) { - error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); + error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); if (error) { if (curthread->t_flag & T_WOULDBLOCK) goto out; - error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); + error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); if (error) goto out; } @@ -946,7 +959,7 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, offset = args->offset; if (offset >= va.va_size) { - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); VN_RELE(vp); @@ -961,7 +974,7 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, } if (args->count == 0) { - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); VN_RELE(vp); @@ -1002,15 +1015,20 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, uio.uio_loffset = args->offset; uio.uio_resid = args->count; - error = VOP_READ(vp, &uio, 0, cr, NULL); + error = VOP_READ(vp, &uio, 0, cr, &ct); if (error) { freeb(mp); + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + resp->status = NFS3ERR_JUKEBOX; + goto out1; + } goto out; } va.va_mask = AT_ALL; - error = VOP_GETATTR(vp, &va, 0, cr, NULL); + error = VOP_GETATTR(vp, &va, 0, cr, &ct); #ifdef DEBUG if (rfs3_do_post_op_attr) { @@ -1027,7 +1045,7 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, vap = &va; #endif - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); #if 0 /* notyet */ /* @@ -1069,7 +1087,7 @@ out: out1: if (vp != NULL) { if (need_rwunlock) - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); VN_RELE(vp); @@ -1123,6 +1141,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, cred_t *savecred; int in_crit = 0; int rwlock_ret = -1; + caller_context_t ct; vp = nfs3_fhtovp(&args->file, exi); if (vp == NULL) { @@ -1145,15 +1164,10 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, } } - /* - * Check to see if the v4 side of the server has delegated - * this file. If so, then we return JUKEBOX to allow the - * client to retrasmit its request. - */ - if (rfs4_check_delegated(FWRITE, vp, FALSE)) { - resp->status = NFS3ERR_JUKEBOX; - goto out1; - } + ct.cc_sysid = 0; + ct.cc_pid = 0; + ct.cc_caller_id = nfs3_srv_caller_id; + ct.cc_flags = CC_DONTBLOCK; /* * We have to enter the critical region before calling VOP_RWLOCK @@ -1169,10 +1183,18 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, } } - rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); + rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); + + /* check if a monitor detected a delegation conflict */ + if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + resp->status = NFS3ERR_JUKEBOX; + rwlock_ret = -1; + goto out1; + } + bva.va_mask = AT_ALL; - error = VOP_GETATTR(vp, &bva, 0, cr, NULL); + error = VOP_GETATTR(vp, &bva, 0, cr, &ct); /* * If we can't get the attributes, then we can't do the @@ -1204,7 +1226,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, } if (crgetuid(cr) != bva.va_uid && - (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL))) + (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) goto out; if (MANDLOCK(vp, bva.va_mode)) { @@ -1213,7 +1235,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, } if (args->count == 0) { - VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); VN_RELE(vp); resp->status = NFS3_OK; vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); @@ -1278,14 +1300,20 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, */ savecred = curthread->t_cred; curthread->t_cred = cr; - error = VOP_WRITE(vp, &uio, ioflag, cr, NULL); + error = VOP_WRITE(vp, &uio, ioflag, cr, &ct); curthread->t_cred = savecred; if (iovp != iov) kmem_free(iovp, sizeof (*iovp) * iovcnt); + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + resp->status = NFS3ERR_JUKEBOX; + goto out1; + } + ava.va_mask = AT_ALL; - avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava; + avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava; #ifdef DEBUG if (!rfs3_do_post_op_attr) @@ -1295,7 +1323,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, if (error) goto out; - VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); if (in_crit) nbl_end_crit(vp); VN_RELE(vp); @@ -1309,8 +1337,8 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, */ if (rwlock_ret != V_WRITELOCK_TRUE) { if (bvap == NULL || avap == NULL || - bvap->va_seq == 0 || avap->va_seq == 0 || - avap->va_seq != (bvap->va_seq + 1)) { + bvap->va_seq == 0 || avap->va_seq == 0 || + avap->va_seq != (bvap->va_seq + 1)) { bvap = NULL; } } @@ -1331,7 +1359,7 @@ out: out1: if (vp != NULL) { if (rwlock_ret != -1) - VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); if (in_crit) nbl_end_crit(vp); VN_RELE(vp); @@ -1455,7 +1483,7 @@ rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi, * Does file already exist? */ error = VOP_LOOKUP(dvp, args->where.name, &tvp, - NULL, 0, NULL, cr, NULL, NULL, NULL); + NULL, 0, NULL, cr, NULL, NULL, NULL); /* * Check to see if the file has been delegated @@ -1483,7 +1511,7 @@ rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi, tva.va_mask = AT_SIZE; error = VOP_GETATTR(tvp, &tva, 0, cr, - NULL); + NULL); /* * Can't check for conflicts, so return * error. @@ -1492,12 +1520,12 @@ rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi, goto out; offset = tva.va_size < va.va_size ? - tva.va_size : va.va_size; + tva.va_size : va.va_size; len = tva.va_size < va.va_size ? - va.va_size - tva.va_size : - tva.va_size - va.va_size; + va.va_size - tva.va_size : + tva.va_size - va.va_size; if (nbl_conflict(tvp, NBL_WRITE, - offset, len, 0, NULL)) { + offset, len, 0, NULL)) { error = EACCES; goto out; } @@ -1964,7 +1992,7 @@ rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi, goto out; error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr, - NULL, NULL, NULL); + NULL, NULL, NULL); /* * Force modified data and metadata out to stable storage. @@ -2310,7 +2338,7 @@ rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi, * reservation and V4 delegations */ error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0, - NULL, cr, NULL, NULL, NULL); + NULL, cr, NULL, NULL, NULL); if (error != 0) goto out; @@ -2624,7 +2652,7 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, * reservation or V4 delegations. */ error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0, - NULL, cr, NULL, NULL, NULL); + NULL, cr, NULL, NULL, NULL); if (error != 0) goto out; @@ -2656,14 +2684,14 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, if (!nbl_need_check(srcvp)) { error = VOP_RENAME(fvp, args->from.name, tvp, - args->to.name, cr, NULL, 0); + args->to.name, cr, NULL, 0); } else { nbl_start_crit(srcvp, RW_READER); if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { error = EACCES; } else { error = VOP_RENAME(fvp, args->from.name, tvp, - args->to.name, cr, NULL, 0); + args->to.name, cr, NULL, 0); } nbl_end_crit(srcvp); } @@ -2676,7 +2704,7 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, srcvp->v_path = NULL; mutex_exit(&srcvp->v_lock); vn_setpath(rootdir, tvp, srcvp, args->to.name, - strlen(args->to.name)); + strlen(args->to.name)); if (tmp != NULL) kmem_free(tmp, strlen(tmp) + 1); } @@ -3083,7 +3111,7 @@ rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi, if (count != uio.uio_resid) { namlen = strlen(((struct dirent64 *)data)->d_name); bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT + - roundup(namlen, BYTES_PER_XDR_UNIT); + roundup(namlen, BYTES_PER_XDR_UNIT); } /* * We need to check to see if the number of bytes left @@ -3355,8 +3383,8 @@ getmoredents: * entry exists and attributes and filehandle are also valid */ for (size = prev_len - uio.uio_resid; - size > 0; - size -= dp->d_reclen, dp = nextdp(dp)) { + size > 0; + size -= dp->d_reclen, dp = nextdp(dp)) { if (dp->d_ino == 0) { nents++; @@ -3444,7 +3472,7 @@ good: infop[i].namelen = namlen[i]; error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr, - NULL, NULL, NULL); + NULL, NULL, NULL); if (error) { infop[i].attr.attributes = FALSE; infop[i].fh.handle_follows = FALSE; @@ -3456,7 +3484,7 @@ good: if (rfs3_do_post_op_attr) { nva.va_mask = AT_ALL; nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? - NULL : &nva; + NULL : &nva; } else nvap = NULL; #else @@ -3541,7 +3569,7 @@ rfs3_readdirplus_free(READDIRPLUS3res *resp) if (resp->status == NFS3_OK) { kmem_free(resp->resok.reply.entries, resp->resok.count); kmem_free(resp->resok.infop, - resp->resok.size * sizeof (struct entryplus3_info)); + resp->resok.size * sizeof (struct entryplus3_info)); } } @@ -3970,7 +3998,7 @@ sattr3_to_vattr(sattr3 *sap, struct vattr *vap) * unless sysadmin set nfs_allow_preepoch_time. */ NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, - sap->atime.atime.seconds); + sap->atime.atime.seconds); vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds; vap->va_mask |= AT_ATIME; } else if (sap->atime.set_it == SET_TO_SERVER_TIME) { @@ -3988,7 +4016,7 @@ sattr3_to_vattr(sattr3 *sap, struct vattr *vap) * unless sysadmin set nfs_allow_preepoch_time. */ NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, - sap->mtime.mtime.seconds); + sap->mtime.mtime.seconds); vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds; vap->va_mask |= AT_MTIME; } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) { @@ -4044,8 +4072,8 @@ vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap) /* Return error if time or size overflow */ if (! (NFS_TIME_T_OK(vap->va_mtime.tv_sec) && - NFS_TIME_T_OK(vap->va_ctime.tv_sec) && - NFS3_SIZE_OK(vap->va_size))) { + NFS_TIME_T_OK(vap->va_ctime.tv_sec) && + NFS3_SIZE_OK(vap->va_size))) { return (EOVERFLOW); } wccap->size = (size3)vap->va_size; @@ -4129,6 +4157,8 @@ rfs3_srvrinit(void) if (verfp->id == 0) verfp->id = (uint_t)now.tv_nsec; + nfs3_srv_caller_id = fs_new_caller_id(); + } void diff --git a/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c b/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c index b285807b3f..d4fe209026 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_deleg_ops.c @@ -45,19 +45,71 @@ extern u_longlong_t nfs4_srv_caller_id; /* * This file contains the code for the monitors which are placed on the vnodes * of files that are granted delegations by the nfsV4 server. These monitors - * will detect local access that conflict with the delegations and recall the + * will detect local access, as well as access from other servers + * (NFS and CIFS), that conflict with the delegations and recall the * delegation from the client before letting the offending operation continue. + * + * If the caller does not want to block while waiting for the delegation to + * be returned, then it should set CC_DONTBLOCK in the flags of caller context. + * This does not work for vnevnents; remove and rename, they always block. + */ + +/* + * This is the function to recall a delegation. It will check if the caller + * wishes to block or not while waiting for the delegation to be returned. + * If the caller context flag has CC_DONTBLOCK set, then it will return + * an error and set CC_WOULDBLOCK instead of waiting for the delegation. */ +int +recall_all_delegations( + rfs4_file_t *fp, + bool_t trunc, + caller_context_t *ct) +{ + clock_t rc; + + rfs4_recall_deleg(fp, trunc, NULL); + + /* optimization that may not stay */ + delay(NFS4_DELEGATION_CONFLICT_DELAY); + + /* if it has been returned, we're done. */ + rfs4_dbe_lock(fp->dbe); + if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { + rfs4_dbe_unlock(fp->dbe); + return (0); + } + + if (ct->cc_flags & CC_DONTBLOCK) { + rfs4_dbe_unlock(fp->dbe); + ct->cc_flags |= CC_WOULDBLOCK; + return (NFS4ERR_DELAY); + } + + while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { + rc = rfs4_dbe_twait(fp->dbe, + lbolt + SEC_TO_TICK(rfs4_lease_time)); + if (rc == -1) { /* timed out */ + rfs4_dbe_unlock(fp->dbe); + rfs4_recall_deleg(fp, trunc, NULL); + rfs4_dbe_lock(fp->dbe); + } + } + rfs4_dbe_unlock(fp->dbe); + + return (0); +} + /* monitor for open on read delegated file */ int -deleg_rdopen( +deleg_rd_open( femarg_t *arg, int mode, cred_t *cr, caller_context_t *ct) { - clock_t rc; + int rc; rfs4_file_t *fp; /* @@ -70,18 +122,9 @@ deleg_rdopen( if ((ct == NULL || ct->cc_caller_id != nfs4_srv_caller_id) && (mode & (FWRITE|FTRUNC))) { fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_open(arg, mode, cr, ct)); @@ -89,13 +132,13 @@ deleg_rdopen( /* monitor for open on write delegated file */ int -deleg_wropen( +deleg_wr_open( femarg_t *arg, int mode, cred_t *cr, caller_context_t *ct) { - clock_t rc; + int rc; rfs4_file_t *fp; /* @@ -107,192 +150,186 @@ deleg_wropen( */ if (ct == NULL || ct->cc_caller_id != nfs4_srv_caller_id) { fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_open(arg, mode, cr, ct)); } /* - * this is only a write delegation op and should only be hit - * by the owner of the delegation. if not, then someone is - * doing a read without doing an open first. shouldn't happen. + * This is op is for write delegations only and should only be hit + * by the owner of the delegation. If not, then someone is + * doing a read without doing an open first. Like from nfs2/3. */ int -deleg_read( +deleg_wr_read( femarg_t *arg, uio_t *uiop, int ioflag, cred_t *cr, struct caller_context *ct) { - clock_t rc; + int rc; rfs4_file_t *fp; - /* use caller context to compare caller to delegation owner */ + /* Use caller context to compare caller to delegation owner */ if (ct == NULL || ct->cc_caller_id != nfs4_srv_caller_id) { fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_read(arg, uiop, ioflag, cr, ct)); } /* - * this should only be hit by the owner of the delegation. if not, then - * someone is doing a write without doing an open first. shouldn't happen. + * If someone is doing a write on a read delegated file, it is a conflict. + * conflicts should be caught at open, but NFSv2&3 don't use OPEN. */ int -deleg_write( +deleg_rd_write( femarg_t *arg, uio_t *uiop, int ioflag, cred_t *cr, struct caller_context *ct) { - clock_t rc; + int rc; + rfs4_file_t *fp; + + fp = (rfs4_file_t *)arg->fa_fnode->fn_available; + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); + + return (vnext_write(arg, uiop, ioflag, cr, ct)); +} + +/* + * The owner of the delegation can write the file, but nobody else can. + * Conflicts should be caught at open, but NFSv2&3 don't use OPEN. + */ +int +deleg_wr_write( + femarg_t *arg, + uio_t *uiop, + int ioflag, + cred_t *cr, + struct caller_context *ct) +{ + int rc; rfs4_file_t *fp; - /* use caller context to compare caller to delegation owner */ + /* Use caller context to compare caller to delegation owner */ if (ct == NULL || ct->cc_caller_id != nfs4_srv_caller_id) { fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_write(arg, uiop, ioflag, cr, ct)); } +/* Doing a setattr on a read delegated file is a conflict. */ +int +deleg_rd_setattr( + femarg_t *arg, + vattr_t *vap, + int flags, + cred_t *cr, + caller_context_t *ct) +{ + int rc; + bool_t trunc = FALSE; + rfs4_file_t *fp; + + if ((vap->va_mask & AT_SIZE) && (vap->va_size == 0)) + trunc = TRUE; + fp = (rfs4_file_t *)arg->fa_fnode->fn_available; + rc = recall_all_delegations(fp, trunc, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); + + return (vnext_setattr(arg, vap, flags, cr, ct)); +} + +/* Only the owner of the write delegation can do a setattr */ int -deleg_setattr( +deleg_wr_setattr( femarg_t *arg, vattr_t *vap, int flags, cred_t *cr, caller_context_t *ct) { - clock_t rc; + int rc; + bool_t trunc = FALSE; rfs4_file_t *fp; /* - * use caller context to compare caller to delegation owner + * Use caller context to compare caller to delegation owner */ if (ct == NULL || (ct->cc_caller_id != nfs4_srv_caller_id)) { + if ((vap->va_mask & AT_SIZE) && (vap->va_size == 0)) + trunc = TRUE; + fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, trunc, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_setattr(arg, vap, flags, cr, ct)); } - - int deleg_rd_rwlock( femarg_t *arg, int write_lock, caller_context_t *ct) { - clock_t rc; + int rc; rfs4_file_t *fp; /* - * if this is a write lock, then use caller context to compare - * caller to delegation owner + * If this is a write lock, then we got us a conflict. */ - if (write_lock && - (ct == NULL || ct->cc_caller_id != nfs4_srv_caller_id)) { + if (write_lock) { fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_rwlock(arg, write_lock, ct)); } +/* Only the owner of the write delegation should be doing this. */ int deleg_wr_rwlock( femarg_t *arg, int write_lock, caller_context_t *ct) { - clock_t rc; + int rc; rfs4_file_t *fp; - /* use caller context to compare caller to delegation owner */ + /* Use caller context to compare caller to delegation owner */ if (ct == NULL || ct->cc_caller_id != nfs4_srv_caller_id) { fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_rwlock(arg, write_lock, ct)); } int -deleg_space( +deleg_rd_space( femarg_t *arg, int cmd, flock64_t *bfp, @@ -301,62 +338,85 @@ deleg_space( cred_t *cr, caller_context_t *ct) { - clock_t rc; + int rc; + rfs4_file_t *fp; + + fp = (rfs4_file_t *)arg->fa_fnode->fn_available; + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); + + return (vnext_space(arg, cmd, bfp, flag, offset, cr, ct)); +} + +int +deleg_wr_space( + femarg_t *arg, + int cmd, + flock64_t *bfp, + int flag, + offset_t offset, + cred_t *cr, + caller_context_t *ct) +{ + int rc; rfs4_file_t *fp; - /* use caller context to compare caller to delegation owner */ + /* Use caller context to compare caller to delegation owner */ if (ct == NULL || ct->cc_caller_id != nfs4_srv_caller_id) { fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); } return (vnext_space(arg, cmd, bfp, flag, offset, cr, ct)); } int -deleg_setsecattr( +deleg_rd_setsecattr( femarg_t *arg, vsecattr_t *vsap, int flag, cred_t *cr, caller_context_t *ct) { - clock_t rc; + int rc; rfs4_file_t *fp; fp = (rfs4_file_t *)arg->fa_fnode->fn_available; - /* changing security attribute triggers recall */ - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { - rc = rfs4_dbe_twait(fp->dbe, - lbolt + SEC_TO_TICK(rfs4_lease_time)); - if (rc == -1) { /* timed out */ - rfs4_dbe_unlock(fp->dbe); - rfs4_recall_deleg(fp, FALSE, NULL); - rfs4_dbe_lock(fp->dbe); - } - } - rfs4_dbe_unlock(fp->dbe); + /* Changing security attribute triggers recall */ + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); + + return (vnext_setsecattr(arg, vsap, flag, cr, ct)); +} + +int +deleg_wr_setsecattr( + femarg_t *arg, + vsecattr_t *vsap, + int flag, + cred_t *cr, + caller_context_t *ct) +{ + int rc; + rfs4_file_t *fp; + + fp = (rfs4_file_t *)arg->fa_fnode->fn_available; + + /* Changing security attribute triggers recall */ + rc = recall_all_delegations(fp, FALSE, ct); + if (rc == NFS4ERR_DELAY) + return (EAGAIN); return (vnext_setsecattr(arg, vsap, flag, cr, ct)); } -/* ARGSUSED */ int -deleg_vnevent( +deleg_rd_vnevent( femarg_t *arg, vnevent_t vnevent, vnode_t *dvp, @@ -376,6 +436,7 @@ deleg_vnevent( case VE_RENAME_SRC: fp = (rfs4_file_t *)arg->fa_fnode->fn_available; rfs4_recall_deleg(fp, trunc, NULL); + rfs4_dbe_lock(fp->dbe); while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { rc = rfs4_dbe_twait(fp->dbe, @@ -387,6 +448,48 @@ deleg_vnevent( } } rfs4_dbe_unlock(fp->dbe); + + break; + + default: + break; + } + return (vnext_vnevent(arg, vnevent, dvp, name, ct)); +} + +int +deleg_wr_vnevent( + femarg_t *arg, + vnevent_t vnevent, + vnode_t *dvp, + char *name, + caller_context_t *ct) +{ + clock_t rc; + rfs4_file_t *fp; + bool_t trunc = FALSE; + + switch (vnevent) { + case VE_REMOVE: + case VE_RENAME_DEST: + trunc = TRUE; + /*FALLTHROUGH*/ + + case VE_RENAME_SRC: + fp = (rfs4_file_t *)arg->fa_fnode->fn_available; + rfs4_recall_deleg(fp, trunc, NULL); + rfs4_dbe_lock(fp->dbe); + while (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { + rc = rfs4_dbe_twait(fp->dbe, + lbolt + SEC_TO_TICK(rfs4_lease_time)); + if (rc == -1) { /* timed out */ + rfs4_dbe_unlock(fp->dbe); + rfs4_recall_deleg(fp, trunc, NULL); + rfs4_dbe_lock(fp->dbe); + } + } + rfs4_dbe_unlock(fp->dbe); + break; default: diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c index c830396935..8022b17bed 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c @@ -471,24 +471,24 @@ extern size_t strlcpy(char *dst, const char *src, size_t dstsize); #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = { - VOPNAME_OPEN, { .femop_open = deleg_rdopen }, - VOPNAME_WRITE, { .femop_write = deleg_write }, - VOPNAME_SETATTR, { .femop_setattr = deleg_setattr }, + VOPNAME_OPEN, { .femop_open = deleg_rd_open }, + VOPNAME_WRITE, { .femop_write = deleg_rd_write }, + VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr }, VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock }, - VOPNAME_SPACE, { .femop_space = deleg_space }, - VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_setsecattr }, - VOPNAME_VNEVENT, { .femop_vnevent = deleg_vnevent }, + VOPNAME_SPACE, { .femop_space = deleg_rd_space }, + VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr }, + VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent }, NULL, NULL }; static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = { - VOPNAME_OPEN, { .femop_open = deleg_wropen }, - VOPNAME_READ, { .femop_read = deleg_read }, - VOPNAME_WRITE, { .femop_write = deleg_write }, - VOPNAME_SETATTR, { .femop_setattr = deleg_setattr }, + VOPNAME_OPEN, { .femop_open = deleg_wr_open }, + VOPNAME_READ, { .femop_read = deleg_wr_read }, + VOPNAME_WRITE, { .femop_write = deleg_wr_write }, + VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr }, VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock }, - VOPNAME_SPACE, { .femop_space = deleg_space }, - VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_setsecattr }, - VOPNAME_VNEVENT, { .femop_vnevent = deleg_vnevent }, + VOPNAME_SPACE, { .femop_space = deleg_wr_space }, + VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr }, + VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent }, NULL, NULL }; diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c index d2969930be..45f57c2255 100644 --- a/usr/src/uts/common/fs/nfs/nfs_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs_srv.c @@ -87,6 +87,8 @@ static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, #define IFBLK 0060000 /* block special */ #define IFSOCK 0140000 /* socket */ +u_longlong_t nfs2_srv_caller_id; + /* * Get file attributes. * Returns the current attributes of the file with the given fhandle. @@ -152,6 +154,7 @@ rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, struct vattr va; struct vattr bva; struct flock64 bf; + caller_context_t ct; TRACE_0(TR_FAC_NFS, TR_RFS_SETATTR_START, "rfs_setattr_start:"); @@ -211,6 +214,11 @@ rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, (exi->exi_export.ex_flags & EX_NOSUID)) va.va_mode &= ~(VSUID | VSGID); + ct.cc_sysid = 0; + ct.cc_pid = 0; + ct.cc_caller_id = nfs2_srv_caller_id; + ct.cc_flags = CC_DONTBLOCK; + /* * We need to specially handle size changes because it is * possible for the client to create a file with modes @@ -227,21 +235,8 @@ rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, * Also the client should not be allowed to change the * size of the file if there is a conflicting non-blocking * mandatory lock in the region of change. - * - * Also(2), check to see if the v4 side of the server has - * delegated this file. If so, then we set T_WOULDBLOCK - * so that the dispatch function dosn't send a reply, forcing - * the client to retrasmit its request. */ if (vp->v_type == VREG && va.va_mask & AT_SIZE) { - /* If delegated, mark as wouldblock so response is dropped */ - if (rfs4_check_delegated(FWRITE, vp, TRUE)) { - VN_RELE(vp); - curthread->t_flag |= T_WOULDBLOCK; - TRACE_1(TR_FAC_NFS, TR_RFS_SETATTR_END, - "rfs_setattr_end:(%S)", "delegated"); - return; - } if (nbl_need_check(vp)) { nbl_start_crit(vp, RW_READER); in_crit = 1; @@ -249,7 +244,7 @@ rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, bva.va_mask = AT_UID | AT_SIZE; TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); - error = VOP_GETATTR(vp, &bva, 0, cr, NULL); + error = VOP_GETATTR(vp, &bva, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_END, "vop_getattr_end:"); if (error) { if (in_crit) @@ -290,7 +285,7 @@ rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, TRACE_0(TR_FAC_NFS, TR_VOP_SPACE_START, "vop_space_start:"); error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, - (offset_t)va.va_size, cr, NULL); + (offset_t)va.va_size, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_SPACE_END, "vop_space_end:"); } if (in_crit) @@ -303,10 +298,24 @@ rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, */ if (!error && va.va_mask) { TRACE_0(TR_FAC_NFS, TR_VOP_SETATTR_START, "vop_setattr_start:"); - error = VOP_SETATTR(vp, &va, flag, cr, NULL); + error = VOP_SETATTR(vp, &va, flag, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_SETATTR_END, "vop_setattr_end:"); } + /* + * check if the monitor on either vop_space or vop_setattr detected + * a delegation conflict and if so, mark the thread flag as + * wouldblock so that the response is dropped and the client will + * try again. + */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + VN_RELE(vp); + curthread->t_flag |= T_WOULDBLOCK; + TRACE_1(TR_FAC_NFS, TR_RFS_SETATTR_END, + "rfs_setattr_end:(%S)", "delegated"); + return; + } + if (!error) { va.va_mask = AT_ALL; /* get everything */ TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); @@ -320,10 +329,12 @@ rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, } } + ct.cc_flags = 0; + /* * Force modified metadata out to stable storage. */ - (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); + (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); VN_RELE(vp); @@ -633,6 +644,7 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, mblk_t *mp; int alloc_err = 0; int in_crit = 0; + caller_context_t ct; TRACE_0(TR_FAC_NFS, TR_RFS_READ_START, "rfs_read_start:"); @@ -654,19 +666,10 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, return; } - /* - * Check to see if the v4 side of the server has delegated - * this file. If so, then we mark thread as wouldblock so - * the response is dropped. - */ - if (rfs4_check_delegated(FREAD, vp, FALSE)) { - VN_RELE(vp); - curthread->t_flag |= T_WOULDBLOCK; - rr->rr_data = NULL; - TRACE_1(TR_FAC_NFS, TR_RFS_READ_END, - "rfs_read_end:(%S)", "delegated"); - return; - } + ct.cc_sysid = 0; + ct.cc_pid = 0; + ct.cc_caller_id = nfs2_srv_caller_id; + ct.cc_flags = CC_DONTBLOCK; /* * Enter the critical region before calling VOP_RWLOCK @@ -688,18 +691,29 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, } TRACE_0(TR_FAC_NFS, TR_VOP_RWLOCK_START, "vop_rwlock_start:"); - (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); + error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_RWLOCK_END, "vop_rwlock_end:"); + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + VN_RELE(vp); + /* mark as wouldblock so response is dropped */ + curthread->t_flag |= T_WOULDBLOCK; + TRACE_1(TR_FAC_NFS, TR_RFS_READ_END, + "rfs_read_end:(%S)", "delegated"); + rr->rr_data = NULL; + return; + } + va.va_mask = AT_ALL; TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); - error = VOP_GETATTR(vp, &va, 0, cr, NULL); + error = VOP_GETATTR(vp, &va, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_END, "vop_getattr_end:"); if (error) { TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, "vop_rwunlock_end:"); @@ -718,7 +732,7 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, */ if (crgetuid(cr) != va.va_uid) { TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_START, "vop_access_start:"); - error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); + error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_END, "vop_access_end:"); if (error) { /* @@ -727,14 +741,14 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, */ TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_START, "vop_access_start:"); - error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); + error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_END, "vop_access_end:"); } if (error) { TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, @@ -751,7 +765,7 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, if (MANDLOCK(vp, va.va_mode)) { TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, "vop_rwunlock_end:"); @@ -801,20 +815,29 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, uio.uio_resid = ra->ra_count; TRACE_0(TR_FAC_NFS, TR_VOP_READ_START, "vop_read_start:"); - error = VOP_READ(vp, &uio, 0, cr, NULL); + error = VOP_READ(vp, &uio, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_READ_END, "vop_read_end:"); if (error) { freeb(mp); + + /* + * check if a monitor detected a delegation conflict and + * mark as wouldblock so response is dropped + */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) + curthread->t_flag |= T_WOULDBLOCK; + else + rr->rr_status = puterrno(error); + TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, "vop_rwunlock_end:"); VN_RELE(vp); rr->rr_data = NULL; - rr->rr_status = puterrno(error); TRACE_1(TR_FAC_NFS, TR_RFS_READ_END, "rfs_read_end:(%S)", "read error"); return; @@ -826,13 +849,13 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, */ va.va_mask = AT_ALL; TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); - error = VOP_GETATTR(vp, &va, 0, cr, NULL); + error = VOP_GETATTR(vp, &va, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_END, "vop_getattr_end:"); if (error) { freeb(mp); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, @@ -851,7 +874,7 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, done: TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); if (in_crit) nbl_end_crit(vp); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, "vop_rwunlock_end:"); @@ -930,6 +953,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, int iovcnt; cred_t *savecred; int in_crit = 0; + caller_context_t ct; TRACE_1(TR_FAC_NFS, TR_RFS_WRITE_START, "rfs_write_start:(%S)", "sync"); @@ -957,22 +981,14 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, return; } - /* - * Check to see if the v4 side of the server has delegated - * this file. If so, then we mark thread as wouldblock so - * the response is dropped. - */ - if (rfs4_check_delegated(FWRITE, vp, FALSE)) { - VN_RELE(vp); - curthread->t_flag |= T_WOULDBLOCK; - TRACE_1(TR_FAC_NFS, TR_RFS_READ_END, - "rfs_write_end:(%S)", "delegated"); - return; - } + ct.cc_sysid = 0; + ct.cc_pid = 0; + ct.cc_caller_id = nfs2_srv_caller_id; + ct.cc_flags = CC_DONTBLOCK; va.va_mask = AT_UID|AT_MODE; TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); - error = VOP_GETATTR(vp, &va, 0, cr, NULL); + error = VOP_GETATTR(vp, &va, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_END, "vop_getattr_end:"); if (error) { @@ -990,7 +1006,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, * is always allowed to write it. */ TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_START, "vop_access_start:"); - error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); + error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_END, "vop_access_end:"); if (error) { VN_RELE(vp); @@ -1029,9 +1045,19 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, } TRACE_0(TR_FAC_NFS, TR_VOP_RWLOCK_START, "vop_rwlock_start:"); - (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); + error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_RWLOCK_END, "vop_rwlock_end:"); + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + VN_RELE(vp); + /* mark as wouldblock so response is dropped */ + curthread->t_flag |= T_WOULDBLOCK; + TRACE_1(TR_FAC_NFS, TR_RFS_READ_END, + "rfs_write_end:(%S)", "delegated"); + return; + } + if (wa->wa_data) { iov[0].iov_base = wa->wa_data; iov[0].iov_len = wa->wa_count; @@ -1062,7 +1088,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, */ savecred = curthread->t_cred; curthread->t_cred = cr; - error = VOP_WRITE(vp, &uio, FSYNC, cr, NULL); + error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); curthread->t_cred = savecred; TRACE_0(TR_FAC_NFS, TR_VOP_WRITE_END, "vop_write_end:"); } else { @@ -1108,7 +1134,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, */ savecred = curthread->t_cred; curthread->t_cred = cr; - error = VOP_WRITE(vp, &uio, FSYNC, cr, NULL); + error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); curthread->t_cred = savecred; TRACE_0(TR_FAC_NFS, TR_VOP_WRITE_END, "vop_write_end:"); @@ -1117,7 +1143,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, } TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, "vop_rwunlock_end:"); if (!error) { @@ -1127,7 +1153,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, */ va.va_mask = AT_ALL; /* now we want everything */ TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); - error = VOP_GETATTR(vp, &va, 0, cr, NULL); + error = VOP_GETATTR(vp, &va, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_END, "vop_getattr_end:"); /* check for overflows */ if (!error) { @@ -1141,7 +1167,12 @@ out: nbl_end_crit(vp); VN_RELE(vp); - ns->ns_status = puterrno(error); + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) + /* mark as wouldblock so response is dropped */ + curthread->t_flag |= T_WOULDBLOCK; + else + ns->ns_status = puterrno(error); TRACE_1(TR_FAC_NFS, TR_RFS_WRITE_END, "rfs_write_end:(%S)", "sync"); } @@ -1208,6 +1239,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, ushort_t t_flag; cred_t *savecred; int in_crit = 0; + caller_context_t ct; if (!rfs_write_async) { rfs_write_sync(wa, ns, exi, req, cr); @@ -1355,14 +1387,38 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, in_crit = 1; } + ct.cc_sysid = 0; + ct.cc_pid = 0; + ct.cc_caller_id = nfs2_srv_caller_id; + ct.cc_flags = CC_DONTBLOCK; + /* * Lock the file for writing. This operation provides * the delay which allows clusters to grow. */ TRACE_0(TR_FAC_NFS, TR_VOP_RWLOCK_START, "vop_wrlock_start:"); - (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); + error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_RWLOCK_END, "vop_wrlock_end"); + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + VN_RELE(vp); + /* mark as wouldblock so response is dropped */ + curthread->t_flag |= T_WOULDBLOCK; + mutex_enter(&rfs_async_write_lock); + for (rp = nlp->list; rp != NULL; rp = rp->list) { + if (rp->ns->ns_status == RFSWRITE_INITVAL) { + rp->ns->ns_status = puterrno(error); + rp->thread->t_flag |= T_WOULDBLOCK; + } + } + cv_broadcast(&nlp->cv); + mutex_exit(&rfs_async_write_lock); + TRACE_1(TR_FAC_NFS, TR_RFS_WRITE_END, + "rfs_write_end:(%S)", "delegated"); + return; + } + /* * Disconnect this cluster from the list of clusters. * The cluster that is being dealt with must be fixed @@ -1415,7 +1471,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, va.va_mask = AT_UID|AT_MODE; TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); - error = VOP_GETATTR(vp, &va, 0, rp->cr, NULL); + error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_END, "vop_getattr_end:"); if (!error) { if (crgetuid(rp->cr) != va.va_uid) { @@ -1427,7 +1483,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, */ TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_START, "vop_access_start:"); - error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, NULL); + error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_ACCESS_END, "vop_access_end:"); } @@ -1564,29 +1620,21 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, "vop_write_start:(%S)", "async"); /* - * Check to see if the v4 side of the server has - * delegated this file. If so, then we mark thread - * as wouldblock so the response is dropped. + * We're changing creds because VM may fault + * and we need the cred of the current + * thread to be used if quota * checking is + * enabled. */ - if (rfs4_check_delegated(FWRITE, vp, FALSE)) { + savecred = curthread->t_cred; + curthread->t_cred = cr; + error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); + curthread->t_cred = savecred; + TRACE_0(TR_FAC_NFS, TR_VOP_WRITE_END, "vop_write_end:"); + + /* check if a monitor detected a delegation conflict */ + if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) + /* mark as wouldblock so response is dropped */ curthread->t_flag |= T_WOULDBLOCK; - error = EACCES; /* just to have an error */ - TRACE_1(TR_FAC_NFS, TR_RFS_READ_END, - "rfs_write_end:(%S)", "delegated"); - } else { - /* - * We're changing creds because VM may fault - * and we need the cred of the current - * thread to be used if quota * checking is - * enabled. - */ - savecred = curthread->t_cred; - curthread->t_cred = cr; - error = VOP_WRITE(vp, &uio, 0, rp->cr, NULL); - curthread->t_cred = savecred; - TRACE_0(TR_FAC_NFS, TR_VOP_WRITE_END, - "vop_write_end:"); - } if (niovp != iov) kmem_free(niovp, sizeof (*niovp) * iovcnt); @@ -1600,7 +1648,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, va.va_mask = AT_ALL; /* now we want everything */ TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_START, "vop_getattr_start:"); - error = VOP_GETATTR(vp, &va, 0, rp->cr, NULL); + error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_GETATTR_END, "vop_getattr_end:"); if (!error) @@ -1631,18 +1679,18 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, */ if (data_written) { TRACE_0(TR_FAC_NFS, TR_VOP_PUTPAGE_START, "vop_putpage_start:"); - error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, NULL); + error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_PUTPAGE_END, "vop_putpage_end:"); if (!error) { TRACE_0(TR_FAC_NFS, TR_VOP_FSYNC_START, "vop_fsync_start:"); - error = VOP_FSYNC(vp, FNODSYNC, cr, NULL); + error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_FSYNC_END, "vop_fsync_end:"); } } TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_START, "vop_rwunlock_start:"); - VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); + VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); TRACE_0(TR_FAC_NFS, TR_VOP_RWUNLOCK_END, "vop_rwunlock_end:"); if (in_crit) @@ -3063,6 +3111,7 @@ void rfs_srvrinit(void) { mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); + nfs2_srv_caller_id = fs_new_caller_id(); } void diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h index b9a76a2d43..c2c4ff0510 100644 --- a/usr/src/uts/common/nfs/nfs4.h +++ b/usr/src/uts/common/nfs/nfs4.h @@ -909,19 +909,31 @@ extern void rfs4_clear_dont_grant(rfs4_file_t *); /* * nfs4 monitored operations. */ -extern int deleg_rdopen(femarg_t *, int, cred_t *, caller_context_t *); -extern int deleg_wropen(femarg_t *, int, cred_t *, caller_context_t *); +extern int deleg_rd_open(femarg_t *, int, cred_t *, caller_context_t *); +extern int deleg_wr_open(femarg_t *, int, cred_t *, caller_context_t *); +extern int deleg_wr_read(femarg_t *, uio_t *, int, cred_t *, + caller_context_t *); +extern int deleg_rd_write(femarg_t *, uio_t *, int, cred_t *, + caller_context_t *); +extern int deleg_wr_write(femarg_t *, uio_t *, int, cred_t *, + caller_context_t *); +extern int deleg_rd_setattr(femarg_t *, vattr_t *, int, cred_t *, + caller_context_t *); +extern int deleg_wr_setattr(femarg_t *, vattr_t *, int, cred_t *, + caller_context_t *); extern int deleg_rd_rwlock(femarg_t *, int, caller_context_t *); extern int deleg_wr_rwlock(femarg_t *, int, caller_context_t *); -extern int deleg_read(femarg_t *, uio_t *, int, cred_t *, caller_context_t *); -extern int deleg_write(femarg_t *, uio_t *, int, cred_t *, caller_context_t *); -extern int deleg_setattr(femarg_t *, vattr_t *, int, cred_t *, +extern int deleg_rd_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *, + caller_context_t *); +extern int deleg_wr_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *, + caller_context_t *); +extern int deleg_rd_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *, caller_context_t *); -extern int deleg_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *, +extern int deleg_wr_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *, caller_context_t *); -extern int deleg_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *, +extern int deleg_rd_vnevent(femarg_t *, vnevent_t, vnode_t *, char *, caller_context_t *); -extern int deleg_vnevent(femarg_t *, vnevent_t, vnode_t *, char *, +extern int deleg_wr_vnevent(femarg_t *, vnevent_t, vnode_t *, char *, caller_context_t *); extern void rfs4_mon_hold(void *); diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index 3195fb4ebd..82d267700e 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -756,9 +756,18 @@ typedef struct caller_context { pid_t cc_pid; /* Process ID of the caller */ int cc_sysid; /* System ID, used for remote calls */ u_longlong_t cc_caller_id; /* Identifier for (set of) caller(s) */ + ulong_t cc_flags; } caller_context_t; /* + * Flags for caller context. The caller sets CC_DONTBLOCK if it does not + * want to block inside of a FEM monitor. The monitor will set CC_WOULDBLOCK + * and return EAGAIN if the operation would have blocked. + */ +#define CC_WOULDBLOCK 0x01 +#define CC_DONTBLOCK 0x02 + +/* * Structure tags for function prototypes, defined elsewhere. */ struct pathname; |