diff options
author | Robert Thurlow <Robert.Thurlow@Sun.COM> | 2009-12-09 17:27:22 -0600 |
---|---|---|
committer | Robert Thurlow <Robert.Thurlow@Sun.COM> | 2009-12-09 17:27:22 -0600 |
commit | 2f172c55ef76964744bc62b4500ece87f3089b4d (patch) | |
tree | 68a197e4eb4d77acf9993e2e3d75c3f3b06f6a86 /usr/src/uts | |
parent | 1dbbbf767041f5cea7771826e2efc21c03bbffda (diff) | |
download | illumos-joyent-2f172c55ef76964744bc62b4500ece87f3089b4d.tar.gz |
6232737 Client should support NFS4ERR_MOVED and fs_locations
6232743 Server should support NFS4ERR_MOVED and fs_locations
6891289 client panick mutex_vector_tryenter with some stress testing
Diffstat (limited to 'usr/src/uts')
34 files changed, 2608 insertions, 367 deletions
diff --git a/usr/src/uts/common/fs/fs_subr.c b/usr/src/uts/common/fs/fs_subr.c index a465a97043..b1fec8b4ba 100644 --- a/usr/src/uts/common/fs/fs_subr.c +++ b/usr/src/uts/common/fs/fs_subr.c @@ -938,7 +938,7 @@ int reparse_kderef(const char *svc_type, const char *svc_data, char *buf, size_t *bufsize) { - int err, retries, need_free; + int err, retries, need_free, retried_doorhd; size_t dlen, res_len; char *darg; door_arg_t door_args; @@ -975,6 +975,7 @@ reparse_kderef(const char *svc_type, const char *svc_data, char *buf, door_args.rsize = *bufsize; /* do the door_call */ + retried_doorhd = 0; retries = 0; door_ki_hold(rp_door); while ((err = door_ki_upcall_limited(rp_door, &door_args, @@ -987,10 +988,23 @@ reparse_kderef(const char *svc_type, const char *svc_data, char *buf, } else if (err == EBADF) { /* door server goes away... */ reparse_door_reset_handle(); + + if (retried_doorhd == 0) { + door_ki_rele(rp_door); + retried_doorhd++; + rp_door = reparse_door_get_handle(); + if (rp_door != NULL) { + door_ki_hold(rp_door); + continue; + } + } } break; } - door_ki_rele(rp_door); + + if (rp_door) + door_ki_rele(rp_door); + if (need_free) kmem_free(darg, dlen); /* done with args buffer */ diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c index e060023197..71ebdb2d74 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c @@ -112,6 +112,10 @@ rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi, error = rfs4_delegated_getattr(vp, &va, 0, cr); if (!error) { + /* Lie about the object type for a referral */ + if (vn_is_nfs_reparse(vp, cr)) + va.va_type = VLNK; + /* overflow error if time or size is out of range */ error = vattr_to_fattr3(&va, &resp->resok.obj_attributes); if (error) @@ -792,6 +796,7 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, char *data; struct sockaddr *ca; char *name = NULL; + int is_referral = 0; vap = NULL; @@ -817,7 +822,11 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, vap = &va; #endif - if (vp->v_type != VLNK) { + /* We lied about the object type for a referral */ + if (vn_is_nfs_reparse(vp, cr)) + is_referral = 1; + + if (vp->v_type != VLNK && !is_referral) { resp->status = NFS3ERR_INVAL; goto out1; } @@ -845,16 +854,39 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); - iov.iov_base = data; - iov.iov_len = MAXPATHLEN; - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_extflg = UIO_COPY_CACHED; - uio.uio_loffset = 0; - uio.uio_resid = MAXPATHLEN; + if (is_referral) { + char *s; + size_t strsz; + + /* Get an artificial symlink based on a referral */ + s = build_symlink(vp, cr, &strsz); + global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++; + DTRACE_PROBE2(nfs3serv__func__referral__reflink, + vnode_t *, vp, char *, s); + if (s == NULL) + error = EINVAL; + else { + error = 0; + (void) strlcpy(data, s, MAXPATHLEN + 1); + kmem_free(s, strsz); + } - error = VOP_READLINK(vp, &uio, cr, NULL); + } else { + + iov.iov_base = data; + iov.iov_len = MAXPATHLEN; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = 0; + uio.uio_resid = MAXPATHLEN; + + error = VOP_READLINK(vp, &uio, cr, NULL); + + if (!error) + *(data + MAXPATHLEN - uio.uio_resid) = '\0'; + } #ifdef DEBUG if (rfs3_do_post_op_attr) { @@ -866,6 +898,9 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, va.va_mask = AT_ALL; vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va; #endif + /* Lie about object type again just to be consistent */ + if (is_referral && vap != NULL) + vap->va_type = VLNK; #if 0 /* notyet */ /* @@ -884,8 +919,6 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, goto out; } - *(data + MAXPATHLEN - uio.uio_resid) = '\0'; - ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1); @@ -3863,6 +3896,10 @@ good: nva.va_mask = AT_ALL; nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva; #endif + /* Lie about the object type for a referral */ + if (vn_is_nfs_reparse(nvp, cr)) + nvap->va_type = VLNK; + vattr_to_post_op_attr(nvap, &infop[i].attr); #ifdef DEBUG diff --git a/usr/src/uts/common/fs/nfs/nfs4_attr.c b/usr/src/uts/common/fs/nfs/nfs4_attr.c index 1740b64d30..c32a9526ec 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_attr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_attr.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. - * All rights reserved. Use is subject to license terms. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/time.h> #include <sys/systm.h> @@ -107,7 +104,7 @@ nfs4_ver_fattr4_attr(vattr_t *vap, struct nfs4_ntov_map *ntovp, case AT_ATIME: if ((ntovp->nval != FATTR4_TIME_ACCESS) || (*errorp = nfs4_time_vton(&vap->va_ctime, - &nap->time_access))) { + &nap->time_access))) { /* * either asked for FATTR4_TIME_ACCESS_SET - * not used for setattr @@ -119,7 +116,7 @@ nfs4_ver_fattr4_attr(vattr_t *vap, struct nfs4_ntov_map *ntovp, case AT_MTIME: if ((ntovp->nval != FATTR4_TIME_MODIFY) || (*errorp = nfs4_time_vton(&vap->va_mtime, - &nap->time_modify))) { + &nap->time_modify))) { /* * either asked for FATTR4_TIME_MODIFY_SET - * not used for setattr @@ -130,7 +127,7 @@ nfs4_ver_fattr4_attr(vattr_t *vap, struct nfs4_ntov_map *ntovp, break; case AT_CTIME: if (*errorp = nfs4_time_vton(&vap->va_ctime, - &nap->time_metadata)) { + &nap->time_metadata)) { /* * system time invalid for otw transfers */ @@ -196,7 +193,7 @@ nfs4_set_fattr4_attr(vattr_t *vap, vsecattr_t *vsap, case AT_ATIME: if ((ntovp->nval != FATTR4_TIME_ACCESS_SET) || (*errorp = timestruc_to_settime4(&vap->va_atime, - &nap->time_access_set, flags))) { + &nap->time_access_set, flags))) { /* FATTR4_TIME_ACCESS - not used for verify */ retval = FALSE; } @@ -204,7 +201,7 @@ nfs4_set_fattr4_attr(vattr_t *vap, vsecattr_t *vsap, case AT_MTIME: if ((ntovp->nval != FATTR4_TIME_MODIFY_SET) || (*errorp = timestruc_to_settime4(&vap->va_mtime, - &nap->time_modify_set, flags))) { + &nap->time_modify_set, flags))) { /* FATTR4_TIME_MODIFY - not used for verify */ retval = FALSE; } @@ -260,7 +257,7 @@ vattr_to_fattr4(vattr_t *vap, vsecattr_t *vsap, fattr4 *fattrp, int flags, fattrp->attrlist4_len = 0; fattrp->attrlist4 = NULL; na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size, - KM_SLEEP); + KM_SLEEP); if (op == OP_SETATTR || op == OP_CREATE || op == OP_OPEN) { /* @@ -341,8 +338,8 @@ vattr_to_fattr4(vattr_t *vap, vsecattr_t *vsap, fattr4 *fattrp, int flags, xdr_size += nfs4_ntov_map[i].xdr_size; if ((nfs4_ntov_map[i].nval == FATTR4_TIME_ACCESS_SET || nfs4_ntov_map[i].nval == FATTR4_TIME_MODIFY_SET) && - attrfunc == nfs4_set_fattr4_attr && - !(flags & ATTR_UTIME)) { + attrfunc == nfs4_set_fattr4_attr && + !(flags & ATTR_UTIME)) { xdr_size -= 3 * BYTES_PER_XDR_UNIT; } } else { @@ -351,19 +348,19 @@ vattr_to_fattr4(vattr_t *vap, vsecattr_t *vsap, fattr4 *fattrp, int flags, * are AT_UID, AT_GID and FATTR4_ACL_MASK */ ASSERT(nfs4_ntov_map[i].vbit == AT_UID || - nfs4_ntov_map[i].vbit == AT_GID || - nfs4_ntov_map[i].fbit == FATTR4_ACL_MASK); + nfs4_ntov_map[i].vbit == AT_GID || + nfs4_ntov_map[i].fbit == FATTR4_ACL_MASK); if (nfs4_ntov_map[i].vbit == AT_UID) { uid_attr = attrcnt; xdr_size += BYTES_PER_XDR_UNIT; /* length */ xdr_size += - RNDUP(na[attrcnt].owner.utf8string_len); + RNDUP(na[attrcnt].owner.utf8string_len); } else if (nfs4_ntov_map[i].vbit == AT_GID) { gid_attr = attrcnt; xdr_size += BYTES_PER_XDR_UNIT; /* length */ xdr_size += RNDUP( - na[attrcnt].owner_group.utf8string_len); + na[attrcnt].owner_group.utf8string_len); } else if (nfs4_ntov_map[i].fbit == FATTR4_ACL_MASK) { nfsace4 *tmpacl = (nfsace4 *)vsap->vsa_aclentp; @@ -411,7 +408,7 @@ vattr_to_fattr4(vattr_t *vap, vsecattr_t *vsap, fattr4 *fattrp, int flags, for (i = 0; i < attrcnt; i++) { if ((*nfs4_ntov_map[amap[i]].xfunc)(&xdr, &na[i]) == FALSE) { cmn_err(CE_WARN, "vattr_to_fattr4: xdr encode of " - "attribute failed\n"); + "attribute failed\n"); error = EINVAL; break; } @@ -422,11 +419,11 @@ done: */ if (uid_attr != -1 && na[uid_attr].owner.utf8string_val != NULL) { kmem_free(na[uid_attr].owner.utf8string_val, - na[uid_attr].owner.utf8string_len); + na[uid_attr].owner.utf8string_len); } if (gid_attr != -1 && na[gid_attr].owner_group.utf8string_val != NULL) { kmem_free(na[gid_attr].owner_group.utf8string_val, - na[gid_attr].owner_group.utf8string_len); + na[gid_attr].owner_group.utf8string_len); } /* xdrmem_destroy(&xdrs); */ /* NO-OP */ @@ -770,7 +767,7 @@ struct nfs4_ntov_map nfs4_ntov_map[] = { FATTR4_TIME_MODIFY_SET, 4 * BYTES_PER_XDR_UNIT, xdr_settime4, NULL, "fattr4_time_modify_set" }, - { FATTR4_MOUNTED_ON_FILEID_MASK, 0, FALSE, FALSE, + { FATTR4_MOUNTED_ON_FILEID_MASK, AT_NODEID, FALSE, FALSE, FATTR4_MOUNTED_ON_FILEID, 2 * BYTES_PER_XDR_UNIT, xdr_u_longlong_t, NULL, "fattr4_mounted_on_fileid" }, diff --git a/usr/src/uts/common/fs/nfs/nfs4_callback.c b/usr/src/uts/common/fs/nfs/nfs4_callback.c index 057eb3c608..90c570ba24 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_callback.c +++ b/usr/src/uts/common/fs/nfs/nfs4_callback.c @@ -1493,7 +1493,8 @@ nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr, (void) nfs4_start_recovery(&e, mi, vp, NULL, &rp->r_deleg_stateid, lost_rqst.lr_op == OP_DELEGRETURN ? - &lost_rqst : NULL, OP_DELEGRETURN, NULL); + &lost_rqst : NULL, OP_DELEGRETURN, NULL, + NULL, NULL); nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); break; } @@ -1517,7 +1518,8 @@ nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr, (void) nfs4_start_recovery(&e, mi, vp, NULL, &rp->r_deleg_stateid, lost_rqst.lr_op == OP_DELEGRETURN ? - &lost_rqst : NULL, OP_DELEGRETURN, NULL); + &lost_rqst : NULL, OP_DELEGRETURN, NULL, + NULL, NULL); } else { nfs4delegreturn_cleanup_impl(rp, NULL, ncg); done = TRUE; @@ -1956,7 +1958,7 @@ retry: * thread will take it from here. */ (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL, - NULL, OP_OPEN, NULL); + NULL, OP_OPEN, NULL, NULL, NULL); open_stream_rele(osp, rp); *recovp = TRUE; break; diff --git a/usr/src/uts/common/fs/nfs/nfs4_client.c b/usr/src/uts/common/fs/nfs/nfs4_client.c index b60b211df1..abbdaebee7 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client.c @@ -837,7 +837,7 @@ recov_retry: if (nfs4_needs_recovery(&e, FALSE, vp->v_vfsp)) { if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, - NULL, OP_GETATTR, NULL) == FALSE) { + NULL, OP_GETATTR, NULL, NULL, NULL) == FALSE) { nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 1); goto recov_retry; @@ -1027,7 +1027,7 @@ recov_retry: "nfs4_attr_otw: initiating recovery\n")); abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, - NULL, OP_GETATTR, NULL); + NULL, OP_GETATTR, NULL, NULL, NULL); nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, needrecov); if (!e.error) { @@ -3492,7 +3492,7 @@ recov_retry: "nfs4renew: initiating recovery\n")); if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, - OP_RENEW, NULL) == FALSE) { + OP_RENEW, NULL, NULL, NULL) == FALSE) { nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); VFS_RELE(mi->mi_vfsp); if (!e.error) diff --git a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c index 5336ddf0c8..b8e85ae198 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c @@ -303,6 +303,17 @@ set_event(nfs4_event_type_t id, nfs4_revent_t *ep, mntinfo4_t *mi, if (rp2 != NULL && rp2->r_svnode.sv_name != NULL) ep->re_char2 = fn_path(rp2->r_svnode.sv_name); break; + case RE_REFERRAL: + /* server we're being referred to */ + if (server1 != NULL) { + len = strlen(server1); + ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP); + bcopy(server1, ep->re_char1, len); + ep->re_char1[len] = '\0'; + } else { + ep->re_char1 = NULL; + } + break; default: break; } @@ -391,6 +402,8 @@ successful_comm(nfs4_debug_msg_t *msgp) case RE_SIGLOST: case RE_SIGLOST_NO_DUMP: case RE_LOST_STATE_BAD_OP: + case RE_REFERRAL: + /* placeholder */ return (0); default: return (0); @@ -1039,6 +1052,18 @@ queue_print_event(nfs4_debug_msg_t *msg, mntinfo4_t *mi, int dump) ep->re_char1, (void *)ep->re_rp1, ep->re_char2, (void *)ep->re_rp2); break; + case RE_REFERRAL: + if (ep->re_char1) + zcmn_err(zoneid, CE_NOTE, + "![NFS4][Server: %s][Mntpt: %s]" + "being referred from %s to %s", msg->msg_srv, + msg->msg_mntpt, msg->msg_srv, ep->re_char1); + else + zcmn_err(zoneid, CE_NOTE, + "![NFS4][Server: %s][Mntpt: %s]" + "NFS4: being referred from %s to unknown server", + msg->msg_srv, msg->msg_mntpt, msg->msg_srv); + break; default: zcmn_err(zoneid, CE_WARN, "!queue_print_event: illegal event %d", ep->re_type); @@ -1186,6 +1211,7 @@ id_to_dump_solo_event(nfs4_event_type_t id) case RE_UNEXPECTED_ERRNO: case RE_UNEXPECTED_STATUS: case RE_LOST_STATE_BAD_OP: + case RE_REFERRAL: return (1); default: return (0); diff --git a/usr/src/uts/common/fs/nfs/nfs4_client_secinfo.c b/usr/src/uts/common/fs/nfs/nfs4_client_secinfo.c index 3546a88ea9..ed2c2c167a 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client_secinfo.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client_secinfo.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * NFS Version 4 client side SECINFO code. */ @@ -749,7 +747,7 @@ retry: "nfs4secinfo_otw: recovery in a recovery thread\n")); abort = nfs4_start_recovery(&e, mi, NULL, - NULL, NULL, NULL, OP_SECINFO, NULL); + NULL, NULL, NULL, OP_SECINFO, NULL, NULL, NULL); if (!e.error) { e.error = geterrno4(res.status); (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); diff --git a/usr/src/uts/common/fs/nfs/nfs4_common.c b/usr/src/uts/common/fs/nfs/nfs4_common.c index 4966d48946..fbd2670acc 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_common.c +++ b/usr/src/uts/common/fs/nfs/nfs4_common.c @@ -19,13 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - - /* * This is the loadable module wrapper. */ @@ -164,28 +161,28 @@ nfs4_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) mi->mi_acregmin = SEC2HR(ACMINMAX); else mi->mi_acregmin = SEC2HR(MIN(STRUCT_FGET(args, - acregmin), ACMINMAX)); + acregmin), ACMINMAX)); } if (flags & NFSMNT_ACREGMAX) { if (STRUCT_FGET(args, acregmax) < 0) mi->mi_acregmax = SEC2HR(ACMAXMAX); else mi->mi_acregmax = SEC2HR(MIN(STRUCT_FGET(args, - acregmax), ACMAXMAX)); + acregmax), ACMAXMAX)); } if (flags & NFSMNT_ACDIRMIN) { if (STRUCT_FGET(args, acdirmin) < 0) mi->mi_acdirmin = SEC2HR(ACMINMAX); else mi->mi_acdirmin = SEC2HR(MIN(STRUCT_FGET(args, - acdirmin), ACMINMAX)); + acdirmin), ACMINMAX)); } if (flags & NFSMNT_ACDIRMAX) { if (STRUCT_FGET(args, acdirmax) < 0) mi->mi_acdirmax = SEC2HR(ACMAXMAX); else mi->mi_acdirmax = SEC2HR(MIN(STRUCT_FGET(args, - acdirmax), ACMAXMAX)); + acdirmax), ACMAXMAX)); } return (0); @@ -437,6 +434,8 @@ nfs4_recov_action_to_str(nfs4_recov_t what) return ("NR_LOST_LOCK"); case NR_LOST_STATE_RQST: return ("NR_LOST_STATE_RQST"); + case NR_MOVED: + return ("NR_MOVED"); default: (void) snprintf(buf, 40, "Unknown, code %d", (int)what); return (buf); diff --git a/usr/src/uts/common/fs/nfs/nfs4_idmap.c b/usr/src/uts/common/fs/nfs/nfs4_idmap.c index 14e1708e53..a5f05a5d34 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_idmap.c +++ b/usr/src/uts/common/fs/nfs/nfs4_idmap.c @@ -19,11 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* * There are well defined policies for mapping uid and gid values to and @@ -134,7 +133,7 @@ /* * Truly global modular globals */ -static zone_key_t nfsidmap_zone_key; +zone_key_t nfsidmap_zone_key; static list_t nfsidmap_globals_list; static kmutex_t nfsidmap_globals_lock; static kmem_cache_t *nfsidmap_cache; diff --git a/usr/src/uts/common/fs/nfs/nfs4_recovery.c b/usr/src/uts/common/fs/nfs/nfs4_recovery.c index 9d2ef14175..ca32816d19 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_recovery.c +++ b/usr/src/uts/common/fs/nfs/nfs4_recovery.c @@ -39,6 +39,11 @@ #include <sys/disp.h> #include <sys/list.h> #include <sys/sdt.h> +#include <sys/mount.h> +#include <sys/door.h> +#include <nfs/nfssys.h> +#include <nfs/nfsid_map.h> +#include <nfs/nfs4_idmap_impl.h> extern r4hashq_t *rtable4; @@ -69,6 +74,8 @@ typedef struct { nfs4_error_t rc_orig_errors; /* original errors causing recovery */ int rc_error; nfs4_bseqid_entry_t *rc_bseqid_rqst; + vnode_t *rc_moved_vp; + char *rc_moved_nm; } recov_info_t; /* @@ -135,6 +142,8 @@ int nfs4_srvmnt_fail_cnt = 0; int nfs4_srvmnt_debug = 0; #endif +extern zone_key_t nfs4clnt_zone_key; + /* forward references, in alphabetic order */ static void close_after_open_resend(vnode_t *, cred_t *, uint32_t, nfs4_error_t *); @@ -169,7 +178,7 @@ static void resend_one_op(nfs4_lost_rqst_t *, nfs4_error_t *, mntinfo4_t *, nfs4_server_t *); static void save_bseqid_rqst(nfs4_bseqid_entry_t *, recov_info_t *); static void start_recovery(recov_info_t *, mntinfo4_t *, vnode_t *, vnode_t *, - nfs4_server_t *); + nfs4_server_t *, vnode_t *, char *); static void start_recovery_action(nfs4_recov_t, bool_t, mntinfo4_t *, vnode_t *, vnode_t *); static int wait_for_recovery(mntinfo4_t *, nfs4_op_hint_t); @@ -330,7 +339,7 @@ enqueue_bseqid_rqst(recov_info_t *recovp, mntinfo4_t *mi) bool_t nfs4_start_recovery(nfs4_error_t *ep, mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, stateid4 *sid, nfs4_lost_rqst_t *lost_rqstp, nfs_opnum4 op, - nfs4_bseqid_entry_t *bsep) + nfs4_bseqid_entry_t *bsep, vnode_t *moved_vp, char *moved_nm) { recov_info_t *recovp; nfs4_server_t *sp; @@ -371,7 +380,7 @@ nfs4_start_recovery(nfs4_error_t *ep, mntinfo4_t *mi, vnode_t *vp1, errs_to_action(recovp, sp, mi, sid, lost_rqstp, gone, op, bsep); if (sp != NULL) mutex_exit(&sp->s_lock); - start_recovery(recovp, mi, vp1, vp2, sp); + start_recovery(recovp, mi, vp1, vp2, sp, moved_vp, moved_nm); if (sp != NULL) nfs4_server_rele(sp); return (FALSE); @@ -397,12 +406,13 @@ start_recovery_action(nfs4_recov_t what, bool_t reboot, mntinfo4_t *mi, recovp->rc_action = what; recovp->rc_srv_reboot = reboot; recovp->rc_error = EIO; - start_recovery(recovp, mi, vp1, vp2, NULL); + start_recovery(recovp, mi, vp1, vp2, NULL, NULL, NULL); } static void start_recovery(recov_info_t *recovp, mntinfo4_t *mi, - vnode_t *vp1, vnode_t *vp2, nfs4_server_t *sp) + vnode_t *vp1, vnode_t *vp2, nfs4_server_t *sp, + vnode_t *moved_vp, char *moved_nm) { NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, "start_recovery: mi %p, what %s", (void*)mi, @@ -563,7 +573,6 @@ again: case NR_LOST_LOCK: nfs4_enqueue_lost_rqst(recovp, mi); break; - default: nfs4_queue_event(RE_UNEXPECTED_ACTION, mi, NULL, recovp->rc_action, NULL, NULL, 0, NULL, 0, TAG_NONE, @@ -607,6 +616,8 @@ again: ASSERT(VTOMI4(vp2) == mi); VN_HOLD(recovp->rc_vp2); } + recovp->rc_moved_vp = moved_vp; + recovp->rc_moved_nm = moved_nm; (void) zthread_create(NULL, 0, nfs4_recov_thread, recovp, 0, minclsyspri); @@ -1937,7 +1948,7 @@ recov_filehandle(nfs4_recov_t action, mntinfo4_t *mi, vnode_t *vp) needrecov = FALSE; if (needrecov) { (void) nfs4_start_recovery(&e, mi, vp, - NULL, NULL, NULL, OP_LOOKUP, NULL); + NULL, NULL, NULL, OP_LOOKUP, NULL, NULL, NULL); } else if (e.error != EINTR && !NFS4_FRC_UNMT_ERR(e.error, mi->mi_vfsp) && (e.error != 0 || e.stat != NFS4_OK)) { @@ -2012,7 +2023,7 @@ recov_stale(mntinfo4_t *mi, vnode_t *vp) needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp); if (needrecov && (e.error != 0 || e.stat != NFS4ERR_STALE)) { (void) nfs4_start_recovery(&e, mi, vp, - NULL, NULL, NULL, OP_GETATTR, NULL); + NULL, NULL, NULL, OP_GETATTR, NULL, NULL, NULL); NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, "recov_stale: error=%d, stat=%d seen on rp %s", e.error, e.stat, rnode4info(rp))); @@ -2062,7 +2073,7 @@ recov_stale(mntinfo4_t *mi, vnode_t *vp) if (needrecov) { (void) nfs4_start_recovery(&e, mi, rootvp, NULL, NULL, NULL, - OP_GETATTR, NULL); + OP_GETATTR, NULL, NULL, NULL); NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, "recov_stale: error=%d, stat=%d seen " "on rp %s", e.error, e.stat, @@ -2491,7 +2502,7 @@ recov_openfiles(recov_info_t *recovp, nfs4_server_t *sp) nfs4_remap_root(mi, &e, 0); if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) { (void) nfs4_start_recovery(&e, mi, NULL, - NULL, NULL, NULL, OP_LOOKUP, NULL); + NULL, NULL, NULL, OP_LOOKUP, NULL, NULL, NULL); } } @@ -2561,7 +2572,7 @@ recov_openfiles(recov_info_t *recovp, nfs4_server_t *sp) mi->mi_vfsp)) { (void) nfs4_start_recovery(&e, mi, rep->re_vp, NULL, NULL, NULL, - OP_OPEN, NULL); + OP_OPEN, NULL, NULL, NULL); break; } } @@ -2575,7 +2586,7 @@ recov_openfiles(recov_info_t *recovp, nfs4_server_t *sp) if (nfs4_needs_recovery(&e, TRUE, mi->mi_vfsp)) (void) nfs4_start_recovery(&e, mi, rep->re_vp, NULL, NULL, NULL, OP_LOCK, - NULL); + NULL, NULL, NULL); if (e.error != 0 || e.stat != NFS4_OK) break; } @@ -2664,7 +2675,7 @@ nfs4_resend_lost_rqsts(recov_info_t *recovp, nfs4_server_t *sp) } else { (void) nfs4_start_recovery(&n4e, mi, lrp->lr_dvp, lrp->lr_vp, NULL, NULL, - lrp->lr_op, NULL); + lrp->lr_op, NULL, NULL, NULL); } return; } @@ -3122,10 +3133,10 @@ errs_to_action(recov_info_t *recovp, case NFS4ERR_LEASE_MOVED: action = xxx; break; +#endif case NFS4ERR_MOVED: - action = xxx; + action = NR_MOVED; break; -#endif case NFS4ERR_BADHANDLE: action = NR_BADHANDLE; break; diff --git a/usr/src/uts/common/fs/nfs/nfs4_rnode.c b/usr/src/uts/common/fs/nfs/nfs4_rnode.c index 14cb143e95..48e9eafbb0 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_rnode.c +++ b/usr/src/uts/common/fs/nfs/nfs4_rnode.c @@ -381,11 +381,12 @@ r4_do_attrcache(vnode_t *vp, nfs4_ga_res_t *garp, int newnode, * creation time and it never changes for life * of the rnode. * + * This stub will be for a mirror-mount, rather than + * a referral (the latter also sets R4SRVSTUB). + * * The stub type is also set during RO failover, * nfs4_remap_file(). * - * This stub will be for a mirror-mount. - * * We don't bother with taking r_state_lock to * set the stub type because this is a new rnode * and we're holding the hash bucket r_lock RW_WRITER. @@ -1769,6 +1770,52 @@ r4mkopenlist(mntinfo4_t *mi) } /* + * Given a filesystem id, check to see if any rnodes + * within this fsid reside in the rnode cache, other + * than one we know about. + * + * Return 1 if an rnode is found, 0 otherwise + */ +int +r4find_by_fsid(mntinfo4_t *mi, fattr4_fsid *moved_fsid) +{ + rnode4_t *rp; + vnode_t *vp; + vfs_t *vfsp = mi->mi_vfsp; + fattr4_fsid *fsid; + int index, found = 0; + + for (index = 0; index < rtable4size; index++) { + rw_enter(&rtable4[index].r_lock, RW_READER); + for (rp = rtable4[index].r_hashf; + rp != (rnode4_t *)(&rtable4[index]); + rp = rp->r_hashf) { + + vp = RTOV4(rp); + if (vp->v_vfsp != vfsp) + continue; + + /* + * XXX there might be a case where a + * replicated fs may have the same fsid + * across two different servers. This + * check isn't good enough in that case + */ + fsid = &rp->r_srv_fsid; + if (FATTR4_FSID_EQ(moved_fsid, fsid)) { + found = 1; + break; + } + } + rw_exit(&rtable4[index].r_lock); + + if (found) + break; + } + return (found); +} + +/* * Release the list of open instance references. */ @@ -1898,6 +1945,14 @@ r4_stub_mirrormount(rnode4_t *rp) } void +r4_stub_referral(rnode4_t *rp) +{ + DTRACE_PROBE1(nfs4clnt__func__referral__moved, + vnode_t *, RTOV4(rp)); + r4_stub_set(rp, NFS4_STUB_REFERRAL); +} + +void r4_stub_none(rnode4_t *rp) { r4_stub_set(rp, NFS4_STUB_NONE); diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c index 842c30d53e..c7163712c0 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c @@ -56,6 +56,8 @@ #include <sys/ddi.h> #include <sys/zone.h> +#include <fs/fs_reparse.h> + #include <rpc/types.h> #include <rpc/auth.h> #include <rpc/rpcsec_gss.h> @@ -82,7 +84,6 @@ static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES; #define RFS4_LOCK_DELAY 10 /* Milliseconds */ static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY; extern struct svc_ops rdma_svc_ops; -/* End of Tunables */ static int rdma_setup_read_data4(READ4args *, READ4res *); @@ -243,7 +244,7 @@ static void rfs4_op_secinfo_free(nfs_resop4 *); static nfsstat4 check_open_access(uint32_t, struct compound_state *, struct svc_req *); nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); -void rfs4_ss_clid(rfs4_client_t *, struct svc_req *); +void rfs4_ss_clid(rfs4_client_t *); /* * translation table for attrs @@ -465,6 +466,8 @@ void rfs4_ss_chkclid(rfs4_client_t *); extern size_t strlcpy(char *dst, const char *src, size_t dstsize); +extern void rfs4_free_fs_locations4(fs_locations4 *); + #ifdef nextdp #undef nextdp #endif @@ -1609,6 +1612,7 @@ rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, resp->attrset = 0; sarg.sbp = &sb; + sarg.is_referral = B_FALSE; nfs4_ntov_table_init(&ntov); status = do_rfs4_set_attrs(&resp->attrset, @@ -2314,13 +2318,26 @@ rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, sarg.sbp = &sb; sarg.cs = cs; + sarg.is_referral = B_FALSE; status = bitmap4_to_attrmask(args->attr_request, &sarg); if (status == NFS4_OK) { + status = bitmap4_get_sysattrs(&sarg); - if (status == NFS4_OK) + if (status == NFS4_OK) { + + /* Is this a referral? */ + if (vn_is_nfs_reparse(cs->vp, cs->cr)) { + /* Older V4 Solaris client sees a link */ + if (client_is_downrev(req)) + sarg.vap->va_type = VLNK; + else + sarg.is_referral = B_TRUE; + } + status = do_rfs4_op_getattr(args->attr_request, &resp->obj_attributes, &sarg); + } } *cs->statusp = resp->status = status; out: @@ -2354,6 +2371,25 @@ rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, goto out; } + /* check for reparse point at the share point */ + if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) { + /* it's all bad */ + cs->exi->exi_moved = 1; + *cs->statusp = resp->status = NFS4ERR_MOVED; + DTRACE_PROBE2(nfs4serv__func__referral__shared__moved, + vnode_t *, cs->vp, char *, "rfs4_op_getfh"); + return; + } + + /* check for reparse point at vp */ + if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) { + /* it's not all bad */ + *cs->statusp = resp->status = NFS4ERR_MOVED; + DTRACE_PROBE2(nfs4serv__func__referral__moved, + vnode_t *, cs->vp, char *, "rfs4_op_getfh"); + return; + } + resp->object.nfs_fh4_val = kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP); nfs_fh4_copy(&cs->fh, &resp->object); @@ -3688,6 +3724,7 @@ rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, char *data; struct sockaddr *ca; char *name = NULL; + int is_referral; DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs); @@ -3703,14 +3740,25 @@ rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, goto out; } - if (vp->v_type == VDIR) { - *cs->statusp = resp->status = NFS4ERR_ISDIR; - goto out; - } + /* Is it a referral? */ + if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) { + + is_referral = 1; + + } else { + + is_referral = 0; + + if (vp->v_type == VDIR) { + *cs->statusp = resp->status = NFS4ERR_ISDIR; + goto out; + } + + if (vp->v_type != VLNK) { + *cs->statusp = resp->status = NFS4ERR_INVAL; + goto out; + } - if (vp->v_type != VLNK) { - *cs->statusp = resp->status = NFS4ERR_INVAL; - goto out; } va.va_mask = AT_MODE; @@ -3727,16 +3775,39 @@ rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); - iov.iov_base = data; - iov.iov_len = MAXPATHLEN; - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_extflg = UIO_COPY_CACHED; - uio.uio_loffset = 0; - uio.uio_resid = MAXPATHLEN; + if (is_referral) { + char *s; + size_t strsz; + + /* Get an artificial symlink based on a referral */ + s = build_symlink(vp, cs->cr, &strsz); + global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++; + DTRACE_PROBE2(nfs4serv__func__referral__reflink, + vnode_t *, vp, char *, s); + if (s == NULL) + error = EINVAL; + else { + error = 0; + (void) strlcpy(data, s, MAXPATHLEN + 1); + kmem_free(s, strsz); + } + + } else { + + iov.iov_base = data; + iov.iov_len = MAXPATHLEN; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = 0; + uio.uio_resid = MAXPATHLEN; + + error = VOP_READLINK(vp, &uio, cs->cr, NULL); - error = VOP_READLINK(vp, &uio, cs->cr, NULL); + if (!error) + *(data + MAXPATHLEN - uio.uio_resid) = '\0'; + } if (error) { kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); @@ -3744,8 +3815,6 @@ rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, goto out; } - *(data + MAXPATHLEN - uio.uio_resid) = '\0'; - ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND, MAXPATHLEN + 1); @@ -5053,6 +5122,7 @@ do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, *resp = 0; sarg.sbp = &sb; + sarg.is_referral = B_FALSE; nfs4_ntov_table_init(&ntov); status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov, NFS4ATTR_SETIT); @@ -5347,6 +5417,7 @@ rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, } sarg.sbp = &sb; + sarg.is_referral = B_FALSE; nfs4_ntov_table_init(&ntov); resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, &sarg, &ntov, NFS4ATTR_VERIT); @@ -5408,6 +5479,7 @@ rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, return; } sarg.sbp = &sb; + sarg.is_referral = B_FALSE; nfs4_ntov_table_init(&ntov); resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, &sarg, &ntov, NFS4ATTR_VERIT); @@ -6212,6 +6284,7 @@ rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, char *name = NULL; sarg.sbp = &sb; + sarg.is_referral = B_FALSE; dvp = cs->vp; @@ -7806,7 +7879,8 @@ rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop, SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid; SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid; rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed; - bool_t create = TRUE; + rfs4_clntip_t *ci; + bool_t create; char *addr, *netid; int len; @@ -7816,6 +7890,27 @@ retry: newcp = cp_confirmed = cp_unconfirmed = NULL; /* + * Save the caller's IP address + */ + args->client.cl_addr = + (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; + + /* + * Record if it is a Solaris client that cannot handle referrals. + */ + if (strstr(args->client.id_val, "Solaris") && + !strstr(args->client.id_val, "+referrals")) { + /* Add a "yes, it's downrev" record */ + create = TRUE; + ci = rfs4_find_clntip(args->client.cl_addr, &create); + ASSERT(ci != NULL); + rfs4_dbe_rele(ci->ri_dbe); + } else { + /* Remove any previous record */ + rfs4_invalidate_clntip(args->client.cl_addr); + } + + /* * In search of an EXISTING client matching the incoming * request to establish a new client identifier at the server */ @@ -8063,7 +8158,7 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, * Record clientid in stable storage. * Must be done after server instance has been assigned. */ - rfs4_ss_clid(cp, req); + rfs4_ss_clid(cp); rfs4_dbe_unlock(cp->rc_dbe); @@ -9363,3 +9458,283 @@ rdma_setup_read_data4(READ4args *args, READ4res *rok) rok->wlist = wcl; return (TRUE); } + +/* tunable to disable server referrals */ +int rfs4_no_referrals = 0; + +/* + * Find an NFS record in reparse point data. + * Returns 0 for success and <0 or an errno value on failure. + */ +int +vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap) +{ + int err; + char *stype, *val; + nvlist_t *nvl; + nvpair_t *curr; + + if ((nvl = reparse_init()) == NULL) + return (-1); + + if ((err = reparse_vnode_parse(vp, nvl)) != 0) { + reparse_free(nvl); + return (err); + } + + curr = NULL; + while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) { + if ((stype = nvpair_name(curr)) == NULL) { + reparse_free(nvl); + return (-2); + } + if (strncasecmp(stype, "NFS", 3) == 0) + break; + } + + if ((curr == NULL) || + (nvpair_value_string(curr, &val))) { + reparse_free(nvl); + return (-3); + } + *nvlp = nvl; + *svcp = stype; + *datap = val; + return (0); +} + +int +vn_is_nfs_reparse(vnode_t *vp, cred_t *cr) +{ + nvlist_t *nvl; + char *s, *d; + + if (rfs4_no_referrals != 0) + return (B_FALSE); + + if (vn_is_reparse(vp, cr, NULL) == B_FALSE) + return (B_FALSE); + + if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0) + return (B_FALSE); + + reparse_free(nvl); + + return (B_TRUE); +} + +/* + * There is a user-level copy of this routine in ref_subr.c. + * Changes should be kept in sync. + */ +static int +nfs4_create_components(char *path, component4 *comp4) +{ + int slen, plen, ncomp; + char *ori_path, *nxtc, buf[MAXNAMELEN]; + + if (path == NULL) + return (0); + + plen = strlen(path) + 1; /* include the terminator */ + ori_path = path; + ncomp = 0; + + /* count number of components in the path */ + for (nxtc = path; nxtc < ori_path + plen; nxtc++) { + if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') { + if ((slen = nxtc - path) == 0) { + path = nxtc + 1; + continue; + } + + if (comp4 != NULL) { + bcopy(path, buf, slen); + buf[slen] = '\0'; + str_to_utf8(buf, &comp4[ncomp]); + } + + ncomp++; /* 1 valid component */ + path = nxtc + 1; + } + if (*nxtc == '\0' || *nxtc == '\n') + break; + } + + return (ncomp); +} + +/* + * There is a user-level copy of this routine in ref_subr.c. + * Changes should be kept in sync. + */ +static int +make_pathname4(char *path, pathname4 *pathname) +{ + int ncomp; + component4 *comp4; + + if (pathname == NULL) + return (0); + + if (path == NULL) { + pathname->pathname4_val = NULL; + pathname->pathname4_len = 0; + return (0); + } + + /* count number of components to alloc buffer */ + if ((ncomp = nfs4_create_components(path, NULL)) == 0) { + pathname->pathname4_val = NULL; + pathname->pathname4_len = 0; + return (0); + } + comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP); + + /* copy components into allocated buffer */ + ncomp = nfs4_create_components(path, comp4); + + pathname->pathname4_val = comp4; + pathname->pathname4_len = ncomp; + + return (ncomp); +} + +#define xdr_fs_locations4 xdr_fattr4_fs_locations + +fs_locations4 * +fetch_referral(vnode_t *vp, cred_t *cr) +{ + nvlist_t *nvl; + char *stype, *sdata; + fs_locations4 *result; + char buf[1024]; + size_t bufsize; + XDR xdr; + int err; + + /* + * Check attrs to ensure it's a reparse point + */ + if (vn_is_reparse(vp, cr, NULL) == B_FALSE) + return (NULL); + + /* + * Look for an NFS record and get the type and data + */ + if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0) + return (NULL); + + /* + * With the type and data, upcall to get the referral + */ + bufsize = sizeof (buf); + bzero(buf, sizeof (buf)); + err = reparse_kderef((const char *)stype, (const char *)sdata, + buf, &bufsize); + reparse_free(nvl); + + DTRACE_PROBE4(nfs4serv__func__referral__upcall, + char *, stype, char *, sdata, char *, buf, int, err); + if (err) { + cmn_err(CE_NOTE, + "reparsed daemon not running: unable to get referral (%d)", + err); + return (NULL); + } + + /* + * We get an XDR'ed record back from the kderef call + */ + xdrmem_create(&xdr, buf, bufsize, XDR_DECODE); + result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP); + err = xdr_fs_locations4(&xdr, result); + XDR_DESTROY(&xdr); + if (err != TRUE) { + DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail, + int, err); + return (NULL); + } + + /* + * Look at path to recover fs_root, ignoring the leading '/' + */ + (void) make_pathname4(vp->v_path, &result->fs_root); + + return (result); +} + +char * +build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz) +{ + fs_locations4 *fsl; + fs_location4 *fs; + char *server, *path, *symbuf; + static char *prefix = "/net/"; + int i, size, npaths; + uint_t len; + + /* Get the referral */ + if ((fsl = fetch_referral(vp, cr)) == NULL) + return (NULL); + + /* Deal with only the first location and first server */ + fs = &fsl->locations_val[0]; + server = utf8_to_str(&fs->server_val[0], &len, NULL); + if (server == NULL) { + rfs4_free_fs_locations4(fsl); + kmem_free(fsl, sizeof (fs_locations4)); + return (NULL); + } + + /* Figure out size for "/net/" + host + /path/path/path + NULL */ + size = strlen(prefix) + len; + for (i = 0; i < fs->rootpath.pathname4_len; i++) + size += fs->rootpath.pathname4_val[i].utf8string_len + 1; + + /* Allocate the symlink buffer and fill it */ + symbuf = kmem_zalloc(size, KM_SLEEP); + (void) strcat(symbuf, prefix); + (void) strcat(symbuf, server); + kmem_free(server, len); + + npaths = 0; + for (i = 0; i < fs->rootpath.pathname4_len; i++) { + path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL); + if (path == NULL) + continue; + (void) strcat(symbuf, "/"); + (void) strcat(symbuf, path); + npaths++; + kmem_free(path, len); + } + + rfs4_free_fs_locations4(fsl); + kmem_free(fsl, sizeof (fs_locations4)); + + if (strsz != NULL) + *strsz = size; + return (symbuf); +} + +/* + * Check to see if we have a downrev Solaris client, so that we + * can send it a symlink instead of a referral. + */ +int +client_is_downrev(struct svc_req *req) +{ + struct sockaddr *ca; + rfs4_clntip_t *ci; + bool_t create = FALSE; + int is_downrev; + + ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; + ASSERT(ca); + ci = rfs4_find_clntip(ca, &create); + if (ci == NULL) + return (0); + is_downrev = ci->ri_no_referrals; + rfs4_dbe_rele(ci->ri_dbe); + return (is_downrev); +} diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c index 729718b5b9..c1f64d69bc 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c @@ -29,6 +29,9 @@ #include <nfs/export.h> #include <nfs/nfs4.h> #include <sys/ddi.h> +#include <sys/door.h> +#include <sys/sdt.h> +#include <nfs/nfssys.h> void rfs4_init_compound_state(struct compound_state *); @@ -141,6 +144,7 @@ rfs4_attr_init() sarg.flag = 0; sarg.rdattr_error = NFS4_OK; sarg.rdattr_error_req = FALSE; + sarg.is_referral = B_FALSE; rfs4_ntov_init(); @@ -634,7 +638,10 @@ rfs4_fattr4_fsid(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg, error = EINVAL; break; /* this attr is supported */ case NFS4ATTR_GETIT: - if (sarg->cs->exi->exi_volatile_dev) { + if (sarg->is_referral) { + na->fsid.major = 1; + na->fsid.minor = 0; + } else if (sarg->cs->exi->exi_volatile_dev) { pmaj[0] = sarg->cs->exi->exi_fsid.val[0]; pmaj[1] = sarg->cs->exi->exi_fsid.val[1]; na->fsid.minor = 0; @@ -647,7 +654,11 @@ rfs4_fattr4_fsid(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg, error = EINVAL; break; case NFS4ATTR_VERIT: - if (sarg->cs->exi->exi_volatile_dev) { + if (sarg->is_referral) { + if (na->fsid.major != 1 || + na->fsid.minor != 0) + error = -1; + } else if (sarg->cs->exi->exi_volatile_dev) { if (pmaj[0] != sarg->cs->exi->exi_fsid.val[0] || pmaj[1] != sarg->cs->exi->exi_fsid.val[1] || na->fsid.minor != 0) @@ -1495,12 +1506,109 @@ rfs4_fattr4_files_total(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg, return (error); } +static void +rfs4_free_pathname4(pathname4 *pn4) +{ + int i, len; + utf8string *utf8s; + + if (pn4 == NULL || (len = pn4->pathname4_len) == 0 || + (utf8s = pn4->pathname4_val) == NULL) + return; + + for (i = 0; i < len; i++, utf8s++) { + if (utf8s->utf8string_val == NULL || + utf8s->utf8string_len == 0) + continue; + + kmem_free(utf8s->utf8string_val, utf8s->utf8string_len); + utf8s->utf8string_val = NULL; + } + + kmem_free(pn4->pathname4_val, + sizeof (utf8string) * pn4->pathname4_len); + pn4->pathname4_val = 0; +} + +static void +rfs4_free_fs_location4(fs_location4 *fsl4) +{ + if (fsl4 == NULL) + return; + + rfs4_free_pathname4((pathname4 *)&fsl4->server_len); + rfs4_free_pathname4(&fsl4->rootpath); +} + +void +rfs4_free_fs_locations4(fs_locations4 *fsls4) +{ + int i, len; + fs_location4 *fsl4; + + if (fsls4 == NULL) + return; + + /* free fs_root */ + rfs4_free_pathname4(&fsls4->fs_root); + + if ((len = fsls4->locations_len) == 0 || + (fsl4 = fsls4->locations_val) == NULL) + return; + + /* free fs_location4 */ + for (i = 0; i < len; i++) { + rfs4_free_fs_location4(fsl4); + fsl4++; + } + + kmem_free(fsls4->locations_val, sizeof (fs_location4) * len); + fsls4->locations_val = NULL; +} + /* ARGSUSED */ static int rfs4_fattr4_fs_locations(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg, union nfs4_attr_u *na) { - return (ENOTSUP); + int error = 0; + fs_locations4 *fsl; + + if (RFS4_MANDATTR_ONLY) + return (ENOTSUP); + + switch (cmd) { + case NFS4ATTR_SUPPORTED: + if (sarg->op == NFS4ATTR_SETIT || sarg->op == NFS4ATTR_VERIT) + error = EINVAL; + break; /* this attr is supported */ + + case NFS4ATTR_GETIT: + fsl = fetch_referral(sarg->cs->vp, sarg->cs->cr); + if (fsl == NULL) + error = EINVAL; + else { + na->fs_locations = *fsl; + kmem_free(fsl, sizeof (fs_locations4)); + } + global_svstat_ptr[4][NFS_REFERRALS].value.ui64++; + break; + + case NFS4ATTR_FREEIT: + if (sarg->op == NFS4ATTR_SETIT || sarg->op == NFS4ATTR_VERIT) + error = EINVAL; + rfs4_free_fs_locations4(&na->fs_locations); + break; + + case NFS4ATTR_SETIT: + case NFS4ATTR_VERIT: + /* + * read-only attr + */ + error = EINVAL; + break; + } + return (error); } /* ARGSUSED */ diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c index debdacc1cd..3069a98835 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c @@ -97,7 +97,6 @@ static nfs_ftype4 vt_to_nf4[] = { 0, NF4REG, NF4DIR, NF4BLK, NF4CHR, NF4LNK, NF4FIFO, 0, 0, NF4SOCK, 0 }; - int nfs4_readdir_getvp(vnode_t *dvp, char *d_name, vnode_t **vpp, struct exportinfo **exi, struct svc_req *req, @@ -117,8 +116,20 @@ nfs4_readdir_getvp(vnode_t *dvp, char *d_name, vnode_t **vpp, NULL, NULL, NULL)) return (error); + /* + * If the directory is a referral point, don't return the + * attrs, instead set rdattr_error to MOVED. + */ + if (vn_is_nfs_reparse(vp, cs->cr) && !client_is_downrev(req)) { + VN_RELE(vp); + DTRACE_PROBE2(nfs4serv__func__referral__moved, + vnode_t *, vp, char *, "nfs4_readdir_getvp"); + return (NFS4ERR_MOVED); + } + /* Is this object mounted upon? */ ismntpt = vn_ismntpt(vp); + /* * Nothing more to do if object is not a mount point or * a possible LOFS shadow of an LOFS mount (which won't @@ -141,6 +152,13 @@ nfs4_readdir_getvp(vnode_t *dvp, char *d_name, vnode_t **vpp, VN_RELE(pre_tvp); return (error); } + if (vn_is_nfs_reparse(vp, cs->cr)) { + VN_RELE(vp); + VN_RELE(pre_tvp); + DTRACE_PROBE2(nfs4serv__func__referral__moved, + vnode_t *, vp, char *, "nfs4_readdir_getvp"); + return (NFS4ERR_MOVED); + } } bzero(&fid, sizeof (fid)); @@ -818,9 +836,18 @@ reencode_attrs: va.va_mask = AT_ALL; rddirattr_error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL); - if (rddirattr_error) + if (rddirattr_error) { ae = ar & (FATTR4_RDATTR_ERROR_MASK | FATTR4_MOUNTED_ON_FILEID_MASK); + } else { + /* + * We may lie about the object + * type for a referral + */ + if (vn_is_nfs_reparse(vp, cs->cr) && + client_is_downrev(req)) + va.va_type = VLNK; + } } } diff --git a/usr/src/uts/common/fs/nfs/nfs4_state.c b/usr/src/uts/common/fs/nfs/nfs4_state.c index ef0f2c800b..0659e8c253 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_state.c +++ b/usr/src/uts/common/fs/nfs/nfs4_state.c @@ -288,6 +288,8 @@ rfs4_table_t *rfs4_client_tab; static rfs4_index_t *rfs4_clientid_idx; static rfs4_index_t *rfs4_nfsclnt_idx; +static rfs4_table_t *rfs4_clntip_tab; +static rfs4_index_t *rfs4_clntip_idx; static rfs4_table_t *rfs4_openowner_tab; static rfs4_index_t *rfs4_openowner_idx; static rfs4_table_t *rfs4_state_tab; @@ -330,6 +332,7 @@ static rfs4_index_t *rfs4_deleg_state_idx; static time_t rfs4_client_cache_time = 0; +static time_t rfs4_clntip_cache_time = 0; static time_t rfs4_openowner_cache_time = 0; static time_t rfs4_state_cache_time = 0; static time_t rfs4_lo_state_cache_time = 0; @@ -348,6 +351,12 @@ static void *clientid_mkkey(rfs4_entry_t); static uint32_t nfsclnt_hash(void *); static bool_t nfsclnt_compare(rfs4_entry_t, void *); static void *nfsclnt_mkkey(rfs4_entry_t); +static bool_t rfs4_clntip_expiry(rfs4_entry_t); +static void rfs4_clntip_destroy(rfs4_entry_t); +static bool_t rfs4_clntip_create(rfs4_entry_t, void *); +static uint32_t clntip_hash(void *); +static bool_t clntip_compare(rfs4_entry_t, void *); +static void *clntip_mkkey(rfs4_entry_t); static bool_t rfs4_openowner_create(rfs4_entry_t, void *); static void rfs4_openowner_destroy(rfs4_entry_t); static bool_t rfs4_openowner_expiry(rfs4_entry_t); @@ -883,7 +892,7 @@ rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip) * the server-generated short-hand clientid. */ void -rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req) +rfs4_ss_clid(rfs4_client_t *cp) { const char *kinet_ntop6(uchar_t *, char *, size_t); char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN]; @@ -896,19 +905,12 @@ rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req) buf[0] = 0; - - ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; - if (ca == NULL) { - return; - } + ca = (struct sockaddr *)&cp->rc_addr; /* * Convert the caller's IP address to a dotted string */ if (ca->sa_family == AF_INET) { - - bcopy(svc_getrpccaller(req->rq_xprt)->buf, &cp->rc_addr, - sizeof (struct sockaddr_in)); b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr; (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF); @@ -916,8 +918,6 @@ rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req) struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)ca; - bcopy(svc_getrpccaller(req->rq_xprt)->buf, &cp->rc_addr, - sizeof (struct sockaddr_in6)); (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, buf, INET6_ADDRSTRLEN); } @@ -1257,6 +1257,22 @@ rfs4_state_init() clientid_compare, clientid_mkkey, FALSE); + rfs4_clntip_cache_time = 86400 * 365; /* about a year */ + rfs4_clntip_tab = rfs4_table_create(rfs4_server_state, + "ClntIP", + rfs4_clntip_cache_time, + 1, + rfs4_clntip_create, + rfs4_clntip_destroy, + rfs4_clntip_expiry, + sizeof (rfs4_clntip_t), + TABSIZE, + MAXTABSZ, 100); + rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab, + "client_ip", clntip_hash, + clntip_compare, clntip_mkkey, + TRUE); + rfs4_openowner_cache_time *= rfs4_lease_time; rfs4_openowner_tab = rfs4_table_create(rfs4_server_state, "OpenOwner", @@ -1649,6 +1665,7 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg) { rfs4_client_t *cp = (rfs4_client_t *)u_entry; nfs_client_id4 *client = (nfs_client_id4 *)arg; + struct sockaddr *ca; cid *cidp; scid_confirm_verf *scvp; @@ -1667,6 +1684,14 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg) bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len); cp->rc_nfs_client.verifier = client->verifier; + /* Copy client's IP address */ + ca = client->cl_addr; + if (ca->sa_family == AF_INET) + bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in)); + else if (ca->sa_family == AF_INET6) + bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6)); + cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr; + /* Init the value for the SETCLIENTID_CONFIRM verifier */ scvp = (scid_confirm_verf *)&cp->rc_confirm_verf; scvp->cv_impl.c_id = cidp->impl_id.c_id; @@ -1781,6 +1806,119 @@ rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed) } } +static uint32_t +clntip_hash(void *key) +{ + struct sockaddr *addr = key; + int i, len = 0; + uint32_t hash = 0; + + if (addr->sa_family == AF_INET) + len = sizeof (struct sockaddr_in); + else if (addr->sa_family == AF_INET6) + len = sizeof (struct sockaddr_in6); + + for (i = 0; i < len; i++) { + hash <<= 1; + hash += (uint_t)(((char *)addr)[i]); + } + return (hash); +} + +static bool_t +clntip_compare(rfs4_entry_t entry, void *key) +{ + rfs4_clntip_t *cp = (rfs4_clntip_t *)entry; + struct sockaddr *addr = key; + int len = 0; + + if (addr->sa_family == AF_INET) + len = sizeof (struct sockaddr_in); + else if (addr->sa_family == AF_INET6) + len = sizeof (struct sockaddr_in6); + else + return (0); + + return (bcmp(&cp->ri_addr, addr, len) == 0); +} + +static void * +clntip_mkkey(rfs4_entry_t entry) +{ + rfs4_clntip_t *cp = (rfs4_clntip_t *)entry; + + return (&cp->ri_addr); +} + +static bool_t +rfs4_clntip_expiry(rfs4_entry_t u_entry) +{ + rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry; + + if (rfs4_dbe_is_invalid(cp->ri_dbe)) + return (TRUE); + return (FALSE); +} + +/* ARGSUSED */ +static void +rfs4_clntip_destroy(rfs4_entry_t u_entry) +{ +} + +static bool_t +rfs4_clntip_create(rfs4_entry_t u_entry, void *arg) +{ + rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry; + struct sockaddr *ca = (struct sockaddr *)arg; + + /* Copy client's IP address */ + if (ca->sa_family == AF_INET) + bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in)); + else if (ca->sa_family == AF_INET6) + bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6)); + else + return (FALSE); + cp->ri_no_referrals = 1; + + return (TRUE); +} + +rfs4_clntip_t * +rfs4_find_clntip(struct sockaddr *addr, bool_t *create) +{ + rfs4_clntip_t *cp; + + rw_enter(&rfs4_findclient_lock, RW_READER); + + cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr, + create, addr, RFS4_DBS_VALID); + + rw_exit(&rfs4_findclient_lock); + + return (cp); +} + +void +rfs4_invalidate_clntip(struct sockaddr *addr) +{ + rfs4_clntip_t *cp; + bool_t create = FALSE; + + rw_enter(&rfs4_findclient_lock, RW_READER); + + cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr, + &create, NULL, RFS4_DBS_VALID); + if (cp == NULL) { + rw_exit(&rfs4_findclient_lock); + return; + } + rfs4_dbe_invalidate(cp->ri_dbe); + rfs4_dbe_rele(cp->ri_dbe); + + rw_exit(&rfs4_findclient_lock); +} + bool_t rfs4_lease_expired(rfs4_client_t *cp) { diff --git a/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c index 37bc502b0b..40d9236765 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c @@ -65,6 +65,7 @@ #include <sys/list.h> #include <sys/stat.h> #include <sys/mntent.h> +#include <sys/priv.h> #include <rpc/types.h> #include <rpc/auth.h> @@ -78,6 +79,8 @@ #include <nfs/nfs4_kprot.h> #include <nfs/rnode4.h> #include <nfs/nfs4_clnt.h> +#include <nfs/nfsid_map.h> +#include <nfs/nfs4_idmap_impl.h> #include <vm/hat.h> #include <vm/as.h> @@ -97,6 +100,9 @@ #include <sys/priv_names.h> +extern zone_key_t nfs4clnt_zone_key; +extern zone_key_t nfsidmap_zone_key; + /* * The automatic unmounter thread stuff! */ @@ -202,13 +208,16 @@ extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **); static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, cred_t *, vnode_t **); -static domount_args_t *nfs4_trigger_domount_args_create(vnode_t *); +static domount_args_t *nfs4_trigger_domount_args_create(vnode_t *, cred_t *); static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp); -static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *); +static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *, + cred_t *); static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, servinfo4_t *); +static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *, + cred_t *); static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, ephemeral_servinfo_t *); static void nfs4_trigger_nargs_destroy(struct nfs_args *); @@ -216,10 +225,11 @@ static char *nfs4_trigger_create_mntopts(vfs_t *); static void nfs4_trigger_destroy_mntopts(char *); static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); +static enum clnt_stat nfs4_ping_server_common(struct knetconfig *, + struct netbuf *, int); extern int umount2_engine(vfs_t *, int, cred_t *, int); - vnodeops_t *nfs4_trigger_vnodeops; /* @@ -372,12 +382,46 @@ nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) return (VOP_OPEN(vpp, flag, cr, ct)); } +void +nfs4_fake_attrs(vnode_t *vp, struct vattr *vap) +{ + uint_t mask; + timespec_t now; + + /* + * Set some attributes here for referrals. + */ + mask = vap->va_mask; + bzero(vap, sizeof (struct vattr)); + vap->va_mask = mask; + vap->va_uid = 0; + vap->va_gid = 0; + vap->va_nlink = 1; + vap->va_size = 1; + gethrestime(&now); + vap->va_atime = now; + vap->va_mtime = now; + vap->va_ctime = now; + vap->va_type = VDIR; + vap->va_mode = 0555; + vap->va_fsid = vp->v_vfsp->vfs_dev; + vap->va_rdev = 0; + vap->va_blksize = MAXBSIZE; + vap->va_nblocks = 1; + vap->va_seq = 0; +} + /* * For the majority of cases, nfs4_trigger_getattr() will not trigger * a mount. However, if ATTR_TRIGGER is set, we are being informed * that we need to force the mount before we attempt to determine * the attributes. The intent is an atomic operation for security * testing. + * + * If we're not triggering a mount, we can still inquire about the + * actual attributes from the server in the mirror mount case, + * and will return manufactured attributes for a referral (see + * the 'create' branch of find_referral_stubvp()). */ static int nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, @@ -394,8 +438,15 @@ nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, error = VOP_GETATTR(newvp, vap, flags, cr, ct); VN_RELE(newvp); - } else { + + } else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) { + error = nfs4_getattr(vp, vap, flags, cr, ct); + + } else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) { + + nfs4_fake_attrs(vp, vap); + error = 0; } return (error); @@ -446,17 +497,19 @@ nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, ASSERT(RP_ISSTUB(drp)); - /* for now, we only support mirror-mounts */ - ASSERT(RP_ISSTUB_MIRRORMOUNT(drp)); - /* * It's not legal to lookup ".." for an fs root, so we mustn't pass * that up. Instead, pass onto the regular op, regardless of whether * we've triggered a mount. */ if (strcmp(nm, "..") == 0) - return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, - ct, deflags, rpnp)); + if (RP_ISSTUB_MIRRORMOUNT(drp)) { + return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, + ct, deflags, rpnp)); + } else if (RP_ISSTUB_REFERRAL(drp)) { + /* Return the parent vnode */ + return (vtodv(dvp, vpp, cr, TRUE)); + } error = nfs4_trigger_mount(dvp, cr, &newdvp); if (error) @@ -672,7 +725,7 @@ nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp, } /* - * Mount upon a trigger vnode; for mirror-mounts, etc. + * Mount upon a trigger vnode; for mirror-mounts, referrals, etc. * * The mount may have already occurred, via another thread. If not, * assemble the location information - which may require fetching - and @@ -706,9 +759,6 @@ nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) ASSERT(RP_ISSTUB(rp)); - /* for now, we only support mirror-mounts */ - ASSERT(RP_ISSTUB_MIRRORMOUNT(rp)); - *newvpp = NULL; /* @@ -782,7 +832,7 @@ nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) must_unlock = TRUE; - dma = nfs4_trigger_domount_args_create(vp); + dma = nfs4_trigger_domount_args_create(vp, cr); if (dma == NULL) { error = EINVAL; goto done; @@ -801,10 +851,15 @@ nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) } crset_zone_privall(mcred); + if (is_system_labeled()) + (void) setpflags(NET_MAC_AWARE, 1, mcred); error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp); nfs4_trigger_domount_args_destroy(dma, vp); + DTRACE_PROBE2(nfs4clnt__func__referral__mount, + vnode_t *, vp, int, error); + crfree(mcred); done: @@ -812,9 +867,20 @@ done: if (must_unlock) { mutex_enter(&net->net_cnt_lock); net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; + + /* + * REFCNT: If we are the root of the tree, then we need + * to keep a reference because we malloced the tree and + * this is where we tied it to our mntinfo. + * + * If we are not the root of the tree, then our tie to + * the mntinfo occured elsewhere and we need to + * decrement the reference to the tree. + */ if (is_building) net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; - nfs4_ephemeral_tree_decr(net); + else + nfs4_ephemeral_tree_decr(net); mutex_exit(&net->net_cnt_lock); mutex_exit(&net->net_tree_lock); @@ -830,7 +896,7 @@ done: * Collect together both the generic & mount-type specific args. */ static domount_args_t * -nfs4_trigger_domount_args_create(vnode_t *vp) +nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr) { int nointr; char *hostlist; @@ -848,7 +914,7 @@ nfs4_trigger_domount_args_create(vnode_t *vp) /* check if the current server is responding */ status = nfs4_trigger_ping_server(svp, nointr); if (status == RPC_SUCCESS) { - esi_first = nfs4_trigger_esi_create(vp, svp); + esi_first = nfs4_trigger_esi_create(vp, svp, cr); if (esi_first == NULL) { kmem_free(hostlist, MAXPATHLEN); return (NULL); @@ -924,7 +990,7 @@ nfs4_trigger_domount_args_create(vnode_t *vp) if (status != RPC_SUCCESS) continue; - esi = nfs4_trigger_esi_create(vp, svp); + esi = nfs4_trigger_esi_create(vp, svp, cr); if (esi == NULL) continue; @@ -1006,7 +1072,7 @@ nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) * types of ephemeral mount, the way we gather its contents differs. */ static ephemeral_servinfo_t * -nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp) +nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr) { ephemeral_servinfo_t *esi; rnode4_t *rp = VTOR4(vp); @@ -1016,12 +1082,10 @@ nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp) /* Call the ephemeral type-specific routine */ if (RP_ISSTUB_MIRRORMOUNT(rp)) esi = nfs4_trigger_esi_create_mirrormount(vp, svp); + else if (RP_ISSTUB_REFERRAL(rp)) + esi = nfs4_trigger_esi_create_referral(vp, cr); else esi = NULL; - - /* for now, we only support mirror-mounts */ - ASSERT(esi != NULL); - return (esi); } @@ -1032,9 +1096,6 @@ nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) ASSERT(RP_ISSTUB(rp)); - /* for now, we only support mirror-mounts */ - ASSERT(RP_ISSTUB_MIRRORMOUNT(rp)); - /* Currently, no need for an ephemeral type-specific routine */ /* @@ -1051,6 +1112,10 @@ nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) * in which case it should be moved to nfs4_trigger_esi_create(), or a * common function called. */ + +/* + * Mirror mounts case - should have all data available + */ static ephemeral_servinfo_t * nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) { @@ -1149,9 +1214,12 @@ nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) stubpath += 1; /* for nfs_args->fh */ - esi->esi_path_len = strlen(svp->sv_path) + strlen(stubpath) + 1; + esi->esi_path_len = strlen(stubpath) + 1; + if (strcmp(svp->sv_path, "/") != 0) + esi->esi_path_len += strlen(svp->sv_path); esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); - (void) strcat(esi->esi_path, svp->sv_path); + if (strcmp(svp->sv_path, "/") != 0) + (void) strcat(esi->esi_path, svp->sv_path); (void) strcat(esi->esi_path, stubpath); stubpath -= 1; @@ -1164,6 +1232,592 @@ nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) } /* + * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to + * get network information required to do the mount call. + */ +int +nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp) +{ + door_arg_t door_args; + door_handle_t dh; + XDR xdr; + refd_door_args_t *xdr_argsp; + refd_door_res_t *orig_resp; + k_sigset_t smask; + int xdr_len = 0; + int res_len = 16; /* length of an ip adress */ + int orig_reslen = res_len; + int error = 0; + struct nfsidmap_globals *nig; + + if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) + return (ECONNREFUSED); + + nig = zone_getspecific(nfsidmap_zone_key, nfs_zone()); + ASSERT(nig != NULL); + + mutex_enter(&nig->nfsidmap_daemon_lock); + dh = nig->nfsidmap_daemon_dh; + if (dh == NULL) { + mutex_exit(&nig->nfsidmap_daemon_lock); + cmn_err(CE_NOTE, + "nfs4_callmapid: nfsmapid daemon not " \ + "running unable to resolve host name\n"); + return (EINVAL); + } + door_ki_hold(dh); + mutex_exit(&nig->nfsidmap_daemon_lock); + + xdr_len = xdr_sizeof(&(xdr_utf8string), server); + + xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP); + xdr_argsp->xdr_len = xdr_len; + xdr_argsp->cmd = NFSMAPID_SRV_NETINFO; + + xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg, + xdr_len, XDR_ENCODE); + + if (!xdr_utf8string(&xdr, server)) { + kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); + door_ki_rele(dh); + return (1); + } + + if (orig_reslen) + orig_resp = kmem_alloc(orig_reslen, KM_SLEEP); + + door_args.data_ptr = (char *)xdr_argsp; + door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len; + door_args.desc_ptr = NULL; + door_args.desc_num = 0; + door_args.rbuf = orig_resp ? (char *)orig_resp : NULL; + door_args.rsize = res_len; + + sigintr(&smask, 1); + error = door_ki_upcall(dh, &door_args); + sigunintr(&smask); + + door_ki_rele(dh); + + kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); + if (error) { + kmem_free(orig_resp, orig_reslen); + /* + * There is no door to connect to. The referral daemon + * must not be running yet. + */ + cmn_err(CE_WARN, + "nfsmapid not running cannot resolve host name"); + goto out; + } + + /* + * If the results buffer passed back are not the same as + * what was sent free the old buffer and use the new one. + */ + if (orig_resp && orig_reslen) { + refd_door_res_t *door_resp; + + door_resp = (refd_door_res_t *)door_args.rbuf; + if ((void *)door_args.rbuf != orig_resp) + kmem_free(orig_resp, orig_reslen); + if (door_resp->res_status == 0) { + xdrmem_create(&xdr, (char *)&door_resp->xdr_res, + door_resp->xdr_len, XDR_DECODE); + bzero(resp, sizeof (struct nfs_fsl_info)); + if (!xdr_nfs_fsl_info(&xdr, resp)) { + DTRACE_PROBE2( + nfs4clnt__debug__referral__upcall__xdrfail, + struct nfs_fsl_info *, resp, + char *, "nfs4_callmapid"); + error = EINVAL; + } + } else { + DTRACE_PROBE2( + nfs4clnt__debug__referral__upcall__badstatus, + int, door_resp->res_status, + char *, "nfs4_callmapid"); + error = door_resp->res_status; + } + kmem_free(door_args.rbuf, door_args.rsize); + } +out: + DTRACE_PROBE2(nfs4clnt__func__referral__upcall, + char *, server, int, error); + return (error); +} + +/* + * Fetches the fs_locations attribute. Typically called + * from a Replication/Migration/Referrals/Mirror-mount context + * + * Fills in the attributes in garp. The caller is assumed + * to have allocated memory for garp. + * + * lock: if set do not lock s_recovlock and mi_recovlock mutex, + * it's already done by caller. Otherwise lock these mutexes + * before doing the rfs4call(). + * + * Returns + * 1 for success + * 0 for failure + */ +int +nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm, + cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock) +{ + COMPOUND4args_clnt args; + COMPOUND4res_clnt res; + nfs_argop4 *argop; + int argoplist_size = 3 * sizeof (nfs_argop4); + nfs4_server_t *sp = NULL; + int doqueue = 1; + nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; + int retval = 1; + struct nfs4_clnt *nfscl; + + if (lock == TRUE) + (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); + else + ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || + nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); + + sp = find_nfs4_server(mi); + if (lock == TRUE) + nfs_rw_exit(&mi->mi_recovlock); + + if (sp != NULL) + mutex_exit(&sp->s_lock); + + if (lock == TRUE) { + if (sp != NULL) + (void) nfs_rw_enter_sig(&sp->s_recovlock, + RW_WRITER, 0); + (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); + } else { + if (sp != NULL) { + ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) || + nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER)); + } + } + + /* + * Do we want to do the setup for recovery here? + * + * We know that the server responded to a null ping a very + * short time ago, and we know that we intend to do a + * single stateless operation - we want to fetch attributes, + * so we know we can't encounter errors about state. If + * something goes wrong with the GETATTR, like not being + * able to get a response from the server or getting any + * kind of FH error, we should fail the mount. + * + * We may want to re-visited this at a later time. + */ + argop = kmem_alloc(argoplist_size, KM_SLEEP); + + args.ctag = TAG_GETATTR_FSLOCATION; + /* PUTFH LOOKUP GETATTR */ + args.array_len = 3; + args.array = argop; + + /* 0. putfh file */ + argop[0].argop = OP_CPUTFH; + argop[0].nfs_argop4_u.opcputfh.sfh = sfh; + + /* 1. lookup name, can't be dotdot */ + argop[1].argop = OP_CLOOKUP; + argop[1].nfs_argop4_u.opclookup.cname = nm; + + /* 2. file attrs */ + argop[2].argop = OP_GETATTR; + argop[2].nfs_argop4_u.opgetattr.attr_request = + FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK | + FATTR4_MOUNTED_ON_FILEID_MASK; + argop[2].nfs_argop4_u.opgetattr.mi = mi; + + rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); + + if (lock == TRUE) { + nfs_rw_exit(&mi->mi_recovlock); + if (sp != NULL) + nfs_rw_exit(&sp->s_recovlock); + } + + nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); + nfscl->nfscl_stat.referrals.value.ui64++; + DTRACE_PROBE3(nfs4clnt__func__referral__fsloc, + nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e); + + if (e.error != 0) { + if (sp != NULL) + nfs4_server_rele(sp); + kmem_free(argop, argoplist_size); + return (0); + } + + /* + * Check for all possible error conditions. + * For valid replies without an ops array or for illegal + * replies, return a failure. + */ + if (res.status != NFS4_OK || res.array_len < 3 || + res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) { + retval = 0; + goto exit; + } + + /* + * There isn't much value in putting the attributes + * in the attr cache since fs_locations4 aren't + * encountered very frequently, so just make them + * available to the caller. + */ + *garp = res.array[2].nfs_resop4_u.opgetattr.ga_res; + + DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc, + nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations"); + + /* No fs_locations? -- return a failure */ + if (garp->n4g_ext_res == NULL || + garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) { + retval = 0; + goto exit; + } + + if (!garp->n4g_fsid_valid) + retval = 0; + +exit: + if (retval == 0) { + /* the call was ok but failed validating the call results */ + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); + } else { + ASSERT(callres != NULL); + *callres = res; + } + + if (sp != NULL) + nfs4_server_rele(sp); + kmem_free(argop, argoplist_size); + return (retval); +} + +/* tunable to disable referral mounts */ +int nfs4_no_referrals = 0; + +/* + * Returns NULL if the vnode cannot be created or found. + */ +vnode_t * +find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr) +{ + nfs_fh4 *stub_fh, *dfh; + nfs4_sharedfh_t *sfhp; + char *newfhval; + vnode_t *vp = NULL; + fattr4_mounted_on_fileid mnt_on_fileid; + nfs4_ga_res_t garp; + mntinfo4_t *mi; + COMPOUND4res_clnt callres; + hrtime_t t; + + if (nfs4_no_referrals) + return (NULL); + + /* + * Get the mounted_on_fileid, unique on that server::fsid + */ + mi = VTOMI4(dvp); + if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr, + &garp, &callres, FALSE) == 0) + return (NULL); + mnt_on_fileid = garp.n4g_mon_fid; + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); + + /* + * Build a fake filehandle from the dir FH and the mounted_on_fileid + */ + dfh = &VTOR4(dvp)->r_fh->sfh_fh; + stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP); + stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len + + sizeof (fattr4_mounted_on_fileid), KM_SLEEP); + newfhval = stub_fh->nfs_fh4_val; + + /* copy directory's file handle */ + bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len); + stub_fh->nfs_fh4_len = dfh->nfs_fh4_len; + newfhval = newfhval + dfh->nfs_fh4_len; + + /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */ + bcopy((char *)&mnt_on_fileid, newfhval, + sizeof (fattr4_mounted_on_fileid)); + stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid); + + sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL); + kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len + + sizeof (fattr4_mounted_on_fileid)); + kmem_free(stub_fh, sizeof (nfs_fh4)); + if (sfhp == NULL) + return (NULL); + + t = gethrtime(); + garp.n4g_va.va_type = VDIR; + vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t, + cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp)); + + if (vp != NULL) + vp->v_type = VDIR; + + sfh4_rele(&sfhp); + return (vp); +} + +int +nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr) +{ + vnode_t *nvp; + rnode4_t *rp; + + if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL) + return (EINVAL); + + rp = VTOR4(nvp); + mutex_enter(&rp->r_statelock); + r4_stub_referral(rp); + mutex_exit(&rp->r_statelock); + dnlc_enter(dvp, nm, nvp); + + if (*vpp != NULL) + VN_RELE(*vpp); /* no longer need this vnode */ + + *vpp = nvp; + + return (0); +} + +/* + * Fetch the location information and resolve the new server. + * Caller needs to free up the XDR data which is returned. + * Input: mount info, shared filehandle, nodename + * Return: Index to the result or Error(-1) + * Output: FsLocations Info, Resolved Server Info. + */ +int +nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, + char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res, + struct nfs_fsl_info *fsloc) +{ + fs_location4 *fsp; + struct nfs_fsl_info nfsfsloc; + int ret, i, error; + nfs4_ga_res_t garp; + COMPOUND4res_clnt callres; + struct knetconfig *knc; + + ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE); + if (ret == 0) + return (-1); + + /* + * As a lame attempt to figuring out if we're + * handling a migration event or a referral, + * look for rnodes with this fsid in the rnode + * cache. + * + * If we can find one or more such rnodes, it + * means we're handling a migration event and + * we want to bail out in that case. + */ + if (r4find_by_fsid(mi, &garp.n4g_fsid)) { + DTRACE_PROBE3(nfs4clnt__debug__referral__migration, + mntinfo4_t *, mi, nfs4_ga_res_t *, &garp, + char *, "nfs4_process_referral"); + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); + return (-1); + } + + /* + * Find the first responsive server to mount. When we find + * one, fsp will point to it. + */ + for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) { + + fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i]; + if (fsp->server_len == 0 || fsp->server_val == NULL) + continue; + + error = nfs4_callmapid(fsp->server_val, &nfsfsloc); + if (error != 0) + continue; + + error = nfs4_ping_server_common(nfsfsloc.knconf, + nfsfsloc.addr, !(mi->mi_flags & MI4_INT)); + if (error == RPC_SUCCESS) + break; + + DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr, + sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf, + char *, "nfs4_process_referral"); + + (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); + } + knc = nfsfsloc.knconf; + if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) || + (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) { + DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc, + nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral"); + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); + return (-1); + } + + /* Send the results back */ + *fsloc = nfsfsloc; + *grp = garp; + *res = callres; + return (i); +} + +/* + * Referrals case - need to fetch referral data and then upcall to + * user-level to get complete mount data. + */ +static ephemeral_servinfo_t * +nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr) +{ + struct knetconfig *sikncp, *svkncp; + struct netbuf *bufp; + ephemeral_servinfo_t *esi; + vnode_t *dvp; + rnode4_t *drp; + fs_location4 *fsp; + struct nfs_fsl_info nfsfsloc; + nfs4_ga_res_t garp; + char *p; + char fn[MAXNAMELEN]; + int i, index = -1; + mntinfo4_t *mi; + COMPOUND4res_clnt callres; + + /* + * If we're passed in a stub vnode that + * isn't a "referral" stub, bail out + * and return a failure + */ + if (!RP_ISSTUB_REFERRAL(VTOR4(vp))) + return (NULL); + + if (vtodv(vp, &dvp, CRED(), TRUE) != 0) + return (NULL); + + drp = VTOR4(dvp); + if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) { + VN_RELE(dvp); + return (NULL); + } + + if (vtoname(vp, fn, MAXNAMELEN) != 0) { + nfs_rw_exit(&drp->r_rwlock); + VN_RELE(dvp); + return (NULL); + } + + mi = VTOMI4(dvp); + index = nfs4_process_referral(mi, drp->r_fh, fn, cr, + &garp, &callres, &nfsfsloc); + nfs_rw_exit(&drp->r_rwlock); + VN_RELE(dvp); + if (index < 0) + return (NULL); + + fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; + esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); + + /* initially set to be our type of ephemeral mount; may be added to */ + esi->esi_mount_flags = NFSMNT_REFERRAL; + + esi->esi_hostname = + kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP); + bcopy(fsp->server_val->utf8string_val, esi->esi_hostname, + fsp->server_val->utf8string_len); + esi->esi_hostname[fsp->server_val->utf8string_len] = '\0'; + + bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); + bufp->len = nfsfsloc.addr->len; + bufp->maxlen = nfsfsloc.addr->maxlen; + bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); + bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len); + esi->esi_addr = bufp; + + esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); + sikncp = esi->esi_knconf; + + DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc, + struct nfs_fsl_info *, &nfsfsloc, + char *, "nfs4_trigger_esi_create_referral"); + + svkncp = nfsfsloc.knconf; + sikncp->knc_semantics = svkncp->knc_semantics; + sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); + (void) strlcat((char *)sikncp->knc_protofmly, + (char *)svkncp->knc_protofmly, KNC_STRSIZE); + sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); + (void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto, + KNC_STRSIZE); + sikncp->knc_rdev = svkncp->knc_rdev; + + DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf, + struct knetconfig *, sikncp, + char *, "nfs4_trigger_esi_create_referral"); + + esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP); + bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len); + esi->esi_syncaddr = NULL; + + esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + esi->esi_path_len = MAXPATHLEN; + *p++ = '/'; + for (i = 0; i < fsp->rootpath.pathname4_len; i++) { + component4 *comp; + + comp = &fsp->rootpath.pathname4_val[i]; + /* If no space, null the string and bail */ + if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN) + goto err; + bcopy(comp->utf8string_val, p, comp->utf8string_len); + p += comp->utf8string_len; + *p++ = '/'; + } + if (fsp->rootpath.pathname4_len != 0) + *(p - 1) = '\0'; + else + *p = '\0'; + p = esi->esi_path; + esi->esi_path = strdup(p); + esi->esi_path_len = strlen(p) + 1; + kmem_free(p, MAXPATHLEN); + + /* Allocated in nfs4_process_referral() */ + (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); + + return (esi); +err: + kmem_free(esi->esi_path, esi->esi_path_len); + kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1); + kmem_free(esi->esi_addr->buf, esi->esi_addr->len); + kmem_free(esi->esi_addr, sizeof (struct netbuf)); + kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE); + kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE); + kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf)); + kmem_free(esi->esi_netname, nfsfsloc.netnm_len); + kmem_free(esi, sizeof (ephemeral_servinfo_t)); + (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); + return (NULL); +} + +/* * Assemble the args, and call the generic VFS mount function to * finally perform the ephemeral mount. */ @@ -1357,6 +2011,9 @@ nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, nargs->acdirmin = HR2SEC(mi->mi_acdirmin); nargs->acdirmax = HR2SEC(mi->mi_acdirmax); + /* add any specific flags for this type of ephemeral mount */ + nargs->flags |= esi->esi_mount_flags; + if (mi->mi_flags & MI4_NOCTO) nargs->flags |= NFSMNT_NOCTO; if (mi->mi_flags & MI4_GRPID) @@ -1367,19 +2024,28 @@ nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, nargs->flags |= NFSMNT_NOPRINT; if (mi->mi_flags & MI4_DIRECTIO) nargs->flags |= NFSMNT_DIRECTIO; - if (mi->mi_flags & MI4_PUBLIC) + if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT) nargs->flags |= NFSMNT_PUBLIC; - mutex_exit(&mi->mi_lock); + /* Do some referral-specific option tweaking */ + if (nargs->flags & NFSMNT_REFERRAL) { + nargs->flags &= ~NFSMNT_DORDMA; + nargs->flags |= NFSMNT_TRYRDMA; + } - /* add any specific flags for this type of ephemeral mount */ - nargs->flags |= esi->esi_mount_flags; + mutex_exit(&mi->mi_lock); /* * Security data & negotiation policy. * - * We need to preserve the parent mount's preference for security - * negotiation, translating SV4_TRYSECDEFAULT -> NFSMNT_SECDEFAULT. + * For mirror mounts, we need to preserve the parent mount's + * preference for security negotiation, translating SV4_TRYSECDEFAULT + * to NFSMNT_SECDEFAULT if present. + * + * For referrals, we always want security negotiation and will + * set NFSMNT_SECDEFAULT and we will not copy current secdata. + * The reason is that we can't negotiate down from a parent's + * Kerberos flavor to AUTH_SYS. * * If SV4_TRYSECDEFAULT is not set, that indicates that a specific * security flavour was requested, with data in sv_secdata, and that @@ -1395,8 +2061,16 @@ nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, * we will set NFSMNT_SECDEFAULT, to enable negotiation. */ - if (svp->sv_flags & SV4_TRYSECDEFAULT) { - /* enable negotiation for ephemeral mount */ + if (nargs->flags & NFSMNT_REFERRAL) { + /* enable negotiation for referral mount */ + nargs->flags |= NFSMNT_SECDEFAULT; + secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP); + secdata->secmod = secdata->rpcflavor = AUTH_SYS; + secdata->data = NULL; + } + + else if (svp->sv_flags & SV4_TRYSECDEFAULT) { + /* enable negotiation for mirror mount */ nargs->flags |= NFSMNT_SECDEFAULT; /* @@ -1499,6 +2173,11 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) return (EBUSY); } + /* + * We've just tied the mntinfo to the tree, so + * now we bump the refcnt and hold it there until + * this mntinfo is removed from the tree. + */ nfs4_ephemeral_tree_hold(net); /* @@ -1515,7 +2194,6 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) */ eph->ne_mount_to = ntg->ntg_mount_to; - mi->mi_flags |= MI4_EPHEMERAL; mi->mi_ephemeral = eph; /* @@ -1542,6 +2220,7 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) mi->mi_flags &= ~MI4_EPHEMERAL; mi->mi_ephemeral = NULL; kmem_free(eph, sizeof (*eph)); + nfs4_ephemeral_tree_rele(net); rc = EBUSY; } else { if (prior->ne_child == NULL) { @@ -1581,8 +2260,6 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) eph->ne_prior = NULL; } - nfs4_ephemeral_tree_rele(net); - mutex_exit(&mi->mi_lock); mutex_exit(&mi_parent->mi_lock); @@ -1758,15 +2435,13 @@ nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, */ void nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, - bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet) + nfs4_ephemeral_tree_t **pnet) { nfs4_ephemeral_tree_t *net = *pnet; if (*pmust_unlock) { mutex_enter(&net->net_cnt_lock); net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; - if (*pmust_rele) - nfs4_ephemeral_tree_decr(net); mutex_exit(&net->net_cnt_lock); mutex_exit(&net->net_tree_lock); @@ -1783,7 +2458,7 @@ nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, */ void nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, - bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet) + nfs4_ephemeral_tree_t **pnet) { /* * Now we need to get rid of the ephemeral data if it exists. @@ -1798,6 +2473,7 @@ nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); + nfs4_ephemeral_tree_rele(*pnet); ASSERT(mi->mi_ephemeral != NULL); kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); @@ -1805,15 +2481,19 @@ nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, } mutex_exit(&mi->mi_lock); - nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele, pnet); + nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); } /* * Unmount an ephemeral node. + * + * Note that if this code fails, then it must unlock. + * + * If it succeeds, then the caller must be prepared to do so. */ int nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, - bool_t *pmust_unlock, bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet) + bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet) { int error = 0; nfs4_ephemeral_t *eph; @@ -1826,7 +2506,7 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, * Make sure to set the default state for cleaning * up the tree in the caller (and on the way out). */ - *pmust_unlock = *pmust_rele = FALSE; + *pmust_unlock = FALSE; /* * The active vnodes on this file system may be ephemeral @@ -1865,16 +2545,18 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; is_derooting = (eph == NULL); + mutex_enter(&net->net_cnt_lock); + /* * If this is not recursion, then we need to - * grab a ref count. + * check to see if a harvester thread has + * already grabbed the lock. * - * But wait, we also do not want to do that - * if a harvester thread has already grabbed - * the lock. + * After we exit this branch, we may not + * blindly return, we need to jump to + * is_busy! */ if (!is_recursed) { - mutex_enter(&net->net_cnt_lock); if (net->net_status & NFS4_EPHEMERAL_TREE_LOCKED) { /* @@ -1902,13 +2584,10 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, } was_locked = TRUE; - } else { - nfs4_ephemeral_tree_incr(net); - *pmust_rele = TRUE; } - - mutex_exit(&net->net_cnt_lock); } + + mutex_exit(&net->net_cnt_lock); mutex_exit(&mi->mi_lock); /* @@ -1936,9 +2615,7 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, if (net->net_status & (NFS4_EPHEMERAL_TREE_DEROOTING | NFS4_EPHEMERAL_TREE_INVALID)) { - nfs4_ephemeral_tree_decr(net); mutex_exit(&net->net_cnt_lock); - *pmust_rele = FALSE; goto is_busy; } mutex_exit(&net->net_cnt_lock); @@ -1974,10 +2651,8 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, if (net->net_status & NFS4_EPHEMERAL_TREE_INVALID || (!is_derooting && eph == NULL)) { - nfs4_ephemeral_tree_decr(net); mutex_exit(&net->net_cnt_lock); mutex_exit(&net->net_tree_lock); - *pmust_rele = FALSE; goto is_busy; } mutex_exit(&net->net_cnt_lock); @@ -2048,8 +2723,14 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, mutex_enter(&net->net_cnt_lock); net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; - if (was_locked == FALSE) - nfs4_ephemeral_tree_decr(net); + DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting, + uint_t, net->net_refcnt); + + /* + * We will not finalize this node, so safe to + * release it. + */ + nfs4_ephemeral_tree_decr(net); mutex_exit(&net->net_cnt_lock); if (was_locked == FALSE) @@ -2057,8 +2738,8 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, /* * We have just blown away any notation of this - * tree being locked. We can't let the caller - * try to clean things up. + * tree being locked or having a refcnt. + * We can't let the caller try to clean things up. */ *pmust_unlock = FALSE; @@ -2077,8 +2758,7 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, is_busy: - nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele, - pnet); + nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); return (error); } @@ -2314,19 +2994,18 @@ check_done: /* * At this point we are done processing this tree. * - * If the tree is invalid and we are the only reference + * If the tree is invalid and we were the only reference * to it, then we push it on the local linked list * to remove it at the end. We avoid that action now * to keep the tree processing going along at a fair clip. * - * Else, even if we are the only reference, we drop - * our hold on the current tree and allow it to be - * reused as needed. + * Else, even if we were the only reference, we + * allow it to be reused as needed. */ mutex_enter(&net->net_cnt_lock); - if (net->net_refcnt == 1 && + nfs4_ephemeral_tree_decr(net); + if (net->net_refcnt == 0 && net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { - nfs4_ephemeral_tree_decr(net); net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; mutex_exit(&net->net_cnt_lock); mutex_exit(&net->net_tree_lock); @@ -2341,7 +3020,6 @@ check_done: continue; } - nfs4_ephemeral_tree_decr(net); net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; mutex_exit(&net->net_cnt_lock); mutex_exit(&net->net_tree_lock); @@ -2620,9 +3298,9 @@ nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) } static enum clnt_stat -nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) +nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr) { - int retries, error; + int retries; uint_t max_msgsize; enum clnt_stat status; CLIENT *cl; @@ -2634,9 +3312,8 @@ nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) timeout.tv_sec = 2; timeout.tv_usec = 0; - error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, NFS_PROGRAM, - NFS_V4, max_msgsize, retries, CRED(), &cl); - if (error) + if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4, + max_msgsize, retries, CRED(), &cl) != 0) return (RPC_FAILED); if (nointr) @@ -2651,3 +3328,9 @@ nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) return (status); } + +static enum clnt_stat +nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) +{ + return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr)); +} diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c index 27261bf583..b94d41899d 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_subr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c @@ -59,6 +59,8 @@ static const struct clstat4 clstat4_tmpl = { { "calls", KSTAT_DATA_UINT64 }, { "badcalls", KSTAT_DATA_UINT64 }, + { "referrals", KSTAT_DATA_UINT64 }, + { "referlinks", KSTAT_DATA_UINT64 }, { "clgets", KSTAT_DATA_UINT64 }, { "cltoomany", KSTAT_DATA_UINT64 }, #ifdef DEBUG @@ -90,7 +92,7 @@ struct clstat4_debug clstat4_debug = { */ static list_t nfs4_clnt_list; static kmutex_t nfs4_clnt_list_lock; -static zone_key_t nfs4clnt_zone_key; +zone_key_t nfs4clnt_zone_key; static struct kmem_cache *chtab4_cache; @@ -1943,6 +1945,9 @@ again: * crossed an underlying server fs boundary. * * This stub will be for a mirror-mount. + * A referral would look like a boundary crossing + * as well, but would not be the same type of object, + * so we would expect to mark the object dead. * * See comment in r4_do_attrcache() for more details. */ @@ -2109,7 +2114,8 @@ recov_retry: bool_t abort; abort = nfs4_start_recovery(&e, mi, - rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL); + rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL, NULL, + NULL); if (abort) { nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE); @@ -2169,7 +2175,7 @@ recov_retry: abort = nfs4_start_recovery(&e, mi, rootvp, NULL, NULL, NULL, - OP_LOOKUP, NULL); + OP_LOOKUP, NULL, NULL, NULL); if (abort) { nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, @@ -2757,6 +2763,7 @@ clinit4_zone(zoneid_t zoneid) mutex_enter(&nfs4_clnt_list_lock); list_insert_head(&nfs4_clnt_list, nfscl); mutex_exit(&nfs4_clnt_list_lock); + return (nfscl); } diff --git a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c index 71bb6f7f2f..2ebd9d6fdd 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c @@ -76,6 +76,8 @@ #include <nfs/nfs4_clnt.h> #include <sys/fs/autofs.h> +#include <sys/sdt.h> + /* * Arguments passed to thread to free data structures from forced unmount. @@ -160,6 +162,14 @@ static void remove_mi(nfs4_server_t *, mntinfo4_t *); extern void nfs4_ephemeral_init(void); extern void nfs4_ephemeral_fini(void); +/* referral related routines */ +static servinfo4_t *copy_svp(servinfo4_t *); +static void free_knconf_contents(struct knetconfig *k); +static char *extract_referral_point(const char *, int); +static void setup_newsvpath(servinfo4_t *, int); +static void update_servinfo4(servinfo4_t *, fs_location4 *, + struct nfs_fsl_info *, char *, int); + /* * Initialize the vfs structure */ @@ -1270,7 +1280,7 @@ recov_retry: "getlinktext_otw: initiating recovery\n")); if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, - OP_READLINK, NULL) == FALSE) { + OP_READLINK, NULL, NULL, NULL) == FALSE) { nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); if (!e.error) (void) xdr_free(xdr_COMPOUND4res_clnt, @@ -1457,6 +1467,198 @@ out: } /* + * This routine updates servinfo4 structure with the new referred server + * info. + * nfsfsloc has the location related information + * fsp has the hostname and pathname info. + * new path = pathname from referral + part of orig pathname(based on nth). + */ +static void +update_servinfo4(servinfo4_t *svp, fs_location4 *fsp, + struct nfs_fsl_info *nfsfsloc, char *orig_path, int nth) +{ + struct knetconfig *knconf, *svknconf; + struct netbuf *saddr; + sec_data_t *secdata; + utf8string *host; + int i = 0, num_slashes = 0; + char *p, *spath, *op, *new_path; + + /* Update knconf */ + knconf = svp->sv_knconf; + free_knconf_contents(knconf); + bzero(knconf, sizeof (struct knetconfig)); + svknconf = nfsfsloc->knconf; + knconf->knc_semantics = svknconf->knc_semantics; + knconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); + knconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); + knconf->knc_rdev = svknconf->knc_rdev; + bcopy(svknconf->knc_protofmly, knconf->knc_protofmly, KNC_STRSIZE); + bcopy(svknconf->knc_proto, knconf->knc_proto, KNC_STRSIZE); + + /* Update server address */ + saddr = &svp->sv_addr; + if (saddr->buf != NULL) + kmem_free(saddr->buf, saddr->maxlen); + saddr->buf = kmem_alloc(nfsfsloc->addr->maxlen, KM_SLEEP); + saddr->len = nfsfsloc->addr->len; + saddr->maxlen = nfsfsloc->addr->maxlen; + bcopy(nfsfsloc->addr->buf, saddr->buf, nfsfsloc->addr->len); + + /* Update server name */ + host = fsp->server_val; + kmem_free(svp->sv_hostname, svp->sv_hostnamelen); + svp->sv_hostname = kmem_zalloc(host->utf8string_len + 1, KM_SLEEP); + bcopy(host->utf8string_val, svp->sv_hostname, host->utf8string_len); + svp->sv_hostname[host->utf8string_len] = '\0'; + svp->sv_hostnamelen = host->utf8string_len + 1; + + /* + * Update server path. + * We need to setup proper path here. + * For ex., If we got a path name serv1:/rp/aaa/bbb + * where aaa is a referral and points to serv2:/rpool/aa + * we need to set the path to serv2:/rpool/aa/bbb + * The first part of this below code generates /rpool/aa + * and the second part appends /bbb to the server path. + */ + spath = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + *p++ = '/'; + for (i = 0; i < fsp->rootpath.pathname4_len; i++) { + component4 *comp; + + comp = &fsp->rootpath.pathname4_val[i]; + /* If no space, null the string and bail */ + if ((p - spath) + comp->utf8string_len + 1 > MAXPATHLEN) { + p = spath + MAXPATHLEN - 1; + spath[0] = '\0'; + break; + } + bcopy(comp->utf8string_val, p, comp->utf8string_len); + p += comp->utf8string_len; + *p++ = '/'; + } + if (fsp->rootpath.pathname4_len != 0) + *(p - 1) = '\0'; + else + *p = '\0'; + p = spath; + + new_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + (void) strlcpy(new_path, p, MAXPATHLEN); + kmem_free(p, MAXPATHLEN); + i = strlen(new_path); + + for (op = orig_path; *op; op++) { + if (*op == '/') + num_slashes++; + if (num_slashes == nth + 2) { + while (*op != '\0') { + new_path[i] = *op; + i++; + op++; + } + break; + } + } + new_path[i] = '\0'; + + kmem_free(svp->sv_path, svp->sv_pathlen); + svp->sv_pathlen = strlen(new_path) + 1; + svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); + bcopy(new_path, svp->sv_path, svp->sv_pathlen); + kmem_free(new_path, MAXPATHLEN); + + /* + * All the security data is specific to old server. + * Clean it up except secdata which deals with mount options. + * We need to inherit that data. Copy secdata into our new servinfo4. + */ + if (svp->sv_dhsec) { + sec_clnt_freeinfo(svp->sv_dhsec); + svp->sv_dhsec = NULL; + } + if (svp->sv_save_secinfo && + svp->sv_save_secinfo != svp->sv_secinfo) { + secinfo_free(svp->sv_save_secinfo); + svp->sv_save_secinfo = NULL; + } + if (svp->sv_secinfo) { + secinfo_free(svp->sv_secinfo); + svp->sv_secinfo = NULL; + } + svp->sv_currsec = NULL; + + secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP); + *secdata = *svp->sv_secdata; + secdata->data = NULL; + if (svp->sv_secdata) { + sec_clnt_freeinfo(svp->sv_secdata); + svp->sv_secdata = NULL; + } + svp->sv_secdata = secdata; +} + +/* + * Resolve a referral. The referral is in the n+1th component of + * svp->sv_path and has a parent nfs4 file handle "fh". + * Upon return, the sv_path will point to the new path that has referral + * component resolved to its referred path and part of original path. + * Hostname and other address information is also updated. + */ +int +resolve_referral(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, int nth, + nfs_fh4 *fh) +{ + nfs4_sharedfh_t *sfh; + struct nfs_fsl_info nfsfsloc; + nfs4_ga_res_t garp; + COMPOUND4res_clnt callres; + fs_location4 *fsp; + char *nm, *orig_path; + int orig_pathlen = 0, ret = -1, index; + + if (svp->sv_pathlen <= 0) + return (ret); + + (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); + orig_pathlen = svp->sv_pathlen; + orig_path = kmem_alloc(orig_pathlen, KM_SLEEP); + bcopy(svp->sv_path, orig_path, orig_pathlen); + nm = extract_referral_point(svp->sv_path, nth); + setup_newsvpath(svp, nth); + nfs_rw_exit(&svp->sv_lock); + + sfh = sfh4_get(fh, mi); + index = nfs4_process_referral(mi, sfh, nm, cr, + &garp, &callres, &nfsfsloc); + sfh4_rele(&sfh); + kmem_free(nm, MAXPATHLEN); + if (index < 0) { + kmem_free(orig_path, orig_pathlen); + return (index); + } + + fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; + (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); + update_servinfo4(svp, fsp, &nfsfsloc, orig_path, nth); + nfs_rw_exit(&svp->sv_lock); + + mutex_enter(&mi->mi_lock); + mi->mi_vfs_referral_loop_cnt++; + mutex_exit(&mi->mi_lock); + + ret = 0; +bad: + /* Free up XDR memory allocated in nfs4_process_referral() */ + xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); + xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); + kmem_free(orig_path, orig_pathlen); + + return (ret); +} + +/* * Get the root filehandle for the given filesystem and server, and update * svp. * @@ -1466,7 +1668,6 @@ out: * * Errors are returned by the nfs4_error_t parameter. */ - static void nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, int flags, cred_t *cr, nfs4_error_t *ep) @@ -1498,7 +1699,14 @@ nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp, recov_state.rs_flags = 0; recov_state.rs_num_retry_despite_err = 0; + recov_retry: + if (mi->mi_vfs_referral_loop_cnt >= NFS4_REFERRAL_LOOP_MAX) { + DTRACE_PROBE3(nfs4clnt__debug__referral__loop, mntinfo4 *, + mi, servinfo4_t *, svp, char *, "nfs4getfh_otw"); + nfs4_error_init(ep, EINVAL); + return; + } nfs4_error_zinit(ep); if (!recovery) { @@ -1599,7 +1807,7 @@ recov_retry: (CE_NOTE, "nfs4getfh_otw: initiating recovery\n")); abort = nfs4_start_recovery(ep, mi, NULL, - NULL, NULL, NULL, OP_GETFH, NULL); + NULL, NULL, NULL, OP_GETFH, NULL, NULL, NULL); if (!ep->error) { ep->error = geterrno4(res.status); (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); @@ -1628,7 +1836,8 @@ recov_retry: is_link_err: /* for non-recovery errors */ - if (res.status && res.status != NFS4ERR_SYMLINK) { + if (res.status && res.status != NFS4ERR_SYMLINK && + res.status != NFS4ERR_MOVED) { if (!recovery) { nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); @@ -1643,10 +1852,18 @@ is_link_err: * If any intermediate component in the path is a symbolic link, * resolve the symlink, then try mount again using the new path. */ - if (res.status == NFS4ERR_SYMLINK) { + if (res.status == NFS4ERR_SYMLINK || res.status == NFS4ERR_MOVED) { int where; /* + * Need to call nfs4_end_op before resolve_sympath to avoid + * potential nfs4_start_op deadlock. + */ + if (!recovery) + nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, + needrecov); + + /* * This must be from OP_LOOKUP failure. The (cfh) for this * OP_LOOKUP is a symlink node. Found out where the * OP_GETFH is for the (cfh) that is a symlink node. @@ -1661,21 +1878,24 @@ is_link_err: where = res.array_len - 2; ASSERT(where > 0); - resop = &res.array[where - 1]; - ASSERT(resop->resop == OP_GETFH); - tmpfhp = &resop->nfs_resop4_u.opgetfh.object; - nthcomp = res.array_len/3 - 1; + if (res.status == NFS4ERR_SYMLINK) { - /* - * Need to call nfs4_end_op before resolve_sympath to avoid - * potential nfs4_start_op deadlock. - */ - if (!recovery) - nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, - needrecov); + resop = &res.array[where - 1]; + ASSERT(resop->resop == OP_GETFH); + tmpfhp = &resop->nfs_resop4_u.opgetfh.object; + nthcomp = res.array_len/3 - 1; + ep->error = resolve_sympath(mi, svp, nthcomp, + tmpfhp, cr, flags); - ep->error = resolve_sympath(mi, svp, nthcomp, tmpfhp, cr, - flags); + } else if (res.status == NFS4ERR_MOVED) { + + resop = &res.array[where - 2]; + ASSERT(resop->resop == OP_GETFH); + tmpfhp = &resop->nfs_resop4_u.opgetfh.object; + nthcomp = res.array_len/3 - 1; + ep->error = resolve_referral(mi, svp, cr, nthcomp, + tmpfhp); + } nfs4args_lookup_free(argop, num_argops); kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); @@ -1809,7 +2029,6 @@ is_link_err: garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK; nfs_rw_exit(&svp->sv_lock); - nfs4args_lookup_free(argop, num_argops); kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4)); (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); @@ -1817,6 +2036,128 @@ is_link_err: nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov); } +/* + * Save a copy of Servinfo4_t structure. + * We might need when there is a failure in getting file handle + * in case of a referral to replace servinfo4 struct and try again. + */ +static struct servinfo4 * +copy_svp(servinfo4_t *nsvp) +{ + servinfo4_t *svp = NULL; + struct knetconfig *sknconf, *tknconf; + struct netbuf *saddr, *taddr; + + svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); + nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL); + svp->sv_flags = nsvp->sv_flags; + svp->sv_fsid = nsvp->sv_fsid; + svp->sv_hostnamelen = nsvp->sv_hostnamelen; + svp->sv_pathlen = nsvp->sv_pathlen; + svp->sv_supp_attrs = nsvp->sv_supp_attrs; + + svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP); + svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP); + bcopy(nsvp->sv_hostname, svp->sv_hostname, svp->sv_hostnamelen); + bcopy(nsvp->sv_path, svp->sv_path, svp->sv_pathlen); + + saddr = &nsvp->sv_addr; + taddr = &svp->sv_addr; + taddr->maxlen = saddr->maxlen; + taddr->len = saddr->len; + if (saddr->len > 0) { + taddr->buf = kmem_zalloc(saddr->maxlen, KM_SLEEP); + bcopy(saddr->buf, taddr->buf, saddr->len); + } + + svp->sv_knconf = kmem_zalloc(sizeof (struct knetconfig), KM_SLEEP); + sknconf = nsvp->sv_knconf; + tknconf = svp->sv_knconf; + tknconf->knc_semantics = sknconf->knc_semantics; + tknconf->knc_rdev = sknconf->knc_rdev; + if (sknconf->knc_proto != NULL) { + tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); + bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto, + KNC_STRSIZE); + } + if (sknconf->knc_protofmly != NULL) { + tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); + bcopy(sknconf->knc_protofmly, (char *)tknconf->knc_protofmly, + KNC_STRSIZE); + } + + if (nsvp->sv_origknconf != NULL) { + svp->sv_origknconf = kmem_zalloc(sizeof (struct knetconfig), + KM_SLEEP); + sknconf = nsvp->sv_origknconf; + tknconf = svp->sv_origknconf; + tknconf->knc_semantics = sknconf->knc_semantics; + tknconf->knc_rdev = sknconf->knc_rdev; + if (sknconf->knc_proto != NULL) { + tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); + bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto, + KNC_STRSIZE); + } + if (sknconf->knc_protofmly != NULL) { + tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, + KM_SLEEP); + bcopy(sknconf->knc_protofmly, + (char *)tknconf->knc_protofmly, KNC_STRSIZE); + } + } + + svp->sv_secdata = copy_sec_data(nsvp->sv_secdata); + svp->sv_dhsec = copy_sec_data(svp->sv_dhsec); + /* + * Rest of the security information is not copied as they are built + * with the information available from secdata and dhsec. + */ + svp->sv_next = NULL; + + return (svp); +} + +servinfo4_t * +restore_svp(mntinfo4_t *mi, servinfo4_t *svp, servinfo4_t *origsvp) +{ + servinfo4_t *srvnext, *tmpsrv; + + if (strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) { + /* + * Since the hostname changed, we must be dealing + * with a referral, and the lookup failed. We will + * restore the whole servinfo4_t to what it was before. + */ + srvnext = svp->sv_next; + svp->sv_next = NULL; + tmpsrv = copy_svp(origsvp); + sv4_free(svp); + svp = tmpsrv; + svp->sv_next = srvnext; + mutex_enter(&mi->mi_lock); + mi->mi_servers = svp; + mi->mi_curr_serv = svp; + mutex_exit(&mi->mi_lock); + + } else if (origsvp->sv_pathlen != svp->sv_pathlen) { + + /* + * For symlink case: restore original path because + * it might have contained symlinks that were + * expanded by nfsgetfh_otw before the failure occurred. + */ + nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); + kmem_free(svp->sv_path, svp->sv_pathlen); + svp->sv_path = + kmem_alloc(origsvp->sv_pathlen, KM_SLEEP); + svp->sv_pathlen = origsvp->sv_pathlen; + bcopy(origsvp->sv_path, svp->sv_path, + origsvp->sv_pathlen); + nfs_rw_exit(&svp->sv_lock); + } + return (svp); +} + static ushort_t nfs4_max_threads = 8; /* max number of active async threads */ static uint_t nfs4_bsize = 32 * 1024; /* client `block' size */ static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */ @@ -1830,12 +2171,11 @@ static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO; void nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags) { - struct servinfo4 *svp; + struct servinfo4 *svp, *origsvp; vtype_t vtype; nfs_fh4 rootfh; int getfh_flags; - char *orig_sv_path; - int orig_sv_pathlen, num_retry; + int num_retry; mutex_enter(&mi->mi_lock); @@ -1861,9 +2201,7 @@ remap_retry: * to re-lookup everything and recover. */ (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); - orig_sv_pathlen = svp->sv_pathlen; - orig_sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); - bcopy(svp->sv_path, orig_sv_path, orig_sv_pathlen); + origsvp = copy_svp(svp); nfs_rw_exit(&svp->sv_lock); num_retry = nfs4_max_mount_retry; @@ -1882,24 +2220,13 @@ remap_retry: /* * For some reason, the mount compound failed. Before - * retrying, we need to restore the original sv_path - * because it might have contained symlinks that were - * expanded by nfsgetfh_otw before the failure occurred. - * replace current sv_path with orig sv_path -- just in case - * it changed due to embedded symlinks. + * retrying, we need to restore original conditions. */ - (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); - if (orig_sv_pathlen != svp->sv_pathlen) { - kmem_free(svp->sv_path, svp->sv_pathlen); - svp->sv_path = kmem_alloc(orig_sv_pathlen, KM_SLEEP); - svp->sv_pathlen = orig_sv_pathlen; - } - bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); - nfs_rw_exit(&svp->sv_lock); + svp = restore_svp(mi, svp, origsvp); } while (num_retry-- > 0); - kmem_free(orig_sv_path, orig_sv_pathlen); + sv4_free(origsvp); if (ep->error != 0 || ep->stat != 0) { return; @@ -1940,7 +2267,7 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, dev_t nfs_dev; int error = 0; rnode4_t *rp; - int i; + int i, len; struct vattr va; vtype_t vtype = VNON; vtype_t tmp_vtype = VNON; @@ -1951,9 +2278,10 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, struct nfs_stats *nfsstatsp; nfs4_fname_t *mfname; nfs4_error_t e; - char *orig_sv_path; - int orig_sv_pathlen, num_retry, removed; + int num_retry, removed; cred_t *lcr = NULL, *tcr = cr; + struct servinfo4 *origsvp; + char *resource; nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone()); ASSERT(nfsstatsp != NULL); @@ -1980,6 +2308,8 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, mi->mi_flags |= MI4_PUBLIC; if (flags & NFSMNT_MIRRORMOUNT) mi->mi_flags |= MI4_MIRRORMOUNT; + if (flags & NFSMNT_REFERRAL) + mi->mi_flags |= MI4_REFERRAL; mi->mi_retrans = NFS_RETRIES; if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || svp->sv_knconf->knc_semantics == NC_TPI_COTS) @@ -2100,9 +2430,7 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, * Save server path we're attempting to mount. */ (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); - orig_sv_pathlen = svp_head->sv_pathlen; - orig_sv_path = kmem_alloc(svp_head->sv_pathlen, KM_SLEEP); - bcopy(svp_head->sv_path, orig_sv_path, svp_head->sv_pathlen); + origsvp = copy_svp(svp); nfs_rw_exit(&svp->sv_lock); /* @@ -2162,21 +2490,13 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, break; /* - * replace current sv_path with orig sv_path -- just in - * case it changed due to embedded symlinks. + * For some reason, the mount compound failed. Before + * retrying, we need to restore original conditions. */ - (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); - if (orig_sv_pathlen != svp->sv_pathlen) { - kmem_free(svp->sv_path, svp->sv_pathlen); - svp->sv_path = kmem_alloc(orig_sv_pathlen, - KM_SLEEP); - svp->sv_pathlen = orig_sv_pathlen; - } - bcopy(orig_sv_path, svp->sv_path, orig_sv_pathlen); - nfs_rw_exit(&svp->sv_lock); + svp = restore_svp(mi, svp, origsvp); + svp_head = svp; } while (num_retry-- > 0); - error = e.error ? e.error : geterrno4(e.stat); if (error) { nfs_cmn_err(error, CE_WARN, @@ -2215,8 +2535,6 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, firstsvp = svp; } - kmem_free(orig_sv_path, orig_sv_pathlen); - if (firstsvp == NULL) { if (error == 0) error = ENOENT; @@ -2286,6 +2604,19 @@ nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head, mi->mi_flags &= ~MI4_MOUNTING; mutex_exit(&mi->mi_lock); + /* Update VFS with new server and path info */ + if ((strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) || + (strcmp(svp->sv_path, origsvp->sv_path) != 0)) { + len = svp->sv_hostnamelen + svp->sv_pathlen; + resource = kmem_zalloc(len, KM_SLEEP); + (void) strcat(resource, svp->sv_hostname); + (void) strcat(resource, ":"); + (void) strcat(resource, svp->sv_path); + vfs_setresource(vfsp, resource); + kmem_free(resource, len); + } + + sv4_free(origsvp); *rtvpp = rtvp; if (lcr != NULL) crfree(lcr); @@ -2321,6 +2652,9 @@ bad: */ MI4_RELE(mi); + if (origsvp != NULL) + sv4_free(origsvp); + *rtvpp = NULL; return (error); } @@ -2336,7 +2670,6 @@ nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) int removed; bool_t must_unlock; - bool_t must_rele; nfs4_ephemeral_tree_t *eph_tree; @@ -2388,7 +2721,7 @@ nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) * again when needed. */ if (nfs4_ephemeral_umount(mi, flag, cr, - &must_unlock, &must_rele, &eph_tree)) { + &must_unlock, &eph_tree)) { ASSERT(must_unlock == FALSE); mutex_enter(&mi->mi_async_lock); mi->mi_max_threads = omax; @@ -2402,8 +2735,7 @@ nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) * then the file system is busy and can't be unmounted. */ if (check_rtable4(vfsp)) { - nfs4_ephemeral_umount_unlock(&must_unlock, &must_rele, - &eph_tree); + nfs4_ephemeral_umount_unlock(&must_unlock, &eph_tree); mutex_enter(&mi->mi_async_lock); mi->mi_max_threads = omax; @@ -2416,8 +2748,7 @@ nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr) * The unmount can't fail from now on, so record any * ephemeral changes. */ - nfs4_ephemeral_umount_activate(mi, &must_unlock, - &must_rele, &eph_tree); + nfs4_ephemeral_umount_activate(mi, &must_unlock, &eph_tree); /* * There are no active files that could require over-the-wire @@ -2982,7 +3313,7 @@ recov_retry: */ if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) { (void) nfs4_start_recovery(n4ep, mi, NULL, - NULL, NULL, NULL, OP_SETCLIENTID, NULL); + NULL, NULL, NULL, OP_SETCLIENTID, NULL, NULL, NULL); /* * Don't retry here, just return and let * recovery take over. @@ -3534,7 +3865,12 @@ new_nfs4_server(struct servinfo4 *svp, cred_t *cr) } un_curtime; verifier4 un_verifier; } nfs4clientid_verifier; - char id_val[] = "Solaris: %s, NFSv4 kernel client"; + /* + * We change this ID string carefully and with the Solaris + * NFS server behaviour in mind. "+referrals" indicates + * a client that can handle an NFSv4 referral. + */ + char id_val[] = "Solaris: %s, NFSv4 kernel client +referrals"; int len; np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP); @@ -3946,7 +4282,6 @@ nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr) int removed; bool_t must_unlock; - bool_t must_rele; nfs4_ephemeral_tree_t *eph_tree; /* @@ -4020,9 +4355,9 @@ nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr) * directory tree, we are okay. */ if (!nfs4_ephemeral_umount(mi, flag, cr, - &must_unlock, &must_rele, &eph_tree)) + &must_unlock, &eph_tree)) nfs4_ephemeral_umount_activate(mi, &must_unlock, - &must_rele, &eph_tree); + &eph_tree); /* * The original purge of the dnlc via 'dounmount' @@ -4058,3 +4393,81 @@ nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr) if (removed) zone_rele(mi->mi_zone); } + +/* Referral related sub-routines */ + +/* Freeup knetconfig */ +static void +free_knconf_contents(struct knetconfig *k) +{ + if (k == NULL) + return; + if (k->knc_protofmly) + kmem_free(k->knc_protofmly, KNC_STRSIZE); + if (k->knc_proto) + kmem_free(k->knc_proto, KNC_STRSIZE); +} + +/* + * This updates newpath variable with exact name component from the + * path which gave us a NFS4ERR_MOVED error. + * If the path is /rp/aaa/bbb and nth value is 1, aaa is returned. + */ +static char * +extract_referral_point(const char *svp, int nth) +{ + int num_slashes = 0; + const char *p; + char *newpath = NULL; + int i = 0; + + newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + for (p = svp; *p; p++) { + if (*p == '/') + num_slashes++; + if (num_slashes == nth + 1) { + p++; + while (*p != '/') { + if (*p == '\0') + break; + newpath[i] = *p; + i++; + p++; + } + newpath[i++] = '\0'; + break; + } + } + return (newpath); +} + +/* + * This sets up a new path in sv_path to do a lookup of the referral point. + * If the path is /rp/aaa/bbb and the referral point is aaa, + * this updates /rp/aaa. This path will be used to get referral + * location. + */ +static void +setup_newsvpath(servinfo4_t *svp, int nth) +{ + int num_slashes = 0, pathlen, i = 0; + char *newpath, *p; + + newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + for (p = svp->sv_path; *p; p++) { + newpath[i] = *p; + if (*p == '/') + num_slashes++; + if (num_slashes == nth + 1) { + newpath[i] = '\0'; + pathlen = strlen(newpath) + 1; + kmem_free(svp->sv_path, svp->sv_pathlen); + svp->sv_path = kmem_alloc(pathlen, KM_SLEEP); + svp->sv_pathlen = pathlen; + bcopy(newpath, svp->sv_path, pathlen); + break; + } + i++; + } + kmem_free(newpath, MAXPATHLEN); +} diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c index 5e79c7138f..49d2359d78 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c @@ -90,6 +90,7 @@ #include <sys/ddi.h> #include <sys/int_fmtio.h> +#include <sys/fs/autofs.h> typedef struct { nfs4_ga_res_t *di_garp; @@ -164,7 +165,6 @@ static void nfs4_register_lock_locally(vnode_t *, struct flock64 *, int, static int nfs4_lockrelease(vnode_t *, int, offset_t, cred_t *); static int nfs4_block_and_wait(clock_t *, rnode4_t *); static cred_t *state_to_cred(nfs4_open_stream_t *); -static int vtoname(vnode_t *, char *, ssize_t); static void denied_to_flk(LOCK4denied *, flock64_t *, LOCKT4args *); static pid_t lo_to_pid(lock_owner4 *); static void nfs4_reinstitute_local_lock_state(vnode_t *, flock64_t *, @@ -1183,7 +1183,7 @@ recov_retry: abort = nfs4_start_recovery(&e, VTOMI4(dvp), dvp, vpi, NULL, lost_rqst.lr_op == OP_OPEN ? - &lost_rqst : NULL, OP_OPEN, bsep); + &lost_rqst : NULL, OP_OPEN, bsep, NULL, NULL); if (bsep) kmem_free(bsep, sizeof (*bsep)); @@ -1897,7 +1897,7 @@ top: abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, NULL, NULL, lost_rqst.lr_op == OP_OPEN ? - &lost_rqst : NULL, OP_OPEN, NULL); + &lost_rqst : NULL, OP_OPEN, NULL, NULL, NULL); nfs4args_copen_free(open_args); goto bailout; } @@ -1936,7 +1936,7 @@ top: abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, NULL, NULL, lost_rqst.lr_op == OP_OPEN ? &lost_rqst : - NULL, OP_OPEN, bsep); + NULL, OP_OPEN, bsep, NULL, NULL); nfs4args_copen_free(open_args); (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); @@ -1995,7 +1995,7 @@ top: case NFS4ERR_FHEXPIRED: /* recover filehandle and retry */ abort = nfs4_start_recovery(ep, - mi, vp, NULL, NULL, NULL, OP_OPEN, NULL); + mi, vp, NULL, NULL, NULL, OP_OPEN, NULL, NULL, NULL); nfs4args_copen_free(open_args); (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); nfs4_end_open_seqid_sync(oop); @@ -2511,7 +2511,7 @@ nfs4close_otw(rnode4_t *rp, cred_t *cred_otw, nfs4_open_owner_t *oop, abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, NULL, NULL, (close_type != CLOSE_RESEND && lost_rqst.lr_op == OP_CLOSE) ? &lost_rqst : NULL, - OP_CLOSE, bsep); + OP_CLOSE, bsep, NULL, NULL); /* drop open seq sync, and let the calling function regrab it */ nfs4_end_open_seqid_sync(oop); @@ -3280,7 +3280,7 @@ recov_retry: abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, &wargs->stateid, - NULL, OP_WRITE, NULL); + NULL, OP_WRITE, NULL, NULL, NULL); if (!e.error) { e.error = geterrno4(res.status); (void) xdr_free(xdr_COMPOUND4res_clnt, @@ -3540,7 +3540,7 @@ recov_retry: "nfs4read: initiating recovery\n")); abort = nfs4_start_recovery(&e, mi, vp, NULL, &rargs->stateid, - NULL, OP_READ, NULL); + NULL, OP_READ, NULL, NULL, NULL); nfs4_end_fop(mi, vp, NULL, OH_READ, &recov_state, needrecov); /* @@ -3987,7 +3987,7 @@ recov_retry: abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, NULL, - OP_SETATTR, NULL); + OP_SETATTR, NULL, NULL, NULL); nfs4_end_op(VTOMI4(vp), vp, NULL, &recov_state, needrecov); /* @@ -4379,7 +4379,7 @@ recov_retry: "nfs4_access: initiating recovery\n")); if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, - NULL, OP_ACCESS, NULL) == FALSE) { + NULL, OP_ACCESS, NULL, NULL, NULL) == FALSE) { nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_ACCESS, &recov_state, needrecov); if (!e.error) @@ -4536,7 +4536,7 @@ recov_retry: "nfs4_readlink: initiating recovery\n")); if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, - NULL, OP_READLINK, NULL) == FALSE) { + NULL, OP_READLINK, NULL, NULL, NULL) == FALSE) { if (!e.error) (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); @@ -4897,7 +4897,7 @@ recov_retry_remove: if (nfs4_needs_recovery(&e, FALSE, unldvp->v_vfsp)) { if (nfs4_start_recovery(&e, VTOMI4(unldvp), unldvp, NULL, - NULL, NULL, OP_REMOVE, NULL) == FALSE) { + NULL, NULL, OP_REMOVE, NULL, NULL, NULL) == FALSE) { if (!e.error) (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); @@ -5316,6 +5316,17 @@ recov_retry: rfs4call(VTOMI4(dvp), &args, &res, cr, &doqueue, 0, &e); + if (!isdotdot && res.status == NFS4ERR_MOVED) { + e.error = nfs4_setup_referral(dvp, nm, vpp, cr); + if (e.error != 0 && *vpp != NULL) + VN_RELE(*vpp); + nfs4_end_fop(mi, dvp, NULL, OH_LOOKUP, + &recov_state, FALSE); + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); + kmem_free(argop, argoplist_size); + return (e.error); + } + if (nfs4_needs_recovery(&e, FALSE, dvp->v_vfsp)) { /* * For WRONGSEC of a non-dotdot case, send secinfo directly @@ -5343,7 +5354,7 @@ recov_retry: } if (nfs4_start_recovery(&e, mi, dvp, NULL, NULL, NULL, - OP_LOOKUP, NULL) == FALSE) { + OP_LOOKUP, NULL, NULL, NULL) == FALSE) { nfs4_end_fop(mi, dvp, NULL, OH_LOOKUP, &recov_state, TRUE); @@ -5743,6 +5754,17 @@ recov_retry: rfs4call(VTOMI4(dvp), &args, &res, cr, &doqueue, 0, &e); + if (!isdotdot && res.status == NFS4ERR_MOVED) { + e.error = nfs4_setup_referral(dvp, nm, vpp, cr); + if (e.error != 0 && *vpp != NULL) + VN_RELE(*vpp); + nfs4_end_fop(mi, dvp, NULL, OH_LOOKUP, + &recov_state, FALSE); + (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); + kmem_free(argop, argoplist_size); + return (e.error); + } + if (nfs4_needs_recovery(&e, FALSE, dvp->v_vfsp)) { /* * For WRONGSEC of a non-dotdot case, send secinfo directly @@ -5768,7 +5790,7 @@ recov_retry: } if (nfs4_start_recovery(&e, mi, dvp, NULL, NULL, NULL, - OP_LOOKUP, NULL) == FALSE) { + OP_LOOKUP, NULL, NULL, NULL) == FALSE) { nfs4_end_fop(mi, dvp, NULL, OH_LOOKUP, &recov_state, TRUE); @@ -6374,7 +6396,7 @@ recov_retry: abort = nfs4_start_recovery(&e, VTOMI4(dvp), dvp, NULL, NULL, NULL, - OP_OPENATTR, NULL); + OP_OPENATTR, NULL, NULL, NULL); nfs4_end_op(VTOMI4(dvp), dvp, NULL, &recov_state, needrecov); if (!e.error) { e.error = geterrno4(res.status); @@ -6986,7 +7008,7 @@ recov_retry: if (needrecov) { if (nfs4_start_recovery(&e, mi, dvp, NULL, NULL, NULL, - OP_CREATE, NULL) == FALSE) { + OP_CREATE, NULL, NULL, NULL) == FALSE) { nfs4_end_op(mi, dvp, NULL, &recov_state, needrecov); need_end_op = FALSE; @@ -7345,7 +7367,7 @@ recov_retry: if (needrecov) { if (nfs4_start_recovery(&e, VTOMI4(dvp), dvp, - NULL, NULL, NULL, OP_REMOVE, NULL) == FALSE) { + NULL, NULL, NULL, OP_REMOVE, NULL, NULL, NULL) == FALSE) { if (!e.error) (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); @@ -7519,7 +7541,7 @@ recov_retry: bool_t abort; abort = nfs4_start_recovery(&e, VTOMI4(svp), svp, tdvp, - NULL, NULL, OP_LINK, NULL); + NULL, NULL, OP_LINK, NULL, NULL, NULL); if (abort == FALSE) { nfs4_end_op(VTOMI4(svp), svp, tdvp, &recov_state, needrecov); @@ -8187,7 +8209,7 @@ recov_retry: if (needrecov) { if (nfs4_start_recovery(&e, mi, odvp, ndvp, NULL, NULL, - OP_RENAME, NULL) == FALSE) { + OP_RENAME, NULL, NULL, NULL) == FALSE) { nfs4_end_op(mi, odvp, ndvp, &recov_state, needrecov); if (!e.error) (void) xdr_free(xdr_COMPOUND4res_clnt, @@ -8444,7 +8466,7 @@ recov_retry: bool_t abort; abort = nfs4_start_recovery(&e, mi, odvp, ndvp, NULL, NULL, - OP_RENAME, NULL); + OP_RENAME, NULL, NULL, NULL); if (abort == FALSE) { nfs4_end_fop(mi, odvp, ndvp, OH_VFH_RENAME, &recov_state, needrecov); @@ -8722,7 +8744,7 @@ recov_retry: if (needrecov) { if (nfs4_start_recovery(&e, VTOMI4(dvp), dvp, NULL, NULL, - NULL, OP_REMOVE, NULL) == FALSE) { + NULL, OP_REMOVE, NULL, NULL, NULL) == FALSE) { if (!e.error) (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); @@ -9319,7 +9341,7 @@ recov_retry: "nfs4readdir: initiating recovery.\n")); abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, - NULL, OP_READDIR, NULL); + NULL, OP_READDIR, NULL, NULL, NULL); if (abort == FALSE) { nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_READDIR, &recov_state, needrecov); @@ -11756,7 +11778,7 @@ recov_retry: if (needrecov) { if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, - NULL, OP_COMMIT, NULL) == FALSE) { + NULL, OP_COMMIT, NULL, NULL, NULL) == FALSE) { nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_COMMIT, &recov_state, needrecov); if (!e.error) @@ -12318,6 +12340,12 @@ nfs4_getsecattr(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr, if (error) /* EINVAL */ return (error); + /* + * If this is a referral stub, don't try to go OTW for an ACL + */ + if (RP_ISSTUB_REFERRAL(VTOR4(vp))) + return (fs_fab_acl(vp, vsecattr, flag, cr, ct)); + if (mi->mi_flags & MI4_ACL) { /* * Check if the data is cached and the cache is valid. If it @@ -12752,8 +12780,8 @@ recov_retry_confirm: vp, 0, args.ctag, open_confirm_args->seqid); - abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, - NULL, NULL, NULL, OP_OPEN_CONFIRM, bsep); + abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, NULL, + NULL, NULL, OP_OPEN_CONFIRM, bsep, NULL, NULL); if (bsep) { kmem_free(bsep, sizeof (*bsep)); if (num_bseqid_retryp && @@ -13686,7 +13714,7 @@ nfs4frlock_recovery(int needrecov, nfs4_error_t *ep, abort = nfs4_start_recovery(ep, VTOMI4(vp), vp, NULL, NULL, (lost_rqstp && (lost_rqstp->lr_op == OP_LOCK || lost_rqstp->lr_op == OP_LOCKU)) ? lost_rqstp : - NULL, op, bsep); + NULL, op, bsep, NULL, NULL); if (bsep) kmem_free(bsep, sizeof (*bsep)); @@ -14215,7 +14243,7 @@ recov_retry: VTOMI4(vp), vp, NULL, NULL, (lost_rqst.lr_op == OP_LOCK || lost_rqst.lr_op == OP_LOCKU) ? - &lost_rqst : NULL, OP_LOCKU, NULL); + &lost_rqst : NULL, OP_LOCKU, NULL, NULL, NULL); lock_owner_rele(lop); lop = NULL; } @@ -15303,7 +15331,7 @@ recov_retry: have_sync_lock = 0; (void) nfs4_start_recovery(ep, mi, vp, NULL, NULL, lost_rqst.lr_op == OP_CLOSE ? - &lost_rqst : NULL, OP_CLOSE, NULL); + &lost_rqst : NULL, OP_CLOSE, NULL, NULL, NULL); close_failed = 1; force_close = 0; goto close_cleanup; @@ -15387,7 +15415,7 @@ recov_retry: abort = nfs4_start_recovery(ep, mi, vp, NULL, NULL, new_lost_rqst.lr_op == OP_OPEN_DOWNGRADE ? &new_lost_rqst : NULL, OP_OPEN_DOWNGRADE, - bsep); + bsep, NULL, NULL); if (odg_cred_otw) crfree(odg_cred_otw); if (bsep) @@ -15976,5 +16004,5 @@ push_reinstate(vnode_t *vp, int cmd, flock64_t *flk, cred_t *cr, (void) nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, (req.lr_op == OP_LOCK || req.lr_op == OP_LOCKU) ? &req : NULL, flk->l_type == F_UNLCK ? OP_LOCKU : OP_LOCK, - NULL); + NULL, NULL, NULL); } diff --git a/usr/src/uts/common/fs/nfs/nfs4_xdr.c b/usr/src/uts/common/fs/nfs/nfs4_xdr.c index 1b3d1462a6..e2e14cff8a 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_xdr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_xdr.c @@ -42,7 +42,16 @@ #include <nfs/nfs4.h> #include <nfs/nfs4_clnt.h> #include <sys/sdt.h> +#include <sys/mkdev.h> #include <rpc/rpc_rdma.h> +#include <rpc/xdr.h> + +#define xdr_dev_t xdr_u_int + +extern bool_t xdr_netbuf(XDR *, struct netbuf *); +extern bool_t xdr_vector(XDR *, char *, const uint_t, const uint_t, + const xdrproc_t); +bool_t xdr_knetconfig(XDR *, struct knetconfig *); bool_t xdr_bitmap4(XDR *xdrs, bitmap4 *objp) @@ -146,6 +155,143 @@ xdr_utf8string(XDR *xdrs, utf8string *objp) } /* + * used by NFSv4 referrals to get info needed for NFSv4 referral mount. + */ +bool_t +xdr_nfs_fsl_info(XDR *xdrs, struct nfs_fsl_info *objp) +{ + + if (!xdr_u_int(xdrs, &objp->netbuf_len)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->netnm_len)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->knconf_len)) + return (FALSE); + +#if defined(_LP64) + /* + * The object can come from a 32-bit binary; nfsmapid. + * To be safe we double the size of the knetconfig to + * allow some buffering for decoding. + */ + if (xdrs->x_op == XDR_DECODE) + objp->knconf_len += sizeof (struct knetconfig); +#endif + + if (!xdr_string(xdrs, &objp->netname, ~0)) + return (FALSE); + if (!xdr_pointer(xdrs, (char **)&objp->addr, objp->netbuf_len, + (xdrproc_t)xdr_netbuf)) + return (FALSE); + if (!xdr_pointer(xdrs, (char **)&objp->knconf, + objp->knconf_len, (xdrproc_t)xdr_knetconfig)) + return (FALSE); + return (TRUE); +} + +bool_t +xdr_knetconfig(XDR *xdrs, struct knetconfig *objp) +{ + rpc_inline_t *buf; + u_longlong_t dev64; +#if !defined(_LP64) + uint32_t major, minor; +#endif + int i; + + if (!xdr_u_int(xdrs, &objp->knc_semantics)) + return (FALSE); + if (xdrs->x_op == XDR_DECODE) { + objp->knc_protofmly = (((char *)objp) + + sizeof (struct knetconfig)); + objp->knc_proto = objp->knc_protofmly + KNC_STRSIZE; + } + if (!xdr_opaque(xdrs, objp->knc_protofmly, KNC_STRSIZE)) + return (FALSE); + if (!xdr_opaque(xdrs, objp->knc_proto, KNC_STRSIZE)) + return (FALSE); + + /* + * For interoperability between 32-bit daemon and 64-bit kernel, + * we always treat dev_t as 64-bit number and do the expanding + * or compression of dev_t as needed. + * We have to hand craft the conversion since there is no available + * function in ddi.c. Besides ddi.c is available only in the kernel + * and we want to keep both user and kernel of xdr_knetconfig() the + * same for consistency. + */ + if (xdrs->x_op == XDR_ENCODE) { +#if defined(_LP64) + dev64 = objp->knc_rdev; +#else + major = (objp->knc_rdev >> NBITSMINOR32) & MAXMAJ32; + minor = objp->knc_rdev & MAXMIN32; + dev64 = (((unsigned long long)major) << NBITSMINOR64) | minor; +#endif + if (!xdr_u_longlong_t(xdrs, &dev64)) + return (FALSE); + } + if (xdrs->x_op == XDR_DECODE) { +#if defined(_LP64) + if (!xdr_u_longlong_t(xdrs, (u_longlong_t *)&objp->knc_rdev)) + return (FALSE); +#else + if (!xdr_u_longlong_t(xdrs, &dev64)) + return (FALSE); + + major = (dev64 >> NBITSMINOR64) & L_MAXMAJ32; + minor = dev64 & L_MAXMIN32; + objp->knc_rdev = (major << L_BITSMINOR32) | minor; +#endif + } + + if (xdrs->x_op == XDR_ENCODE) { + buf = XDR_INLINE(xdrs, (8) * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_vector(xdrs, (char *)objp->knc_unused, 8, + sizeof (uint_t), (xdrproc_t)xdr_u_int)) + return (FALSE); + } else { + uint_t *genp; + + for (i = 0, genp = objp->knc_unused; + i < 8; i++) { +#if defined(_LP64) || defined(_KERNEL) + IXDR_PUT_U_INT32(buf, *genp++); +#else + IXDR_PUT_U_LONG(buf, *genp++); +#endif + } + } + return (TRUE); + } else if (xdrs->x_op == XDR_DECODE) { + buf = XDR_INLINE(xdrs, (8) * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_vector(xdrs, (char *)objp->knc_unused, 8, + sizeof (uint_t), (xdrproc_t)xdr_u_int)) + return (FALSE); + } else { + uint_t *genp; + + for (i = 0, genp = objp->knc_unused; + i < 8; i++) { +#if defined(_LP64) || defined(_KERNEL) + *genp++ = IXDR_GET_U_INT32(buf); +#else + *genp++ = IXDR_GET_U_LONG(buf); +#endif + } + } + return (TRUE); + } + + if (!xdr_vector(xdrs, (char *)objp->knc_unused, 8, + sizeof (uint_t), (xdrproc_t)xdr_u_int)) + return (FALSE); + return (TRUE); +} + +/* * XDR_INLINE decode a filehandle. */ bool_t @@ -492,8 +638,12 @@ xdr_nfs_fh4(XDR *xdrs, nfs_fh4 *objp) static bool_t xdr_fs_location4(XDR *xdrs, fs_location4 *objp) { + if (xdrs->x_op == XDR_DECODE) { + objp->server_val = NULL; + objp->rootpath.pathname4_val = NULL; + } if (!xdr_array(xdrs, (char **)&objp->server_val, - (uint_t *)&objp->server_len, NFS4_FS_LOCATIONS_LIMIT, + (uint_t *)&objp->server_len, NFS4_MAX_UTF8STRING, sizeof (utf8string), (xdrproc_t)xdr_utf8string)) return (FALSE); return (xdr_array(xdrs, (char **)&objp->rootpath.pathname4_val, @@ -560,6 +710,11 @@ xdr_fattr4_acl(XDR *xdrs, fattr4_acl *objp) bool_t xdr_fattr4_fs_locations(XDR *xdrs, fattr4_fs_locations *objp) { + if (xdrs->x_op == XDR_DECODE) { + objp->fs_root.pathname4_len = 0; + objp->fs_root.pathname4_val = NULL; + objp->locations_val = NULL; + } if (!xdr_array(xdrs, (char **)&objp->fs_root.pathname4_val, (uint_t *)&objp->fs_root.pathname4_len, NFS4_MAX_PATHNAME4, @@ -930,7 +1085,9 @@ xdr_ga_fattr_res(XDR *xdrs, struct nfs4_ga_res *garp, bitmap4 resbmap, FATTR4_HOMOGENEOUS_MASK)) { if (resbmap & FATTR4_FS_LOCATIONS_MASK) { - ASSERT(0); + if (!xdr_fattr4_fs_locations(xdrs, + &gesp->n4g_fslocations)) + return (FALSE); } if (resbmap & FATTR4_HIDDEN_MASK) { ASSERT(0); @@ -2253,7 +2410,9 @@ noentries: static bool_t xdr_ga_res(XDR *xdrs, GETATTR4res *objp, GETATTR4args *aobjp) { +#ifdef INLINE uint32_t *ptr; +#endif bitmap4 resbmap; uint32_t attrlen; @@ -2307,11 +2466,13 @@ xdr_ga_res(XDR *xdrs, GETATTR4res *objp, GETATTR4args *aobjp) } /* Check to see if the attrs can be inlined and go for it if so */ +#ifdef INLINE if (!(resbmap & FATTR4_ACL_MASK) && (ptr = (uint32_t *)XDR_INLINE(xdrs, attrlen)) != NULL) return (xdr_ga_fattr_res_inline(ptr, &objp->ga_res, resbmap, aobjp->attr_request, aobjp->mi, NULL)); else +#endif return (xdr_ga_fattr_res(xdrs, &objp->ga_res, resbmap, aobjp->attr_request, aobjp->mi, NULL)); } @@ -4300,6 +4461,7 @@ xdr_nfs_resop4_free(XDR *xdrs, nfs_resop4 **arrayp, int len, int decode_len) { int i; nfs_resop4 *array = *arrayp; + nfs4_ga_res_t *gr; /* * Optimized XDR_FREE only results array @@ -4319,10 +4481,15 @@ xdr_nfs_resop4_free(XDR *xdrs, nfs_resop4 **arrayp, int len, int decode_len) case OP_GETATTR: if (array[i].nfs_resop4_u.opgetattr.status != NFS4_OK) continue; - if (array[i].nfs_resop4_u.opgetattr.ga_res.n4g_ext_res) - kmem_free(array[i].nfs_resop4_u.opgetattr. - ga_res.n4g_ext_res, + + gr = &array[i].nfs_resop4_u.opgetattr.ga_res; + if (gr->n4g_ext_res) { + if (gr->n4g_resbmap & FATTR4_FS_LOCATIONS_MASK) + (void) xdr_fattr4_fs_locations(xdrs, + &gr->n4g_ext_res->n4g_fslocations); + kmem_free(gr->n4g_ext_res, sizeof (struct nfs4_ga_ext_res)); + } continue; case OP_GETFH: if (array[i].nfs_resop4_u.opgetfh.status != NFS4_OK) diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c index 4fb64c62b1..4d98017403 100644 --- a/usr/src/uts/common/fs/nfs/nfs_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs_srv.c @@ -117,6 +117,10 @@ rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, /* check for overflows */ if (!error) { + /* Lie about the object type for a referral */ + if (vn_is_nfs_reparse(vp, cr)) + va.va_type = VLNK; + acl_perm(vp, exi, &va, cr); error = vattr_to_nattr(&va, &ns->ns_attr); } @@ -489,6 +493,7 @@ rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, struct vattr va; struct sockaddr *ca; char *name = NULL; + int is_referral = 0; vp = nfs_fhtovp(fhp, exi); if (vp == NULL) { @@ -515,11 +520,15 @@ rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, return; } + /* We lied about the object type for a referral */ + if (vn_is_nfs_reparse(vp, cr)) + is_referral = 1; + /* * XNFS and RFC1094 require us to return ENXIO if argument * is not a link. BUGID 1138002. */ - if (vp->v_type != VLNK) { + if (vp->v_type != VLNK && !is_referral) { VN_RELE(vp); rl->rl_data = NULL; rl->rl_status = NFSERR_NXIO; @@ -531,27 +540,52 @@ rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, */ rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); - /* - * Set up io vector to read sym link data - */ - iov.iov_base = rl->rl_data; - iov.iov_len = NFS_MAXPATHLEN; - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_extflg = UIO_COPY_CACHED; - uio.uio_loffset = (offset_t)0; - uio.uio_resid = NFS_MAXPATHLEN; + if (is_referral) { + char *s; + size_t strsz; + + /* Get an artificial symlink based on a referral */ + s = build_symlink(vp, cr, &strsz); + global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; + DTRACE_PROBE2(nfs2serv__func__referral__reflink, + vnode_t *, vp, char *, s); + if (s == NULL) + error = EINVAL; + else { + error = 0; + (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); + rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); + kmem_free(s, strsz); + } - /* - * Do the readlink. - */ - error = VOP_READLINK(vp, &uio, cr, NULL); + } else { - VN_RELE(vp); + /* + * Set up io vector to read sym link data + */ + iov.iov_base = rl->rl_data; + iov.iov_len = NFS_MAXPATHLEN; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = (offset_t)0; + uio.uio_resid = NFS_MAXPATHLEN; + + /* + * Do the readlink. + */ + error = VOP_READLINK(vp, &uio, cr, NULL); + + rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); - rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); - rl->rl_data[rl->rl_count] = '\0'; + if (!error) + rl->rl_data[rl->rl_count] = '\0'; + + } + + + VN_RELE(vp); ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; name = nfscmd_convname(ca, exi, rl->rl_data, diff --git a/usr/src/uts/common/fs/nfs/nfs_stats.c b/usr/src/uts/common/fs/nfs/nfs_stats.c index b125a06d6c..baaf47a82a 100644 --- a/usr/src/uts/common/fs/nfs/nfs_stats.c +++ b/usr/src/uts/common/fs/nfs/nfs_stats.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/kstat.h> #include <sys/zone.h> @@ -87,6 +85,8 @@ nfsstat_zone_fini_common(zoneid_t zoneid, const char *module, int vers, static const kstat_named_t svstat_tmpl[] = { { "calls", KSTAT_DATA_UINT64 }, { "badcalls", KSTAT_DATA_UINT64 }, + { "referrals", KSTAT_DATA_UINT64 }, + { "referlinks", KSTAT_DATA_UINT64 }, }; /* Points to the global zone server kstat data for all nfs versions */ @@ -108,7 +108,7 @@ nfsstat_zone_init_server(zoneid_t zoneid, kstat_named_t *svstatp[]) for (vers = NFS_VERSION; vers <= NFS_V4; vers++) { svstatp[vers] = nfsstat_zone_init_common(zoneid, "nfs", vers, - "nfs_server", svstat_tmpl, sizeof (svstat_tmpl)); + "nfs_server", svstat_tmpl, sizeof (svstat_tmpl)); if (zoneid == GLOBAL_ZONEID) global_svstat_ptr[vers] = svstatp[vers]; } diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index a42259df93..2f7aa751ad 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -4441,3 +4441,32 @@ fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr) return (0); } + +/* + * Function to check whether a symlink is a reparse point. + * Return B_TRUE if it is a reparse point, else return B_FALSE + */ +boolean_t +vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct) +{ + xvattr_t xvattr; + xoptattr_t *xoap; + + if ((vp->v_type != VLNK) || + !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR))) + return (B_FALSE); + + xva_init(&xvattr); + xoap = xva_getxoptattr(&xvattr); + ASSERT(xoap); + XVA_SET_REQ(&xvattr, XAT_REPARSE); + + if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct)) + return (B_FALSE); + + if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) || + (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE)))) + return (B_FALSE); + + return (xoap->xoa_reparse ? B_TRUE : B_FALSE); +} diff --git a/usr/src/uts/common/nfs/export.h b/usr/src/uts/common/nfs/export.h index b66c00c389..21d6e9e4f6 100644 --- a/usr/src/uts/common/nfs/export.h +++ b/usr/src/uts/common/nfs/export.h @@ -295,6 +295,7 @@ struct charset_cache { /* Forward declarations */ struct exportinfo; struct exp_visible; +struct svc_req; /* * Treenodes are used to build tree representing every node which is part @@ -409,6 +410,7 @@ struct exportinfo { struct exp_visible *exi_visible; struct charset_cache *exi_charset; unsigned exi_volatile_dev:1; + unsigned exi_moved:1; #ifdef VOLATILE_FH_TEST uint32_t exi_volatile_id; struct ex_vol_rename *exi_vol_rename; @@ -512,6 +514,9 @@ extern void export_link(struct exportinfo *); extern int export_unlink(fsid_t *, fid_t *, vnode_t *, struct exportinfo **); extern vnode_t *untraverse(vnode_t *); +extern int vn_is_nfs_reparse(vnode_t *, cred_t *); +extern int client_is_downrev(struct svc_req *); +extern char *build_symlink(vnode_t *, cred_t *, size_t *); /* * Functions that handle the NFSv4 server namespace diff --git a/usr/src/uts/common/nfs/mount.h b/usr/src/uts/common/nfs/mount.h index 4b5c3adb9c..c2785ce73f 100644 --- a/usr/src/uts/common/nfs/mount.h +++ b/usr/src/uts/common/nfs/mount.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,8 +31,6 @@ #ifndef _NFS_MOUNT_H #define _NFS_MOUNT_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -157,11 +155,9 @@ struct nfs_args32 { #define NFSMNT_TRYRDMA 0x8000000 /* Try RDMA mount,no proto advised */ #define NFSMNT_DORDMA 0x10000000 /* Do an RDMA mount, regardless */ #define NFSMNT_MIRRORMOUNT 0x20000000 /* Is a mirrormount */ +#define NFSMNT_REFERRAL 0x40000000 /* Is a referral */ -/* - * This will have to change when we do referrals. - */ -#define NFSMNT_EPHEMERAL NFSMNT_MIRRORMOUNT +#define NFSMNT_EPHEMERAL (NFSMNT_MIRRORMOUNT | NFSMNT_REFERRAL) #ifdef __cplusplus } diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h index a3aa522944..77e5a397c2 100644 --- a/usr/src/uts/common/nfs/nfs.h +++ b/usr/src/uts/common/nfs/nfs.h @@ -884,7 +884,8 @@ extern void rfs_srvrfini(void); #define NATIVEPATH 0x02 /* Native path, i.e., via mount protocol */ #define SECURITY_QUERY 0x04 /* Security query */ -enum nfs_svccounts {NFS_CALLS, NFS_BADCALLS}; /* index for svstat_ptr */ +/* index for svstat_ptr */ +enum nfs_svccounts {NFS_CALLS, NFS_BADCALLS, NFS_REFERRALS, NFS_REFERLINKS}; /* function defs for NFS kernel */ extern int nfs_waitfor_purge_complete(vnode_t *); diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h index eccc885a2d..c72c238f7e 100644 --- a/usr/src/uts/common/nfs/nfs4.h +++ b/usr/src/uts/common/nfs/nfs4.h @@ -460,6 +460,7 @@ nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths; * ss_remove - indicates that the rfs4_client_destroy function should * clean up stable storage file. * forced_expire - set if the sysadmin has used clear_locks for this client. + * no_referrals - set if the client is Solaris and pre-dates referrals * deleg_revoked - how many delegations have been revoked for this client? * * cp_confirmed - this refers to a confirmed client struct that has @@ -499,6 +500,17 @@ typedef struct rfs4_client { } rfs4_client_t; /* + * ClntIP struct - holds the diagnosis about whether the client + * cannot support referrals. Set to true for old Solaris clients. + */ + +typedef struct rfs4_clntip { + rfs4_dbe_t *ri_dbe; + struct sockaddr_storage ri_addr; + unsigned ri_no_referrals:1; +} rfs4_clntip_t; + +/* * The openowner contains the client supplied open_owner4 as well as * the matching sequence id and is used to track the client's usage of * the open_owner4. Note that a reply is saved here as well for @@ -775,6 +787,7 @@ extern void rfs4_copy_reply(nfs_resop4 *, nfs_resop4 *); extern rfs4_client_t *rfs4_findclient(nfs_client_id4 *, bool_t *, rfs4_client_t *); extern rfs4_client_t *rfs4_findclient_by_id(clientid4, bool_t); +extern rfs4_client_t *rfs4_findclient_by_addr(struct sockaddr *); extern void rfs4_client_rele(rfs4_client_t *); extern void rfs4_client_close(rfs4_client_t *); extern void rfs4_client_state_remove(rfs4_client_t *); @@ -783,6 +796,10 @@ extern void rfs4_update_lease(rfs4_client_t *); extern bool_t rfs4_lease_expired(rfs4_client_t *); extern nfsstat4 rfs4_check_clientid(clientid4 *, int); +/* rfs4_clntip_t handling */ +extern rfs4_clntip_t *rfs4_find_clntip(struct sockaddr *, bool_t *); +extern void rfs4_invalidate_clntip(struct sockaddr *); + /* rfs4_openowner_t handling */ extern rfs4_openowner_t *rfs4_findopenowner(open_owner4 *, bool_t *, seqid4); extern void rfs4_update_open_sequence(rfs4_openowner_t *); @@ -1095,6 +1112,7 @@ struct nfs4_svgetit_arg { /* rdattr_error */ nfsstat4 rdattr_error; /* used for per-entry status */ /* (if rdattr_err) */ + bool_t is_referral; /* because sometimes we tell lies */ bool_t mntdfid_set; fattr4_mounted_on_fileid mounted_on_fileid; @@ -1302,6 +1320,11 @@ extern void vs_acet_destroy(vsecattr_t *); extern void vs_ace4_destroy(vsecattr_t *); extern void vs_aent_destroy(vsecattr_t *); +extern int vn_find_nfs_record(vnode_t *, nvlist_t **, char **, char **); +extern int vn_is_nfs_reparse(vnode_t *, cred_t *); +extern fs_locations4 *fetch_referral(vnode_t *, cred_t *); +extern char *build_symlink(vnode_t *, cred_t *, size_t *); + extern int stateid4_cmp(stateid4 *, stateid4 *); extern vtype_t nf4_to_vt[]; diff --git a/usr/src/uts/common/nfs/nfs4_attr.h b/usr/src/uts/common/nfs/nfs4_attr.h index aa28ff9d40..44acf555b4 100644 --- a/usr/src/uts/common/nfs/nfs4_attr.h +++ b/usr/src/uts/common/nfs/nfs4_attr.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _NFS4_ATTR_H #define _NFS4_ATTR_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -436,6 +433,7 @@ typedef struct nfs4_ga_ext_res { * ACL4_SUPPORT_ALARM_ACL */ fattr4_aclsupport n4g_aclsupport; + fattr4_fs_locations n4g_fslocations; } nfs4_ga_ext_res_t; extern bitmap4 rfs4_supported_attrs; diff --git a/usr/src/uts/common/nfs/nfs4_clnt.h b/usr/src/uts/common/nfs/nfs4_clnt.h index e0d9852c78..8796b96e0b 100644 --- a/usr/src/uts/common/nfs/nfs4_clnt.h +++ b/usr/src/uts/common/nfs/nfs4_clnt.h @@ -46,6 +46,7 @@ #include <sys/avl.h> #include <sys/list.h> #include <rpc/auth.h> +#include <sys/door.h> #ifdef __cplusplus extern "C" { @@ -161,6 +162,8 @@ typedef struct nfs4_delmap_args { struct clstat4 { kstat_named_t calls; /* client requests */ kstat_named_t badcalls; /* rpc failures */ + kstat_named_t referrals; /* referrals */ + kstat_named_t referlinks; /* referrals as symlinks */ kstat_named_t clgets; /* client handle gets */ kstat_named_t cltoomany; /* client handle cache misses */ #ifdef DEBUG @@ -302,6 +305,7 @@ typedef enum nfs4_tag_type { TAG_FSINFO, TAG_GET_SYMLINK, TAG_GETATTR, + TAG_GETATTR_FSLOCATION, TAG_INACTIVE, TAG_LINK, TAG_LOCK, @@ -374,6 +378,8 @@ typedef enum nfs4_tag_type { {0x67657420, 0x736c6e6b, 0x20747874}}, \ {TAG_GETATTR, "getattr", \ {0x67657461, 0x74747220, 0x20202020}}, \ + {TAG_GETATTR_FSLOCATION, "getattr fslocation", \ + {0x67657461, 0x74747220, 0x66736c6f}}, \ {TAG_INACTIVE, "inactive", \ {0x696e6163, 0x74697665, 0x20202020}}, \ {TAG_LINK, "link", \ @@ -553,7 +559,8 @@ typedef struct nfs4_open_owner { /* * Static server information. - * These fields are read-only once they are initialized: + * These fields are read-only once they are initialized; sv_lock + * should be held as writer if they are changed during mount: * sv_addr * sv_dhsec * sv_hostname @@ -699,7 +706,8 @@ typedef enum { NR_DELAY, NR_LOST_LOCK, NR_LOST_STATE_RQST, - NR_STALE + NR_STALE, + NR_MOVED } nfs4_recov_t; /* @@ -709,6 +717,8 @@ typedef enum { #define NFS4_MSG_MAX 100 extern int nfs4_msg_max; +#define NFS4_REFERRAL_LOOP_MAX 20 + typedef enum { RE_BAD_SEQID, RE_BADHANDLE, @@ -729,7 +739,8 @@ typedef enum { RE_UNEXPECTED_ERRNO, RE_UNEXPECTED_STATUS, RE_WRONGSEC, - RE_LOST_STATE_BAD_OP + RE_LOST_STATE_BAD_OP, + RE_REFERRAL } nfs4_event_type_t; typedef enum { @@ -1033,6 +1044,10 @@ typedef struct mntinfo4 { uint_t mi_srvset_cnt; /* increment when changing the nfs4_server_t */ struct nfs4_server *mi_srv; /* backpointer to nfs4_server_t */ + /* + * Referral related info. + */ + int mi_vfs_referral_loop_cnt; } mntinfo4_t; /* @@ -1085,7 +1100,7 @@ typedef struct mntinfo4 { #define MI4_ACL 0x2000 /* MI4_MIRRORMOUNT is also defined in nfsstat.c */ #define MI4_MIRRORMOUNT 0x4000 -/* 0x8000 is available */ +#define MI4_REFERRAL 0x8000 /* 0x10000 is available */ #define MI4_NOPRINT 0x20000 #define MI4_DIRECTIO 0x40000 @@ -1103,11 +1118,7 @@ typedef struct mntinfo4 { #define MI4_ASYNC_MGR_STOP 0x40000000 #define MI4_TIMEDOUT 0x80000000 -/* - * Note that when we add referrals, then MI4_EPHEMERAL - * will be MI4_MIRRORMOUNT | MI4_REFERRAL. - */ -#define MI4_EPHEMERAL MI4_MIRRORMOUNT +#define MI4_EPHEMERAL (MI4_MIRRORMOUNT | MI4_REFERRAL) #define INTR4(vp) (VTOMI4(vp)->mi_flags & MI4_INT) @@ -1128,6 +1139,7 @@ typedef struct mntinfo4 { #define MI4R_SRV_REBOOT 0x20 /* server has rebooted */ #define MI4R_LOST_STATE 0x40 #define MI4R_BAD_SEQID 0x80 +#define MI4R_MOVED 0x100 #define MI4_HOLD(mi) { \ mi_hold(mi); \ @@ -1492,6 +1504,7 @@ extern void nfs4_write_error(vnode_t *, int, cred_t *); extern void nfs4_lockcompletion(vnode_t *, int); extern bool_t nfs4_map_lost_lock_conflict(vnode_t *); extern int vtodv(vnode_t *, vnode_t **, cred_t *, bool_t); +extern int vtoname(vnode_t *, char *, ssize_t); extern void nfs4open_confirm(vnode_t *, seqid4*, stateid4 *, cred_t *, bool_t, bool_t *, nfs4_open_owner_t *, bool_t, nfs4_error_t *, int *); @@ -1502,6 +1515,9 @@ extern void nfs4_free_args(struct nfs_args *); extern void mi_hold(mntinfo4_t *); extern void mi_rele(mntinfo4_t *); +extern vnode_t *find_referral_stubvp(vnode_t *, char *, cred_t *); +extern int nfs4_setup_referral(vnode_t *, char *, vnode_t **, cred_t *); + extern sec_data_t *copy_sec_data(sec_data_t *); extern gss_clntdata_t *copy_sec_data_gss(gss_clntdata_t *); @@ -1969,7 +1985,8 @@ extern int nfs4_needs_recovery(nfs4_error_t *, bool_t, vfs_t *); extern int nfs4_recov_marks_dead(nfsstat4); extern bool_t nfs4_start_recovery(nfs4_error_t *, struct mntinfo4 *, vnode_t *, vnode_t *, stateid4 *, - nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *); + nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *, + vnode_t *, char *); extern int nfs4_start_op(struct mntinfo4 *, vnode_t *, vnode_t *, nfs4_recov_state_t *); extern void nfs4_end_op(struct mntinfo4 *, vnode_t *, vnode_t *, @@ -1991,14 +2008,18 @@ extern char *nfs4_recov_action_to_str(nfs4_recov_t); * of whether or not the code in _unlock is to be ran. */ extern void nfs4_ephemeral_umount_activate(mntinfo4_t *, - bool_t *, bool_t *, nfs4_ephemeral_tree_t **); + bool_t *, nfs4_ephemeral_tree_t **); extern int nfs4_ephemeral_umount(mntinfo4_t *, int, cred_t *, - bool_t *, bool_t *, nfs4_ephemeral_tree_t **); -extern void nfs4_ephemeral_umount_unlock(bool_t *, bool_t *, + bool_t *, nfs4_ephemeral_tree_t **); +extern void nfs4_ephemeral_umount_unlock(bool_t *, nfs4_ephemeral_tree_t **); extern int nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp); +extern int nfs4_callmapid(utf8string *, struct nfs_fsl_info *); +extern int nfs4_fetch_locations(mntinfo4_t *, struct nfs4_sharedfh *, + char *, cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, bool_t); + extern int wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t, nfs4_recov_state_t *); extern void nfs4_end_op_recall(vnode_t *, vnode_t *, nfs4_recov_state_t *); @@ -2138,6 +2159,10 @@ extern char *fn_path(nfs4_fname_t *); extern void fn_move(nfs4_fname_t *, nfs4_fname_t *, char *); extern nfs4_fname_t *fn_parent(nfs4_fname_t *); +/* Referral Support */ +extern int nfs4_process_referral(mntinfo4_t *, nfs4_sharedfh_t *, char *, + cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, struct nfs_fsl_info *); + #endif /* diff --git a/usr/src/uts/common/nfs/nfs4_kprot.h b/usr/src/uts/common/nfs/nfs4_kprot.h index 3e35d29f25..be30bed10c 100644 --- a/usr/src/uts/common/nfs/nfs4_kprot.h +++ b/usr/src/uts/common/nfs/nfs4_kprot.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -227,6 +227,15 @@ struct fs_locations4 { }; typedef struct fs_locations4 fs_locations4; +struct nfs_fsl_info { + uint_t netbuf_len; + uint_t netnm_len; + uint_t knconf_len; + char *netname; + struct netbuf *addr; + struct knetconfig *knconf; +}; + /* * ACL support */ @@ -590,6 +599,7 @@ struct nfs_client_id4 { verifier4 verifier; uint_t id_len; char *id_val; + struct sockaddr *cl_addr; }; typedef struct nfs_client_id4 nfs_client_id4; @@ -1636,6 +1646,12 @@ extern bool_t xdr_CB_COMPOUND4args_clnt(XDR *, CB_COMPOUND4args *); extern bool_t xdr_CB_COMPOUND4args_srv(XDR *, CB_COMPOUND4args *); extern bool_t xdr_CB_COMPOUND4res(XDR *, CB_COMPOUND4res *); +/* + * xdr for referrrals upcall + */ +extern bool_t xdr_knetconfig(XDR *, struct knetconfig *); +extern bool_t xdr_nfs_fsl_info(XDR *, struct nfs_fsl_info *); + #ifdef __cplusplus } diff --git a/usr/src/uts/common/nfs/nfsid_map.h b/usr/src/uts/common/nfs/nfsid_map.h index c617695245..197970d4d2 100644 --- a/usr/src/uts/common/nfs/nfsid_map.h +++ b/usr/src/uts/common/nfs/nfsid_map.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _NFSID_MAP_H #define _NFSID_MAP_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifndef _KERNEL #include <stddef.h> #endif @@ -59,6 +56,7 @@ extern "C" { #define NFSMAPID_UID_STR 2 #define NFSMAPID_STR_GID 3 #define NFSMAPID_GID_STR 4 +#define NFSMAPID_SRV_NETINFO 5 /* * We are passing in arguments in a variable length struct @@ -145,6 +143,35 @@ typedef struct mapid_res mapid_res_t; #define MAPID_RES_LEN(str_length) \ ((offsetof(mapid_res_t, str[0]) + 1 + (str_length) + 7) & ~ 7) +/* + * Support for referral name resolution by the NFS client + */ +typedef struct refd_door_args { + int cmd; /* NFS4_FS_LOCATIONS/NFS4_SRV_NETINFO */ + int xdr_len; /* Length of xdr Buffer */ + char xdr_arg[1]; /* Buffer holding xdr encoded data */ +} refd_door_args_t; + +typedef struct refd_door_res { + int res_status; + int xdr_len; + char xdr_res[1]; +} refd_door_res_t; + +#ifdef _SYSCALL32 +typedef struct refd_door_args32 { + int32_t cmd; + int32_t xdr_len; + char xdr_arg[1]; +} refd_door_args32_t; + +typedef struct refd_door_res32 { + int32_t res_status; + int32_t xdr_len; + char xdr_res[1]; +} refd_door_res32_t; +#endif + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/nfs/rnode4.h b/usr/src/uts/common/nfs/rnode4.h index a8d6c5ac71..61ea044ca5 100644 --- a/usr/src/uts/common/nfs/rnode4.h +++ b/usr/src/uts/common/nfs/rnode4.h @@ -41,7 +41,8 @@ extern "C" { typedef enum nfs4_stub_type { NFS4_STUB_NONE, - NFS4_STUB_MIRRORMOUNT + NFS4_STUB_MIRRORMOUNT, + NFS4_STUB_REFERRAL } nfs4_stub_type_t; typedef enum nfs4_access_type { @@ -333,7 +334,7 @@ typedef struct rnode4 { /* sv_fsid (servinfo4_t) to see why */ /* stub type was set */ nfs4_stub_type_t r_stub_type; - /* e.g. mirror-mount */ + /* e.g. mirror-mount or referral */ uint_t r_inmap; /* to serialize read/write and mmap */ } rnode4_t; @@ -371,6 +372,7 @@ typedef struct rnode4 { #define RP_ISSTUB(rp) (((rp)->r_stub_type != NFS4_STUB_NONE)) #define RP_ISSTUB_MIRRORMOUNT(rp) ((rp)->r_stub_type == NFS4_STUB_MIRRORMOUNT) +#define RP_ISSTUB_REFERRAL(rp) ((rp)->r_stub_type == NFS4_STUB_REFERRAL) /* * Open file instances. @@ -415,6 +417,7 @@ extern vnode_t *makenfs4node_by_fh(nfs4_sharedfh_t *, nfs4_sharedfh_t *, extern nfs4_opinst_t *r4mkopenlist(struct mntinfo4 *); extern void r4releopenlist(nfs4_opinst_t *); +extern int r4find_by_fsid(mntinfo4_t *, fattr4_fsid *); /* Access cache calls */ extern nfs4_access_type_t nfs4_access_check(rnode4_t *, uint32_t, cred_t *); @@ -499,6 +502,7 @@ extern rddir4_cache *rddir4_cache_lookup(rnode4_t *, offset_t, int); extern void rddir4_cache_rele(rnode4_t *, rddir4_cache *); extern void r4_stub_mirrormount(rnode4_t *); +extern void r4_stub_referral(rnode4_t *); extern void r4_stub_none(rnode4_t *); #ifdef DEBUG diff --git a/usr/src/uts/common/sys/mkdev.h b/usr/src/uts/common/sys/mkdev.h index 8969362360..0b0f7deb64 100644 --- a/usr/src/uts/common/sys/mkdev.h +++ b/usr/src/uts/common/sys/mkdev.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1997 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,8 +29,6 @@ #ifndef _SYS_MKDEV_H #define _SYS_MKDEV_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #ifdef __cplusplus @@ -54,10 +51,11 @@ extern "C" { #define MAXMAJ32 0x3ffful /* SVR4 max major value */ #define MAXMIN32 0x3fffful /* SVR4 max minor value */ -#ifdef _LP64 - #define NBITSMAJOR64 32 /* # of major device bits in 64-bit Solaris */ #define NBITSMINOR64 32 /* # of minor device bits in 64-bit Solaris */ + +#ifdef _LP64 + #define MAXMAJ64 0xfffffffful /* max major value */ #define MAXMIN64 0xfffffffful /* max minor value */ diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index cdf4fe9b68..97504aabf3 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -1239,6 +1239,7 @@ vnode_t *specvp(struct vnode *vp, dev_t dev, vtype_t type, struct cred *cr); vnode_t *makespecvp(dev_t dev, vtype_t type); vn_vfslocks_entry_t *vn_vfslocks_getlock(void *); void vn_vfslocks_rele(vn_vfslocks_entry_t *); +boolean_t vn_is_reparse(vnode_t *, cred_t *, caller_context_t *); void vn_copypath(struct vnode *src, struct vnode *dst); void vn_setpath_str(struct vnode *vp, const char *str, size_t len); |