diff options
| author | Dai Ngo <dai.ngo@sun.com> | 2009-05-20 11:23:22 -0700 |
|---|---|---|
| committer | Dai Ngo <dai.ngo@sun.com> | 2009-05-20 11:23:22 -0700 |
| commit | e280ed373e7a3152deb093649e841edea237022f (patch) | |
| tree | 8b74c3ad2425d82b624ffb0f5a476ed5b232c797 /usr/src | |
| parent | c1d90a7f5657ab4d599b769861a5495af7a253b0 (diff) | |
| download | illumos-joyent-e280ed373e7a3152deb093649e841edea237022f.tar.gz | |
6831781 "[NFS4] NFS server not responding - still trying" messages during heavy TCP traffic
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_client_debug.c | 12 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_subr.c | 31 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_subr.c | 36 | ||||
| -rw-r--r-- | usr/src/uts/common/nfs/nfs4_clnt.h | 3 | ||||
| -rw-r--r-- | usr/src/uts/common/rpc/clnt_cots.c | 51 |
5 files changed, 99 insertions, 34 deletions
diff --git a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c index 0d57e91049..5336ddf0c8 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c @@ -353,6 +353,8 @@ set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4, else fp->rf_char1 = NULL; break; + case RF_SENDQ_FULL: + break; default: zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id); break; @@ -404,6 +406,7 @@ successful_comm(nfs4_debug_msg_t *msgp) return (1); case RF_SRV_NOT_RESPOND: case RF_SRVS_NOT_RESPOND: + case RF_SENDQ_FULL: return (0); default: return (0); @@ -547,6 +550,8 @@ mntinfo4_t *mi) break; case RF_DELMAP_CB_ERR: break; + case RF_SENDQ_FULL: + break; default: zcmn_err(getzoneid(), CE_NOTE, "get facts: illegal fact %d", cur_fp->rf_type); @@ -1126,6 +1131,12 @@ queue_print_fact(nfs4_debug_msg_t *msg, int dump) nfs4_stat_to_str(fp->rf_stat4), fp->rf_char1, (void *)fp->rf_rp1); break; + case RF_SENDQ_FULL: + zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" + "send queue to NFS server %s is full; still trying\n", + msg->msg_srv, msg->msg_mntpt, msg->msg_srv); + break; + default: zcmn_err(zoneid, CE_WARN, "!queue_print_fact: illegal fact %d", fp->rf_type); @@ -1193,6 +1204,7 @@ id_to_dump_solo_fact(nfs4_fact_type_t id) case RF_SRV_OK: case RF_SRVS_NOT_RESPOND: case RF_SRVS_OK: + case RF_SENDQ_FULL: return (1); default: return (0); diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c index 52a482f642..27261bf583 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_subr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -28,8 +28,6 @@ * All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> @@ -1213,7 +1211,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, CLIENT *client; struct chtab *ch; cred_t *cr = icr; - struct rpc_err rpcerr; + struct rpc_err rpcerr, rpcerr_tmp; enum clnt_stat status; int error; struct timeval wait; @@ -1440,20 +1438,35 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, break; timeo = backoff(timeo); + CLNT_GETERR(client, &rpcerr_tmp); + mutex_enter(&mi->mi_lock); if (!(mi->mi_flags & MI4_PRINTED)) { mi->mi_flags |= MI4_PRINTED; mutex_exit(&mi->mi_lock); - nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 0, 0, 0, - FALSE, NULL, 0, NULL); + if ((status == RPC_CANTSEND) && + (rpcerr_tmp.re_errno == ENOBUFS)) + nfs4_queue_fact(RF_SENDQ_FULL, mi, 0, + 0, 0, FALSE, NULL, 0, NULL); + else + nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, + 0, 0, 0, FALSE, NULL, 0, NULL); } else mutex_exit(&mi->mi_lock); if (*doqueue && nfs_has_ctty()) { *doqueue = 0; - if (!(mi->mi_flags & MI4_NOPRINT)) - nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, - 0, 0, 0, FALSE, NULL, 0, NULL); + if (!(mi->mi_flags & MI4_NOPRINT)) { + if ((status == RPC_CANTSEND) && + (rpcerr_tmp.re_errno == ENOBUFS)) + nfs4_queue_fact(RF_SENDQ_FULL, + mi, 0, 0, 0, FALSE, NULL, + 0, NULL); + else + nfs4_queue_fact( + RF_SRV_NOT_RESPOND, mi, 0, + 0, 0, FALSE, NULL, 0, NULL); + } } } } while (tryagain); diff --git a/usr/src/uts/common/fs/nfs/nfs_subr.c b/usr/src/uts/common/fs/nfs/nfs_subr.c index 25397dd6e4..6477e27f25 100644 --- a/usr/src/uts/common/fs/nfs/nfs_subr.c +++ b/usr/src/uts/common/fs/nfs/nfs_subr.c @@ -290,6 +290,13 @@ extern ts_label_t *getflabel_cipso(vfs_t *); */ #define IS_RECOVERABLE_ERROR(error) !((error == EINTR) || (error == EIO)) +#ifdef DEBUG +#define SRV_QFULL_MSG "send queue to NFS%d server %s is full; still trying\n" +#define SRV_NOTRESP_MSG "NFS%d server %s not responding still trying\n" +#else +#define SRV_QFULL_MSG "send queue to NFS server %s is full still trying\n" +#define SRV_NOTRESP_MSG "NFS server %s not responding still trying\n" +#endif /* * Common handle get program for NFS, NFS ACL, and NFS AUTH client. */ @@ -928,7 +935,7 @@ rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, struct chtab *ch; cred_t *cr = icr; enum clnt_stat status; - struct rpc_err rpcerr; + struct rpc_err rpcerr, rpcerr_tmp; struct timeval wait; int timeo; /* in units of hz */ int my_rsize, my_wsize; @@ -938,6 +945,7 @@ rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, servinfo_t *svp; struct nfs_clnt *nfscl; zoneid_t zoneid = getzoneid(); + char *msg; #ifdef DEBUG char *bufp; #endif @@ -1219,18 +1227,23 @@ failoverretry: tryagain = TRUE; timeo = backoff(timeo); + + CLNT_GETERR(client, &rpcerr_tmp); + if ((status == RPC_CANTSEND) && + (rpcerr_tmp.re_errno == ENOBUFS)) + msg = SRV_QFULL_MSG; + else + msg = SRV_NOTRESP_MSG; + mutex_enter(&mi->mi_lock); if (!(mi->mi_flags & MI_PRINTED)) { mi->mi_flags |= MI_PRINTED; mutex_exit(&mi->mi_lock); #ifdef DEBUG - zprintf(zoneid, - "NFS%d server %s not responding still trying\n", - mi->mi_vers, svp->sv_hostname); -#else - zprintf(zoneid, - "NFS server %s not responding still trying\n", + zprintf(zoneid, msg, mi->mi_vers, svp->sv_hostname); +#else + zprintf(zoneid, msg, svp->sv_hostname); #endif } else mutex_exit(&mi->mi_lock); @@ -1238,13 +1251,10 @@ failoverretry: *douprintf = 0; if (!(mi->mi_flags & MI_NOPRINT)) #ifdef DEBUG - uprintf( - "NFS%d server %s not responding still trying\n", - mi->mi_vers, svp->sv_hostname); -#else - uprintf( - "NFS server %s not responding still trying\n", + uprintf(msg, mi->mi_vers, svp->sv_hostname); +#else + uprintf(msg, svp->sv_hostname); #endif } diff --git a/usr/src/uts/common/nfs/nfs4_clnt.h b/usr/src/uts/common/nfs/nfs4_clnt.h index 8edf43d5fd..e7e4a66fae 100644 --- a/usr/src/uts/common/nfs/nfs4_clnt.h +++ b/usr/src/uts/common/nfs/nfs4_clnt.h @@ -745,7 +745,8 @@ typedef enum { RF_SRV_OK, RF_SRVS_NOT_RESPOND, RF_SRVS_OK, - RF_DELMAP_CB_ERR + RF_DELMAP_CB_ERR, + RF_SENDQ_FULL } nfs4_fact_type_t; typedef enum { diff --git a/usr/src/uts/common/rpc/clnt_cots.c b/usr/src/uts/common/rpc/clnt_cots.c index 58c946b15a..85874c885a 100644 --- a/usr/src/uts/common/rpc/clnt_cots.c +++ b/usr/src/uts/common/rpc/clnt_cots.c @@ -763,7 +763,7 @@ clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, struct netbuf *retryaddr; struct cm_xprt *cm_entry = NULL; queue_t *wq; - int len; + int len, waitsecs, max_waitsecs; int mpsize; int refreshes = REFRESHES; int interrupted; @@ -778,7 +778,6 @@ clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait.tv_sec); RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait.tv_usec); - /* * Bug ID 1240234: * Look out for zero length timeouts. We don't want to @@ -1081,23 +1080,53 @@ call_again: tidu_size); wq = cm_entry->x_wq; + waitsecs = 0; + +dispatch_again: status = clnt_dispatch_send(wq, mp, call, p->cku_xid, (p->cku_flags & CKU_ONQUEUE)); - if (status == RPC_CANTSEND) { - p->cku_err.re_status = status; - p->cku_err.re_errno = EIO; - DTRACE_PROBE(krpc__e__clntcots__kcallit__cantsend); - + if ((status == RPC_CANTSEND) && (call->call_reason == ENOBUFS)) { /* - * Allow for processing of the QFULL queue. + * QFULL condition, allow some time for queue to drain + * and try again. Give up after waiting for all timeout + * specified for the call, or zone is going away. */ - delay_first = TRUE; - ticks = clnt_cots_min_tout * drv_usectohz(1000000); + max_waitsecs = wait.tv_sec ? wait.tv_sec : clnt_cots_min_tout; + if ((waitsecs++ < max_waitsecs) && + !(zone_status_get(curproc->p_zone) >= + ZONE_IS_SHUTTING_DOWN)) { + + /* wait 1 sec for queue to drain */ + if (clnt_delay(drv_usectohz(1000000), + h->cl_nosignal) == EINTR) { + p->cku_err.re_errno = EINTR; + p->cku_err.re_status = RPC_INTR; + + goto cots_done; + } + + /* and try again */ + goto dispatch_again; + } + p->cku_err.re_status = status; + p->cku_err.re_errno = call->call_reason; + DTRACE_PROBE(krpc__e__clntcots__kcallit__cantsend); goto cots_done; } + if (waitsecs) { + /* adjust timeout to account for time wait to send */ + wait.tv_sec -= waitsecs; + if (wait.tv_sec < 0) { + /* pick up reply on next retry */ + wait.tv_sec = 0; + } + DTRACE_PROBE2(clnt_cots__sendwait, CLIENT *, h, + int, waitsecs); + } + RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n", (uint_t)p->cku_xid); p->cku_flags = (CKU_ONQUEUE|CKU_SENT); @@ -2888,7 +2917,7 @@ clnt_dispatch_send(queue_t *q, mblk_t *mp, calllist_t *e, uint_t xid, if (!canput(q)) { e->call_status = RPC_CANTSEND; - e->call_reason = EIO; + e->call_reason = ENOBUFS; return (RPC_CANTSEND); } |
