summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorDai Ngo <dai.ngo@sun.com>2009-05-20 11:23:22 -0700
committerDai Ngo <dai.ngo@sun.com>2009-05-20 11:23:22 -0700
commite280ed373e7a3152deb093649e841edea237022f (patch)
tree8b74c3ad2425d82b624ffb0f5a476ed5b232c797 /usr/src
parentc1d90a7f5657ab4d599b769861a5495af7a253b0 (diff)
downloadillumos-joyent-e280ed373e7a3152deb093649e841edea237022f.tar.gz
6831781 "[NFS4] NFS server not responding - still trying" messages during heavy TCP traffic
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_client_debug.c12
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_subr.c31
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_subr.c36
-rw-r--r--usr/src/uts/common/nfs/nfs4_clnt.h3
-rw-r--r--usr/src/uts/common/rpc/clnt_cots.c51
5 files changed, 99 insertions, 34 deletions
diff --git a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c
index 0d57e91049..5336ddf0c8 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c
@@ -353,6 +353,8 @@ set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4,
else
fp->rf_char1 = NULL;
break;
+ case RF_SENDQ_FULL:
+ break;
default:
zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id);
break;
@@ -404,6 +406,7 @@ successful_comm(nfs4_debug_msg_t *msgp)
return (1);
case RF_SRV_NOT_RESPOND:
case RF_SRVS_NOT_RESPOND:
+ case RF_SENDQ_FULL:
return (0);
default:
return (0);
@@ -547,6 +550,8 @@ mntinfo4_t *mi)
break;
case RF_DELMAP_CB_ERR:
break;
+ case RF_SENDQ_FULL:
+ break;
default:
zcmn_err(getzoneid(), CE_NOTE,
"get facts: illegal fact %d", cur_fp->rf_type);
@@ -1126,6 +1131,12 @@ queue_print_fact(nfs4_debug_msg_t *msg, int dump)
nfs4_stat_to_str(fp->rf_stat4), fp->rf_char1,
(void *)fp->rf_rp1);
break;
+ case RF_SENDQ_FULL:
+ zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
+ "send queue to NFS server %s is full; still trying\n",
+ msg->msg_srv, msg->msg_mntpt, msg->msg_srv);
+ break;
+
default:
zcmn_err(zoneid, CE_WARN, "!queue_print_fact: illegal fact %d",
fp->rf_type);
@@ -1193,6 +1204,7 @@ id_to_dump_solo_fact(nfs4_fact_type_t id)
case RF_SRV_OK:
case RF_SRVS_NOT_RESPOND:
case RF_SRVS_OK:
+ case RF_SENDQ_FULL:
return (1);
default:
return (0);
diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c
index 52a482f642..27261bf583 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -28,8 +28,6 @@
* All Rights Reserved
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
@@ -1213,7 +1211,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
CLIENT *client;
struct chtab *ch;
cred_t *cr = icr;
- struct rpc_err rpcerr;
+ struct rpc_err rpcerr, rpcerr_tmp;
enum clnt_stat status;
int error;
struct timeval wait;
@@ -1440,20 +1438,35 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
break;
timeo = backoff(timeo);
+ CLNT_GETERR(client, &rpcerr_tmp);
+
mutex_enter(&mi->mi_lock);
if (!(mi->mi_flags & MI4_PRINTED)) {
mi->mi_flags |= MI4_PRINTED;
mutex_exit(&mi->mi_lock);
- nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, 0, 0, 0,
- FALSE, NULL, 0, NULL);
+ if ((status == RPC_CANTSEND) &&
+ (rpcerr_tmp.re_errno == ENOBUFS))
+ nfs4_queue_fact(RF_SENDQ_FULL, mi, 0,
+ 0, 0, FALSE, NULL, 0, NULL);
+ else
+ nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
+ 0, 0, 0, FALSE, NULL, 0, NULL);
} else
mutex_exit(&mi->mi_lock);
if (*doqueue && nfs_has_ctty()) {
*doqueue = 0;
- if (!(mi->mi_flags & MI4_NOPRINT))
- nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
- 0, 0, 0, FALSE, NULL, 0, NULL);
+ if (!(mi->mi_flags & MI4_NOPRINT)) {
+ if ((status == RPC_CANTSEND) &&
+ (rpcerr_tmp.re_errno == ENOBUFS))
+ nfs4_queue_fact(RF_SENDQ_FULL,
+ mi, 0, 0, 0, FALSE, NULL,
+ 0, NULL);
+ else
+ nfs4_queue_fact(
+ RF_SRV_NOT_RESPOND, mi, 0,
+ 0, 0, FALSE, NULL, 0, NULL);
+ }
}
}
} while (tryagain);
diff --git a/usr/src/uts/common/fs/nfs/nfs_subr.c b/usr/src/uts/common/fs/nfs/nfs_subr.c
index 25397dd6e4..6477e27f25 100644
--- a/usr/src/uts/common/fs/nfs/nfs_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs_subr.c
@@ -290,6 +290,13 @@ extern ts_label_t *getflabel_cipso(vfs_t *);
*/
#define IS_RECOVERABLE_ERROR(error) !((error == EINTR) || (error == EIO))
+#ifdef DEBUG
+#define SRV_QFULL_MSG "send queue to NFS%d server %s is full; still trying\n"
+#define SRV_NOTRESP_MSG "NFS%d server %s not responding still trying\n"
+#else
+#define SRV_QFULL_MSG "send queue to NFS server %s is full still trying\n"
+#define SRV_NOTRESP_MSG "NFS server %s not responding still trying\n"
+#endif
/*
* Common handle get program for NFS, NFS ACL, and NFS AUTH client.
*/
@@ -928,7 +935,7 @@ rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
struct chtab *ch;
cred_t *cr = icr;
enum clnt_stat status;
- struct rpc_err rpcerr;
+ struct rpc_err rpcerr, rpcerr_tmp;
struct timeval wait;
int timeo; /* in units of hz */
int my_rsize, my_wsize;
@@ -938,6 +945,7 @@ rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
servinfo_t *svp;
struct nfs_clnt *nfscl;
zoneid_t zoneid = getzoneid();
+ char *msg;
#ifdef DEBUG
char *bufp;
#endif
@@ -1219,18 +1227,23 @@ failoverretry:
tryagain = TRUE;
timeo = backoff(timeo);
+
+ CLNT_GETERR(client, &rpcerr_tmp);
+ if ((status == RPC_CANTSEND) &&
+ (rpcerr_tmp.re_errno == ENOBUFS))
+ msg = SRV_QFULL_MSG;
+ else
+ msg = SRV_NOTRESP_MSG;
+
mutex_enter(&mi->mi_lock);
if (!(mi->mi_flags & MI_PRINTED)) {
mi->mi_flags |= MI_PRINTED;
mutex_exit(&mi->mi_lock);
#ifdef DEBUG
- zprintf(zoneid,
- "NFS%d server %s not responding still trying\n",
- mi->mi_vers, svp->sv_hostname);
-#else
- zprintf(zoneid,
- "NFS server %s not responding still trying\n",
+ zprintf(zoneid, msg, mi->mi_vers,
svp->sv_hostname);
+#else
+ zprintf(zoneid, msg, svp->sv_hostname);
#endif
} else
mutex_exit(&mi->mi_lock);
@@ -1238,13 +1251,10 @@ failoverretry:
*douprintf = 0;
if (!(mi->mi_flags & MI_NOPRINT))
#ifdef DEBUG
- uprintf(
- "NFS%d server %s not responding still trying\n",
- mi->mi_vers, svp->sv_hostname);
-#else
- uprintf(
- "NFS server %s not responding still trying\n",
+ uprintf(msg, mi->mi_vers,
svp->sv_hostname);
+#else
+ uprintf(msg, svp->sv_hostname);
#endif
}
diff --git a/usr/src/uts/common/nfs/nfs4_clnt.h b/usr/src/uts/common/nfs/nfs4_clnt.h
index 8edf43d5fd..e7e4a66fae 100644
--- a/usr/src/uts/common/nfs/nfs4_clnt.h
+++ b/usr/src/uts/common/nfs/nfs4_clnt.h
@@ -745,7 +745,8 @@ typedef enum {
RF_SRV_OK,
RF_SRVS_NOT_RESPOND,
RF_SRVS_OK,
- RF_DELMAP_CB_ERR
+ RF_DELMAP_CB_ERR,
+ RF_SENDQ_FULL
} nfs4_fact_type_t;
typedef enum {
diff --git a/usr/src/uts/common/rpc/clnt_cots.c b/usr/src/uts/common/rpc/clnt_cots.c
index 58c946b15a..85874c885a 100644
--- a/usr/src/uts/common/rpc/clnt_cots.c
+++ b/usr/src/uts/common/rpc/clnt_cots.c
@@ -763,7 +763,7 @@ clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
struct netbuf *retryaddr;
struct cm_xprt *cm_entry = NULL;
queue_t *wq;
- int len;
+ int len, waitsecs, max_waitsecs;
int mpsize;
int refreshes = REFRESHES;
int interrupted;
@@ -778,7 +778,6 @@ clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait.tv_sec);
RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait.tv_usec);
-
/*
* Bug ID 1240234:
* Look out for zero length timeouts. We don't want to
@@ -1081,23 +1080,53 @@ call_again:
tidu_size);
wq = cm_entry->x_wq;
+ waitsecs = 0;
+
+dispatch_again:
status = clnt_dispatch_send(wq, mp, call, p->cku_xid,
(p->cku_flags & CKU_ONQUEUE));
- if (status == RPC_CANTSEND) {
- p->cku_err.re_status = status;
- p->cku_err.re_errno = EIO;
- DTRACE_PROBE(krpc__e__clntcots__kcallit__cantsend);
-
+ if ((status == RPC_CANTSEND) && (call->call_reason == ENOBUFS)) {
/*
- * Allow for processing of the QFULL queue.
+ * QFULL condition, allow some time for queue to drain
+ * and try again. Give up after waiting for all timeout
+ * specified for the call, or zone is going away.
*/
- delay_first = TRUE;
- ticks = clnt_cots_min_tout * drv_usectohz(1000000);
+ max_waitsecs = wait.tv_sec ? wait.tv_sec : clnt_cots_min_tout;
+ if ((waitsecs++ < max_waitsecs) &&
+ !(zone_status_get(curproc->p_zone) >=
+ ZONE_IS_SHUTTING_DOWN)) {
+
+ /* wait 1 sec for queue to drain */
+ if (clnt_delay(drv_usectohz(1000000),
+ h->cl_nosignal) == EINTR) {
+ p->cku_err.re_errno = EINTR;
+ p->cku_err.re_status = RPC_INTR;
+
+ goto cots_done;
+ }
+
+ /* and try again */
+ goto dispatch_again;
+ }
+ p->cku_err.re_status = status;
+ p->cku_err.re_errno = call->call_reason;
+ DTRACE_PROBE(krpc__e__clntcots__kcallit__cantsend);
goto cots_done;
}
+ if (waitsecs) {
+ /* adjust timeout to account for time wait to send */
+ wait.tv_sec -= waitsecs;
+ if (wait.tv_sec < 0) {
+ /* pick up reply on next retry */
+ wait.tv_sec = 0;
+ }
+ DTRACE_PROBE2(clnt_cots__sendwait, CLIENT *, h,
+ int, waitsecs);
+ }
+
RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n",
(uint_t)p->cku_xid);
p->cku_flags = (CKU_ONQUEUE|CKU_SENT);
@@ -2888,7 +2917,7 @@ clnt_dispatch_send(queue_t *q, mblk_t *mp, calllist_t *e, uint_t xid,
if (!canput(q)) {
e->call_status = RPC_CANTSEND;
- e->call_reason = EIO;
+ e->call_reason = ENOBUFS;
return (RPC_CANTSEND);
}