summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authormeem <none@none>2006-08-20 10:20:53 -0700
committermeem <none@none>2006-08-20 10:20:53 -0700
commita2036d4dec789d6fcf6f71ae43cc38d40c4786cc (patch)
tree78cee26f1c36d603dbc2292d88c42209af3a2042 /usr/src
parent70ab954a5d6c4d36858fd6e7e3dd4498d06d2c40 (diff)
downloadillumos-joyent-a2036d4dec789d6fcf6f71ae43cc38d40c4786cc.tar.gz
6440123 TCP Fusion loopback connections may hang due to flow control logic error
6458410 read() may spuriously return EAGAIN while unfusing a TCP connection
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/inet/tcp.h4
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c25
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_fusion.c69
-rw-r--r--usr/src/uts/common/inet/tcp_impl.h32
4 files changed, 88 insertions, 42 deletions
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h
index 7ed4f35564..ebf0084e18 100644
--- a/usr/src/uts/common/inet/tcp.h
+++ b/usr/src/uts/common/inet/tcp.h
@@ -525,6 +525,7 @@ typedef struct tcp_s {
* manipulated with squeue protection or with tcp_fuse_lock held.
*/
kmutex_t tcp_fuse_lock;
+ kcondvar_t tcp_fuse_plugcv;
uint_t tcp_fuse_rcv_unread_cnt; /* # of outstanding pkts */
uint32_t
tcp_fused : 1, /* loopback tcp in fusion mode */
@@ -533,7 +534,8 @@ typedef struct tcp_s {
tcp_direct_sockfs : 1, /* direct calls to sockfs */
tcp_fuse_syncstr_stopped : 1, /* synchronous streams stopped */
- tcp_fuse_to_bit_31 : 27;
+ tcp_fuse_syncstr_plugged : 1, /* synchronous streams plugged */
+ tcp_fuse_to_bit_31 : 26;
/*
* This variable is accessed without any lock protection
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index c66229327e..60c30521be 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -406,6 +406,7 @@ tcp_stat_t tcp_statistics = {
{ "tcp_fusion_unqualified", KSTAT_DATA_UINT64 },
{ "tcp_fusion_rrw_busy", KSTAT_DATA_UINT64 },
{ "tcp_fusion_rrw_msgcnt", KSTAT_DATA_UINT64 },
+ { "tcp_fusion_rrw_plugged", KSTAT_DATA_UINT64 },
{ "tcp_in_ack_unsent_drop", KSTAT_DATA_UINT64 },
{ "tcp_sock_fallback", KSTAT_DATA_UINT64 },
};
@@ -7904,6 +7905,7 @@ tcp_reinit_values(tcp)
tcp->tcp_fused_sigurg = B_FALSE;
tcp->tcp_direct_sockfs = B_FALSE;
tcp->tcp_fuse_syncstr_stopped = B_FALSE;
+ tcp->tcp_fuse_syncstr_plugged = B_FALSE;
tcp->tcp_loopback_peer = NULL;
tcp->tcp_fuse_rcv_hiwater = 0;
tcp->tcp_fuse_rcv_unread_hiwater = 0;
@@ -7996,6 +7998,7 @@ tcp_init_values(tcp_t *tcp)
tcp->tcp_fused_sigurg = B_FALSE;
tcp->tcp_direct_sockfs = B_FALSE;
tcp->tcp_fuse_syncstr_stopped = B_FALSE;
+ tcp->tcp_fuse_syncstr_plugged = B_FALSE;
tcp->tcp_loopback_peer = NULL;
tcp->tcp_fuse_rcv_hiwater = 0;
tcp->tcp_fuse_rcv_unread_hiwater = 0;
@@ -15531,18 +15534,16 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2)
/*
* Normally we would not get backenabled in synchronous
- * streams mode, but in case this happens, we need to stop
- * synchronous streams temporarily to prevent a race with
- * tcp_fuse_rrw() or tcp_fuse_rinfop(). It is safe to access
- * tcp_rcv_list here because those entry points will return
- * right away when synchronous streams is stopped.
+ * streams mode, but in case this happens, we need to plug
+ * synchronous streams during our drain to prevent a race
+ * with tcp_fuse_rrw() or tcp_fuse_rinfop().
*/
- TCP_FUSE_SYNCSTR_STOP(tcp);
+ TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp);
if (tcp->tcp_rcv_list != NULL)
(void) tcp_rcv_drain(tcp->tcp_rq, tcp);
tcp_clrqfull(peer_tcp);
- TCP_FUSE_SYNCSTR_RESUME(tcp);
+ TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp);
TCP_STAT(tcp_fusion_backenabled);
return;
}
@@ -22260,17 +22261,15 @@ tcp_push_timer(void *arg)
ASSERT(tcp->tcp_listener == NULL);
/*
- * We need to stop synchronous streams temporarily to prevent a race
- * with tcp_fuse_rrw() or tcp_fusion rinfop(). It is safe to access
- * tcp_rcv_list here because those entry points will return right
- * away when synchronous streams is stopped.
+ * We need to plug synchronous streams during our drain to prevent
+ * a race with tcp_fuse_rrw() or tcp_fusion_rinfop().
*/
- TCP_FUSE_SYNCSTR_STOP(tcp);
+ TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp);
tcp->tcp_push_tid = 0;
if ((tcp->tcp_rcv_list != NULL) &&
(tcp_rcv_drain(tcp->tcp_rq, tcp) == TH_ACK_NEEDED))
tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
- TCP_FUSE_SYNCSTR_RESUME(tcp);
+ TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp);
}
/*
diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c
index fda7bff9d2..3f9bf559cc 100644
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c
@@ -72,16 +72,18 @@
* used for this purpose. When there is urgent data, the sender needs
* to push the data up the receiver's streams read queue. In order to
* avoid holding the tcp_fuse_lock across putnext(), the sender sets
- * the peer tcp's tcp_fuse_syncstr_stopped bit and releases tcp_fuse_lock
- * (see macro TCP_FUSE_SYNCSTR_STOP()). If tcp_fuse_rrw() enters after
- * this point, it will see that synchronous streams is temporarily
- * stopped and it will immediately return EBUSY without accessing the
- * tcp_rcv_list or other fields protected by the tcp_fuse_lock. This
- * will result in strget() calling getq_noenab() to dequeue data from
- * the stream head instead. After the sender has finished pushing up
- * all urgent data, it will clear the tcp_fuse_syncstr_stopped bit using
- * TCP_FUSE_SYNCSTR_RESUME and the receiver may then resume using
- * tcp_fuse_rrw() to retrieve data from tcp_rcv_list.
+ * the peer tcp's tcp_fuse_syncstr_plugged bit and releases tcp_fuse_lock
+ * (see macro TCP_FUSE_SYNCSTR_PLUG_DRAIN()). If tcp_fuse_rrw() enters
+ * after this point, it will see that synchronous streams is plugged and
+ * will wait on tcp_fuse_plugcv. After the sender has finished pushing up
+ * all urgent data, it will clear the tcp_fuse_syncstr_plugged bit using
+ * TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(). This will cause any threads waiting
+ * on tcp_fuse_plugcv to return EBUSY, and in turn cause strget() to call
+ * getq_noenab() to dequeue data from the stream head instead. Once the
+ * data on the stream head has been consumed, tcp_fuse_rrw() may again
+ * be used to process tcp_rcv_list. However, if TCP_FUSE_SYNCSTR_STOP()
+ * has been called, all future calls to tcp_fuse_rrw() will return EBUSY,
+ * effectively disabling synchronous streams.
*
* The following note applies only to the synchronous streams mode.
*
@@ -496,7 +498,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
* below, synchronous streams will remain stopped until
* someone drains the tcp_rcv_list.
*/
- TCP_FUSE_SYNCSTR_STOP(peer_tcp);
+ TCP_FUSE_SYNCSTR_PLUG_DRAIN(peer_tcp);
tcp_fuse_output_urg(tcp, mp);
}
@@ -563,6 +565,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
} else if (flow_stopped &&
TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) {
tcp_clrqfull(tcp);
+ flow_stopped = B_FALSE;
}
loopback_packets++;
@@ -613,7 +616,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
* to the presence of urgent data, re-enable it.
*/
if (urgent)
- TCP_FUSE_SYNCSTR_RESUME(peer_tcp);
+ TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(peer_tcp);
}
}
return (B_TRUE);
@@ -733,10 +736,27 @@ tcp_fuse_rrw(queue_t *q, struiod_t *dp)
mblk_t *mp;
mutex_enter(&tcp->tcp_fuse_lock);
+
+ /*
+ * If tcp_fuse_syncstr_plugged is set, then another thread is moving
+ * the underlying data to the stream head. We need to wait until it's
+ * done, then return EBUSY so that strget() will dequeue data from the
+ * stream head to ensure data is drained in-order.
+ */
+ if (tcp->tcp_fuse_syncstr_plugged) {
+ do {
+ cv_wait(&tcp->tcp_fuse_plugcv, &tcp->tcp_fuse_lock);
+ } while (tcp->tcp_fuse_syncstr_plugged);
+
+ TCP_STAT(tcp_fusion_rrw_plugged);
+ TCP_STAT(tcp_fusion_rrw_busy);
+ return (EBUSY);
+ }
+
/*
* If someone had turned off tcp_direct_sockfs or if synchronous
- * streams is temporarily disabled, we return EBUSY. This causes
- * strget() to dequeue data from the stream head instead.
+ * streams is stopped, we return EBUSY. This causes strget() to
+ * dequeue data from the stream head instead.
*/
if (!tcp->tcp_direct_sockfs || tcp->tcp_fuse_syncstr_stopped) {
mutex_exit(&tcp->tcp_fuse_lock);
@@ -817,7 +837,7 @@ tcp_fuse_rinfop(queue_t *q, infod_t *dp)
* currently not accessible.
*/
if (!tcp->tcp_direct_sockfs || tcp->tcp_fuse_syncstr_stopped ||
- (mp = tcp->tcp_rcv_list) == NULL)
+ tcp->tcp_fuse_syncstr_plugged || (mp = tcp->tcp_rcv_list) == NULL)
goto done;
if (cmd & INFOD_COUNT) {
@@ -984,11 +1004,11 @@ tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing)
ASSERT(peer_tcp != NULL);
/*
- * We need to prevent tcp_fuse_rrw() from entering before
- * we can disable synchronous streams.
+ * Force any tcp_fuse_rrw() calls to block until we've moved the data
+ * onto the stream head.
*/
- TCP_FUSE_SYNCSTR_STOP(tcp);
- TCP_FUSE_SYNCSTR_STOP(peer_tcp);
+ TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp);
+ TCP_FUSE_SYNCSTR_PLUG_DRAIN(peer_tcp);
/*
* Drain any pending data; the detached check is needed because
@@ -1010,6 +1030,17 @@ tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing)
(unfusing ? &peer_tcp->tcp_fused_sigurg_mp : NULL));
}
+ /*
+ * Make all current and future tcp_fuse_rrw() calls fail with EBUSY.
+ * To ensure threads don't sneak past the checks in tcp_fuse_rrw(),
+ * a given stream must be stopped prior to being unplugged (but the
+ * ordering of operations between the streams is unimportant).
+ */
+ TCP_FUSE_SYNCSTR_STOP(tcp);
+ TCP_FUSE_SYNCSTR_STOP(peer_tcp);
+ TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp);
+ TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(peer_tcp);
+
/* Lift up any flow-control conditions */
if (tcp->tcp_flow_stopped) {
tcp_clrqfull(tcp);
diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h
index 93c08cb144..1348d2f6cd 100644
--- a/usr/src/uts/common/inet/tcp_impl.h
+++ b/usr/src/uts/common/inet/tcp_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -73,7 +72,7 @@ extern "C" {
/*
* This stops synchronous streams for a fused tcp endpoint
- * and prevents tcp_rrw() from pulling data from it.
+ * and prevents tcp_fuse_rrw() from pulling data from it.
*/
#define TCP_FUSE_SYNCSTR_STOP(tcp) { \
if ((tcp)->tcp_direct_sockfs) { \
@@ -84,13 +83,27 @@ extern "C" {
}
/*
- * This resumes synchronous streams for this fused tcp endpoint
- * and allows tcp_rrw() to pull data from it again.
+ * This causes all calls to tcp_fuse_rrw() to block until
+ * TCP_FUSE_SYNCSTR_UNPLUG_DRAIN() is called.
*/
-#define TCP_FUSE_SYNCSTR_RESUME(tcp) { \
+#define TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp) { \
if ((tcp)->tcp_direct_sockfs) { \
mutex_enter(&(tcp)->tcp_fuse_lock); \
- (tcp)->tcp_fuse_syncstr_stopped = B_FALSE; \
+ ASSERT(!(tcp)->tcp_fuse_syncstr_plugged); \
+ (tcp)->tcp_fuse_syncstr_plugged = B_TRUE; \
+ mutex_exit(&(tcp)->tcp_fuse_lock); \
+ } \
+}
+
+/*
+ * This unplugs the draining of data through tcp_fuse_rrw(); see
+ * the comments in tcp_fuse_rrw() for how we preserve ordering.
+ */
+#define TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp) { \
+ if ((tcp)->tcp_direct_sockfs) { \
+ mutex_enter(&(tcp)->tcp_fuse_lock); \
+ (tcp)->tcp_fuse_syncstr_plugged = B_FALSE; \
+ (void) cv_broadcast(&(tcp)->tcp_fuse_plugcv); \
mutex_exit(&(tcp)->tcp_fuse_lock); \
} \
}
@@ -291,6 +304,7 @@ typedef struct tcp_stat {
kstat_named_t tcp_fusion_unqualified;
kstat_named_t tcp_fusion_rrw_busy;
kstat_named_t tcp_fusion_rrw_msgcnt;
+ kstat_named_t tcp_fusion_rrw_plugged;
kstat_named_t tcp_in_ack_unsent_drop;
kstat_named_t tcp_sock_fallback;
} tcp_stat_t;