diff options
author | meem <none@none> | 2006-08-20 10:20:53 -0700 |
---|---|---|
committer | meem <none@none> | 2006-08-20 10:20:53 -0700 |
commit | a2036d4dec789d6fcf6f71ae43cc38d40c4786cc (patch) | |
tree | 78cee26f1c36d603dbc2292d88c42209af3a2042 /usr/src | |
parent | 70ab954a5d6c4d36858fd6e7e3dd4498d06d2c40 (diff) | |
download | illumos-joyent-a2036d4dec789d6fcf6f71ae43cc38d40c4786cc.tar.gz |
6440123 TCP Fusion loopback connections may hang due to flow control logic error
6458410 read() may spuriously return EAGAIN while unfusing a TCP connection
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/tcp.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 25 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_fusion.c | 69 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp_impl.h | 32 |
4 files changed, 88 insertions, 42 deletions
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index 7ed4f35564..ebf0084e18 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -525,6 +525,7 @@ typedef struct tcp_s { * manipulated with squeue protection or with tcp_fuse_lock held. */ kmutex_t tcp_fuse_lock; + kcondvar_t tcp_fuse_plugcv; uint_t tcp_fuse_rcv_unread_cnt; /* # of outstanding pkts */ uint32_t tcp_fused : 1, /* loopback tcp in fusion mode */ @@ -533,7 +534,8 @@ typedef struct tcp_s { tcp_direct_sockfs : 1, /* direct calls to sockfs */ tcp_fuse_syncstr_stopped : 1, /* synchronous streams stopped */ - tcp_fuse_to_bit_31 : 27; + tcp_fuse_syncstr_plugged : 1, /* synchronous streams plugged */ + tcp_fuse_to_bit_31 : 26; /* * This variable is accessed without any lock protection diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index c66229327e..60c30521be 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -406,6 +406,7 @@ tcp_stat_t tcp_statistics = { { "tcp_fusion_unqualified", KSTAT_DATA_UINT64 }, { "tcp_fusion_rrw_busy", KSTAT_DATA_UINT64 }, { "tcp_fusion_rrw_msgcnt", KSTAT_DATA_UINT64 }, + { "tcp_fusion_rrw_plugged", KSTAT_DATA_UINT64 }, { "tcp_in_ack_unsent_drop", KSTAT_DATA_UINT64 }, { "tcp_sock_fallback", KSTAT_DATA_UINT64 }, }; @@ -7904,6 +7905,7 @@ tcp_reinit_values(tcp) tcp->tcp_fused_sigurg = B_FALSE; tcp->tcp_direct_sockfs = B_FALSE; tcp->tcp_fuse_syncstr_stopped = B_FALSE; + tcp->tcp_fuse_syncstr_plugged = B_FALSE; tcp->tcp_loopback_peer = NULL; tcp->tcp_fuse_rcv_hiwater = 0; tcp->tcp_fuse_rcv_unread_hiwater = 0; @@ -7996,6 +7998,7 @@ tcp_init_values(tcp_t *tcp) tcp->tcp_fused_sigurg = B_FALSE; tcp->tcp_direct_sockfs = B_FALSE; tcp->tcp_fuse_syncstr_stopped = B_FALSE; + tcp->tcp_fuse_syncstr_plugged = B_FALSE; tcp->tcp_loopback_peer = NULL; tcp->tcp_fuse_rcv_hiwater = 0; tcp->tcp_fuse_rcv_unread_hiwater = 0; @@ -15531,18 +15534,16 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2) /* * Normally we would not get backenabled in synchronous - * streams mode, but in case this happens, we need to stop - * synchronous streams temporarily to prevent a race with - * tcp_fuse_rrw() or tcp_fuse_rinfop(). It is safe to access - * tcp_rcv_list here because those entry points will return - * right away when synchronous streams is stopped. + * streams mode, but in case this happens, we need to plug + * synchronous streams during our drain to prevent a race + * with tcp_fuse_rrw() or tcp_fuse_rinfop(). */ - TCP_FUSE_SYNCSTR_STOP(tcp); + TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp); if (tcp->tcp_rcv_list != NULL) (void) tcp_rcv_drain(tcp->tcp_rq, tcp); tcp_clrqfull(peer_tcp); - TCP_FUSE_SYNCSTR_RESUME(tcp); + TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp); TCP_STAT(tcp_fusion_backenabled); return; } @@ -22260,17 +22261,15 @@ tcp_push_timer(void *arg) ASSERT(tcp->tcp_listener == NULL); /* - * We need to stop synchronous streams temporarily to prevent a race - * with tcp_fuse_rrw() or tcp_fusion rinfop(). It is safe to access - * tcp_rcv_list here because those entry points will return right - * away when synchronous streams is stopped. + * We need to plug synchronous streams during our drain to prevent + * a race with tcp_fuse_rrw() or tcp_fusion_rinfop(). */ - TCP_FUSE_SYNCSTR_STOP(tcp); + TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp); tcp->tcp_push_tid = 0; if ((tcp->tcp_rcv_list != NULL) && (tcp_rcv_drain(tcp->tcp_rq, tcp) == TH_ACK_NEEDED)) tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); - TCP_FUSE_SYNCSTR_RESUME(tcp); + TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp); } /* diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c index fda7bff9d2..3f9bf559cc 100644 --- a/usr/src/uts/common/inet/tcp/tcp_fusion.c +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c @@ -72,16 +72,18 @@ * used for this purpose. When there is urgent data, the sender needs * to push the data up the receiver's streams read queue. In order to * avoid holding the tcp_fuse_lock across putnext(), the sender sets - * the peer tcp's tcp_fuse_syncstr_stopped bit and releases tcp_fuse_lock - * (see macro TCP_FUSE_SYNCSTR_STOP()). If tcp_fuse_rrw() enters after - * this point, it will see that synchronous streams is temporarily - * stopped and it will immediately return EBUSY without accessing the - * tcp_rcv_list or other fields protected by the tcp_fuse_lock. This - * will result in strget() calling getq_noenab() to dequeue data from - * the stream head instead. After the sender has finished pushing up - * all urgent data, it will clear the tcp_fuse_syncstr_stopped bit using - * TCP_FUSE_SYNCSTR_RESUME and the receiver may then resume using - * tcp_fuse_rrw() to retrieve data from tcp_rcv_list. + * the peer tcp's tcp_fuse_syncstr_plugged bit and releases tcp_fuse_lock + * (see macro TCP_FUSE_SYNCSTR_PLUG_DRAIN()). If tcp_fuse_rrw() enters + * after this point, it will see that synchronous streams is plugged and + * will wait on tcp_fuse_plugcv. After the sender has finished pushing up + * all urgent data, it will clear the tcp_fuse_syncstr_plugged bit using + * TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(). This will cause any threads waiting + * on tcp_fuse_plugcv to return EBUSY, and in turn cause strget() to call + * getq_noenab() to dequeue data from the stream head instead. Once the + * data on the stream head has been consumed, tcp_fuse_rrw() may again + * be used to process tcp_rcv_list. However, if TCP_FUSE_SYNCSTR_STOP() + * has been called, all future calls to tcp_fuse_rrw() will return EBUSY, + * effectively disabling synchronous streams. * * The following note applies only to the synchronous streams mode. * @@ -496,7 +498,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) * below, synchronous streams will remain stopped until * someone drains the tcp_rcv_list. */ - TCP_FUSE_SYNCSTR_STOP(peer_tcp); + TCP_FUSE_SYNCSTR_PLUG_DRAIN(peer_tcp); tcp_fuse_output_urg(tcp, mp); } @@ -563,6 +565,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) } else if (flow_stopped && TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) { tcp_clrqfull(tcp); + flow_stopped = B_FALSE; } loopback_packets++; @@ -613,7 +616,7 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size) * to the presence of urgent data, re-enable it. */ if (urgent) - TCP_FUSE_SYNCSTR_RESUME(peer_tcp); + TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(peer_tcp); } } return (B_TRUE); @@ -733,10 +736,27 @@ tcp_fuse_rrw(queue_t *q, struiod_t *dp) mblk_t *mp; mutex_enter(&tcp->tcp_fuse_lock); + + /* + * If tcp_fuse_syncstr_plugged is set, then another thread is moving + * the underlying data to the stream head. We need to wait until it's + * done, then return EBUSY so that strget() will dequeue data from the + * stream head to ensure data is drained in-order. + */ + if (tcp->tcp_fuse_syncstr_plugged) { + do { + cv_wait(&tcp->tcp_fuse_plugcv, &tcp->tcp_fuse_lock); + } while (tcp->tcp_fuse_syncstr_plugged); + + TCP_STAT(tcp_fusion_rrw_plugged); + TCP_STAT(tcp_fusion_rrw_busy); + return (EBUSY); + } + /* * If someone had turned off tcp_direct_sockfs or if synchronous - * streams is temporarily disabled, we return EBUSY. This causes - * strget() to dequeue data from the stream head instead. + * streams is stopped, we return EBUSY. This causes strget() to + * dequeue data from the stream head instead. */ if (!tcp->tcp_direct_sockfs || tcp->tcp_fuse_syncstr_stopped) { mutex_exit(&tcp->tcp_fuse_lock); @@ -817,7 +837,7 @@ tcp_fuse_rinfop(queue_t *q, infod_t *dp) * currently not accessible. */ if (!tcp->tcp_direct_sockfs || tcp->tcp_fuse_syncstr_stopped || - (mp = tcp->tcp_rcv_list) == NULL) + tcp->tcp_fuse_syncstr_plugged || (mp = tcp->tcp_rcv_list) == NULL) goto done; if (cmd & INFOD_COUNT) { @@ -984,11 +1004,11 @@ tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing) ASSERT(peer_tcp != NULL); /* - * We need to prevent tcp_fuse_rrw() from entering before - * we can disable synchronous streams. + * Force any tcp_fuse_rrw() calls to block until we've moved the data + * onto the stream head. */ - TCP_FUSE_SYNCSTR_STOP(tcp); - TCP_FUSE_SYNCSTR_STOP(peer_tcp); + TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp); + TCP_FUSE_SYNCSTR_PLUG_DRAIN(peer_tcp); /* * Drain any pending data; the detached check is needed because @@ -1010,6 +1030,17 @@ tcp_fuse_disable_pair(tcp_t *tcp, boolean_t unfusing) (unfusing ? &peer_tcp->tcp_fused_sigurg_mp : NULL)); } + /* + * Make all current and future tcp_fuse_rrw() calls fail with EBUSY. + * To ensure threads don't sneak past the checks in tcp_fuse_rrw(), + * a given stream must be stopped prior to being unplugged (but the + * ordering of operations between the streams is unimportant). + */ + TCP_FUSE_SYNCSTR_STOP(tcp); + TCP_FUSE_SYNCSTR_STOP(peer_tcp); + TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp); + TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(peer_tcp); + /* Lift up any flow-control conditions */ if (tcp->tcp_flow_stopped) { tcp_clrqfull(tcp); diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h index 93c08cb144..1348d2f6cd 100644 --- a/usr/src/uts/common/inet/tcp_impl.h +++ b/usr/src/uts/common/inet/tcp_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -73,7 +72,7 @@ extern "C" { /* * This stops synchronous streams for a fused tcp endpoint - * and prevents tcp_rrw() from pulling data from it. + * and prevents tcp_fuse_rrw() from pulling data from it. */ #define TCP_FUSE_SYNCSTR_STOP(tcp) { \ if ((tcp)->tcp_direct_sockfs) { \ @@ -84,13 +83,27 @@ extern "C" { } /* - * This resumes synchronous streams for this fused tcp endpoint - * and allows tcp_rrw() to pull data from it again. + * This causes all calls to tcp_fuse_rrw() to block until + * TCP_FUSE_SYNCSTR_UNPLUG_DRAIN() is called. */ -#define TCP_FUSE_SYNCSTR_RESUME(tcp) { \ +#define TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp) { \ if ((tcp)->tcp_direct_sockfs) { \ mutex_enter(&(tcp)->tcp_fuse_lock); \ - (tcp)->tcp_fuse_syncstr_stopped = B_FALSE; \ + ASSERT(!(tcp)->tcp_fuse_syncstr_plugged); \ + (tcp)->tcp_fuse_syncstr_plugged = B_TRUE; \ + mutex_exit(&(tcp)->tcp_fuse_lock); \ + } \ +} + +/* + * This unplugs the draining of data through tcp_fuse_rrw(); see + * the comments in tcp_fuse_rrw() for how we preserve ordering. + */ +#define TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp) { \ + if ((tcp)->tcp_direct_sockfs) { \ + mutex_enter(&(tcp)->tcp_fuse_lock); \ + (tcp)->tcp_fuse_syncstr_plugged = B_FALSE; \ + (void) cv_broadcast(&(tcp)->tcp_fuse_plugcv); \ mutex_exit(&(tcp)->tcp_fuse_lock); \ } \ } @@ -291,6 +304,7 @@ typedef struct tcp_stat { kstat_named_t tcp_fusion_unqualified; kstat_named_t tcp_fusion_rrw_busy; kstat_named_t tcp_fusion_rrw_msgcnt; + kstat_named_t tcp_fusion_rrw_plugged; kstat_named_t tcp_in_ack_unsent_drop; kstat_named_t tcp_sock_fallback; } tcp_stat_t; |