summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet
diff options
context:
space:
mode:
authorPhilip Kirk <Phil.Kirk@Sun.COM>2009-06-15 05:57:03 -0400
committerPhilip Kirk <Phil.Kirk@Sun.COM>2009-06-15 05:57:03 -0400
commit410734d49da2a410ec87758533a8f01c75aa0bd8 (patch)
treec7d458af0dc4054b456e3dd858a9df02e35218c8 /usr/src/uts/common/inet
parent4bc40f6908f439ff427ce56daad14366e8a63f7c (diff)
downloadillumos-joyent-410734d49da2a410ec87758533a8f01c75aa0bd8.tar.gz
6485039 TCP needs to perform shrunk window functionality robustly.
Diffstat (limited to 'usr/src/uts/common/inet')
-rw-r--r--usr/src/uts/common/inet/tcp.h8
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c148
-rw-r--r--usr/src/uts/common/inet/tcp_sack.h13
3 files changed, 118 insertions, 51 deletions
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h
index 1d10a8cbfa..74ffce34f4 100644
--- a/usr/src/uts/common/inet/tcp.h
+++ b/usr/src/uts/common/inet/tcp.h
@@ -292,7 +292,8 @@ typedef struct tcp_s {
tcp_tconnind_started : 1, /* conn_ind message is being sent */
tcp_lso :1, /* Lower layer is capable of LSO */
tcp_refuse :1, /* Connection needs refusing */
- tcp_pad_to_bit_31 : 16;
+ tcp_is_wnd_shrnk : 1, /* Window has shrunk */
+ tcp_pad_to_bit_31 : 15;
uint32_t tcp_if_mtu; /* Outgoing interface MTU. */
@@ -602,6 +603,11 @@ typedef struct tcp_s {
boolean_t tcp_flow_stopped;
/*
+ * Sender's next sequence number at the time the window was shrunk.
+ */
+ uint32_t tcp_snxt_shrunk;
+
+ /*
* The socket generation number is bumped when an outgoing connection
* attempts is made, and it sent up to the socket when the
* connection was successfully established, or an error occured. The
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 5f0babbc98..c84de5bf29 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -774,6 +774,7 @@ static void tcp_iss_key_init(uint8_t *phrase, int len, tcp_stack_t *);
static int tcp_1948_phrase_set(queue_t *q, mblk_t *mp, char *value,
caddr_t cp, cred_t *cr);
static void tcp_process_shrunk_swnd(tcp_t *tcp, uint32_t shrunk_cnt);
+static void tcp_update_xmit_tail(tcp_t *tcp, uint32_t snxt);
static mblk_t *tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start);
static void tcp_reass_elim_overlap(tcp_t *tcp, mblk_t *mp);
static void tcp_reinit(tcp_t *tcp);
@@ -4255,7 +4256,8 @@ tcp_free(tcp_t *tcp)
if (tcp->tcp_sack_info != NULL) {
if (tcp->tcp_notsack_list != NULL) {
- TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list);
+ TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list,
+ tcp);
}
bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
}
@@ -7715,10 +7717,12 @@ tcp_reinit_values(tcp)
tcp->tcp_cwr = B_FALSE;
tcp->tcp_ecn_echo_on = B_FALSE;
+ tcp->tcp_is_wnd_shrnk = B_FALSE;
if (tcp->tcp_sack_info != NULL) {
if (tcp->tcp_notsack_list != NULL) {
- TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list);
+ TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list,
+ tcp);
}
kmem_cache_free(tcp_sack_info_cache, tcp->tcp_sack_info);
tcp->tcp_sack_info = NULL;
@@ -11819,6 +11823,11 @@ tcp_set_rto(tcp_t *tcp, clock_t rtt)
/*
* tcp_get_seg_mp() is called to get the pointer to a segment in the
+ * send queue which starts at the given sequence number. If the given
+ * sequence number is equal to last valid sequence number (tcp_snxt), the
+ * returned mblk is the last valid mblk, and off is set to the length of
+ * that mblk.
+ *
* send queue which starts at the given seq. no.
*
* Parameters:
@@ -11838,14 +11847,14 @@ tcp_get_seg_mp(tcp_t *tcp, uint32_t seq, int32_t *off)
mblk_t *mp;
/* Defensive coding. Make sure we don't send incorrect data. */
- if (SEQ_LT(seq, tcp->tcp_suna) || SEQ_GEQ(seq, tcp->tcp_snxt))
+ if (SEQ_LT(seq, tcp->tcp_suna) || SEQ_GT(seq, tcp->tcp_snxt))
return (NULL);
cnt = seq - tcp->tcp_suna;
mp = tcp->tcp_xmit_head;
while (cnt > 0 && mp != NULL) {
cnt -= mp->b_wptr - mp->b_rptr;
- if (cnt < 0) {
+ if (cnt <= 0) {
cnt += mp->b_wptr - mp->b_rptr;
break;
}
@@ -14294,34 +14303,63 @@ process_ack:
* state is handled above, so we can always just drop the segment and
* send an ACK here.
*
+ * In the case where the peer shrinks the window, we see the new window
+ * update, but all the data sent previously is queued up by the peer.
+ * To account for this, in tcp_process_shrunk_swnd(), the sequence
+ * number, which was already sent, and within window, is recorded.
+ * tcp_snxt is then updated.
+ *
+ * If the window has previously shrunk, and an ACK for data not yet
+ * sent, according to tcp_snxt is recieved, it may still be valid. If
+ * the ACK is for data within the window at the time the window was
+ * shrunk, then the ACK is acceptable. In this case tcp_snxt is set to
+ * the sequence number ACK'ed.
+ *
+ * If the ACK covers all the data sent at the time the window was
+ * shrunk, we can now set tcp_is_wnd_shrnk to B_FALSE.
+ *
* Should we send ACKs in response to ACK only segments?
*/
+
if (SEQ_GT(seg_ack, tcp->tcp_snxt)) {
- BUMP_MIB(&tcps->tcps_mib, tcpInAckUnsent);
- /* drop the received segment */
- freemsg(mp);
+ if ((tcp->tcp_is_wnd_shrnk) &&
+ (SEQ_LEQ(seg_ack, tcp->tcp_snxt_shrunk))) {
+ uint32_t data_acked_ahead_snxt;
- /*
- * Send back an ACK. If tcp_drop_ack_unsent_cnt is
- * greater than 0, check if the number of such
- * bogus ACks is greater than that count. If yes,
- * don't send back any ACK. This prevents TCP from
- * getting into an ACK storm if somehow an attacker
- * successfully spoofs an acceptable segment to our
- * peer.
- */
- if (tcp_drop_ack_unsent_cnt > 0 &&
- ++tcp->tcp_in_ack_unsent > tcp_drop_ack_unsent_cnt) {
- TCP_STAT(tcps, tcp_in_ack_unsent_drop);
+ data_acked_ahead_snxt = seg_ack - tcp->tcp_snxt;
+ tcp_update_xmit_tail(tcp, seg_ack);
+ tcp->tcp_unsent -= data_acked_ahead_snxt;
+ } else {
+ BUMP_MIB(&tcps->tcps_mib, tcpInAckUnsent);
+ /* drop the received segment */
+ freemsg(mp);
+
+ /*
+ * Send back an ACK. If tcp_drop_ack_unsent_cnt is
+ * greater than 0, check if the number of such
+ * bogus ACks is greater than that count. If yes,
+ * don't send back any ACK. This prevents TCP from
+ * getting into an ACK storm if somehow an attacker
+ * successfully spoofs an acceptable segment to our
+ * peer.
+ */
+ if (tcp_drop_ack_unsent_cnt > 0 &&
+ ++tcp->tcp_in_ack_unsent >
+ tcp_drop_ack_unsent_cnt) {
+ TCP_STAT(tcps, tcp_in_ack_unsent_drop);
+ return;
+ }
+ mp = tcp_ack_mp(tcp);
+ if (mp != NULL) {
+ BUMP_LOCAL(tcp->tcp_obsegs);
+ BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
+ tcp_send_data(tcp, tcp->tcp_wq, mp);
+ }
return;
}
- mp = tcp_ack_mp(tcp);
- if (mp != NULL) {
- BUMP_LOCAL(tcp->tcp_obsegs);
- BUMP_MIB(&tcps->tcps_mib, tcpOutAck);
- tcp_send_data(tcp, tcp->tcp_wq, mp);
- }
- return;
+ } else if (tcp->tcp_is_wnd_shrnk && SEQ_GEQ(seg_ack,
+ tcp->tcp_snxt_shrunk)) {
+ tcp->tcp_is_wnd_shrnk = B_FALSE;
}
/*
@@ -14361,7 +14399,8 @@ process_ack:
*/
if (tcp->tcp_snd_sack_ok &&
tcp->tcp_notsack_list != NULL) {
- TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list);
+ TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list,
+ tcp);
}
} else {
if (tcp->tcp_snd_sack_ok &&
@@ -15166,6 +15205,26 @@ done:
}
/*
+ * This routine adjusts next-to-send sequence number variables, in the
+ * case where the reciever has shrunk it's window.
+ */
+static void
+tcp_update_xmit_tail(tcp_t *tcp, uint32_t snxt)
+{
+ mblk_t *xmit_tail;
+ int32_t offset;
+
+ tcp->tcp_snxt = snxt;
+
+ /* Get the mblk, and the offset in it, as per the shrunk window */
+ xmit_tail = tcp_get_seg_mp(tcp, snxt, &offset);
+ ASSERT(xmit_tail != NULL);
+ tcp->tcp_xmit_tail = xmit_tail;
+ tcp->tcp_xmit_tail_unsent = xmit_tail->b_wptr -
+ xmit_tail->b_rptr - offset;
+}
+
+/*
* This function does PAWS protection check. Returns B_TRUE if the
* segment passes the PAWS test, else returns B_FALSE.
*/
@@ -16547,11 +16606,8 @@ tcp_timer(void *arg)
/*
* Remove all rexmit SACK blk to start from fresh.
*/
- if (tcp->tcp_snd_sack_ok && tcp->tcp_notsack_list != NULL) {
- TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list);
- tcp->tcp_num_notsack_blk = 0;
- tcp->tcp_cnt_notsack_list = 0;
- }
+ if (tcp->tcp_snd_sack_ok && tcp->tcp_notsack_list != NULL)
+ TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
if (mp == NULL) {
return;
}
@@ -18638,26 +18694,32 @@ static void
tcp_process_shrunk_swnd(tcp_t *tcp, uint32_t shrunk_count)
{
uint32_t snxt = tcp->tcp_snxt;
- mblk_t *xmit_tail;
- int32_t offset;
ASSERT(shrunk_count > 0);
+ if (!tcp->tcp_is_wnd_shrnk) {
+ tcp->tcp_snxt_shrunk = snxt;
+ tcp->tcp_is_wnd_shrnk = B_TRUE;
+ } else if (SEQ_GT(snxt, tcp->tcp_snxt_shrunk)) {
+ tcp->tcp_snxt_shrunk = snxt;
+ }
+
/* Pretend we didn't send the data outside the window */
snxt -= shrunk_count;
- /* Get the mblk and the offset in it per the shrunk window */
- xmit_tail = tcp_get_seg_mp(tcp, snxt, &offset);
-
- ASSERT(xmit_tail != NULL);
-
/* Reset all the values per the now shrunk window */
- tcp->tcp_snxt = snxt;
- tcp->tcp_xmit_tail = xmit_tail;
- tcp->tcp_xmit_tail_unsent = xmit_tail->b_wptr - xmit_tail->b_rptr -
- offset;
+ tcp_update_xmit_tail(tcp, snxt);
tcp->tcp_unsent += shrunk_count;
+ /*
+ * If the SACK option is set, delete the entire list of
+ * notsack'ed blocks.
+ */
+ if (tcp->tcp_sack_info != NULL) {
+ if (tcp->tcp_notsack_list != NULL)
+ TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
+ }
+
if (tcp->tcp_suna == tcp->tcp_snxt && tcp->tcp_swnd == 0)
/*
* Make sure the timer is running so that we will probe a zero
diff --git a/usr/src/uts/common/inet/tcp_sack.h b/usr/src/uts/common/inet/tcp_sack.h
index 9bfbc48b3b..7bd9939f67 100644
--- a/usr/src/uts/common/inet/tcp_sack.h
+++ b/usr/src/uts/common/inet/tcp_sack.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _INET_TCP_SACK_H
#define _INET_TCP_SACK_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -88,7 +85,7 @@ extern void tcp_notsack_update(notsack_blk_t **, tcp_seq, tcp_seq,
* Param:
* notsack_blk_t *head: pointer to the head of the list of notsack'ed blks.
*/
-#define TCP_NOTSACK_REMOVE_ALL(head) \
+#define TCP_NOTSACK_REMOVE_ALL(head, tcp) \
{ \
notsack_blk_t *prev, *tmp; \
tmp = (head); \
@@ -98,6 +95,8 @@ extern void tcp_notsack_update(notsack_blk_t **, tcp_seq, tcp_seq,
kmem_free(prev, sizeof (notsack_blk_t)); \
} while (tmp != NULL); \
(head) = NULL; \
+ (tcp)->tcp_cnt_notsack_list = 0; \
+ (tcp)->tcp_num_notsack_blk = 0; \
}