diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/tcp.h | 8 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 148 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp_sack.h | 13 |
3 files changed, 118 insertions, 51 deletions
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index 1d10a8cbfa..74ffce34f4 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -292,7 +292,8 @@ typedef struct tcp_s { tcp_tconnind_started : 1, /* conn_ind message is being sent */ tcp_lso :1, /* Lower layer is capable of LSO */ tcp_refuse :1, /* Connection needs refusing */ - tcp_pad_to_bit_31 : 16; + tcp_is_wnd_shrnk : 1, /* Window has shrunk */ + tcp_pad_to_bit_31 : 15; uint32_t tcp_if_mtu; /* Outgoing interface MTU. */ @@ -602,6 +603,11 @@ typedef struct tcp_s { boolean_t tcp_flow_stopped; /* + * Sender's next sequence number at the time the window was shrunk. + */ + uint32_t tcp_snxt_shrunk; + + /* * The socket generation number is bumped when an outgoing connection * attempts is made, and it sent up to the socket when the * connection was successfully established, or an error occured. The diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 5f0babbc98..c84de5bf29 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -774,6 +774,7 @@ static void tcp_iss_key_init(uint8_t *phrase, int len, tcp_stack_t *); static int tcp_1948_phrase_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); static void tcp_process_shrunk_swnd(tcp_t *tcp, uint32_t shrunk_cnt); +static void tcp_update_xmit_tail(tcp_t *tcp, uint32_t snxt); static mblk_t *tcp_reass(tcp_t *tcp, mblk_t *mp, uint32_t start); static void tcp_reass_elim_overlap(tcp_t *tcp, mblk_t *mp); static void tcp_reinit(tcp_t *tcp); @@ -4255,7 +4256,8 @@ tcp_free(tcp_t *tcp) if (tcp->tcp_sack_info != NULL) { if (tcp->tcp_notsack_list != NULL) { - TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list); + TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, + tcp); } bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); } @@ -7715,10 +7717,12 @@ tcp_reinit_values(tcp) tcp->tcp_cwr = B_FALSE; tcp->tcp_ecn_echo_on = B_FALSE; + tcp->tcp_is_wnd_shrnk = B_FALSE; if (tcp->tcp_sack_info != NULL) { if (tcp->tcp_notsack_list != NULL) { - TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list); + TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, + tcp); } kmem_cache_free(tcp_sack_info_cache, tcp->tcp_sack_info); tcp->tcp_sack_info = NULL; @@ -11819,6 +11823,11 @@ tcp_set_rto(tcp_t *tcp, clock_t rtt) /* * tcp_get_seg_mp() is called to get the pointer to a segment in the + * send queue which starts at the given sequence number. If the given + * sequence number is equal to last valid sequence number (tcp_snxt), the + * returned mblk is the last valid mblk, and off is set to the length of + * that mblk. + * * send queue which starts at the given seq. no. * * Parameters: @@ -11838,14 +11847,14 @@ tcp_get_seg_mp(tcp_t *tcp, uint32_t seq, int32_t *off) mblk_t *mp; /* Defensive coding. Make sure we don't send incorrect data. */ - if (SEQ_LT(seq, tcp->tcp_suna) || SEQ_GEQ(seq, tcp->tcp_snxt)) + if (SEQ_LT(seq, tcp->tcp_suna) || SEQ_GT(seq, tcp->tcp_snxt)) return (NULL); cnt = seq - tcp->tcp_suna; mp = tcp->tcp_xmit_head; while (cnt > 0 && mp != NULL) { cnt -= mp->b_wptr - mp->b_rptr; - if (cnt < 0) { + if (cnt <= 0) { cnt += mp->b_wptr - mp->b_rptr; break; } @@ -14294,34 +14303,63 @@ process_ack: * state is handled above, so we can always just drop the segment and * send an ACK here. * + * In the case where the peer shrinks the window, we see the new window + * update, but all the data sent previously is queued up by the peer. + * To account for this, in tcp_process_shrunk_swnd(), the sequence + * number, which was already sent, and within window, is recorded. + * tcp_snxt is then updated. + * + * If the window has previously shrunk, and an ACK for data not yet + * sent, according to tcp_snxt is recieved, it may still be valid. If + * the ACK is for data within the window at the time the window was + * shrunk, then the ACK is acceptable. In this case tcp_snxt is set to + * the sequence number ACK'ed. + * + * If the ACK covers all the data sent at the time the window was + * shrunk, we can now set tcp_is_wnd_shrnk to B_FALSE. + * * Should we send ACKs in response to ACK only segments? */ + if (SEQ_GT(seg_ack, tcp->tcp_snxt)) { - BUMP_MIB(&tcps->tcps_mib, tcpInAckUnsent); - /* drop the received segment */ - freemsg(mp); + if ((tcp->tcp_is_wnd_shrnk) && + (SEQ_LEQ(seg_ack, tcp->tcp_snxt_shrunk))) { + uint32_t data_acked_ahead_snxt; - /* - * Send back an ACK. If tcp_drop_ack_unsent_cnt is - * greater than 0, check if the number of such - * bogus ACks is greater than that count. If yes, - * don't send back any ACK. This prevents TCP from - * getting into an ACK storm if somehow an attacker - * successfully spoofs an acceptable segment to our - * peer. - */ - if (tcp_drop_ack_unsent_cnt > 0 && - ++tcp->tcp_in_ack_unsent > tcp_drop_ack_unsent_cnt) { - TCP_STAT(tcps, tcp_in_ack_unsent_drop); + data_acked_ahead_snxt = seg_ack - tcp->tcp_snxt; + tcp_update_xmit_tail(tcp, seg_ack); + tcp->tcp_unsent -= data_acked_ahead_snxt; + } else { + BUMP_MIB(&tcps->tcps_mib, tcpInAckUnsent); + /* drop the received segment */ + freemsg(mp); + + /* + * Send back an ACK. If tcp_drop_ack_unsent_cnt is + * greater than 0, check if the number of such + * bogus ACks is greater than that count. If yes, + * don't send back any ACK. This prevents TCP from + * getting into an ACK storm if somehow an attacker + * successfully spoofs an acceptable segment to our + * peer. + */ + if (tcp_drop_ack_unsent_cnt > 0 && + ++tcp->tcp_in_ack_unsent > + tcp_drop_ack_unsent_cnt) { + TCP_STAT(tcps, tcp_in_ack_unsent_drop); + return; + } + mp = tcp_ack_mp(tcp); + if (mp != NULL) { + BUMP_LOCAL(tcp->tcp_obsegs); + BUMP_MIB(&tcps->tcps_mib, tcpOutAck); + tcp_send_data(tcp, tcp->tcp_wq, mp); + } return; } - mp = tcp_ack_mp(tcp); - if (mp != NULL) { - BUMP_LOCAL(tcp->tcp_obsegs); - BUMP_MIB(&tcps->tcps_mib, tcpOutAck); - tcp_send_data(tcp, tcp->tcp_wq, mp); - } - return; + } else if (tcp->tcp_is_wnd_shrnk && SEQ_GEQ(seg_ack, + tcp->tcp_snxt_shrunk)) { + tcp->tcp_is_wnd_shrnk = B_FALSE; } /* @@ -14361,7 +14399,8 @@ process_ack: */ if (tcp->tcp_snd_sack_ok && tcp->tcp_notsack_list != NULL) { - TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list); + TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, + tcp); } } else { if (tcp->tcp_snd_sack_ok && @@ -15166,6 +15205,26 @@ done: } /* + * This routine adjusts next-to-send sequence number variables, in the + * case where the reciever has shrunk it's window. + */ +static void +tcp_update_xmit_tail(tcp_t *tcp, uint32_t snxt) +{ + mblk_t *xmit_tail; + int32_t offset; + + tcp->tcp_snxt = snxt; + + /* Get the mblk, and the offset in it, as per the shrunk window */ + xmit_tail = tcp_get_seg_mp(tcp, snxt, &offset); + ASSERT(xmit_tail != NULL); + tcp->tcp_xmit_tail = xmit_tail; + tcp->tcp_xmit_tail_unsent = xmit_tail->b_wptr - + xmit_tail->b_rptr - offset; +} + +/* * This function does PAWS protection check. Returns B_TRUE if the * segment passes the PAWS test, else returns B_FALSE. */ @@ -16547,11 +16606,8 @@ tcp_timer(void *arg) /* * Remove all rexmit SACK blk to start from fresh. */ - if (tcp->tcp_snd_sack_ok && tcp->tcp_notsack_list != NULL) { - TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list); - tcp->tcp_num_notsack_blk = 0; - tcp->tcp_cnt_notsack_list = 0; - } + if (tcp->tcp_snd_sack_ok && tcp->tcp_notsack_list != NULL) + TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp); if (mp == NULL) { return; } @@ -18638,26 +18694,32 @@ static void tcp_process_shrunk_swnd(tcp_t *tcp, uint32_t shrunk_count) { uint32_t snxt = tcp->tcp_snxt; - mblk_t *xmit_tail; - int32_t offset; ASSERT(shrunk_count > 0); + if (!tcp->tcp_is_wnd_shrnk) { + tcp->tcp_snxt_shrunk = snxt; + tcp->tcp_is_wnd_shrnk = B_TRUE; + } else if (SEQ_GT(snxt, tcp->tcp_snxt_shrunk)) { + tcp->tcp_snxt_shrunk = snxt; + } + /* Pretend we didn't send the data outside the window */ snxt -= shrunk_count; - /* Get the mblk and the offset in it per the shrunk window */ - xmit_tail = tcp_get_seg_mp(tcp, snxt, &offset); - - ASSERT(xmit_tail != NULL); - /* Reset all the values per the now shrunk window */ - tcp->tcp_snxt = snxt; - tcp->tcp_xmit_tail = xmit_tail; - tcp->tcp_xmit_tail_unsent = xmit_tail->b_wptr - xmit_tail->b_rptr - - offset; + tcp_update_xmit_tail(tcp, snxt); tcp->tcp_unsent += shrunk_count; + /* + * If the SACK option is set, delete the entire list of + * notsack'ed blocks. + */ + if (tcp->tcp_sack_info != NULL) { + if (tcp->tcp_notsack_list != NULL) + TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp); + } + if (tcp->tcp_suna == tcp->tcp_snxt && tcp->tcp_swnd == 0) /* * Make sure the timer is running so that we will probe a zero diff --git a/usr/src/uts/common/inet/tcp_sack.h b/usr/src/uts/common/inet/tcp_sack.h index 9bfbc48b3b..7bd9939f67 100644 --- a/usr/src/uts/common/inet/tcp_sack.h +++ b/usr/src/uts/common/inet/tcp_sack.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _INET_TCP_SACK_H #define _INET_TCP_SACK_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -88,7 +85,7 @@ extern void tcp_notsack_update(notsack_blk_t **, tcp_seq, tcp_seq, * Param: * notsack_blk_t *head: pointer to the head of the list of notsack'ed blks. */ -#define TCP_NOTSACK_REMOVE_ALL(head) \ +#define TCP_NOTSACK_REMOVE_ALL(head, tcp) \ { \ notsack_blk_t *prev, *tmp; \ tmp = (head); \ @@ -98,6 +95,8 @@ extern void tcp_notsack_update(notsack_blk_t **, tcp_seq, tcp_seq, kmem_free(prev, sizeof (notsack_blk_t)); \ } while (tmp != NULL); \ (head) = NULL; \ + (tcp)->tcp_cnt_notsack_list = 0; \ + (tcp)->tcp_num_notsack_blk = 0; \ } |