diff options
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_input.c | 173 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_time_wait.c | 25 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp_impl.h | 12 |
3 files changed, 100 insertions, 110 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c index 45337d83d9..cf8e0c6bd4 100644 --- a/usr/src/uts/common/inet/tcp/tcp_input.c +++ b/usr/src/uts/common/inet/tcp/tcp_input.c @@ -51,7 +51,7 @@ #include <inet/ipsec_impl.h> /* - * RFC1323-recommended phrasing of TSTAMP option, for easier parsing + * RFC7323-recommended phrasing of TSTAMP option, for easier parsing */ #ifdef _BIG_ENDIAN @@ -63,15 +63,6 @@ #endif /* - * Flags returned from tcp_parse_options. - */ -#define TCP_OPT_MSS_PRESENT 1 -#define TCP_OPT_WSCALE_PRESENT 2 -#define TCP_OPT_TSTAMP_PRESENT 4 -#define TCP_OPT_SACK_OK_PRESENT 8 -#define TCP_OPT_SACK_PRESENT 16 - -/* * PAWS needs a timer for 24 days. This is the number of ticks in 24 days */ #define PAWS_TIMEOUT ((clock_t)(24*24*60*60*hz)) @@ -171,7 +162,6 @@ static void tcp_icmp_error_ipv6(tcp_t *, mblk_t *, ip_recv_attr_t *); static mblk_t *tcp_input_add_ancillary(tcp_t *, mblk_t *, ip_pkt_t *, ip_recv_attr_t *); static void tcp_input_listener(void *, mblk_t *, void *, ip_recv_attr_t *); -static int tcp_parse_options(tcpha_t *, tcp_opt_t *); static void tcp_process_options(tcp_t *, tcpha_t *); static mblk_t *tcp_reass(tcp_t *, mblk_t *, uint32_t); static void tcp_reass_elim_overlap(tcp_t *, mblk_t *); @@ -237,7 +227,7 @@ tcp_mss_set(tcp_t *tcp, uint32_t mss) * Extract option values from a tcp header. We put any found values into the * tcpopt struct and return a bitmask saying which options were found. */ -static int +int tcp_parse_options(tcpha_t *tcpha, tcp_opt_t *tcpopt) { uchar_t *endp; @@ -251,6 +241,19 @@ tcp_parse_options(tcpha_t *tcpha, tcp_opt_t *tcpopt) endp = up + TCP_HDR_LENGTH(tcpha); up += TCP_MIN_HEADER_LENGTH; + /* + * If timestamp option is aligned as recommended in RFC 7323 Appendix + * A, and is the only option, return quickly. + */ + if (TCP_HDR_LENGTH(tcpha) == (uint32_t)TCP_MIN_HEADER_LENGTH + + TCPOPT_REAL_TS_LEN && + OK_32PTR(up) && + *(uint32_t *)up == TCPOPT_NOP_NOP_TSTAMP) { + tcpopt->tcp_opt_ts_val = ABE32_TO_U32((up+4)); + tcpopt->tcp_opt_ts_ecr = ABE32_TO_U32((up+8)); + + return (TCP_OPT_TSTAMP_PRESENT); + } while (up < endp) { len = endp - up; switch (*up) { @@ -686,82 +689,27 @@ tcp_reass_elim_overlap(tcp_t *tcp, mblk_t *mp) } /* - * This function does PAWS protection check. Returns B_TRUE if the - * segment passes the PAWS test, else returns B_FALSE. + * This function does PAWS protection check, per RFC 7323 section 5. Requires + * that timestamp options are already processed into tcpoptp. Returns B_TRUE if + * the segment passes the PAWS test, else returns B_FALSE. */ boolean_t -tcp_paws_check(tcp_t *tcp, tcpha_t *tcpha, tcp_opt_t *tcpoptp) +tcp_paws_check(tcp_t *tcp, const tcp_opt_t *tcpoptp) { - uint8_t flags; - int options; - uint8_t *up; - conn_t *connp = tcp->tcp_connp; - - flags = (unsigned int)tcpha->tha_flags & 0xFF; - /* - * If timestamp option is aligned nicely, get values inline, - * otherwise call general routine to parse. Only do that - * if timestamp is the only option. - */ - if (TCP_HDR_LENGTH(tcpha) == (uint32_t)TCP_MIN_HEADER_LENGTH + - TCPOPT_REAL_TS_LEN && - OK_32PTR((up = ((uint8_t *)tcpha) + - TCP_MIN_HEADER_LENGTH)) && - *(uint32_t *)up == TCPOPT_NOP_NOP_TSTAMP) { - tcpoptp->tcp_opt_ts_val = ABE32_TO_U32((up+4)); - tcpoptp->tcp_opt_ts_ecr = ABE32_TO_U32((up+8)); - - options = TCP_OPT_TSTAMP_PRESENT; - } else { - if (tcp->tcp_snd_sack_ok) { - tcpoptp->tcp = tcp; + if (TSTMP_LT(tcpoptp->tcp_opt_ts_val, + tcp->tcp_ts_recent)) { + if (LBOLT_FASTPATH64 < + (tcp->tcp_last_rcv_lbolt + PAWS_TIMEOUT)) { + /* This segment is not acceptable. */ + return (B_FALSE); } else { - tcpoptp->tcp = NULL; - } - options = tcp_parse_options(tcpha, tcpoptp); - } - - if (options & TCP_OPT_TSTAMP_PRESENT) { - /* - * Do PAWS per RFC 1323 section 4.2. Accept RST - * regardless of the timestamp, page 18 RFC 1323.bis. - */ - if ((flags & TH_RST) == 0 && - TSTMP_LT(tcpoptp->tcp_opt_ts_val, - tcp->tcp_ts_recent)) { - if (LBOLT_FASTPATH64 < - (tcp->tcp_last_rcv_lbolt + PAWS_TIMEOUT)) { - /* This segment is not acceptable. */ - return (B_FALSE); - } else { - /* - * Connection has been idle for - * too long. Reset the timestamp - * and assume the segment is valid. - */ - tcp->tcp_ts_recent = - tcpoptp->tcp_opt_ts_val; - } + /* + * Connection has been idle for + * too long. Reset the timestamp + */ + tcp->tcp_ts_recent = + tcpoptp->tcp_opt_ts_val; } - } else { - /* - * If we don't get a timestamp on every packet, we - * figure we can't really trust 'em, so we stop sending - * and parsing them. - */ - tcp->tcp_snd_ts_ok = B_FALSE; - - connp->conn_ht_iphc_len -= TCPOPT_REAL_TS_LEN; - connp->conn_ht_ulp_len -= TCPOPT_REAL_TS_LEN; - tcp->tcp_tcpha->tha_offset_and_reserved -= (3 << 4); - /* - * Adjust the tcp_mss and tcp_cwnd accordingly. We avoid - * doing a slow start here so as to not to lose on the - * transfer rate built up so far. - */ - tcp_mss_set(tcp, tcp->tcp_mss + TCPOPT_REAL_TS_LEN); - if (tcp->tcp_snd_sack_ok) - tcp->tcp_max_sack_blk = 4; } return (B_TRUE); } @@ -2912,23 +2860,47 @@ tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) new_swnd = ntohs(tcpha->tha_win) << ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws); - if (tcp->tcp_snd_ts_ok) { - if (!tcp_paws_check(tcp, tcpha, &tcpopt)) { + /* + * We are interested in two TCP options: timestamps (if negotiated) and + * SACK (if negotiated). Skip option parsing if neither is negotiated. + */ + if (tcp->tcp_snd_ts_ok || tcp->tcp_snd_sack_ok) { + int options; + if (tcp->tcp_snd_sack_ok) + tcpopt.tcp = tcp; + else + tcpopt.tcp = NULL; + options = tcp_parse_options(tcpha, &tcpopt); + /* + * RST segments must not be subject to PAWS and are not + * required to have timestamps. + */ + if (tcp->tcp_snd_ts_ok && !(flags & TH_RST)) { /* - * This segment is not acceptable. - * Drop it and send back an ACK. + * Per RFC 7323 section 3.2., silently drop non-RST + * segments without expected TSopt. This is a 'SHOULD' + * requirement. */ - freemsg(mp); - flags |= TH_ACK_NEEDED; - goto ack_check; + if (!(options & TCP_OPT_TSTAMP_PRESENT)) { + /* + * Leave a breadcrumb for people to detect this + * behavior. + */ + DTRACE_TCP1(droppedtimestamp, tcp_t *, tcp); + freemsg(mp); + return; + } + + if (!tcp_paws_check(tcp, &tcpopt)) { + /* + * This segment is not acceptable. + * Drop it and send back an ACK. + */ + freemsg(mp); + flags |= TH_ACK_NEEDED; + goto ack_check; + } } - } else if (tcp->tcp_snd_sack_ok) { - tcpopt.tcp = tcp; - /* - * SACK info in already updated in tcp_parse_options. Ignore - * all other TCP options... - */ - (void) tcp_parse_options(tcpha, &tcpopt); } try_again:; mss = tcp->tcp_mss; @@ -3221,11 +3193,10 @@ ok:; } /* - * Check whether we can update tcp_ts_recent. This test is - * NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP - * Extensions for High Performance: An Update", Internet Draft. + * Check whether we can update tcp_ts_recent. This test is from RFC + * 7323, section 5.3. */ - if (tcp->tcp_snd_ts_ok && + if (tcp->tcp_snd_ts_ok && !(flags & TH_RST) && TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) && SEQ_LEQ(seg_seq, tcp->tcp_rack)) { tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val; diff --git a/usr/src/uts/common/inet/tcp/tcp_time_wait.c b/usr/src/uts/common/inet/tcp/tcp_time_wait.c index adde51e745..b470934da0 100644 --- a/usr/src/uts/common/inet/tcp/tcp_time_wait.c +++ b/usr/src/uts/common/inet/tcp/tcp_time_wait.c @@ -517,10 +517,20 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, flags = (unsigned int)tcpha->tha_flags & 0xFF; new_swnd = ntohs(tcpha->tha_win) << ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws); - if (tcp->tcp_snd_ts_ok) { - if (!tcp_paws_check(tcp, tcpha, &tcpopt)) { - tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, - tcp->tcp_rnxt, TH_ACK); + + if (tcp->tcp_snd_ts_ok && !(tcpha->tha_flags & TH_RST)) { + int options; + if (tcp->tcp_snd_sack_ok) + tcpopt.tcp = tcp; + else + tcpopt.tcp = NULL; + options = tcp_parse_options(tcpha, &tcpopt); + if (!(options & TCP_OPT_TSTAMP_PRESENT)) { + DTRACE_TCP1(droppedtimestamp, tcp_t *, tcp); + goto done; + } else if (!tcp_paws_check(tcp, &tcpopt)) { + tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, + TH_ACK); goto done; } } @@ -667,11 +677,10 @@ tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, } } /* - * Check whether we can update tcp_ts_recent. This test is - * NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP - * Extensions for High Performance: An Update", Internet Draft. + * Check whether we can update tcp_ts_recent. This test is from RFC + * 7323, section 5.3. */ - if (tcp->tcp_snd_ts_ok && + if (tcp->tcp_snd_ts_ok && !(flags & TH_RST) && TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) && SEQ_LEQ(seg_seq, tcp->tcp_rack)) { tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val; diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h index 1b20e40aca..0f0f915a2b 100644 --- a/usr/src/uts/common/inet/tcp_impl.h +++ b/usr/src/uts/common/inet/tcp_impl.h @@ -291,6 +291,15 @@ typedef struct tcp_opt_s { } tcp_opt_t; /* + * Flags returned from tcp_parse_options. + */ +#define TCP_OPT_MSS_PRESENT 1 +#define TCP_OPT_WSCALE_PRESENT 2 +#define TCP_OPT_TSTAMP_PRESENT 4 +#define TCP_OPT_SACK_OK_PRESENT 8 +#define TCP_OPT_SACK_PRESENT 16 + +/* * Write-side flow-control is implemented via the per instance STREAMS * write-side Q by explicitly setting QFULL to stop the flow of mblk_t(s) * and clearing QFULL and calling qbackenable() to restart the flow based @@ -653,7 +662,8 @@ extern void tcp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); extern void tcp_input_data(void *, mblk_t *, void *, ip_recv_attr_t *); extern void tcp_input_listener_unbound(void *, mblk_t *, void *, ip_recv_attr_t *); -extern boolean_t tcp_paws_check(tcp_t *, tcpha_t *, tcp_opt_t *); +extern boolean_t tcp_paws_check(tcp_t *, const tcp_opt_t *); +extern int tcp_parse_options(tcpha_t *, tcp_opt_t *); extern uint_t tcp_rcv_drain(tcp_t *); extern void tcp_rcv_enqueue(tcp_t *, mblk_t *, uint_t, cred_t *); extern boolean_t tcp_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, |