diff options
Diffstat (limited to 'usr/src/uts/common/inet/tcp')
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 39 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_bind.c | 226 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_input.c | 19 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_opt_data.c | 110 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_socket.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_stats.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_timers.c | 2 |
7 files changed, 359 insertions, 56 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 9348ea3d0f..427a6df274 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -961,8 +961,7 @@ void tcp_stop_lingering(tcp_t *tcp) { clock_t delta = 0; - tcp_stack_t *tcps = tcp->tcp_tcps; - conn_t *connp = tcp->tcp_connp; + conn_t *connp = tcp->tcp_connp; tcp->tcp_linger_tid = 0; if (tcp->tcp_state > TCPS_LISTEN) { @@ -990,7 +989,7 @@ tcp_stop_lingering(tcp_t *tcp) if (tcp->tcp_state == TCPS_TIME_WAIT) { tcp_time_wait_append(tcp); - TCP_DBGSTAT(tcps, tcp_detach_time_wait); + TCP_DBGSTAT(tcp->tcp_tcps, tcp_detach_time_wait); goto finish; } @@ -1429,6 +1428,21 @@ tcp_free(tcp_t *tcp) tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv); /* + * Destroy any association with SO_REUSEPORT group. + */ + if (tcp->tcp_rg_bind != NULL) { + /* + * This is only necessary for connections which enabled + * SO_REUSEPORT but were never bound. Such connections should + * be the one and only member of the tcp_rg_tp to which they + * have been associated. + */ + VERIFY(tcp_rg_remove(tcp->tcp_rg_bind, tcp)); + tcp_rg_destroy(tcp->tcp_rg_bind); + tcp->tcp_rg_bind = NULL; + } + + /* * If this is a non-STREAM socket still holding on to an upper * handle, release it. As a result of fallback we might also see * STREAMS based conns with upper handles, in which case there is @@ -2477,8 +2491,10 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent) * Path MTU might have changed by either increase or decrease, so need to * adjust the MSS based on the value of ixa_pmtu. No need to handle tiny * or negative MSS, since tcp_mss_set() will do it. + * + * Returns B_TRUE when the connection PMTU changes, otherwise B_FALSE. */ -void +boolean_t tcp_update_pmtu(tcp_t *tcp, boolean_t decrease_only) { uint32_t pmtu; @@ -2488,10 +2504,10 @@ tcp_update_pmtu(tcp_t *tcp, boolean_t decrease_only) iaflags_t ixaflags; if (tcp->tcp_tcps->tcps_ignore_path_mtu) - return; + return (B_FALSE); if (tcp->tcp_state < TCPS_ESTABLISHED) - return; + return (B_FALSE); /* * Always call ip_get_pmtu() to make sure that IP has updated @@ -2511,13 +2527,13 @@ tcp_update_pmtu(tcp_t *tcp, boolean_t decrease_only) * Nothing to change, so just return. */ if (mss == tcp->tcp_mss) - return; + return (B_FALSE); /* * Currently, for ICMP errors, only PMTU decrease is handled. */ if (mss > tcp->tcp_mss && decrease_only) - return; + return (B_FALSE); DTRACE_PROBE2(tcp_update_pmtu, int32_t, tcp->tcp_mss, uint32_t, mss); @@ -2552,6 +2568,7 @@ tcp_update_pmtu(tcp_t *tcp, boolean_t decrease_only) tcp->tcp_ipha->ipha_fragment_offset_and_flags = 0; } ixa->ixa_flags = ixaflags; + return (B_TRUE); } int @@ -3424,7 +3441,7 @@ tcp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, tcp_update_lso(tcp, connp->conn_ixa); break; case IXAN_PMTU: - tcp_update_pmtu(tcp, B_FALSE); + (void) tcp_update_pmtu(tcp, B_FALSE); break; case IXAN_ZCOPY: tcp_update_zcopy(tcp); @@ -3755,7 +3772,6 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns) { tcp_stack_t *tcps; int i; - int error = 0; major_t major; size_t arrsz; @@ -3819,8 +3835,7 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns) tcps->tcps_mibkp = tcp_kstat_init(stackid); major = mod_name_to_major(INET_NAME); - error = ldi_ident_from_major(major, &tcps->tcps_ldi_ident); - ASSERT(error == 0); + VERIFY0(ldi_ident_from_major(major, &tcps->tcps_ldi_ident)); tcps->tcps_ixa_cleanup_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL); ASSERT(tcps->tcps_ixa_cleanup_mp != NULL); cv_init(&tcps->tcps_ixa_cleanup_ready_cv, NULL, CV_DEFAULT, NULL); diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c index 86242fc944..5c2e1e1932 100644 --- a/usr/src/uts/common/inet/tcp/tcp_bind.c +++ b/usr/src/uts/common/inet/tcp/tcp_bind.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. */ @@ -56,6 +57,7 @@ static uint32_t tcp_random_anon_port = 1; static int tcp_bind_select_lport(tcp_t *, in_port_t *, boolean_t, cred_t *cr); static in_port_t tcp_get_next_priv_port(const tcp_t *); +static int tcp_rg_insert(tcp_rg_t *, struct tcp_s *); /* * Hash list insertion routine for tcp_t structures. Each hash bucket @@ -173,6 +175,16 @@ tcp_bind_hash_remove(tcp_t *tcp) ASSERT(lockp != NULL); mutex_enter(lockp); + + /* destroy any association with SO_REUSEPORT group */ + if (tcp->tcp_rg_bind != NULL) { + if (tcp_rg_remove(tcp->tcp_rg_bind, tcp)) { + /* Last one out turns off the lights */ + tcp_rg_destroy(tcp->tcp_rg_bind); + } + tcp->tcp_rg_bind = NULL; + } + if (tcp->tcp_ptpbhn) { tcpnext = tcp->tcp_bind_hash_port; if (tcpnext != NULL) { @@ -638,13 +650,12 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, } /* - * If the "bind_to_req_port_only" parameter is set, if the requested port - * number is available, return it, If not return 0 + * If the "bind_to_req_port_only" parameter is set and the requested port + * number is available, return it (else return 0). * - * If "bind_to_req_port_only" parameter is not set and - * If the requested port number is available, return it. If not, return - * the first anonymous port we happen across. If no anonymous ports are - * available, return 0. addr is the requested local address, if any. + * If "bind_to_req_port_only" parameter is not set and the requested port + * number is available, return it. If not, return the first anonymous port we + * happen across. If no anonymous ports are available, return 0. * * In either case, when succeeding update the tcp_t to record the port number * and insert it in the bind hash table. @@ -664,6 +675,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, int loopmax; conn_t *connp = tcp->tcp_connp; tcp_stack_t *tcps = tcp->tcp_tcps; + boolean_t reuseport = connp->conn_reuseport; /* * Lookup for free addresses is done in a loop and "loopmax" @@ -700,6 +712,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, tf_t *tbf; tcp_t *ltcp; conn_t *lconnp; + boolean_t attempt_reuse = B_FALSE; lport = htons(port); @@ -726,6 +739,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) { boolean_t not_socket; boolean_t exclbind; + boolean_t addrmatch; lconnp = ltcp->tcp_connp; @@ -831,22 +845,35 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, &lconnp->conn_faddr_v6))) continue; + addrmatch = IN6_ARE_ADDR_EQUAL(laddr, + &lconnp->conn_bound_addr_v6); + + if (addrmatch && reuseport && bind_to_req_port_only && + (ltcp->tcp_state == TCPS_BOUND || + ltcp->tcp_state == TCPS_LISTEN)) { + /* + * This entry is bound to the exact same + * address and port. If SO_REUSEPORT is set on + * the calling socket, attempt to reuse this + * binding if it too had SO_REUSEPORT enabled + * when it was bound. + */ + attempt_reuse = (ltcp->tcp_rg_bind != NULL); + break; + } + if (!reuseaddr) { /* - * No socket option SO_REUSEADDR. - * If existing port is bound to - * a non-wildcard IP address - * and the requesting stream is - * bound to a distinct - * different IP addresses - * (non-wildcard, also), keep - * going. + * No socket option SO_REUSEADDR. If an + * existing port is bound to a non-wildcard IP + * address and the requesting stream is bound + * to a distinct different IP address + * (non-wildcard, also), keep going. */ if (!V6_OR_V4_INADDR_ANY(*laddr) && !V6_OR_V4_INADDR_ANY( lconnp->conn_bound_addr_v6) && - !IN6_ARE_ADDR_EQUAL(laddr, - &lconnp->conn_bound_addr_v6)) + !addrmatch) continue; if (ltcp->tcp_state >= TCPS_BOUND) { /* @@ -861,27 +888,49 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, * socket option SO_REUSEADDR is set on the * binding tcp_t. * - * If two streams are bound to - * same IP address or both addr - * and bound source are wildcards - * (INADDR_ANY), we want to stop - * searching. - * We have found a match of IP source - * address and source port, which is - * refused regardless of the - * SO_REUSEADDR setting, so we break. + * If two streams are bound to the same IP + * address or both addr and bound source are + * wildcards (INADDR_ANY), we want to stop + * searching. We have found a match of IP + * source address and source port, which is + * refused regardless of the SO_REUSEADDR + * setting, so we break. */ - if (IN6_ARE_ADDR_EQUAL(laddr, - &lconnp->conn_bound_addr_v6) && + if (addrmatch && (ltcp->tcp_state == TCPS_LISTEN || ltcp->tcp_state == TCPS_BOUND)) break; } } - if (ltcp != NULL) { + if (ltcp != NULL && !attempt_reuse) { /* The port number is busy */ mutex_exit(&tbf->tf_lock); } else { + if (attempt_reuse) { + int err; + struct tcp_rg_s *rg; + + ASSERT(ltcp != NULL); + ASSERT(ltcp->tcp_rg_bind != NULL); + ASSERT(tcp->tcp_rg_bind != NULL); + ASSERT(ltcp->tcp_rg_bind != tcp->tcp_rg_bind); + + err = tcp_rg_insert(ltcp->tcp_rg_bind, tcp); + if (err != 0) { + mutex_exit(&tbf->tf_lock); + return (0); + } + /* + * Now that the newly-binding socket has joined + * the existing reuseport group on ltcp, it + * should clean up its own (empty) group. + */ + rg = tcp->tcp_rg_bind; + tcp->tcp_rg_bind = ltcp->tcp_rg_bind; + VERIFY(tcp_rg_remove(rg, tcp)); + tcp_rg_destroy(rg); + } + /* * This port is ours. Insert in fanout and mark as * bound to prevent others from getting the port @@ -946,3 +995,124 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, } while (++count < loopmax); return (0); } + +/* Max number of members in TCP SO_REUSEPORT group */ +#define TCP_RG_SIZE_MAX 64 +/* Step size when expanding members array */ +#define TCP_RG_SIZE_STEP 2 + + +tcp_rg_t * +tcp_rg_init(tcp_t *tcp) +{ + tcp_rg_t *rg; + rg = kmem_alloc(sizeof (tcp_rg_t), KM_NOSLEEP_LAZY); + if (rg == NULL) + return (NULL); + rg->tcprg_members = kmem_zalloc(2 * sizeof (tcp_t *), KM_NOSLEEP_LAZY); + if (rg->tcprg_members == NULL) { + kmem_free(rg, sizeof (tcp_rg_t)); + return (NULL); + } + + mutex_init(&rg->tcprg_lock, NULL, MUTEX_DEFAULT, NULL); + rg->tcprg_size = 2; + rg->tcprg_count = 1; + rg->tcprg_active = 1; + rg->tcprg_members[0] = tcp; + return (rg); +} + +void +tcp_rg_destroy(tcp_rg_t *rg) +{ + mutex_enter(&rg->tcprg_lock); + ASSERT(rg->tcprg_count == 0); + ASSERT(rg->tcprg_active == 0); + kmem_free(rg->tcprg_members, rg->tcprg_size * sizeof (tcp_t *)); + mutex_destroy(&rg->tcprg_lock); + kmem_free(rg, sizeof (struct tcp_rg_s)); +} + +static int +tcp_rg_insert(tcp_rg_t *rg, tcp_t *tcp) +{ + mutex_enter(&rg->tcprg_lock); + + VERIFY(rg->tcprg_size > 0); + VERIFY(rg->tcprg_count <= rg->tcprg_size); + if (rg->tcprg_count != 0) { + cred_t *oldcred = rg->tcprg_members[0]->tcp_connp->conn_cred; + cred_t *newcred = tcp->tcp_connp->conn_cred; + + if (crgetuid(oldcred) != crgetuid(newcred) || + crgetzoneid(oldcred) != crgetzoneid(newcred)) { + mutex_exit(&rg->tcprg_lock); + return (EPERM); + } + } + + if (rg->tcprg_count == rg->tcprg_size) { + unsigned int oldalloc = rg->tcprg_size * sizeof (tcp_t *); + unsigned int newsize = rg->tcprg_size + TCP_RG_SIZE_STEP; + tcp_t **newmembers; + + if (newsize > TCP_RG_SIZE_MAX) { + mutex_exit(&rg->tcprg_lock); + return (EINVAL); + } + newmembers = kmem_zalloc(newsize * sizeof (tcp_t *), + KM_NOSLEEP_LAZY); + if (newmembers == NULL) { + mutex_exit(&rg->tcprg_lock); + return (ENOMEM); + } + bcopy(rg->tcprg_members, newmembers, oldalloc); + kmem_free(rg->tcprg_members, oldalloc); + rg->tcprg_members = newmembers; + rg->tcprg_size = newsize; + } + + rg->tcprg_members[rg->tcprg_count] = tcp; + rg->tcprg_count++; + rg->tcprg_active++; + + mutex_exit(&rg->tcprg_lock); + return (0); +} + +boolean_t +tcp_rg_remove(tcp_rg_t *rg, tcp_t *tcp) +{ + int i; + boolean_t is_empty; + + mutex_enter(&rg->tcprg_lock); + for (i = 0; i < rg->tcprg_count; i++) { + if (rg->tcprg_members[i] == tcp) + break; + } + /* The item should be present */ + ASSERT(i < rg->tcprg_count); + /* Move the last member into this position */ + rg->tcprg_count--; + rg->tcprg_members[i] = rg->tcprg_members[rg->tcprg_count]; + rg->tcprg_members[rg->tcprg_count] = NULL; + if (tcp->tcp_connp->conn_reuseport != 0) + rg->tcprg_active--; + is_empty = (rg->tcprg_count == 0); + mutex_exit(&rg->tcprg_lock); + return (is_empty); +} + +void +tcp_rg_setactive(tcp_rg_t *rg, boolean_t is_active) +{ + mutex_enter(&rg->tcprg_lock); + if (is_active) { + rg->tcprg_active++; + } else { + rg->tcprg_active--; + } + mutex_exit(&rg->tcprg_lock); +} diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c index dd264528fc..22b0019a6a 100644 --- a/usr/src/uts/common/inet/tcp/tcp_input.c +++ b/usr/src/uts/common/inet/tcp/tcp_input.c @@ -5715,10 +5715,12 @@ noticmpv4: switch (icmph->icmph_code) { case ICMP_FRAGMENTATION_NEEDED: /* - * Update Path MTU, then try to send something out. + * Attempt to update path MTU and, if the MSS of the + * connection is altered, retransmit outstanding data. */ - tcp_update_pmtu(tcp, B_TRUE); - tcp_rexmit_after_error(tcp); + if (tcp_update_pmtu(tcp, B_TRUE)) { + tcp_rexmit_after_error(tcp); + } break; case ICMP_PORT_UNREACHABLE: case ICMP_PROTOCOL_UNREACHABLE: @@ -5761,7 +5763,7 @@ noticmpv4: break; } break; - case ICMP_SOURCE_QUENCH: { + case ICMP_SOURCE_QUENCH: /* * use a global boolean to control * whether TCP should respond to ICMP_SOURCE_QUENCH. @@ -5786,7 +5788,6 @@ noticmpv4: } break; } - } freemsg(mp); } @@ -5839,10 +5840,12 @@ noticmpv6: switch (icmp6->icmp6_type) { case ICMP6_PACKET_TOO_BIG: /* - * Update Path MTU, then try to send something out. + * Attempt to update path MTU and, if the MSS of the connection + * is altered, retransmit outstanding data. */ - tcp_update_pmtu(tcp, B_TRUE); - tcp_rexmit_after_error(tcp); + if (tcp_update_pmtu(tcp, B_TRUE)) { + tcp_rexmit_after_error(tcp); + } break; case ICMP6_DST_UNREACH: switch (icmp6->icmp6_code) { diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c index 8687b52d53..15e49ae070 100644 --- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c +++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c @@ -67,7 +67,8 @@ opdes_t tcp_opt_arr[] = { { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, -{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ SO_REUSEPORT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 }, { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, @@ -505,6 +506,104 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) } /* + * Set a TCP connection's participation in SO_REUSEPORT. This operation is + * performed under the protection of the squeue via tcp_setsockopt. + * The manipulation of tcp_rg_bind, as part of this operation, is subject to + * these constraints: + * 1. Prior to bind(), tcp_rg_bind can be set/cleared in tcp_set_reuseport + * under the protection of the squeue. + * 2. Once the connection has been bound, the tcp_rg_bind pointer must not be + * altered until such time as tcp_free() cleans up the connection. + * 3. A connection undergoing bind, which matches to a connection participating + * in port-reuse, will switch its tcp_rg_bind pointer when it joins the + * group of an existing connection in tcp_bindi(). + */ +static int +tcp_set_reuseport(conn_t *connp, boolean_t do_enable) +{ + tcp_t *tcp = connp->conn_tcp; + struct tcp_rg_s *rg; + + if (!IPCL_IS_NONSTR(connp)) { + if (do_enable) { + /* + * SO_REUSEPORT cannot be enabled on sockets which have + * fallen back to the STREAMS API. + */ + return (EINVAL); + } else { + /* + * A connection with SO_REUSEPORT enabled should be + * prevented from falling back to STREAMS mode via + * logic in tcp_fallback. It is legal, however, for + * fallen-back connections to affirm the disabled state + * of SO_REUSEPORT. + */ + ASSERT(connp->conn_reuseport == 0); + return (0); + } + } + if (tcp->tcp_state <= TCPS_CLOSED) { + return (EINVAL); + } + if (connp->conn_reuseport == 0 && do_enable) { + /* disabled -> enabled */ + if (tcp->tcp_rg_bind != NULL) { + tcp_rg_setactive(tcp->tcp_rg_bind, do_enable); + } else { + /* + * Connection state is not a concern when initially + * populating tcp_rg_bind. Setting it to non-NULL on a + * bound or listening connection would only mean that + * new reused-port binds become a possibility. + */ + if ((rg = tcp_rg_init(tcp)) == NULL) { + return (ENOMEM); + } + tcp->tcp_rg_bind = rg; + } + connp->conn_reuseport = 1; + } else if (connp->conn_reuseport != 0 && !do_enable) { + /* enabled -> disabled */ + ASSERT(tcp->tcp_rg_bind != NULL); + if (tcp->tcp_state == TCPS_IDLE) { + /* + * If the connection has not been bound yet, discard + * the reuse group state. Since disabling SO_REUSEPORT + * on a bound socket will _not_ prevent others from + * reusing the port, the presence of tcp_rg_bind is + * used to determine reuse availability, not + * conn_reuseport. + * + * This allows proper behavior for examples such as: + * + * setsockopt(fd1, ... SO_REUSEPORT, &on_val...); + * bind(fd1, &myaddr, ...); + * setsockopt(fd1, ... SO_REUSEPORT, &off_val...); + * + * setsockopt(fd2, ... SO_REUSEPORT, &on_val...); + * bind(fd2, &myaddr, ...); // <- SHOULD SUCCEED + * + */ + rg = tcp->tcp_rg_bind; + tcp->tcp_rg_bind = NULL; + VERIFY(tcp_rg_remove(rg, tcp)); + tcp_rg_destroy(rg); + } else { + /* + * If a connection has been bound, it's no longer safe + * to manipulate tcp_rg_bind until connection clean-up + * during tcp_free. Just mark the member status of the + * connection as inactive. + */ + tcp_rg_setactive(tcp->tcp_rg_bind, do_enable); + } + connp->conn_reuseport = 0; + } + return (0); +} + +/* * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements. * Parameters are assumed to be verified by the caller. */ @@ -674,6 +773,11 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, } *outlenp = inlen; return (0); + case SO_REUSEPORT: + if (!checkonly) { + return (tcp_set_reuseport(connp, *i1 != 0)); + } + return (0); } break; case IPPROTO_TCP: @@ -1031,10 +1135,6 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, } break; case IPPROTO_IP: - if (connp->conn_family != AF_INET) { - *outlenp = 0; - return (EINVAL); - } switch (name) { case IP_SEC_OPT: /* diff --git a/usr/src/uts/common/inet/tcp/tcp_socket.c b/usr/src/uts/common/inet/tcp/tcp_socket.c index 9b6c0daac3..32422be675 100644 --- a/usr/src/uts/common/inet/tcp/tcp_socket.c +++ b/usr/src/uts/common/inet/tcp/tcp_socket.c @@ -1029,6 +1029,16 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, } /* + * Do not allow fallback on connections making use of SO_REUSEPORT. + */ + if (tcp->tcp_rg_bind != NULL) { + freeb(stropt_mp); + freeb(ordrel_mp); + squeue_synch_exit(connp, SQ_NODRAIN); + return (EINVAL); + } + + /* * Both endpoints must be of the same type (either STREAMS or * non-STREAMS) for fusion to be enabled. So if we are fused, * we have to unfuse. diff --git a/usr/src/uts/common/inet/tcp/tcp_stats.c b/usr/src/uts/common/inet/tcp/tcp_stats.c index e29c76a696..226467e167 100644 --- a/usr/src/uts/common/inet/tcp/tcp_stats.c +++ b/usr/src/uts/common/inet/tcp/tcp_stats.c @@ -21,8 +21,8 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent Inc. All rights reserved. * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. */ @@ -131,9 +131,14 @@ tcp_set_conninfo(tcp_t *tcp, struct tcpConnEntryInfo_s *tcei, boolean_t ispriv) tcei->ce_rto = tcp->tcp_rto; tcei->ce_mss = tcp->tcp_mss; tcei->ce_state = tcp->tcp_state; - tcei->ce_rtt_sa = NSEC2USEC(tcp->tcp_rtt_sa >> 3); tcei->ce_rtt_sum = NSEC2USEC(tcp->tcp_rtt_sum); tcei->ce_rtt_cnt = tcp->tcp_rtt_cnt; + + /* tcp_rtt_sa is stored as 8 times the average RTT */ + tcei->ce_rtt_sa = NSEC2USEC(tcp->tcp_rtt_sa >> 3); + + /* tcp_rtt_sd is stored as 4 times the average RTTVAR */ + tcei->ce_rtt_sd = NSEC2USEC(tcp->tcp_rtt_sd >> 2); } /* diff --git a/usr/src/uts/common/inet/tcp/tcp_timers.c b/usr/src/uts/common/inet/tcp/tcp_timers.c index 5793a7fd27..7d9b449392 100644 --- a/usr/src/uts/common/inet/tcp/tcp_timers.c +++ b/usr/src/uts/common/inet/tcp/tcp_timers.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright 2011 Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. * Copyright (c) 2014, 2017 by Delphix. All rights reserved. */ |