diff options
Diffstat (limited to 'usr/src/uts/common/inet/tcp')
| -rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 48 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_bind.c | 9 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_input.c | 34 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_misc.c | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_opt_data.c | 27 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_output.c | 20 | ||||
| -rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_socket.c | 10 |
7 files changed, 123 insertions, 27 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index ef4c96db1c..427a6df274 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -21,10 +21,11 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, 2017 by Delphix. All rights reserved. * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. + * Copyright 2020 Joyent, Inc. + * Copyright 2022 Oxide Computer Company */ /* Copyright (c) 1990 Mentat Inc. */ @@ -1018,10 +1019,23 @@ finish: /* If we have an upper handle (socket), release it */ if (IPCL_IS_NONSTR(connp)) { - ASSERT(connp->conn_upper_handle != NULL); - (*connp->conn_upcalls->su_closed)(connp->conn_upper_handle); + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + /* + * Set these to NULL first because closed() will free upper + * structures. Acquire conn_lock because an external caller + * like conn_get_socket_info() will upcall if these are + * non-NULL. + */ + mutex_enter(&connp->conn_lock); connp->conn_upper_handle = NULL; connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); + upcalls->su_closed(handle); } } @@ -1435,13 +1449,26 @@ tcp_free(tcp_t *tcp) * nothing to do other than clearing the field. */ if (connp->conn_upper_handle != NULL) { + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + /* + * Set these to NULL first because closed() will free upper + * structures. Acquire conn_lock because an external caller + * like conn_get_socket_info() will upcall if these are + * non-NULL. + */ + mutex_enter(&connp->conn_lock); + connp->conn_upper_handle = NULL; + connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); if (IPCL_IS_NONSTR(connp)) { - (*connp->conn_upcalls->su_closed)( - connp->conn_upper_handle); + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + upcalls->su_closed(handle); tcp->tcp_detached = B_TRUE; } - connp->conn_upper_handle = NULL; - connp->conn_upcalls = NULL; } } @@ -2394,6 +2421,7 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent) tcp->tcp_fin_wait_2_flush_interval = parent->tcp_fin_wait_2_flush_interval; + tcp->tcp_quickack = parent->tcp_quickack; tcp->tcp_ka_interval = parent->tcp_ka_interval; tcp->tcp_ka_abort_thres = parent->tcp_ka_abort_thres; @@ -3332,9 +3360,11 @@ tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa) */ if (ixa->ixa_flags & IXAF_LSO_CAPAB) { ill_lso_capab_t *lsoc = &ixa->ixa_lso_capab; + uint_t lso_max = (ixa->ixa_flags & IXAF_IS_IPV4) ? + lsoc->ill_lso_max_tcpv4 : lsoc->ill_lso_max_tcpv6; - ASSERT(lsoc->ill_lso_max > 0); - tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH, lsoc->ill_lso_max); + ASSERT3U(lso_max, >, 0); + tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH, lso_max); DTRACE_PROBE3(tcp_update_lso, boolean_t, tcp->tcp_lso, boolean_t, B_TRUE, uint32_t, tcp->tcp_lso_max); diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c index 876e7d48e6..5c2e1e1932 100644 --- a/usr/src/uts/common/inet/tcp/tcp_bind.c +++ b/usr/src/uts/common/inet/tcp/tcp_bind.c @@ -291,7 +291,7 @@ retry: * Return the next anonymous port in the privileged port range for * bind checking. It starts at IPPORT_RESERVED - 1 and goes * downwards. This is the same behavior as documented in the userland - * library call rresvport(3N). + * library call rresvport(3SOCKET). * * TS note: skip multilevel ports. */ @@ -1006,11 +1006,10 @@ tcp_rg_t * tcp_rg_init(tcp_t *tcp) { tcp_rg_t *rg; - rg = kmem_alloc(sizeof (tcp_rg_t), KM_NOSLEEP|KM_NORMALPRI); + rg = kmem_alloc(sizeof (tcp_rg_t), KM_NOSLEEP_LAZY); if (rg == NULL) return (NULL); - rg->tcprg_members = kmem_zalloc(2 * sizeof (tcp_t *), - KM_NOSLEEP|KM_NORMALPRI); + rg->tcprg_members = kmem_zalloc(2 * sizeof (tcp_t *), KM_NOSLEEP_LAZY); if (rg->tcprg_members == NULL) { kmem_free(rg, sizeof (tcp_rg_t)); return (NULL); @@ -1063,7 +1062,7 @@ tcp_rg_insert(tcp_rg_t *rg, tcp_t *tcp) return (EINVAL); } newmembers = kmem_zalloc(newsize * sizeof (tcp_t *), - KM_NOSLEEP|KM_NORMALPRI); + KM_NOSLEEP_LAZY); if (newmembers == NULL) { mutex_exit(&rg->tcprg_lock); return (ENOMEM); diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c index 0aaad871ba..22b0019a6a 100644 --- a/usr/src/uts/common/inet/tcp/tcp_input.c +++ b/usr/src/uts/common/inet/tcp/tcp_input.c @@ -24,6 +24,8 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright (c) 2014, 2016 by Delphix. All rights reserved. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 Oxide Computer Company */ /* This file contains all TCP input processing functions. */ @@ -4753,6 +4755,9 @@ update_ack: tcp->tcp_rack_cur_max = tcp->tcp_rack_abs_max; else tcp->tcp_rack_cur_max = cur_max; + } else if (tcp->tcp_quickack) { + /* The executable asked that we ack each packet */ + flags |= TH_ACK_NEEDED; } else if (TCP_IS_DETACHED(tcp)) { /* We don't have an ACK timer for detached TCP. */ flags |= TH_ACK_NEEDED; @@ -5108,6 +5113,15 @@ tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp, optlen = 0; addflag.crb_all = 0; + + /* If app asked for TOS and it has changed ... */ + if (connp->conn_recv_ancillary.crb_recvtos && + ipp->ipp_type_of_service != tcp->tcp_recvtos && + (ira->ira_flags & IRAF_IS_IPV4)) { + optlen += sizeof (struct T_opthdr) + + P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE); + addflag.crb_recvtos = 1; + } /* If app asked for pktinfo and the index has changed ... */ if (connp->conn_recv_ancillary.crb_ip_recvpktinfo && ira->ira_ruifindex != tcp->tcp_recvifindex) { @@ -5127,8 +5141,9 @@ tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp, optlen += sizeof (struct T_opthdr) + sizeof (uint_t); addflag.crb_ipv6_recvtclass = 1; } + /* - * If app asked for hopbyhop headers and it has changed ... + * If app asked for hop-by-hop headers and it has changed ... * For security labels, note that (1) security labels can't change on * a connected socket at all, (2) we're connected to at most one peer, * (3) if anything changes, then it must be some other extra option. @@ -5206,6 +5221,23 @@ tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp, todi->OPT_length = optlen; todi->OPT_offset = sizeof (*todi); optptr = (uchar_t *)&todi[1]; + + /* If app asked for TOS and it has changed ... */ + if (addflag.crb_recvtos) { + toh = (struct T_opthdr *)optptr; + toh->level = IPPROTO_IP; + toh->name = IP_RECVTOS; + toh->len = sizeof (*toh) + + P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE); + toh->status = 0; + optptr += sizeof (*toh); + *(uint8_t *)optptr = ipp->ipp_type_of_service; + optptr = (uchar_t *)toh + toh->len; + ASSERT(__TPI_TOPT_ISALIGNED(optptr)); + /* Save as "last" value */ + tcp->tcp_recvtos = ipp->ipp_type_of_service; + } + /* * If app asked for pktinfo and the index has changed ... * Note that the local address never changes for the connection. diff --git a/usr/src/uts/common/inet/tcp/tcp_misc.c b/usr/src/uts/common/inet/tcp/tcp_misc.c index 0896dd7611..423d3003cf 100644 --- a/usr/src/uts/common/inet/tcp/tcp_misc.c +++ b/usr/src/uts/common/inet/tcp/tcp_misc.c @@ -44,7 +44,7 @@ static boolean_t tcp_do_reclaim = B_TRUE; * TCP_IOC_ABORT_CONN is a non-transparent ioctl command used for aborting * TCP connections. To invoke this ioctl, a tcp_ioc_abort_conn_t structure * (defined in tcp.h) needs to be filled in and passed into the kernel - * via an I_STR ioctl command (see streamio(7I)). The tcp_ioc_abort_conn_t + * via an I_STR ioctl command (see streamio(4I)). The tcp_ioc_abort_conn_t * structure contains the four-tuple of a TCP connection and a range of TCP * states (specified by ac_start and ac_end). The use of wildcard addresses * and ports is allowed. Connections with a matching four tuple and a state diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c index ea4760e6bb..15e49ae070 100644 --- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c +++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c @@ -23,6 +23,8 @@ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -135,6 +137,8 @@ opdes_t tcp_opt_arr[] = { { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ TCP_QUICKACK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, + { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 }, { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 }, @@ -157,6 +161,7 @@ opdes_t tcp_opt_arr[] = { { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, +{ IP_RECVTOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT, sizeof (ipsec_req_t), -1 /* not initialized */ }, @@ -448,6 +453,9 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) case TCP_CORK: *i1 = tcp->tcp_cork; return (sizeof (int)); + case TCP_QUICKACK: + *i1 = tcp->tcp_quickack; + return (sizeof (int)); case TCP_RTO_INITIAL: *i1 = tcp->tcp_rto_initial; return (sizeof (uint32_t)); @@ -626,9 +634,9 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, /* * Note: Implies T_CHECK semantics for T_OPTCOM_REQ * inlen != 0 implies value supplied and - * we have to "pretend" to set it. + * we have to "pretend" to set it. * inlen == 0 implies that there is no - * value part in T_CHECK request and just validation + * value part in T_CHECK request and just validation * done elsewhere should be enough, we just return here. */ if (inlen == 0) { @@ -1021,6 +1029,11 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, tcp->tcp_cork = onoff; } break; + case TCP_QUICKACK: + if (!checkonly) { + tcp->tcp_quickack = onoff; + } + break; case TCP_RTO_INITIAL: if (checkonly || val == 0) break; @@ -1132,6 +1145,16 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, return (EINVAL); } break; + case IP_RECVTOS: + if (!checkonly) { + /* + * Force it to be sent up with the next msg + * by setting it to a value which cannot + * appear in a packet (TOS is only 8-bits) + */ + tcp->tcp_recvtos = 0xffffffffU; + } + break; } break; case IPPROTO_IPV6: diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c index 7a0472f3dd..086668f435 100644 --- a/usr/src/uts/common/inet/tcp/tcp_output.c +++ b/usr/src/uts/common/inet/tcp/tcp_output.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2017 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. */ /* This file contains all TCP output processing functions. */ @@ -1677,11 +1677,23 @@ finish: /* non-STREAM socket, release the upper handle */ if (IPCL_IS_NONSTR(connp)) { - ASSERT(connp->conn_upper_handle != NULL); - (*connp->conn_upcalls->su_closed) - (connp->conn_upper_handle); + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + /* + * Set these to NULL first because closed() will free + * upper structures. Acquire conn_lock because an + * external caller like conn_get_socket_info() will + * upcall if these are non-NULL. + */ + mutex_enter(&connp->conn_lock); connp->conn_upper_handle = NULL; connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); + upcalls->su_closed(handle); } } diff --git a/usr/src/uts/common/inet/tcp/tcp_socket.c b/usr/src/uts/common/inet/tcp/tcp_socket.c index 2de76ea060..32422be675 100644 --- a/usr/src/uts/common/inet/tcp/tcp_socket.c +++ b/usr/src/uts/common/inet/tcp/tcp_socket.c @@ -199,7 +199,7 @@ static int tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, socklen_t len, cred_t *cr) { - int error; + int error; conn_t *connp = (conn_t *)proto_handle; /* All Solaris components should pass a cred for this operation. */ @@ -240,7 +240,7 @@ tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) { conn_t *connp = (conn_t *)proto_handle; tcp_t *tcp = connp->conn_tcp; - int error; + int error; ASSERT(connp->conn_upper_handle != NULL); @@ -660,7 +660,7 @@ static int tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, int mode, int32_t *rvalp, cred_t *cr) { - conn_t *connp = (conn_t *)proto_handle; + conn_t *connp = (conn_t *)proto_handle; int error; ASSERT(connp->conn_upper_handle != NULL); @@ -825,7 +825,7 @@ tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q, struct stroptions *stropt; struct T_capability_ack tca; struct sockaddr_in6 laddr, faddr; - socklen_t laddrlen, faddrlen; + socklen_t laddrlen, faddrlen; short opts; int error; mblk_t *mp, *mpnext; @@ -999,7 +999,7 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, sock_quiesce_arg_t *arg) { tcp_t *tcp; - conn_t *connp = (conn_t *)proto_handle; + conn_t *connp = (conn_t *)proto_handle; int error; mblk_t *stropt_mp; mblk_t *ordrel_mp; |
