summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/tcp
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/tcp')
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c48
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_bind.c9
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_input.c34
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_misc.c2
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_opt_data.c27
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_output.c20
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_socket.c10
7 files changed, 123 insertions, 27 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index ef4c96db1c..427a6df274 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -21,10 +21,11 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -1018,10 +1019,23 @@ finish:
/* If we have an upper handle (socket), release it */
if (IPCL_IS_NONSTR(connp)) {
- ASSERT(connp->conn_upper_handle != NULL);
- (*connp->conn_upcalls->su_closed)(connp->conn_upper_handle);
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ /*
+ * Set these to NULL first because closed() will free upper
+ * structures. Acquire conn_lock because an external caller
+ * like conn_get_socket_info() will upcall if these are
+ * non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
+ upcalls->su_closed(handle);
}
}
@@ -1435,13 +1449,26 @@ tcp_free(tcp_t *tcp)
* nothing to do other than clearing the field.
*/
if (connp->conn_upper_handle != NULL) {
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ /*
+ * Set these to NULL first because closed() will free upper
+ * structures. Acquire conn_lock because an external caller
+ * like conn_get_socket_info() will upcall if these are
+ * non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
+ connp->conn_upper_handle = NULL;
+ connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
if (IPCL_IS_NONSTR(connp)) {
- (*connp->conn_upcalls->su_closed)(
- connp->conn_upper_handle);
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ upcalls->su_closed(handle);
tcp->tcp_detached = B_TRUE;
}
- connp->conn_upper_handle = NULL;
- connp->conn_upcalls = NULL;
}
}
@@ -2394,6 +2421,7 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
tcp->tcp_fin_wait_2_flush_interval =
parent->tcp_fin_wait_2_flush_interval;
+ tcp->tcp_quickack = parent->tcp_quickack;
tcp->tcp_ka_interval = parent->tcp_ka_interval;
tcp->tcp_ka_abort_thres = parent->tcp_ka_abort_thres;
@@ -3332,9 +3360,11 @@ tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa)
*/
if (ixa->ixa_flags & IXAF_LSO_CAPAB) {
ill_lso_capab_t *lsoc = &ixa->ixa_lso_capab;
+ uint_t lso_max = (ixa->ixa_flags & IXAF_IS_IPV4) ?
+ lsoc->ill_lso_max_tcpv4 : lsoc->ill_lso_max_tcpv6;
- ASSERT(lsoc->ill_lso_max > 0);
- tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH, lsoc->ill_lso_max);
+ ASSERT3U(lso_max, >, 0);
+ tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH, lso_max);
DTRACE_PROBE3(tcp_update_lso, boolean_t, tcp->tcp_lso,
boolean_t, B_TRUE, uint32_t, tcp->tcp_lso_max);
diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c
index 876e7d48e6..5c2e1e1932 100644
--- a/usr/src/uts/common/inet/tcp/tcp_bind.c
+++ b/usr/src/uts/common/inet/tcp/tcp_bind.c
@@ -291,7 +291,7 @@ retry:
* Return the next anonymous port in the privileged port range for
* bind checking. It starts at IPPORT_RESERVED - 1 and goes
* downwards. This is the same behavior as documented in the userland
- * library call rresvport(3N).
+ * library call rresvport(3SOCKET).
*
* TS note: skip multilevel ports.
*/
@@ -1006,11 +1006,10 @@ tcp_rg_t *
tcp_rg_init(tcp_t *tcp)
{
tcp_rg_t *rg;
- rg = kmem_alloc(sizeof (tcp_rg_t), KM_NOSLEEP|KM_NORMALPRI);
+ rg = kmem_alloc(sizeof (tcp_rg_t), KM_NOSLEEP_LAZY);
if (rg == NULL)
return (NULL);
- rg->tcprg_members = kmem_zalloc(2 * sizeof (tcp_t *),
- KM_NOSLEEP|KM_NORMALPRI);
+ rg->tcprg_members = kmem_zalloc(2 * sizeof (tcp_t *), KM_NOSLEEP_LAZY);
if (rg->tcprg_members == NULL) {
kmem_free(rg, sizeof (tcp_rg_t));
return (NULL);
@@ -1063,7 +1062,7 @@ tcp_rg_insert(tcp_rg_t *rg, tcp_t *tcp)
return (EINVAL);
}
newmembers = kmem_zalloc(newsize * sizeof (tcp_t *),
- KM_NOSLEEP|KM_NORMALPRI);
+ KM_NOSLEEP_LAZY);
if (newmembers == NULL) {
mutex_exit(&rg->tcprg_lock);
return (ENOMEM);
diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c
index 0aaad871ba..22b0019a6a 100644
--- a/usr/src/uts/common/inet/tcp/tcp_input.c
+++ b/usr/src/uts/common/inet/tcp/tcp_input.c
@@ -24,6 +24,8 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 Oxide Computer Company
*/
/* This file contains all TCP input processing functions. */
@@ -4753,6 +4755,9 @@ update_ack:
tcp->tcp_rack_cur_max = tcp->tcp_rack_abs_max;
else
tcp->tcp_rack_cur_max = cur_max;
+ } else if (tcp->tcp_quickack) {
+ /* The executable asked that we ack each packet */
+ flags |= TH_ACK_NEEDED;
} else if (TCP_IS_DETACHED(tcp)) {
/* We don't have an ACK timer for detached TCP. */
flags |= TH_ACK_NEEDED;
@@ -5108,6 +5113,15 @@ tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp,
optlen = 0;
addflag.crb_all = 0;
+
+ /* If app asked for TOS and it has changed ... */
+ if (connp->conn_recv_ancillary.crb_recvtos &&
+ ipp->ipp_type_of_service != tcp->tcp_recvtos &&
+ (ira->ira_flags & IRAF_IS_IPV4)) {
+ optlen += sizeof (struct T_opthdr) +
+ P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
+ addflag.crb_recvtos = 1;
+ }
/* If app asked for pktinfo and the index has changed ... */
if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
ira->ira_ruifindex != tcp->tcp_recvifindex) {
@@ -5127,8 +5141,9 @@ tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp,
optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
addflag.crb_ipv6_recvtclass = 1;
}
+
/*
- * If app asked for hopbyhop headers and it has changed ...
+ * If app asked for hop-by-hop headers and it has changed ...
* For security labels, note that (1) security labels can't change on
* a connected socket at all, (2) we're connected to at most one peer,
* (3) if anything changes, then it must be some other extra option.
@@ -5206,6 +5221,23 @@ tcp_input_add_ancillary(tcp_t *tcp, mblk_t *mp, ip_pkt_t *ipp,
todi->OPT_length = optlen;
todi->OPT_offset = sizeof (*todi);
optptr = (uchar_t *)&todi[1];
+
+ /* If app asked for TOS and it has changed ... */
+ if (addflag.crb_recvtos) {
+ toh = (struct T_opthdr *)optptr;
+ toh->level = IPPROTO_IP;
+ toh->name = IP_RECVTOS;
+ toh->len = sizeof (*toh) +
+ P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
+ toh->status = 0;
+ optptr += sizeof (*toh);
+ *(uint8_t *)optptr = ipp->ipp_type_of_service;
+ optptr = (uchar_t *)toh + toh->len;
+ ASSERT(__TPI_TOPT_ISALIGNED(optptr));
+ /* Save as "last" value */
+ tcp->tcp_recvtos = ipp->ipp_type_of_service;
+ }
+
/*
* If app asked for pktinfo and the index has changed ...
* Note that the local address never changes for the connection.
diff --git a/usr/src/uts/common/inet/tcp/tcp_misc.c b/usr/src/uts/common/inet/tcp/tcp_misc.c
index 0896dd7611..423d3003cf 100644
--- a/usr/src/uts/common/inet/tcp/tcp_misc.c
+++ b/usr/src/uts/common/inet/tcp/tcp_misc.c
@@ -44,7 +44,7 @@ static boolean_t tcp_do_reclaim = B_TRUE;
* TCP_IOC_ABORT_CONN is a non-transparent ioctl command used for aborting
* TCP connections. To invoke this ioctl, a tcp_ioc_abort_conn_t structure
* (defined in tcp.h) needs to be filled in and passed into the kernel
- * via an I_STR ioctl command (see streamio(7I)). The tcp_ioc_abort_conn_t
+ * via an I_STR ioctl command (see streamio(4I)). The tcp_ioc_abort_conn_t
* structure contains the four-tuple of a TCP connection and a range of TCP
* states (specified by ac_start and ac_end). The use of wildcard addresses
* and ports is allowed. Connections with a matching four tuple and a state
diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
index ea4760e6bb..15e49ae070 100644
--- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
@@ -23,6 +23,8 @@
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/types.h>
@@ -135,6 +137,8 @@ opdes_t tcp_opt_arr[] = {
{ TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
+{ TCP_QUICKACK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
+
{ TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
{ TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
@@ -157,6 +161,7 @@ opdes_t tcp_opt_arr[] = {
{ T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
{ IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
sizeof (int), -1 /* not initialized */ },
+{ IP_RECVTOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
{ IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
sizeof (ipsec_req_t), -1 /* not initialized */ },
@@ -448,6 +453,9 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
case TCP_CORK:
*i1 = tcp->tcp_cork;
return (sizeof (int));
+ case TCP_QUICKACK:
+ *i1 = tcp->tcp_quickack;
+ return (sizeof (int));
case TCP_RTO_INITIAL:
*i1 = tcp->tcp_rto_initial;
return (sizeof (uint32_t));
@@ -626,9 +634,9 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
/*
* Note: Implies T_CHECK semantics for T_OPTCOM_REQ
* inlen != 0 implies value supplied and
- * we have to "pretend" to set it.
+ * we have to "pretend" to set it.
* inlen == 0 implies that there is no
- * value part in T_CHECK request and just validation
+ * value part in T_CHECK request and just validation
* done elsewhere should be enough, we just return here.
*/
if (inlen == 0) {
@@ -1021,6 +1029,11 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
tcp->tcp_cork = onoff;
}
break;
+ case TCP_QUICKACK:
+ if (!checkonly) {
+ tcp->tcp_quickack = onoff;
+ }
+ break;
case TCP_RTO_INITIAL:
if (checkonly || val == 0)
break;
@@ -1132,6 +1145,16 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
return (EINVAL);
}
break;
+ case IP_RECVTOS:
+ if (!checkonly) {
+ /*
+ * Force it to be sent up with the next msg
+ * by setting it to a value which cannot
+ * appear in a packet (TOS is only 8-bits)
+ */
+ tcp->tcp_recvtos = 0xffffffffU;
+ }
+ break;
}
break;
case IPPROTO_IPV6:
diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c
index 7a0472f3dd..086668f435 100644
--- a/usr/src/uts/common/inet/tcp/tcp_output.c
+++ b/usr/src/uts/common/inet/tcp/tcp_output.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2017 by Delphix. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
/* This file contains all TCP output processing functions. */
@@ -1677,11 +1677,23 @@ finish:
/* non-STREAM socket, release the upper handle */
if (IPCL_IS_NONSTR(connp)) {
- ASSERT(connp->conn_upper_handle != NULL);
- (*connp->conn_upcalls->su_closed)
- (connp->conn_upper_handle);
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ /*
+ * Set these to NULL first because closed() will free
+ * upper structures. Acquire conn_lock because an
+ * external caller like conn_get_socket_info() will
+ * upcall if these are non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
+ upcalls->su_closed(handle);
}
}
diff --git a/usr/src/uts/common/inet/tcp/tcp_socket.c b/usr/src/uts/common/inet/tcp/tcp_socket.c
index 2de76ea060..32422be675 100644
--- a/usr/src/uts/common/inet/tcp/tcp_socket.c
+++ b/usr/src/uts/common/inet/tcp/tcp_socket.c
@@ -199,7 +199,7 @@ static int
tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
socklen_t len, cred_t *cr)
{
- int error;
+ int error;
conn_t *connp = (conn_t *)proto_handle;
/* All Solaris components should pass a cred for this operation. */
@@ -240,7 +240,7 @@ tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
{
conn_t *connp = (conn_t *)proto_handle;
tcp_t *tcp = connp->conn_tcp;
- int error;
+ int error;
ASSERT(connp->conn_upper_handle != NULL);
@@ -660,7 +660,7 @@ static int
tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
int mode, int32_t *rvalp, cred_t *cr)
{
- conn_t *connp = (conn_t *)proto_handle;
+ conn_t *connp = (conn_t *)proto_handle;
int error;
ASSERT(connp->conn_upper_handle != NULL);
@@ -825,7 +825,7 @@ tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
struct stroptions *stropt;
struct T_capability_ack tca;
struct sockaddr_in6 laddr, faddr;
- socklen_t laddrlen, faddrlen;
+ socklen_t laddrlen, faddrlen;
short opts;
int error;
mblk_t *mp, *mpnext;
@@ -999,7 +999,7 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
sock_quiesce_arg_t *arg)
{
tcp_t *tcp;
- conn_t *connp = (conn_t *)proto_handle;
+ conn_t *connp = (conn_t *)proto_handle;
int error;
mblk_t *stropt_mp;
mblk_t *ordrel_mp;