diff options
author | Rao Shoaib <Rao.Shoaib@Sun.COM> | 2009-04-18 01:13:46 -0700 |
---|---|---|
committer | Rao Shoaib <Rao.Shoaib@Sun.COM> | 2009-04-18 01:13:46 -0700 |
commit | eead73cfdc384282a25862c27aed73c597fc10a9 (patch) | |
tree | 89f6ac078af707a4c9ea3d7944096b457a9586fb /usr/src | |
parent | 06519974150162adc2eac151382bd7bd487b09c3 (diff) | |
download | illumos-joyent-eead73cfdc384282a25862c27aed73c597fc10a9.tar.gz |
6792479 tcp_tpi_bind() should call common functions to do bind()/listen().
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/tcp.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 236 |
2 files changed, 130 insertions, 108 deletions
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index d286fc7d5f..fd5efb2b64 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -676,7 +676,7 @@ extern void tcp_conn_request(void *arg, mblk_t *mp, void *arg2); extern void tcp_conn_request_unbound(void *arg, mblk_t *mp, void *arg2); extern void tcp_input(void *arg, mblk_t *mp, void *arg2); extern void tcp_rput_data(void *arg, mblk_t *mp, void *arg2); -extern void *tcp_get_conn(void *arg, tcp_stack_t *); +extern void *tcp_get_conn(void *arg, tcp_stack_t *, boolean_t); extern void tcp_time_wait_collector(void *arg); extern mblk_t *tcp_snmp_get(queue_t *, mblk_t *); extern int tcp_snmp_set(queue_t *, int, int, uchar_t *, int len); diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 49b3ca5a0e..cda1386fcb 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -918,7 +918,7 @@ static int tcp_open(queue_t *, dev_t *, int, int, cred_t *, boolean_t); static int tcp_openv4(queue_t *, dev_t *, int, int, cred_t *); static int tcp_openv6(queue_t *, dev_t *, int, int, cred_t *); static int tcp_tpi_close(queue_t *, int); -static int tcpclose_accept(queue_t *); +static int tcp_tpi_close_accept(queue_t *); static void tcp_squeue_add(squeue_t *); static boolean_t tcp_zcopy_check(tcp_t *); @@ -936,7 +936,8 @@ static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t, sock_upper_handle_t, cred_t *); static int tcp_listen(sock_lower_handle_t, int, cred_t *); static int tcp_post_ip_bind(tcp_t *, mblk_t *, int, cred_t *, pid_t); -static int tcp_do_listen(conn_t *, int, cred_t *); +static int tcp_do_listen(conn_t *, struct sockaddr *, socklen_t, int, cred_t *, + boolean_t); static int tcp_do_connect(conn_t *, const struct sockaddr *, socklen_t, cred_t *, pid_t); static int tcp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, @@ -1017,7 +1018,7 @@ struct qinit tcp_fallback_sock_winit = { * been created. */ struct qinit tcp_acceptor_rinit = { - NULL, (pfi_t)tcp_rsrv, NULL, tcpclose_accept, NULL, &tcp_winfo + NULL, (pfi_t)tcp_rsrv, NULL, tcp_tpi_close_accept, NULL, &tcp_winfo }; struct qinit tcp_acceptor_winit = { @@ -3069,20 +3070,12 @@ tcp_tpi_bind(tcp_t *tcp, mblk_t *mp) return; } - error = tcp_bind_check(connp, sa, len, cr, - tbr->PRIM_type != O_T_BIND_REQ); - if (error == 0) { - if (tcp->tcp_family == AF_INET) { - sin = (sin_t *)sa; - sin->sin_port = tcp->tcp_lport; - } else { - sin6 = (sin6_t *)sa; - sin6->sin6_port = tcp->tcp_lport; - } - - if (backlog > 0) { - error = tcp_do_listen(connp, backlog, cr); - } + if (backlog > 0) { + error = tcp_do_listen(connp, sa, len, backlog, DB_CRED(mp), + tbr->PRIM_type != O_T_BIND_REQ); + } else { + error = tcp_do_bind(connp, sa, len, DB_CRED(mp), + tbr->PRIM_type != O_T_BIND_REQ); } done: if (error > 0) { @@ -3090,6 +3083,16 @@ done: } else if (error < 0) { tcp_err_ack(tcp, mp, -error, 0); } else { + /* + * Update port information as sockfs/tpi needs it for checking + */ + if (tcp->tcp_family == AF_INET) { + sin = (sin_t *)sa; + sin->sin_port = tcp->tcp_lport; + } else { + sin6 = (sin6_t *)sa; + sin6->sin6_port = tcp->tcp_lport; + } mp->b_datap->db_type = M_PCPROTO; tbr->PRIM_type = T_BIND_ACK; putnext(tcp->tcp_rq, mp); @@ -3785,7 +3788,7 @@ done: } static int -tcpclose_accept(queue_t *q) +tcp_tpi_close_accept(queue_t *q) { vmem_t *minor_arena; dev_t conn_dev; @@ -4059,7 +4062,6 @@ finish: mutex_exit(&tcp->tcp_closelock); } - /* * Clean up the b_next and b_prev fields of every mblk pointed at by *mpp. * Some stream heads get upset if they see these later on as anything but NULL. @@ -4894,7 +4896,8 @@ tcp_conn_create_v4(conn_t *lconnp, conn_t *connp, ipha_t *ipha, * in case of error mpp is freed. */ conn_t * -tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp) +tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp, + boolean_t is_streams) { conn_t *connp = tcp->tcp_connp; conn_t *econnp; @@ -4904,7 +4907,7 @@ tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp) boolean_t mctl_present = B_FALSE; uint_t ipvers; - econnp = tcp_get_conn(sqp, tcp->tcp_tcps); + econnp = tcp_get_conn(sqp, tcp->tcp_tcps, is_streams); if (econnp == NULL) { freemsg(first_mp); return (NULL); @@ -5041,14 +5044,14 @@ tcp_get_ipsec_conn(tcp_t *tcp, squeue_t *sqp, mblk_t **mpp) * there for too long. */ void * -tcp_get_conn(void *arg, tcp_stack_t *tcps) +tcp_get_conn(void *arg, tcp_stack_t *tcps, boolean_t is_streams) { tcp_t *tcp = NULL; conn_t *connp = NULL; squeue_t *sqp = (squeue_t *)arg; tcp_squeue_priv_t *tcp_time_wait; netstack_t *ns; - mblk_t *rsrv_mp; + mblk_t *tcp_rsrv_mp = NULL; tcp_time_wait = *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); @@ -5057,6 +5060,19 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps) tcp = tcp_time_wait->tcp_free_list; ASSERT((tcp != NULL) ^ (tcp_time_wait->tcp_free_list_cnt == 0)); if (tcp != NULL) { + if (is_streams && tcp->tcp_rsrv_mp == NULL) { + /* + * Pre-allocate the tcp_rsrv_mp if neccessary. + * This mblk will not be freed until this conn_t/tcp_t + * is freed at ipcl_conn_destroy(). + */ + if ((tcp->tcp_rsrv_mp = allocb(0, BPRI_HI)) == NULL) { + mutex_exit(&tcp_time_wait->tcp_time_wait_lock); + return (NULL); + } + mutex_init(&tcp->tcp_rsrv_mp_lock, + NULL, MUTEX_DEFAULT, NULL); + } tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next; tcp_time_wait->tcp_free_list_cnt--; mutex_exit(&tcp_time_wait->tcp_time_wait_lock); @@ -5066,7 +5082,7 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps) ASSERT(tcp->tcp_tcps == NULL); ASSERT(connp->conn_netstack == NULL); - ASSERT(tcp->tcp_rsrv_mp != NULL); + ASSERT(!is_streams || tcp->tcp_rsrv_mp != NULL); ns = tcps->tcps_netstack; netstack_hold(ns); connp->conn_netstack = ns; @@ -5076,21 +5092,29 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps) return ((void *)connp); } mutex_exit(&tcp_time_wait->tcp_time_wait_lock); - /* - * Pre-allocate the tcp_rsrv_mp. This mblk will not be freed - * until this conn_t/tcp_t is freed at ipcl_conn_destroy(). - */ - if ((rsrv_mp = allocb(0, BPRI_HI)) == NULL) - return (NULL); + if (is_streams) { + /* + * Pre-allocate the tcp_rsrv_mp if neccessary. + * This mblk will not be freed until this conn_t/tcp_t + * is freed at ipcl_conn_destroy(). + */ + tcp_rsrv_mp = allocb(0, BPRI_HI); + if (tcp_rsrv_mp == NULL) + return (NULL); + } if ((connp = ipcl_conn_create(IPCL_TCPCONN, KM_NOSLEEP, tcps->tcps_netstack)) == NULL) { - freeb(rsrv_mp); + if (is_streams) { + ASSERT(tcp_rsrv_mp != NULL); + freeb(tcp_rsrv_mp); + } return (NULL); } - tcp = connp->conn_tcp; - tcp->tcp_rsrv_mp = rsrv_mp; + tcp = connp->conn_tcp; + tcp->tcp_rsrv_mp = tcp_rsrv_mp; mutex_init(&tcp->tcp_rsrv_mp_lock, NULL, MUTEX_DEFAULT, NULL); + tcp->tcp_tcps = tcps; TCPS_REFHOLD(tcps); @@ -5351,7 +5375,8 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) new_sqp = (squeue_t *)DB_CKSUMSTART(mp); DB_CKSUMSTART(mp) = 0; mp->b_datap->db_struioflag &= ~STRUIO_EAGER; - econnp = (conn_t *)tcp_get_conn(arg2, tcps); + econnp = (conn_t *)tcp_get_conn(arg2, tcps, + !IPCL_IS_NONSTR(connp)); if (econnp == NULL) goto error2; ASSERT(econnp->conn_netstack == connp->conn_netstack); @@ -5361,7 +5386,8 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) /* * mp is updated in tcp_get_ipsec_conn(). */ - econnp = tcp_get_ipsec_conn(tcp, arg2, &mp); + econnp = tcp_get_ipsec_conn(tcp, arg2, &mp, + !IPCL_IS_NONSTR(connp)); if (econnp == NULL) { /* * mp freed by tcp_get_ipsec_conn. @@ -5399,28 +5425,25 @@ tcp_conn_request(void *arg, mblk_t *mp, void *arg2) goto error3; eager = econnp->conn_tcp; - - /* - * Pre-allocate the T_ordrel_ind mblk for TPI socket so that at close - * time, we will always have that to send up. Otherwise, we need to do - * special handling in case the allocation fails at that time. - */ ASSERT(eager->tcp_ordrel_mp == NULL); - if (!IPCL_IS_NONSTR(econnp) && - (eager->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL) - goto error3; + if (!IPCL_IS_NONSTR(econnp)) { + /* + * Pre-allocate the T_ordrel_ind mblk for TPI socket so that + * at close time, we will always have that to send up. + * Otherwise, we need to do special handling in case the + * allocation fails at that time. + */ + if ((eager->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL) + goto error3; + } /* Inherit various TCP parameters from the listener */ eager->tcp_naglim = tcp->tcp_naglim; - eager->tcp_first_timer_threshold = - tcp->tcp_first_timer_threshold; - eager->tcp_second_timer_threshold = - tcp->tcp_second_timer_threshold; + eager->tcp_first_timer_threshold = tcp->tcp_first_timer_threshold; + eager->tcp_second_timer_threshold = tcp->tcp_second_timer_threshold; - eager->tcp_first_ctimer_threshold = - tcp->tcp_first_ctimer_threshold; - eager->tcp_second_ctimer_threshold = - tcp->tcp_second_ctimer_threshold; + eager->tcp_first_ctimer_threshold = tcp->tcp_first_ctimer_threshold; + eager->tcp_second_ctimer_threshold = tcp->tcp_second_ctimer_threshold; /* * tcp_adapt_ire() may change tcp_rwnd according to the ire metrics. @@ -9208,7 +9231,7 @@ tcp_create_common(queue_t *q, cred_t *credp, boolean_t isv6, } sqp = IP_SQUEUE_GET((uint_t)gethrtime()); - connp = (conn_t *)tcp_get_conn(sqp, tcps); + connp = (conn_t *)tcp_get_conn(sqp, tcps, q != NULL ? B_TRUE : B_FALSE); /* * Both tcp_get_conn and netstack_find_by_cred incremented refcnt, * so we drop it by one. @@ -9353,8 +9376,8 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, q->q_qinfo = &tcp_acceptor_rinit; /* * the conn_dev and minor_arena will be subsequently used by - * tcp_wput_accept() and tcpclose_accept() to figure out the - * minor device number for this connection from the q_ptr. + * tcp_wput_accept() and tcp_tpi_close_accept() to figure out + * the minor device number for this connection from the q_ptr. */ RD(q)->q_ptr = (void *)conn_dev; WR(q)->q_qinfo = &tcp_acceptor_winit; @@ -9380,10 +9403,11 @@ tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, ASSERT(q->q_qinfo == &tcp_rinitv4 || q->q_qinfo == &tcp_rinitv6); ASSERT(WR(q)->q_qinfo == &tcp_winit); + tcp = connp->conn_tcp; + if (issocket) { WR(q)->q_qinfo = &tcp_sock_winit; } else { - tcp = connp->conn_tcp; #ifdef _ILP32 tcp->tcp_acceptor_id = (t_uscalar_t)RD(q); #else @@ -15840,6 +15864,7 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2) sodirect_t *sodp; boolean_t fc; + ASSERT(!IPCL_IS_NONSTR(connp)); mutex_enter(&tcp->tcp_rsrv_mp_lock); tcp->tcp_rsrv_mp = mp; mutex_exit(&tcp->tcp_rsrv_mp_lock); @@ -18109,7 +18134,7 @@ tcp_tpi_accept(queue_t *q, mblk_t *mp) * rq->q_qinfo->qi_qclose to cleanup the acceptor stream. * we need to do the allocb up here because we have to * make sure rq->q_qinfo->qi_qclose still points to the - * correct function (tcpclose_accept) in case allocb + * correct function (tcp_tpi_close_accept) in case allocb * fails. */ bcopy(mp->b_rptr + conn_res->OPT_offset, @@ -26406,7 +26431,6 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, tcp_t *tcp = connp->conn_tcp; sin_t *sin; sin6_t *sin6; - sin6_t sin6addr; in_port_t requested_port; ipaddr_t v4addr; in6_addr_t v6addr; @@ -26426,7 +26450,9 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, } origipversion = tcp->tcp_ipversion; - if (sa != NULL && !OK_32PTR((char *)sa)) { + ASSERT(sa != NULL && len != 0); + + if (!OK_32PTR((char *)sa)) { if (tcp->tcp_debug) { (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, @@ -26438,24 +26464,6 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, } switch (len) { - case 0: /* request for a generic port */ - if (tcp->tcp_family == AF_INET) { - sin = (sin_t *)&sin6addr; - *sin = sin_null; - sin->sin_family = AF_INET; - tcp->tcp_ipversion = IPV4_VERSION; - IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &v6addr); - } else { - ASSERT(tcp->tcp_family == AF_INET6); - sin6 = (sin6_t *)&sin6addr; - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - tcp->tcp_ipversion = IPV6_VERSION; - V6_SET_ZERO(v6addr); - } - requested_port = 0; - break; - case sizeof (sin_t): /* Complete IPv4 address */ sin = (sin_t *)sa; /* @@ -26552,14 +26560,6 @@ tcp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, tcp->tcp_conn_req_max = 0; - /* - * We need to make sure that the conn_recv is set to a non-null - * value before we insert the conn into the classifier table. - * This is to avoid a race with an incoming packet which does an - * ipcl_classify(). - */ - connp->conn_recv = tcp_conn_request; - if (tcp->tcp_family == AF_INET6) { ASSERT(tcp->tcp_connp->conn_af_isv6); error = ip_proto_bind_laddr_v6(connp, NULL, IPPROTO_TCP, @@ -27199,6 +27199,7 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, mblk_t *stropt_mp; mblk_t *ordrel_mp; mblk_t *fused_sigurp_mp; + mblk_t *tcp_rsrv_mp; tcp = connp->conn_tcp; @@ -27217,6 +27218,13 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, fused_sigurp_mp = allocb_wait(1, BPRI_HI, STR_NOSIG, NULL); /* + * Pre-allocate the tcp_rsrv_mp mblk. + * It is possible that this conn was previously used for a streams + * socket and already has tcp_rsrv_mp + */ + tcp_rsrv_mp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL); + + /* * Enter the squeue so that no new packets can come in */ error = squeue_synch_enter(connp->conn_sqp, connp, 0); @@ -27225,6 +27233,7 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, freeb(stropt_mp); freeb(ordrel_mp); freeb(fused_sigurp_mp); + freeb(tcp_rsrv_mp); /* * We cannot process the eager, so at least send out a * RST so the peer can reconnect. @@ -27250,6 +27259,14 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, freeb(fused_sigurp_mp); } + if (tcp->tcp_rsrv_mp == NULL) { + tcp->tcp_rsrv_mp = tcp_rsrv_mp; + } else { + /* + * reusing a conn that was previously used for streams socket + */ + freeb(tcp_rsrv_mp); + } if (tcp->tcp_listener != NULL) { /* The eager will deal with opts when accept() is called */ freeb(stropt_mp); @@ -27355,7 +27372,7 @@ tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) return (ENOBUFS); } - error = tcp_do_listen(connp, backlog, cr); + error = tcp_do_listen(connp, NULL, 0, backlog, cr, FALSE); if (error == 0) { (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, SOCK_OPCTL_ENAB_ACCEPT, (uintptr_t)backlog); @@ -27370,11 +27387,10 @@ tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) } static int -tcp_do_listen(conn_t *connp, int backlog, cred_t *cr) +tcp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len, + int backlog, cred_t *cr, boolean_t bind_to_req_port_only) { tcp_t *tcp = connp->conn_tcp; - sin_t *sin; - sin6_t *sin6; int error = 0; tcp_stack_t *tcps = tcp->tcp_tcps; @@ -27399,27 +27415,33 @@ tcp_do_listen(conn_t *connp, int backlog, cred_t *cr) } return (-TOUTSTATE); } else { - int32_t len; - sin6_t addr; + if (sa == NULL) { + sin6_t addr; + sin_t *sin; + sin6_t *sin6; - /* Do an implicit bind: Request for a generic port. */ - if (tcp->tcp_family == AF_INET) { - len = sizeof (sin_t); - sin = (sin_t *)&addr; - *sin = sin_null; - sin->sin_family = AF_INET; - tcp->tcp_ipversion = IPV4_VERSION; - } else { - ASSERT(tcp->tcp_family == AF_INET6); - len = sizeof (sin6_t); - sin6 = (sin6_t *)&addr; - *sin6 = sin6_null; - sin6->sin6_family = AF_INET6; - tcp->tcp_ipversion = IPV6_VERSION; + ASSERT(IPCL_IS_NONSTR(connp)); + + /* Do an implicit bind: Request for a generic port. */ + if (tcp->tcp_family == AF_INET) { + len = sizeof (sin_t); + sin = (sin_t *)&addr; + *sin = sin_null; + sin->sin_family = AF_INET; + tcp->tcp_ipversion = IPV4_VERSION; + } else { + ASSERT(tcp->tcp_family == AF_INET6); + len = sizeof (sin6_t); + sin6 = (sin6_t *)&addr; + *sin6 = sin6_null; + sin6->sin6_family = AF_INET6; + tcp->tcp_ipversion = IPV6_VERSION; + } + sa = (struct sockaddr *)&addr; } - error = tcp_bind_check(connp, (struct sockaddr *)&addr, len, - cr, B_FALSE); + error = tcp_bind_check(connp, sa, len, cr, + bind_to_req_port_only); if (error) return (error); /* Fall through and do the fanout insertion */ |