diff options
-rw-r--r-- | usr/src/cmd/mdb/common/modules/sctp/sctp.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/sockfs/sockcommon.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/sockfs/sockcommon_subr.c | 14 | ||||
-rw-r--r-- | usr/src/uts/common/fs/sockfs/sockfilter.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp.c | 23 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_conn.c | 22 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_impl.h | 11 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_input.c | 87 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_opt_data.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sctp/sctp_output.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sockmods/socksctp.c | 150 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sockmods/socksctp.h | 9 | ||||
-rw-r--r-- | usr/src/uts/common/inet/sockmods/socksctpsubr.c | 52 |
13 files changed, 201 insertions, 181 deletions
diff --git a/usr/src/cmd/mdb/common/modules/sctp/sctp.c b/usr/src/cmd/mdb/common/modules/sctp/sctp.c index ad08fe4eb7..5901efcab8 100644 --- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c +++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c @@ -830,10 +830,10 @@ sctp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_printf("%<b>Flow Control%</b>\n"); mdb_printf("tconn_sndbuf\t%?d\n" "conn_sndlowat\t%?d\tfrwnd\t\t%?u\n" - "rwnd\t\t%?u\tinitial rwnd\t%?u\n" + "rwnd\t\t%?u\tlast advertised rwnd\t%?u\n" "rxqueued\t%?u\tcwnd_max\t%?u\n", connp->conn_sndbuf, connp->conn_sndlowat, sctp->sctp_frwnd, - sctp->sctp_rwnd, sctp->sctp_irwnd, sctp->sctp_rxqueued, + sctp->sctp_rwnd, sctp->sctp_arwnd, sctp->sctp_rxqueued, sctp->sctp_cwnd_max); } diff --git a/usr/src/uts/common/fs/sockfs/sockcommon.h b/usr/src/uts/common/fs/sockfs/sockcommon.h index d4e1883b1d..4e333be0aa 100644 --- a/usr/src/uts/common/fs/sockfs/sockcommon.h +++ b/usr/src/uts/common/fs/sockfs/sockcommon.h @@ -186,7 +186,7 @@ extern int so_dequeue_msg(struct sonode *, mblk_t **, struct uio *, rval_t *, int); extern void so_enqueue_msg(struct sonode *, mblk_t *, size_t); extern void so_process_new_message(struct sonode *, mblk_t *, mblk_t *); -extern void so_check_flow_control(struct sonode *); +extern boolean_t so_check_flow_control(struct sonode *); extern mblk_t *socopyinuio(uio_t *, ssize_t, size_t, ssize_t, size_t, int *); extern mblk_t *socopyoutuio(mblk_t *, struct uio *, ssize_t, int *); diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c index a44d389855..00ab44e44c 100644 --- a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c +++ b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c @@ -613,9 +613,10 @@ so_process_new_message(struct sonode *so, mblk_t *mp_head, mblk_t *mp_last_head) /* * Check flow control on a given sonode. Must have so_lock held, and - * this function will release the hold. + * this function will release the hold. Return true if flow control + * is cleared. */ -void +boolean_t so_check_flow_control(struct sonode *so) { ASSERT(MUTEX_HELD(&so->so_lock)); @@ -635,8 +636,10 @@ so_check_flow_control(struct sonode *so) } /* filters can start injecting data */ sof_sonode_notify_filters(so, SOF_EV_INJECT_DATA_IN_OK, 0); + return (B_TRUE); } else { mutex_exit(&so->so_lock); + return (B_FALSE); } } @@ -709,7 +712,7 @@ again1: so_process_new_message(so, new_msg_head, new_msg_last_head); } savemp = savemptail = NULL; - rvalp->r_val1 = 0; + rvalp->r_vals = 0; error = 0; mp = so->so_rcv_q_head; @@ -822,7 +825,7 @@ again1: * so_check_flow_control() will drop * so->so_lock. */ - so_check_flow_control(so); + rvalp->r_val2 = so_check_flow_control(so); } } if (mp != NULL) { /* more data blocks in msg */ @@ -840,7 +843,8 @@ again1: * so_check_flow_control() will drop * so->so_lock. */ - so_check_flow_control(so); + rvalp->r_val2 = + so_check_flow_control(so); } } else if (partial_read && !somsghasdata(mp)) { /* diff --git a/usr/src/uts/common/fs/sockfs/sockfilter.c b/usr/src/uts/common/fs/sockfs/sockfilter.c index 8842d46a73..3de36a768e 100644 --- a/usr/src/uts/common/fs/sockfs/sockfilter.c +++ b/usr/src/uts/common/fs/sockfs/sockfilter.c @@ -1344,7 +1344,7 @@ sof_filter_data_in_proc(struct sonode *so, mblk_t *mp, mblk_t **lastmp) mutex_enter(&so->so_lock); so->so_rcv_queued += diff; /* so_check_flow_control drops so_lock */ - so_check_flow_control(so); + (void) so_check_flow_control(so); } return (retmp); @@ -1612,7 +1612,7 @@ sof_rcv_flowctrl(sof_handle_t handle, boolean_t enable) } so->so_state &= ~SS_FIL_RCV_FLOWCTRL; /* so_check_flow_control drops so_lock */ - so_check_flow_control(so); + (void) so_check_flow_control(so); } ASSERT(MUTEX_NOT_HELD(&so->so_lock)); } diff --git a/usr/src/uts/common/inet/sctp/sctp.c b/usr/src/uts/common/inet/sctp/sctp.c index 6bed139c2f..259cd2baf1 100644 --- a/usr/src/uts/common/inet/sctp/sctp.c +++ b/usr/src/uts/common/inet/sctp/sctp.c @@ -336,11 +336,10 @@ sctp_disconnect(sctp_t *sctp) } /* - * In there is unread data, send an ABORT and terminate the + * If there is unread data, send an ABORT and terminate the * association. */ - if (sctp->sctp_rxqueued > 0 || sctp->sctp_irwnd > - sctp->sctp_rwnd) { + if (sctp->sctp_rxqueued > 0 || sctp->sctp_ulp_rxqueued > 0) { sctp_user_abort(sctp, NULL); WAKE_SCTP(sctp); return (error); @@ -807,7 +806,8 @@ sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) sctp->sctp_mtu_probe_intvl = sctps->sctps_mtu_probe_interval; sctp->sctp_sack_gaps = 0; - sctp->sctp_sack_toggle = 2; + /* So we will not delay sending the first SACK. */ + sctp->sctp_sack_toggle = sctps->sctps_deferred_acks_max; /* Only need to do the allocation if there is no "cached" one. */ if (sctp->sctp_pad_mp == NULL) { @@ -833,11 +833,13 @@ sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) if (err != 0) goto failure; + sctp->sctp_upcalls = psctp->sctp_upcalls; + sctp->sctp_cookie_lifetime = psctp->sctp_cookie_lifetime; sctp->sctp_cwnd_max = psctp->sctp_cwnd_max; sctp->sctp_rwnd = psctp->sctp_rwnd; - sctp->sctp_irwnd = psctp->sctp_rwnd; + sctp->sctp_arwnd = psctp->sctp_arwnd; sctp->sctp_pd_point = psctp->sctp_pd_point; sctp->sctp_rto_max = psctp->sctp_rto_max; sctp->sctp_rto_max_init = psctp->sctp_rto_max_init; @@ -878,7 +880,7 @@ sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) sctp->sctp_cwnd_max = sctps->sctps_cwnd_max_; sctp->sctp_rwnd = connp->conn_rcvbuf; - sctp->sctp_irwnd = sctp->sctp_rwnd; + sctp->sctp_arwnd = connp->conn_rcvbuf; sctp->sctp_pd_point = sctp->sctp_rwnd; sctp->sctp_rto_max = MSEC_TO_TICK(sctps->sctps_rto_maxg); sctp->sctp_rto_max_init = sctp->sctp_rto_max; @@ -1661,6 +1663,13 @@ sctp_rq_tq_init(sctp_stack_t *sctps) int thrs; int max_tasks; + mutex_enter(&sctps->sctps_g_lock); + /* Someone may have beaten us in creating the taskqs. */ + if (sctps->sctps_recvq_tq_list_cur_sz > 0) { + mutex_exit(&sctps->sctps_g_lock); + return; + } + thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, MAX(ncpus, boot_ncpus))); /* @@ -1688,6 +1697,8 @@ sctp_rq_tq_init(sctp_stack_t *sctps) sctps->sctps_recvq_tq_list[0] = taskq_create(tq_name, thrs, minclsyspri, sctp_recvq_tq_task_min, max_tasks, TASKQ_PREPOPULATE); mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); + + mutex_exit(&sctps->sctps_g_lock); } static void diff --git a/usr/src/uts/common/inet/sctp/sctp_conn.c b/usr/src/uts/common/inet/sctp/sctp_conn.c index 6d41675d6b..a2bf44e3f4 100644 --- a/usr/src/uts/common/inet/sctp/sctp_conn.c +++ b/usr/src/uts/common/inet/sctp/sctp_conn.c @@ -128,16 +128,6 @@ sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0); SCTP_ASSOC_EST(sctps, acceptor); - - /* - * listener->sctp_rwnd should be the default window size or a - * window size changed via SO_RCVBUF option. - */ - acceptor->sctp_rwnd = listener->sctp_rwnd; - acceptor->sctp_irwnd = acceptor->sctp_rwnd; - acceptor->sctp_pd_point = acceptor->sctp_rwnd; - acceptor->sctp_upcalls = listener->sctp_upcalls; - return (0); } @@ -151,7 +141,6 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, int err; conn_t *connp, *econnp; sctp_stack_t *sctps; - struct sock_proto_props sopp; cred_t *cr; pid_t cpid; in6_addr_t faddr, laddr; @@ -348,17 +337,6 @@ sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, } ASSERT(SCTP_IS_DETACHED(eager)); eager->sctp_detached = B_FALSE; - bzero(&sopp, sizeof (sopp)); - sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF; - sopp.sopp_maxblk = strmsgsz; - if (econnp->conn_family == AF_INET) { - sopp.sopp_wroff = sctps->sctps_wroff_xtra + - sizeof (sctp_data_hdr_t) + sctp->sctp_hdr_len; - } else { - sopp.sopp_wroff = sctps->sctps_wroff_xtra + - sizeof (sctp_data_hdr_t) + sctp->sctp_hdr6_len; - } - eager->sctp_ulp_prop(eager->sctp_ulpd, &sopp); return (eager); } diff --git a/usr/src/uts/common/inet/sctp/sctp_impl.h b/usr/src/uts/common/inet/sctp/sctp_impl.h index 514ae63e96..cf69efc382 100644 --- a/usr/src/uts/common/inet/sctp/sctp_impl.h +++ b/usr/src/uts/common/inet/sctp/sctp_impl.h @@ -660,7 +660,7 @@ typedef struct sctp_s { #define sctp_ulp_disconnected sctp_upcalls->su_disconnected #define sctp_ulp_opctl sctp_upcalls->su_opctl #define sctp_ulp_recv sctp_upcalls->su_recv -#define sctp_ulp_xmitted sctp_upcalls->su_txq_full +#define sctp_ulp_txq_full sctp_upcalls->su_txq_full #define sctp_ulp_prop sctp_upcalls->su_set_proto_props int32_t sctp_state; @@ -739,8 +739,9 @@ typedef struct sctp_s { /* Inbound flow control */ int32_t sctp_rwnd; /* Current receive window */ - int32_t sctp_irwnd; /* Initial receive window */ + int32_t sctp_arwnd; /* Last advertised window */ int32_t sctp_rxqueued; /* No. of bytes in RX q's */ + int32_t sctp_ulp_rxqueued; /* Data in ULP */ /* Pre-initialized composite headers */ uchar_t *sctp_iphc; /* v4 sctp/ip hdr template buffer */ @@ -800,7 +801,8 @@ typedef struct sctp_s { sctp_txq_full : 1, /* the tx queue is full */ sctp_ulp_discon_done : 1, /* ulp_disconnecting done */ - sctp_dummy : 6; + sctp_flowctrld : 1, /* upper layer flow controlled */ + sctp_dummy : 5; } sctp_bits; struct { uint32_t @@ -838,6 +840,7 @@ typedef struct sctp_s { #define sctp_zero_win_probe sctp_bits.sctp_zero_win_probe #define sctp_txq_full sctp_bits.sctp_txq_full #define sctp_ulp_discon_done sctp_bits.sctp_ulp_discon_done +#define sctp_flowctrld sctp_bits.sctp_flowctrld #define sctp_recvsndrcvinfo sctp_events.sctp_recvsndrcvinfo #define sctp_recvassocevnt sctp_events.sctp_recvassocevnt @@ -960,7 +963,7 @@ typedef struct sctp_s { if ((sctp)->sctp_txq_full && SCTP_TXQ_LEN(sctp) <= \ (sctp)->sctp_connp->conn_sndlowat) { \ (sctp)->sctp_txq_full = 0; \ - (sctp)->sctp_ulp_xmitted((sctp)->sctp_ulpd, \ + (sctp)->sctp_ulp_txq_full((sctp)->sctp_ulpd, \ B_FALSE); \ } diff --git a/usr/src/uts/common/inet/sctp/sctp_input.c b/usr/src/uts/common/inet/sctp/sctp_input.c index d9a249ba3d..1b6449cfab 100644 --- a/usr/src/uts/common/inet/sctp/sctp_input.c +++ b/usr/src/uts/common/inet/sctp/sctp_input.c @@ -1295,7 +1295,6 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups, uint32_t tsn; int dlen; boolean_t tpfinished = B_TRUE; - int32_t new_rwnd; sctp_stack_t *sctps = sctp->sctp_sctps; int error; @@ -1542,31 +1541,27 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups, sctp->sctp_rxqueued -= dlen; if (can_deliver) { - /* step past header to the payload */ dmp->b_rptr = (uchar_t *)(dc + 1); if (sctp_input_add_ancillary(sctp, &dmp, dc, fp, ipp, ira) == 0) { dprint(1, ("sctp_data_chunk: delivering %lu bytes\n", msgdsize(dmp))); - sctp->sctp_rwnd -= dlen; /* * We overload the meaning of b_flag for SCTP sockfs * internal use, to advise sockfs of partial delivery * semantics. */ dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA; - new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp, - msgdsize(dmp), 0, &error, NULL); - /* - * Since we always deliver the next TSN data chunk, - * we may buffer a little more than allowed. In - * that case, just mark the window as 0. - */ - if (new_rwnd < 0) - sctp->sctp_rwnd = 0; - else if (new_rwnd > sctp->sctp_rwnd) - sctp->sctp_rwnd = new_rwnd; + if (sctp->sctp_flowctrld) { + sctp->sctp_rwnd -= dlen; + if (sctp->sctp_rwnd < 0) + sctp->sctp_rwnd = 0; + } + if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp, + msgdsize(dmp), 0, &error, NULL) <= 0) { + sctp->sctp_flowctrld = B_TRUE; + } SCTP_ACK_IT(sctp, tsn); } else { /* No memory don't ack, the peer will retransmit. */ @@ -1689,7 +1684,6 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups, ipp, ira) == 0) { dprint(1, ("sctp_data_chunk: delivering %lu " "bytes\n", msgdsize(dmp))); - sctp->sctp_rwnd -= dlen; /* * Meaning of b_flag overloaded for SCTP sockfs * internal use, advise sockfs of partial @@ -1697,12 +1691,15 @@ sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups, */ dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA; - new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, - dmp, msgdsize(dmp), 0, &error, NULL); - if (new_rwnd < 0) - sctp->sctp_rwnd = 0; - else if (new_rwnd > sctp->sctp_rwnd) - sctp->sctp_rwnd = new_rwnd; + if (sctp->sctp_flowctrld) { + sctp->sctp_rwnd -= dlen; + if (sctp->sctp_rwnd < 0) + sctp->sctp_rwnd = 0; + } + if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp, + msgdsize(dmp), 0, &error, NULL) <= 0) { + sctp->sctp_flowctrld = B_TRUE; + } SCTP_ACK_IT(sctp, tsn); } else { /* don't ack, the peer will retransmit */ @@ -1772,6 +1769,8 @@ sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen) } else { sc->ssc_a_rwnd = 0; } + /* Remember the last window sent to peer. */ + sctp->sctp_arwnd = sc->ssc_a_rwnd; sc->ssc_numfrags = htons(num_gaps); sc->ssc_numdups = 0; @@ -2359,7 +2358,6 @@ sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp, dlen += MBLKL(pmp); } if (can_deliver) { - int32_t nrwnd; int error; dmp->b_rptr = (uchar_t *)(dc + 1); @@ -2368,20 +2366,22 @@ sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp, if (sctp_input_add_ancillary(sctp, &dmp, dc, fp, ipp, ira) == 0) { sctp->sctp_rxqueued -= dlen; - sctp->sctp_rwnd -= dlen; /* * Override b_flag for SCTP sockfs * internal use */ dmp->b_flag = 0; - nrwnd = sctp->sctp_ulp_recv( + if (sctp->sctp_flowctrld) { + sctp->sctp_rwnd -= dlen; + if (sctp->sctp_rwnd < 0) + sctp->sctp_rwnd = 0; + } + if (sctp->sctp_ulp_recv( sctp->sctp_ulpd, dmp, msgdsize(dmp), - 0, &error, NULL); - if (nrwnd < 0) - sctp->sctp_rwnd = 0; - else if (nrwnd > sctp->sctp_rwnd) - sctp->sctp_rwnd = nrwnd; + 0, &error, NULL) <= 0) { + sctp->sctp_flowctrld = B_TRUE; + } } else { /* * We will resume processing when @@ -4409,33 +4409,30 @@ done: } /* - * Some amount of data got removed from rx q. - * Check if we should send a window update. - * - * Due to way sctp_rwnd updates are made, ULP can give reports out-of-order. - * To keep from dropping incoming data due to this, we only update - * sctp_rwnd when if it's larger than what we've reported to peer earlier. + * Some amount of data got removed from ULP's receive queue and we can + * push messages up if we are flow controlled before. Reset the receive + * window to full capacity (conn_rcvbuf) and check if we should send a + * window update. */ void sctp_recvd(sctp_t *sctp, int len) { - int32_t old, new; sctp_stack_t *sctps = sctp->sctp_sctps; + conn_t *connp = sctp->sctp_connp; + boolean_t send_sack = B_FALSE; ASSERT(sctp != NULL); RUN_SCTP(sctp); - if (len < sctp->sctp_rwnd) { - WAKE_SCTP(sctp); - return; - } + sctp->sctp_flowctrld = B_FALSE; + /* This is the amount of data queued in ULP. */ + sctp->sctp_ulp_rxqueued = connp->conn_rcvbuf - len; - old = sctp->sctp_rwnd - sctp->sctp_rxqueued; - new = len - sctp->sctp_rxqueued; - sctp->sctp_rwnd = len; + if (connp->conn_rcvbuf - sctp->sctp_arwnd >= sctp->sctp_mss) + send_sack = B_TRUE; + sctp->sctp_rwnd = connp->conn_rcvbuf; - if (sctp->sctp_state >= SCTPS_ESTABLISHED && - ((old <= new >> 1) || (old < sctp->sctp_mss))) { + if (sctp->sctp_state >= SCTPS_ESTABLISHED && send_sack) { sctp->sctp_force_sack = 1; SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate); (void) sctp_sack(sctp, NULL); diff --git a/usr/src/uts/common/inet/sctp/sctp_opt_data.c b/usr/src/uts/common/inet/sctp/sctp_opt_data.c index ef183ac1c3..6d6d4bdf3c 100644 --- a/usr/src/uts/common/inet/sctp/sctp_opt_data.c +++ b/usr/src/uts/common/inet/sctp/sctp_opt_data.c @@ -1119,7 +1119,7 @@ sctp_set_opt(sctp_t *sctp, int level, int name, const void *invalp, * protocol and here we just whack it. */ connp->conn_rcvbuf = sctp->sctp_rwnd = *i1; - sctp->sctp_irwnd = sctp->sctp_rwnd; + sctp->sctp_arwnd = sctp->sctp_rwnd; sctp->sctp_pd_point = sctp->sctp_rwnd; sopp.sopp_flags = SOCKOPT_RCVHIWAT; diff --git a/usr/src/uts/common/inet/sctp/sctp_output.c b/usr/src/uts/common/inet/sctp/sctp_output.c index e6a991dc1e..eced6eccba 100644 --- a/usr/src/uts/common/inet/sctp/sctp_output.c +++ b/usr/src/uts/common/inet/sctp/sctp_output.c @@ -295,7 +295,7 @@ sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags) */ if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) { sctp->sctp_txq_full = 1; - sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE); + sctp->sctp_ulp_txq_full(sctp->sctp_ulpd, B_TRUE); } if (sctp->sctp_state == SCTPS_ESTABLISHED) sctp_output(sctp, UINT_MAX); diff --git a/usr/src/uts/common/inet/sockmods/socksctp.c b/usr/src/uts/common/inet/sockmods/socksctp.c index 871e9f71e5..bcd8e8d8fa 100644 --- a/usr/src/uts/common/inet/sockmods/socksctp.c +++ b/usr/src/uts/common/inet/sockmods/socksctp.c @@ -145,6 +145,7 @@ sonodeops_t sosctp_seq_sonodeops = { sosctp_close, /* sop_close */ }; +/* All the upcalls expect the upper handle to be sonode. */ sock_upcalls_t sosctp_sock_upcalls = { so_newconn, so_connected, @@ -156,6 +157,7 @@ sock_upcalls_t sosctp_sock_upcalls = { NULL, /* su_signal_oob */ }; +/* All the upcalls expect the upper handle to be sctp_sonode/sctp_soassoc. */ sock_upcalls_t sosctp_assoc_upcalls = { sctp_assoc_newconn, sctp_assoc_connected, @@ -175,7 +177,6 @@ sosctp_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) struct sctp_sonode *ss; struct sctp_sonode *pss; sctp_sockbuf_limits_t sbl; - sock_upcalls_t *upcalls; int err; ss = SOTOSSO(so); @@ -200,19 +201,21 @@ sosctp_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) return (0); } + if ((err = secpolicy_basic_net_access(cr)) != 0) + return (err); + if (so->so_type == SOCK_STREAM) { - upcalls = &sosctp_sock_upcalls; + so->so_proto_handle = (sock_lower_handle_t)sctp_create(so, + NULL, so->so_family, so->so_type, SCTP_CAN_BLOCK, + &sosctp_sock_upcalls, &sbl, cr); so->so_mode = SM_CONNREQUIRED; } else { ASSERT(so->so_type == SOCK_SEQPACKET); - upcalls = &sosctp_assoc_upcalls; + so->so_proto_handle = (sock_lower_handle_t)sctp_create(ss, + NULL, so->so_family, so->so_type, SCTP_CAN_BLOCK, + &sosctp_assoc_upcalls, &sbl, cr); } - if ((err = secpolicy_basic_net_access(cr)) != 0) - return (err); - - so->so_proto_handle = (sock_lower_handle_t)sctp_create(so, NULL, - so->so_family, so->so_type, SCTP_CAN_BLOCK, upcalls, &sbl, cr); if (so->so_proto_handle == NULL) return (ENOMEM); @@ -482,7 +485,7 @@ sosctp_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags, error = 0; struct T_unitdata_ind *tind; ssize_t orig_resid = uiop->uio_resid; - int len, count, readcnt = 0, rxqueued; + int len, count, readcnt = 0; socklen_t controllen, namelen; void *opt; mblk_t *mp; @@ -591,8 +594,10 @@ again: msg->msg_flags |= MSG_NOTIFICATION; } - if (!(mp->b_flag & SCTP_PARTIAL_DATA)) + if (!(mp->b_flag & SCTP_PARTIAL_DATA) && + !(rval.r_val1 & MOREDATA)) { msg->msg_flags |= MSG_EOR; + } freemsg(mp); } done: @@ -606,7 +611,6 @@ done: */ if (ssa == NULL) { mutex_enter(&so->so_lock); - rxqueued = so->so_rcv_queued; count = so->so_rcvbuf - so->so_rcv_queued; ASSERT(so->so_rcv_q_head != NULL || @@ -614,16 +618,17 @@ done: so->so_rcv_queued == 0); so_unlock_read(so); - mutex_exit(&so->so_lock); - if (readcnt > 0 && (((count > 0) && - ((rxqueued + readcnt) >= so->so_rcvlowat)) || - (rxqueued == 0))) { - /* - * If amount of queued data is higher than watermark, - * updata SCTP's idea of available buffer space. - */ + /* + * so_dequeue_msg() sets r_val2 to true if flow control was + * cleared and we need to update SCTP. so_flowctrld was + * cleared in so_dequeue_msg() via so_check_flow_control(). + */ + if (rval.r_val2) { + mutex_exit(&so->so_lock); sctp_recvd((struct sctp_s *)so->so_proto_handle, count); + } else { + mutex_exit(&so->so_lock); } } else { /* @@ -634,26 +639,23 @@ done: * done in so_dequeue_msg(). */ mutex_enter(&so->so_lock); - rxqueued = ssa->ssa_rcv_queued; - - ssa->ssa_rcv_queued = rxqueued - readcnt; + ssa->ssa_rcv_queued -= readcnt; count = so->so_rcvbuf - ssa->ssa_rcv_queued; so_unlock_read(so); - if (readcnt > 0 && - (((count > 0) && (rxqueued >= so->so_rcvlowat)) || - (ssa->ssa_rcv_queued == 0))) { + if (readcnt > 0 && ssa->ssa_flowctrld && + ssa->ssa_rcv_queued < so->so_rcvlowat) { /* - * If amount of queued data is higher than watermark, - * updata SCTP's idea of available buffer space. + * Need to clear ssa_flowctrld, different from 1-1 + * style. */ + ssa->ssa_flowctrld = B_FALSE; mutex_exit(&so->so_lock); - - sctp_recvd((struct sctp_s *)ssa->ssa_conn, count); - + sctp_recvd(ssa->ssa_conn, count); mutex_enter(&so->so_lock); } + /* * MOREDATA flag is set if all data could not be copied */ @@ -723,7 +725,6 @@ static int sosctp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, struct cred *cr) { - struct sctp_sonode *ss = SOTOSSO(so); mblk_t *mctl; struct cmsghdr *cmsg; struct sctp_sndrcvinfo *sinfo; @@ -891,8 +892,8 @@ sosctp_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, } /* Copy in the message. */ - if ((error = sosctp_uiomove(mctl, count, ss->ss_wrsize, ss->ss_wroff, - uiop, flags)) != 0) { + if ((error = sosctp_uiomove(mctl, count, so->so_proto_props.sopp_maxblk, + so->so_proto_props.sopp_wroff, uiop, flags)) != 0) { goto error_ret; } error = sctp_sendmsg((struct sctp_s *)so->so_proto_handle, mctl, 0); @@ -1031,9 +1032,8 @@ sosctp_seq_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, } else { mutex_exit(&so->so_lock); ssa->ssa_state |= SS_ISDISCONNECTING; - sctp_recvd((struct sctp_s *)ssa->ssa_conn, - so->so_rcvbuf); - error = sctp_disconnect((struct sctp_s *)ssa->ssa_conn); + sctp_recvd(ssa->ssa_conn, so->so_rcvbuf); + error = sctp_disconnect(ssa->ssa_conn); mutex_enter(&so->so_lock); } goto refrele; @@ -1825,8 +1825,8 @@ sosctp_close(struct sonode *so, int flag, struct cred *cr) ss = SOTOSSO(so); /* - * Initiate connection shutdown. Update SCTP's receive - * window. + * Initiate connection shutdown. Tell SCTP if there is any data + * left unread. */ sctp_recvd((struct sctp_s *)so->so_proto_handle, so->so_rcvbuf - so->so_rcv_queued); @@ -1845,9 +1845,9 @@ sosctp_close(struct sonode *so, int flag, struct cred *cr) sosctp_assoc_isdisconnected(ssa, 0); mutex_exit(&so->so_lock); - sctp_recvd((struct sctp_s *)ssa->ssa_conn, - so->so_rcvbuf - ssa->ssa_rcv_queued); - (void) sctp_disconnect((struct sctp_s *)ssa->ssa_conn); + sctp_recvd(ssa->ssa_conn, so->so_rcvbuf - + ssa->ssa_rcv_queued); + (void) sctp_disconnect(ssa->ssa_conn); mutex_enter(&so->so_lock); SSA_REFRELE(ss, ssa); @@ -1879,8 +1879,6 @@ sosctp_fini(struct sonode *so, struct cred *cr) /* We are the sole owner of so now */ mutex_enter(&so->so_lock); - so_rcv_flush(so); - /* Free all pending connections */ so_acceptq_flush(so, B_TRUE); @@ -1908,6 +1906,15 @@ sosctp_fini(struct sonode *so, struct cred *cr) sctp_close((struct sctp_s *)so->so_proto_handle); so->so_proto_handle = NULL; + /* + * Note until sctp_close() is called, SCTP can still send up + * messages, such as event notifications. So we should flush + * the recevie buffer after calling sctp_close(). + */ + mutex_enter(&so->so_lock); + so_rcv_flush(so); + mutex_exit(&so->so_lock); + sonode_fini(so); } @@ -1929,8 +1936,8 @@ sctp_assoc_newconn(sock_upper_handle_t parenthandle, sock_lower_handle_t connind, sock_downcalls_t *dc, struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **ucp) { - struct sonode *lso = (struct sonode *)parenthandle; - struct sctp_sonode *lss = SOTOSSO(lso); + struct sctp_sonode *lss = (struct sctp_sonode *)parenthandle; + struct sonode *lso = &lss->ss_so; struct sctp_soassoc *ssa; sctp_assoc_t id; @@ -2144,6 +2151,9 @@ sctp_assoc_recv(sock_upper_handle_t handle, mblk_t *mp, size_t len, int flags, ssa->ssa_rcv_queued += len; space_available = so->so_rcvbuf - ssa->ssa_rcv_queued; + if (space_available <= 0) + ssa->ssa_flowctrld = B_TRUE; + so_enqueue_msg(so, mp, len); /* so_notify_data drops so_lock */ @@ -2179,32 +2189,44 @@ sctp_assoc_properties(sock_upper_handle_t handle, struct sock_proto_props *soppp) { struct sctp_soassoc *ssa = (struct sctp_soassoc *)handle; - struct sctp_sonode *ss; + struct sonode *so; if (ssa->ssa_type == SOSCTP_ASSOC) { - ss = ssa->ssa_sonode; - mutex_enter(&ss->ss_so.so_lock); + so = &ssa->ssa_sonode->ss_so; - /* - * Only change them if they're set. - */ - if (soppp->sopp_wroff != 0) { + mutex_enter(&so->so_lock); + + /* Per assoc_id properties. */ + if (soppp->sopp_flags & SOCKOPT_WROFF) ssa->ssa_wroff = soppp->sopp_wroff; - } - if (soppp->sopp_maxblk != 0) { + if (soppp->sopp_flags & SOCKOPT_MAXBLK) ssa->ssa_wrsize = soppp->sopp_maxblk; - } } else { - ss = (struct sctp_sonode *)handle; - mutex_enter(&ss->ss_so.so_lock); + so = &((struct sctp_sonode *)handle)->ss_so; + mutex_enter(&so->so_lock); - if (soppp->sopp_wroff != 0) { - ss->ss_wroff = soppp->sopp_wroff; - } - if (soppp->sopp_maxblk != 0) { - ss->ss_wrsize = soppp->sopp_maxblk; + if (soppp->sopp_flags & SOCKOPT_WROFF) + so->so_proto_props.sopp_wroff = soppp->sopp_wroff; + if (soppp->sopp_flags & SOCKOPT_MAXBLK) + so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; + if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) { + ssize_t lowat; + + so->so_rcvbuf = soppp->sopp_rxhiwat; + /* + * The low water mark should be adjusted properly + * if the high water mark is changed. It should + * not be bigger than 1/4 of high water mark. + */ + lowat = soppp->sopp_rxhiwat >> 2; + if (so->so_rcvlowat > lowat) { + /* Sanity check... */ + if (lowat == 0) + so->so_rcvlowat = soppp->sopp_rxhiwat; + else + so->so_rcvlowat = lowat; + } } } - - mutex_exit(&ss->ss_so.so_lock); + mutex_exit(&so->so_lock); } diff --git a/usr/src/uts/common/inet/sockmods/socksctp.h b/usr/src/uts/common/inet/sockmods/socksctp.h index 2ac7058821..1aadcdf768 100644 --- a/usr/src/uts/common/inet/sockmods/socksctp.h +++ b/usr/src/uts/common/inet/sockmods/socksctp.h @@ -18,9 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SOCKSCTP_H_ @@ -59,9 +59,10 @@ struct sctp_soassoc { uint_t ssa_state; /* same as so_state */ int ssa_error; /* same as so_error */ boolean_t ssa_snd_qfull; - int ssa_wroff; - size_t ssa_wrsize; + ushort_t ssa_wroff; + ssize_t ssa_wrsize; int ssa_rcv_queued; /* queued rx bytes/# of conn */ + boolean_t ssa_flowctrld; /* receive flow controlled */ }; /* 1-N socket association cache defined in socksctp.c */ diff --git a/usr/src/uts/common/inet/sockmods/socksctpsubr.c b/usr/src/uts/common/inet/sockmods/socksctpsubr.c index a647cbe4f2..9b2e4f1de5 100644 --- a/usr/src/uts/common/inet/sockmods/socksctpsubr.c +++ b/usr/src/uts/common/inet/sockmods/socksctpsubr.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -177,6 +176,7 @@ sosctp_assoc_create(struct sctp_sonode *ss, int kmflag) ssa->ssa_error = 0; ssa->ssa_snd_qfull = 0; ssa->ssa_rcv_queued = 0; + ssa->ssa_flowctrld = B_FALSE; } dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa)); return (ssa); @@ -515,32 +515,37 @@ sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss, { mblk_t *mp, **nmp, *last_mp; struct sctp_soassoc *tmp; + struct sonode *nso, *sso; sosctp_so_inherit(ss, nss); - nss->ss_so.so_state |= (ss->ss_so.so_state & (SS_NDELAY|SS_NONBLOCK)); - nss->ss_so.so_state |= + sso = &ss->ss_so; + nso = &nss->ss_so; + + nso->so_state |= (sso->so_state & (SS_NDELAY|SS_NONBLOCK)); + nso->so_state |= (ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING| SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND)); - nss->ss_so.so_error = ssa->ssa_error; - nss->ss_so.so_snd_qfull = ssa->ssa_snd_qfull; - nss->ss_wroff = ssa->ssa_wroff; - nss->ss_wrsize = ssa->ssa_wrsize; - nss->ss_so.so_rcv_queued = ssa->ssa_rcv_queued; - nss->ss_so.so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn; + nso->so_error = ssa->ssa_error; + nso->so_snd_qfull = ssa->ssa_snd_qfull; + nso->so_proto_props.sopp_wroff = ssa->ssa_wroff; + nso->so_proto_props.sopp_maxblk = ssa->ssa_wrsize; + nso->so_rcv_queued = ssa->ssa_rcv_queued; + nso->so_flowctrld = ssa->ssa_flowctrld; + nso->so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn; /* The peeled off socket is connection oriented */ - nss->ss_so.so_mode |= SM_CONNREQUIRED; + nso->so_mode |= SM_CONNREQUIRED; /* Consolidate all data on a single rcv list */ - if (ss->ss_so.so_rcv_head != NULL) { - so_process_new_message(&ss->ss_so, ss->ss_so.so_rcv_head, - ss->ss_so.so_rcv_last_head); - ss->ss_so.so_rcv_head = NULL; - ss->ss_so.so_rcv_last_head = NULL; + if (sso->so_rcv_head != NULL) { + so_process_new_message(&ss->ss_so, sso->so_rcv_head, + sso->so_rcv_last_head); + sso->so_rcv_head = NULL; + sso->so_rcv_last_head = NULL; } - if (nss->ss_so.so_rcv_queued > 0) { - nmp = &ss->ss_so.so_rcv_q_head; + if (nso->so_rcv_queued > 0) { + nmp = &sso->so_rcv_q_head; last_mp = NULL; while ((mp = *nmp) != NULL) { tmp = *(struct sctp_soassoc **)DB_BASE(mp); @@ -560,13 +565,12 @@ sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss, if (tmp == ssa) { *nmp = mp->b_next; ASSERT(DB_TYPE(mp) != M_DATA); - if (nss->ss_so.so_rcv_q_last_head == NULL) { - nss->ss_so.so_rcv_q_head = mp; + if (nso->so_rcv_q_last_head == NULL) { + nso->so_rcv_q_head = mp; } else { - nss->ss_so.so_rcv_q_last_head->b_next = - mp; + nso->so_rcv_q_last_head->b_next = mp; } - nss->ss_so.so_rcv_q_last_head = mp; + nso->so_rcv_q_last_head = mp; mp->b_next = NULL; } else { nmp = &mp->b_next; @@ -574,7 +578,7 @@ sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss, } } - ss->ss_so.so_rcv_q_last_head = last_mp; + sso->so_rcv_q_last_head = last_mp; } } |