diff options
author | xy158873 <none@none> | 2005-10-17 11:11:52 -0700 |
---|---|---|
committer | xy158873 <none@none> | 2005-10-17 11:11:52 -0700 |
commit | a66ba7b33369c7bd064dcae0e279fd236eee6dbf (patch) | |
tree | 111927198ae119f055eb30d6402cfa5c72c5ce6e /usr/src/uts/common/inet/tcp/tcp.c | |
parent | c0f937da2a16c696966e32fa43ce9d1eeda946ec (diff) | |
download | illumos-joyent-a66ba7b33369c7bd064dcae0e279fd236eee6dbf.tar.gz |
6275398 Galaxy hangs when running lmbench
Diffstat (limited to 'usr/src/uts/common/inet/tcp/tcp.c')
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 130 |
1 files changed, 102 insertions, 28 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 8c651d1443..b3ab0208ac 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -1155,10 +1155,39 @@ static void tcp_ioctl_abort_conn(queue_t *, mblk_t *); static int tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *, int, int *, boolean_t); +/* + * Write-side flow-control is implemented via the per instance STREAMS + * write-side Q by explicitly setting QFULL to stop the flow of mblk_t(s) + * and clearing QFULL and calling qbackenable() to restart the flow based + * on the number of TCP unsent bytes (i.e. those not on the wire waiting + * for a remote ACK). + * + * This is different than a standard STREAMS kmod which when using the + * STREAMS Q the framework would automatictly flow-control based on the + * defined hiwat/lowat values as mblk_t's are enqueued/dequeued. + * + * As of FireEngine TCP write-side flow-control needs to take into account + * both the unsent tcp_xmit list bytes but also any squeue_t enqueued bytes + * (i.e. from tcp_wput() -> tcp_output()). + * + * This is accomplished by adding a new tcp_t fields, tcp_squeue_bytes, to + * count the number of bytes enqueued by tcp_wput() and the number of bytes + * dequeued and processed by tcp_output(). + * + * So, the total number of bytes unsent is (squeue_bytes + unsent) with all + * flow-control uses of unsent replaced with the macro TCP_UNSENT_BYTES. + */ static void tcp_clrqfull(tcp_t *); static void tcp_setqfull(tcp_t *); +#define TCP_UNSENT_BYTES(tcp) \ + ((tcp)->tcp_squeue_bytes + (tcp)->tcp_unsent) + +/* + * STREAMS kmod stuff ... + */ + static struct module_info tcp_rinfo = { #define TCP_MODULE_ID 5105 TCP_MODULE_ID, "tcp", 0, INFPSZ, TCP_RECV_HIWATER, TCP_RECV_LOWATER @@ -1968,12 +1997,10 @@ tcp_unfuse(tcp_t *tcp) /* Lift up any flow-control conditions */ if (tcp->tcp_flow_stopped) { tcp_clrqfull(tcp); - tcp->tcp_flow_stopped = B_FALSE; TCP_STAT(tcp_fusion_backenabled); } if (peer_tcp->tcp_flow_stopped) { tcp_clrqfull(peer_tcp); - peer_tcp->tcp_flow_stopped = B_FALSE; TCP_STAT(tcp_fusion_backenabled); } @@ -2165,7 +2192,6 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp) if (TCP_IS_DETACHED(peer_tcp) && peer_tcp->tcp_rcv_cnt > peer_rq->q_hiwat) { tcp_setqfull(tcp); - tcp->tcp_flow_stopped = B_TRUE; TCP_STAT(tcp_fusion_flowctl); } @@ -2203,7 +2229,6 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp) } else if (!tcp->tcp_flow_stopped) { if (!canputnext(peer_rq)) { tcp_setqfull(tcp); - tcp->tcp_flow_stopped = B_TRUE; TCP_STAT(tcp_fusion_flowctl); } else { ASSERT(peer_tcp->tcp_rcv_list != NULL); @@ -2211,6 +2236,8 @@ tcp_fuse_output(tcp_t *tcp, mblk_t *mp) peer_tcp, NULL); TCP_STAT(tcp_fusion_putnext); } + } else if (TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) { + tcp_clrqfull(tcp); } } return (B_TRUE); @@ -4476,7 +4503,6 @@ tcp_stop_lingering(tcp_t *tcp) if (tcp->tcp_state > TCPS_LISTEN) { tcp_acceptor_hash_remove(tcp); if (tcp->tcp_flow_stopped) { - tcp->tcp_flow_stopped = B_FALSE; tcp_clrqfull(tcp); } @@ -4798,7 +4824,6 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2) tcp_acceptor_hash_remove(tcp); if (tcp->tcp_flow_stopped) { - tcp->tcp_flow_stopped = B_FALSE; tcp_clrqfull(tcp); } @@ -7989,7 +8014,6 @@ tcp_reinit(tcp_t *tcp) tcp_timers_stop(tcp); if (tcp->tcp_flow_stopped) { - tcp->tcp_flow_stopped = B_FALSE; tcp_clrqfull(tcp); } /* @@ -8398,6 +8422,8 @@ tcp_reinit_values(tcp) tcp->tcp_in_ack_unsent = 0; tcp->tcp_cork = B_FALSE; + tcp->tcp_squeue_bytes = 0; + #undef DONTCARE #undef PRESERVE } @@ -10472,8 +10498,8 @@ tcp_opt_set(queue_t *q, uint_t optset_context, int level, int name, * condition to be lifted right away. */ if (tcp->tcp_flow_stopped && - tcp->tcp_unsent < tcp->tcp_xmit_hiwater) { - tcp->tcp_flow_stopped = B_FALSE; + TCP_UNSENT_BYTES(tcp) + < tcp->tcp_xmit_hiwater) { tcp_clrqfull(tcp); } } @@ -15988,7 +16014,6 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2) (void) tcp_rcv_drain(tcp->tcp_rq, tcp); tcp_clrqfull(peer_tcp); - peer_tcp->tcp_flow_stopped = B_FALSE; TCP_STAT(tcp_fusion_backenabled); return; } @@ -17441,6 +17466,7 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) int usable; conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; + uint32_t msize; /* * Try and ASSERT the minimum possible references on the @@ -17455,8 +17481,15 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) (connp->conn_fanout == NULL && connp->conn_ref >= 3)); /* Bypass tcp protocol for fused tcp loopback */ - if (tcp->tcp_fused && tcp_fuse_output(tcp, mp)) - return; + if (tcp->tcp_fused) { + msize = msgdsize(mp); + mutex_enter(&connp->conn_lock); + tcp->tcp_squeue_bytes -= msize; + mutex_exit(&connp->conn_lock); + + if (tcp_fuse_output(tcp, mp)) + return; + } mss = tcp->tcp_mss; if (tcp->tcp_xmit_zc_clean) @@ -17482,6 +17515,11 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) (len == 0) || (len > mss) || (tcp->tcp_valid_bits != 0)) { + msize = msgdsize(mp); + mutex_enter(&connp->conn_lock); + tcp->tcp_squeue_bytes -= msize; + mutex_exit(&connp->conn_lock); + tcp_wput_data(tcp, mp, B_FALSE); return; } @@ -17489,6 +17527,10 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) ASSERT(tcp->tcp_xmit_tail_unsent == 0); ASSERT(tcp->tcp_fin_sent == 0); + mutex_enter(&connp->conn_lock); + tcp->tcp_squeue_bytes -= len; + mutex_exit(&connp->conn_lock); + /* queue new packet onto retransmission queue */ if (tcp->tcp_xmit_head == NULL) { tcp->tcp_xmit_head = mp; @@ -17536,6 +17578,11 @@ tcp_output(void *arg, mblk_t *mp, void *arg2) goto slow; } + if (tcp->tcp_flow_stopped && + TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) { + tcp_clrqfull(tcp); + } + /* * determine if anything to send (Nagle). * @@ -17909,7 +17956,6 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2) ASSERT(peer_tcp->tcp_fused); tcp_clrqfull(peer_tcp); - peer_tcp->tcp_flow_stopped = B_FALSE; TCP_STAT(tcp_fusion_backenabled); } } @@ -18245,12 +18291,27 @@ tcp_wput(queue_t *q, mblk_t *mp) t_scalar_t type; uchar_t *rptr; struct iocblk *iocp; + uint32_t msize; ASSERT(connp->conn_ref >= 2); switch (DB_TYPE(mp)) { case M_DATA: - CONN_INC_REF(connp); + tcp = connp->conn_tcp; + ASSERT(tcp != NULL); + + msize = msgdsize(mp); + + mutex_enter(&connp->conn_lock); + CONN_INC_REF_LOCKED(connp); + + tcp->tcp_squeue_bytes += msize; + if (TCP_UNSENT_BYTES(tcp) > tcp->tcp_xmit_hiwater) { + mutex_exit(&connp->conn_lock); + tcp_setqfull(tcp); + } else + mutex_exit(&connp->conn_lock); + (*tcp_squeue_wput_proc)(connp->conn_sqp, mp, tcp_output, connp, SQTAG_TCP_OUTPUT); return; @@ -19214,15 +19275,12 @@ done:; TCP_TIMER_RESTART(tcp, tcp->tcp_rto); } /* Note that len is the amount we just sent but with a negative sign */ - len += tcp->tcp_unsent; - tcp->tcp_unsent = len; + tcp->tcp_unsent += len; if (tcp->tcp_flow_stopped) { - if (len <= tcp->tcp_xmit_lowater) { - tcp->tcp_flow_stopped = B_FALSE; + if (TCP_UNSENT_BYTES(tcp) <= tcp->tcp_xmit_lowater) { tcp_clrqfull(tcp); } - } else if (len >= tcp->tcp_xmit_hiwater) { - tcp->tcp_flow_stopped = B_TRUE; + } else if (TCP_UNSENT_BYTES(tcp) >= tcp->tcp_xmit_hiwater) { tcp_setqfull(tcp); } } @@ -21112,7 +21170,6 @@ tcp_wput_flush(tcp_t *tcp, mblk_t *mp) * tcp_xmit_lowater, so re-enable flow. */ if (tcp->tcp_flow_stopped) { - tcp->tcp_flow_stopped = B_FALSE; tcp_clrqfull(tcp); } } @@ -25165,16 +25222,29 @@ tcp_timer_free(tcp_t *tcp, mblk_t *mp) * End of TCP Timers implementation. */ +/* + * tcp_{set,clr}qfull() functions are used to either set or clear QFULL + * on the specified backing STREAMS q. Note, the caller may make the + * decision to call based on the tcp_t.tcp_flow_stopped value which + * when check outside the q's lock is only an advisory check ... + */ + static void tcp_setqfull(tcp_t *tcp) { queue_t *q = tcp->tcp_wq; if (!(q->q_flag & QFULL)) { - TCP_STAT(tcp_flwctl_on); mutex_enter(QLOCK(q)); - q->q_flag |= QFULL; - mutex_exit(QLOCK(q)); + if (!(q->q_flag & QFULL)) { + /* still need to set QFULL */ + q->q_flag |= QFULL; + tcp->tcp_flow_stopped = B_TRUE; + mutex_exit(QLOCK(q)); + TCP_STAT(tcp_flwctl_on); + } else { + mutex_exit(QLOCK(q)); + } } } @@ -25185,10 +25255,14 @@ tcp_clrqfull(tcp_t *tcp) if (q->q_flag & QFULL) { mutex_enter(QLOCK(q)); - q->q_flag &= ~QFULL; - mutex_exit(QLOCK(q)); - if (q->q_flag & QWANTW) - qbackenable(q, 0); + if (q->q_flag & QFULL) { + q->q_flag &= ~QFULL; + tcp->tcp_flow_stopped = B_FALSE; + mutex_exit(QLOCK(q)); + if (q->q_flag & QWANTW) + qbackenable(q, 0); + } else + mutex_exit(QLOCK(q)); } } |