diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/ip.h | 37 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip.c | 20 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ip_if.c | 28 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ipclassifier.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 81 |
5 files changed, 139 insertions, 28 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index 7baee80006..5557f941e2 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -232,6 +232,43 @@ typedef struct ipoptp_s } /* + * Ref counter macros for ioctls. This provides a guard for TCP to stop + * tcp_close from removing the rq/wq whilst an ioctl is still in flight on the + * stream. The ioctl could have been queued on e.g. an ipsq. tcp_close will wait + * until the ioctlref count is zero before proceeding. + * Ideally conn_oper_pending_ill would be used for this purpose. However, in the + * case where an ioctl is aborted or interrupted, it can be cleared prematurely. + * There are also some race possibilities between ip and the stream head which + * can also end up with conn_oper_pending_ill being cleared prematurely. So, to + * avoid these situations, we use a dedicated ref counter for ioctls which is + * used in addition to and in parallel with the normal conn_ref count. + */ +#define CONN_INC_IOCTLREF_LOCKED(connp) { \ + ASSERT(MUTEX_HELD(&(connp)->conn_lock)); \ + DTRACE_PROBE1(conn__inc__ioctlref, conn_t *, (connp)); \ + (connp)->conn_ioctlref++; \ + mutex_exit(&(connp)->conn_lock); \ +} + +#define CONN_INC_IOCTLREF(connp) { \ + mutex_enter(&(connp)->conn_lock); \ + CONN_INC_IOCTLREF_LOCKED(connp); \ +} + +#define CONN_DEC_IOCTLREF(connp) { \ + mutex_enter(&(connp)->conn_lock); \ + DTRACE_PROBE1(conn__dec__ioctlref, conn_t *, (connp)); \ + /* Make sure conn_ioctlref will not underflow. */ \ + ASSERT((connp)->conn_ioctlref != 0); \ + if ((--(connp)->conn_ioctlref == 0) && \ + ((connp)->conn_state_flags & CONN_CLOSING)) { \ + cv_broadcast(&(connp)->conn_cv); \ + } \ + mutex_exit(&(connp)->conn_lock); \ +} + + +/* * Complete the pending operation. Usually an ioctl. Can also * be a bind or option management request that got enqueued * in an ipsq_t. Called on completion of the operation. diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index dc67cd5e2e..276711d737 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -12805,10 +12805,13 @@ ip_ioctl_finish(queue_t *q, mblk_t *mp, int err, int mode, ipsq_t *ipsq) } /* - * The refhold placed at the start of the ioctl is released here. + * The conn refhold and ioctlref placed on the conn at the start of the + * ioctl are released here. */ - if (connp != NULL) + if (connp != NULL) { + CONN_DEC_IOCTLREF(connp); CONN_OPER_PENDING_DONE(connp); + } if (ipsq != NULL) ipsq_current_finish(ipsq); @@ -12895,16 +12898,19 @@ ip_wput_nondata(queue_t *q, mblk_t *mp) /* * Refhold the conn, till the ioctl completes. This is * needed in case the ioctl ends up in the pending mp - * list. Every mp in the ipx_pending_mp list - * must have a refhold on the conn - * to resume processing. The refhold is released when - * the ioctl completes. (normally or abnormally) + * list. Every mp in the ipx_pending_mp list must have + * a refhold on the conn to resume processing. The + * refhold is released when the ioctl completes + * (whether normally or abnormally). An ioctlref is also + * placed on the conn to prevent TCP from removing the + * queue needed to send the ioctl reply back. * In all cases ip_ioctl_finish is called to finish - * the ioctl. + * the ioctl and release the refholds. */ if (connp != NULL) { /* This is not a reentry */ CONN_INC_REF(connp); + CONN_INC_IOCTLREF(connp); } else { if (!(ipip->ipi_flags & IPI_MODOK)) { mi_copy_done(q, mp, EINVAL); diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 3d7b12f49e..6e7566bffa 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -899,14 +899,15 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp) mblk_t *prev; mblk_t *curr; mblk_t *next; - queue_t *q; + queue_t *rq, *wq; mblk_t *tmp_list = NULL; ASSERT(IAM_WRITER_ILL(ill)); if (connp != NULL) - q = CONNP_TO_WQ(connp); + wq = CONNP_TO_WQ(connp); else - q = ill->ill_wq; + wq = ill->ill_wq; + rq = RD(wq); ipsq = ill->ill_phyint->phyint_ipsq; /* @@ -922,7 +923,7 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp) for (prev = NULL, curr = ipsq->ipsq_xopq_mphead; curr != NULL; curr = next) { next = curr->b_next; - if (curr->b_queue == q || curr->b_queue == RD(q)) { + if (curr->b_queue == wq || curr->b_queue == rq) { /* Unlink the mblk from the pending mp list */ if (prev != NULL) { prev->b_next = curr->b_next; @@ -954,7 +955,7 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp) DTRACE_PROBE4(ipif__ioctl, char *, "ipsq_xopq_mp_cleanup", int, 0, ill_t *, NULL, ipif_t *, NULL); - ip_ioctl_finish(q, curr, ENXIO, connp != NULL ? + ip_ioctl_finish(wq, curr, ENXIO, connp != NULL ? CONN_CLOSE : NO_COPYOUT, NULL); } else { /* @@ -969,7 +970,7 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp) /* * This conn has started closing. Cleanup any pending ioctl from this conn. - * STREAMS ensures that there can be at most 1 ioctl pending on a stream. + * STREAMS ensures that there can be at most 1 active ioctl on a stream. */ void conn_ioctl_cleanup(conn_t *connp) @@ -979,13 +980,14 @@ conn_ioctl_cleanup(conn_t *connp) boolean_t refheld; /* - * Is any exclusive ioctl pending ? If so clean it up. If the - * ioctl has not yet started, the mp is pending in the list headed by - * ipsq_xopq_head. If the ioctl has started the mp could be present in - * ipx_pending_mp. If the ioctl timed out in the streamhead but - * is currently executing now the mp is not queued anywhere but - * conn_oper_pending_ill is null. The conn close will wait - * till the conn_ref drops to zero. + * Check for a queued ioctl. If the ioctl has not yet started, the mp + * is pending in the list headed by ipsq_xopq_head. If the ioctl has + * started the mp could be present in ipx_pending_mp. Note that if + * conn_oper_pending_ill is NULL, the ioctl may still be in flight and + * not yet queued anywhere. In this case, the conn close code will wait + * until the conn_ref is dropped. If the stream was a tcp stream, then + * tcp_close will wait first until all ioctls have completed for this + * conn. */ mutex_enter(&connp->conn_lock); ill = connp->conn_oper_pending_ill; diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h index c9b1e60753..b7f92b94f4 100644 --- a/usr/src/uts/common/inet/ipclassifier.h +++ b/usr/src/uts/common/inet/ipclassifier.h @@ -445,6 +445,7 @@ struct conn_s { /* Checksum to compensate for source routed packets. Host byte order */ uint32_t conn_sum; + uint32_t conn_ioctlref; /* ioctl ref count */ #ifdef CONN_DEBUG #define CONN_TRACE_MAX 10 int conn_trace_last; /* ndx of last used tracebuf */ diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index f4cba5c666..759c13b39c 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -3342,6 +3342,28 @@ tcp_close_common(conn_t *connp, int flags) TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); + /* + * Cleanup any queued ioctls here. This must be done before the wq/rq + * are re-written by tcp_close_output(). + */ + if (conn_ioctl_cleanup_reqd) + conn_ioctl_cleanup(connp); + + /* + * As CONN_CLOSING is set, no further ioctls should be passed down to + * IP for this conn (see the guards in tcp_ioctl, tcp_wput_ioctl and + * tcp_wput_iocdata). If the ioctl was queued on an ipsq, + * conn_ioctl_cleanup should have found it and removed it. If the ioctl + * was still in flight at the time, we wait for it here. See comments + * for CONN_INC_IOCTLREF in ip.h for details. + */ + mutex_enter(&connp->conn_lock); + while (connp->conn_ioctlref > 0) + cv_wait(&connp->conn_cv, &connp->conn_lock); + ASSERT(connp->conn_ioctlref == 0); + ASSERT(connp->conn_oper_pending_ill == NULL); + mutex_exit(&connp->conn_lock); + SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_close_output, connp, NULL, tcp_squeue_flag, SQTAG_IP_TCP_CLOSE); @@ -3394,11 +3416,6 @@ tcp_close_common(conn_t *connp, int flags) } mutex_exit(&connp->conn_lock); } - /* - * ioctl cleanup. The mp is queued in the ipx_pending_mp. - */ - if (conn_ioctl_cleanup_reqd) - conn_ioctl_cleanup(connp); connp->conn_cpid = NOPID; } @@ -3857,7 +3874,6 @@ tcp_closei_local(tcp_t *tcp) connp->conn_state_flags |= CONN_CONDEMNED; mutex_exit(&connp->conn_lock); - /* Need to cleanup any pending ioctls */ ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); ASSERT(tcp->tcp_time_wait_expire == 0); @@ -15283,7 +15299,9 @@ tcp_wput(queue_t *q, mblk_t *mp) qreply(q, mp); return; } + CONN_INC_IOCTLREF(connp); ip_wput_nondata(q, mp); + CONN_DEC_IOCTLREF(connp); return; default: @@ -16679,7 +16697,26 @@ tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp) case TI_GETPEERNAME: break; default: + /* + * If the conn is closing, then error the ioctl here. Otherwise + * use the CONN_IOCTLREF_* macros to hold off tcp_close until + * we're done here. We also need to decrement the ioctlref which + * was bumped in either tcp_ioctl or tcp_wput_ioctl. + */ + mutex_enter(&connp->conn_lock); + if (connp->conn_state_flags & CONN_CLOSING) { + mutex_exit(&connp->conn_lock); + iocp = (struct iocblk *)mp->b_rptr; + iocp->ioc_error = EINVAL; + mp->b_datap->db_type = M_IOCNAK; + iocp->ioc_count = 0; + qreply(q, mp); + return; + } + + CONN_INC_IOCTLREF_LOCKED(connp); ip_wput_nondata(q, mp); + CONN_DEC_IOCTLREF(connp); return; } switch (mi_copy_state(q, mp, &mp1)) { @@ -16817,7 +16854,24 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) qreply(q, mp); return; } + + /* + * If the conn is closing, then error the ioctl here. Otherwise bump the + * conn_ioctlref to hold off tcp_close until we're done here. + */ + mutex_enter(&(connp)->conn_lock); + if ((connp)->conn_state_flags & CONN_CLOSING) { + mutex_exit(&(connp)->conn_lock); + iocp->ioc_error = EINVAL; + mp->b_datap->db_type = M_IOCNAK; + iocp->ioc_count = 0; + qreply(q, mp); + return; + } + + CONN_INC_IOCTLREF_LOCKED(connp); ip_wput_nondata(q, mp); + CONN_DEC_IOCTLREF(connp); } /* @@ -21876,16 +21930,27 @@ tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, case TCP_IOC_ABORT_CONN: case TI_GETPEERNAME: case TI_GETMYNAME: - ip1dbg(("tcp_ioctl: cmd 0x%x on non sreams socket", + ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket", cmd)); error = EINVAL; break; default: /* - * Pass on to IP using helper stream + * If the conn is not closing, pass on to IP using + * helper stream. Bump the ioctlref to prevent tcp_close + * from closing the rq/wq out from underneath the ioctl + * if it ends up queued or aborted/interrupted. */ + mutex_enter(&connp->conn_lock); + if (connp->conn_state_flags & (CONN_CLOSING)) { + mutex_exit(&connp->conn_lock); + error = EINVAL; + break; + } + CONN_INC_IOCTLREF_LOCKED(connp); error = ldi_ioctl(connp->conn_helper_info->iphs_handle, cmd, arg, mode, cr, rvalp); + CONN_DEC_IOCTLREF(connp); break; } return (error); |