summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/inet/ip.h37
-rw-r--r--usr/src/uts/common/inet/ip/ip.c20
-rw-r--r--usr/src/uts/common/inet/ip/ip_if.c28
-rw-r--r--usr/src/uts/common/inet/ipclassifier.h1
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c81
5 files changed, 139 insertions, 28 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index 7baee80006..5557f941e2 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -232,6 +232,43 @@ typedef struct ipoptp_s
}
/*
+ * Ref counter macros for ioctls. This provides a guard for TCP to stop
+ * tcp_close from removing the rq/wq whilst an ioctl is still in flight on the
+ * stream. The ioctl could have been queued on e.g. an ipsq. tcp_close will wait
+ * until the ioctlref count is zero before proceeding.
+ * Ideally conn_oper_pending_ill would be used for this purpose. However, in the
+ * case where an ioctl is aborted or interrupted, it can be cleared prematurely.
+ * There are also some race possibilities between ip and the stream head which
+ * can also end up with conn_oper_pending_ill being cleared prematurely. So, to
+ * avoid these situations, we use a dedicated ref counter for ioctls which is
+ * used in addition to and in parallel with the normal conn_ref count.
+ */
+#define CONN_INC_IOCTLREF_LOCKED(connp) { \
+ ASSERT(MUTEX_HELD(&(connp)->conn_lock)); \
+ DTRACE_PROBE1(conn__inc__ioctlref, conn_t *, (connp)); \
+ (connp)->conn_ioctlref++; \
+ mutex_exit(&(connp)->conn_lock); \
+}
+
+#define CONN_INC_IOCTLREF(connp) { \
+ mutex_enter(&(connp)->conn_lock); \
+ CONN_INC_IOCTLREF_LOCKED(connp); \
+}
+
+#define CONN_DEC_IOCTLREF(connp) { \
+ mutex_enter(&(connp)->conn_lock); \
+ DTRACE_PROBE1(conn__dec__ioctlref, conn_t *, (connp)); \
+ /* Make sure conn_ioctlref will not underflow. */ \
+ ASSERT((connp)->conn_ioctlref != 0); \
+ if ((--(connp)->conn_ioctlref == 0) && \
+ ((connp)->conn_state_flags & CONN_CLOSING)) { \
+ cv_broadcast(&(connp)->conn_cv); \
+ } \
+ mutex_exit(&(connp)->conn_lock); \
+}
+
+
+/*
* Complete the pending operation. Usually an ioctl. Can also
* be a bind or option management request that got enqueued
* in an ipsq_t. Called on completion of the operation.
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index dc67cd5e2e..276711d737 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -12805,10 +12805,13 @@ ip_ioctl_finish(queue_t *q, mblk_t *mp, int err, int mode, ipsq_t *ipsq)
}
/*
- * The refhold placed at the start of the ioctl is released here.
+ * The conn refhold and ioctlref placed on the conn at the start of the
+ * ioctl are released here.
*/
- if (connp != NULL)
+ if (connp != NULL) {
+ CONN_DEC_IOCTLREF(connp);
CONN_OPER_PENDING_DONE(connp);
+ }
if (ipsq != NULL)
ipsq_current_finish(ipsq);
@@ -12895,16 +12898,19 @@ ip_wput_nondata(queue_t *q, mblk_t *mp)
/*
* Refhold the conn, till the ioctl completes. This is
* needed in case the ioctl ends up in the pending mp
- * list. Every mp in the ipx_pending_mp list
- * must have a refhold on the conn
- * to resume processing. The refhold is released when
- * the ioctl completes. (normally or abnormally)
+ * list. Every mp in the ipx_pending_mp list must have
+ * a refhold on the conn to resume processing. The
+ * refhold is released when the ioctl completes
+ * (whether normally or abnormally). An ioctlref is also
+ * placed on the conn to prevent TCP from removing the
+ * queue needed to send the ioctl reply back.
* In all cases ip_ioctl_finish is called to finish
- * the ioctl.
+ * the ioctl and release the refholds.
*/
if (connp != NULL) {
/* This is not a reentry */
CONN_INC_REF(connp);
+ CONN_INC_IOCTLREF(connp);
} else {
if (!(ipip->ipi_flags & IPI_MODOK)) {
mi_copy_done(q, mp, EINVAL);
diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c
index 3d7b12f49e..6e7566bffa 100644
--- a/usr/src/uts/common/inet/ip/ip_if.c
+++ b/usr/src/uts/common/inet/ip/ip_if.c
@@ -899,14 +899,15 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp)
mblk_t *prev;
mblk_t *curr;
mblk_t *next;
- queue_t *q;
+ queue_t *rq, *wq;
mblk_t *tmp_list = NULL;
ASSERT(IAM_WRITER_ILL(ill));
if (connp != NULL)
- q = CONNP_TO_WQ(connp);
+ wq = CONNP_TO_WQ(connp);
else
- q = ill->ill_wq;
+ wq = ill->ill_wq;
+ rq = RD(wq);
ipsq = ill->ill_phyint->phyint_ipsq;
/*
@@ -922,7 +923,7 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp)
for (prev = NULL, curr = ipsq->ipsq_xopq_mphead; curr != NULL;
curr = next) {
next = curr->b_next;
- if (curr->b_queue == q || curr->b_queue == RD(q)) {
+ if (curr->b_queue == wq || curr->b_queue == rq) {
/* Unlink the mblk from the pending mp list */
if (prev != NULL) {
prev->b_next = curr->b_next;
@@ -954,7 +955,7 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp)
DTRACE_PROBE4(ipif__ioctl,
char *, "ipsq_xopq_mp_cleanup",
int, 0, ill_t *, NULL, ipif_t *, NULL);
- ip_ioctl_finish(q, curr, ENXIO, connp != NULL ?
+ ip_ioctl_finish(wq, curr, ENXIO, connp != NULL ?
CONN_CLOSE : NO_COPYOUT, NULL);
} else {
/*
@@ -969,7 +970,7 @@ ipsq_xopq_mp_cleanup(ill_t *ill, conn_t *connp)
/*
* This conn has started closing. Cleanup any pending ioctl from this conn.
- * STREAMS ensures that there can be at most 1 ioctl pending on a stream.
+ * STREAMS ensures that there can be at most 1 active ioctl on a stream.
*/
void
conn_ioctl_cleanup(conn_t *connp)
@@ -979,13 +980,14 @@ conn_ioctl_cleanup(conn_t *connp)
boolean_t refheld;
/*
- * Is any exclusive ioctl pending ? If so clean it up. If the
- * ioctl has not yet started, the mp is pending in the list headed by
- * ipsq_xopq_head. If the ioctl has started the mp could be present in
- * ipx_pending_mp. If the ioctl timed out in the streamhead but
- * is currently executing now the mp is not queued anywhere but
- * conn_oper_pending_ill is null. The conn close will wait
- * till the conn_ref drops to zero.
+ * Check for a queued ioctl. If the ioctl has not yet started, the mp
+ * is pending in the list headed by ipsq_xopq_head. If the ioctl has
+ * started the mp could be present in ipx_pending_mp. Note that if
+ * conn_oper_pending_ill is NULL, the ioctl may still be in flight and
+ * not yet queued anywhere. In this case, the conn close code will wait
+ * until the conn_ref is dropped. If the stream was a tcp stream, then
+ * tcp_close will wait first until all ioctls have completed for this
+ * conn.
*/
mutex_enter(&connp->conn_lock);
ill = connp->conn_oper_pending_ill;
diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h
index c9b1e60753..b7f92b94f4 100644
--- a/usr/src/uts/common/inet/ipclassifier.h
+++ b/usr/src/uts/common/inet/ipclassifier.h
@@ -445,6 +445,7 @@ struct conn_s {
/* Checksum to compensate for source routed packets. Host byte order */
uint32_t conn_sum;
+ uint32_t conn_ioctlref; /* ioctl ref count */
#ifdef CONN_DEBUG
#define CONN_TRACE_MAX 10
int conn_trace_last; /* ndx of last used tracebuf */
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index f4cba5c666..759c13b39c 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -3342,6 +3342,28 @@ tcp_close_common(conn_t *connp, int flags)
TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
+ /*
+ * Cleanup any queued ioctls here. This must be done before the wq/rq
+ * are re-written by tcp_close_output().
+ */
+ if (conn_ioctl_cleanup_reqd)
+ conn_ioctl_cleanup(connp);
+
+ /*
+ * As CONN_CLOSING is set, no further ioctls should be passed down to
+ * IP for this conn (see the guards in tcp_ioctl, tcp_wput_ioctl and
+ * tcp_wput_iocdata). If the ioctl was queued on an ipsq,
+ * conn_ioctl_cleanup should have found it and removed it. If the ioctl
+ * was still in flight at the time, we wait for it here. See comments
+ * for CONN_INC_IOCTLREF in ip.h for details.
+ */
+ mutex_enter(&connp->conn_lock);
+ while (connp->conn_ioctlref > 0)
+ cv_wait(&connp->conn_cv, &connp->conn_lock);
+ ASSERT(connp->conn_ioctlref == 0);
+ ASSERT(connp->conn_oper_pending_ill == NULL);
+ mutex_exit(&connp->conn_lock);
+
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_close_output, connp,
NULL, tcp_squeue_flag, SQTAG_IP_TCP_CLOSE);
@@ -3394,11 +3416,6 @@ tcp_close_common(conn_t *connp, int flags)
}
mutex_exit(&connp->conn_lock);
}
- /*
- * ioctl cleanup. The mp is queued in the ipx_pending_mp.
- */
- if (conn_ioctl_cleanup_reqd)
- conn_ioctl_cleanup(connp);
connp->conn_cpid = NOPID;
}
@@ -3857,7 +3874,6 @@ tcp_closei_local(tcp_t *tcp)
connp->conn_state_flags |= CONN_CONDEMNED;
mutex_exit(&connp->conn_lock);
- /* Need to cleanup any pending ioctls */
ASSERT(tcp->tcp_time_wait_next == NULL);
ASSERT(tcp->tcp_time_wait_prev == NULL);
ASSERT(tcp->tcp_time_wait_expire == 0);
@@ -15283,7 +15299,9 @@ tcp_wput(queue_t *q, mblk_t *mp)
qreply(q, mp);
return;
}
+ CONN_INC_IOCTLREF(connp);
ip_wput_nondata(q, mp);
+ CONN_DEC_IOCTLREF(connp);
return;
default:
@@ -16679,7 +16697,26 @@ tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp)
case TI_GETPEERNAME:
break;
default:
+ /*
+ * If the conn is closing, then error the ioctl here. Otherwise
+ * use the CONN_IOCTLREF_* macros to hold off tcp_close until
+ * we're done here. We also need to decrement the ioctlref which
+ * was bumped in either tcp_ioctl or tcp_wput_ioctl.
+ */
+ mutex_enter(&connp->conn_lock);
+ if (connp->conn_state_flags & CONN_CLOSING) {
+ mutex_exit(&connp->conn_lock);
+ iocp = (struct iocblk *)mp->b_rptr;
+ iocp->ioc_error = EINVAL;
+ mp->b_datap->db_type = M_IOCNAK;
+ iocp->ioc_count = 0;
+ qreply(q, mp);
+ return;
+ }
+
+ CONN_INC_IOCTLREF_LOCKED(connp);
ip_wput_nondata(q, mp);
+ CONN_DEC_IOCTLREF(connp);
return;
}
switch (mi_copy_state(q, mp, &mp1)) {
@@ -16817,7 +16854,24 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
qreply(q, mp);
return;
}
+
+ /*
+ * If the conn is closing, then error the ioctl here. Otherwise bump the
+ * conn_ioctlref to hold off tcp_close until we're done here.
+ */
+ mutex_enter(&(connp)->conn_lock);
+ if ((connp)->conn_state_flags & CONN_CLOSING) {
+ mutex_exit(&(connp)->conn_lock);
+ iocp->ioc_error = EINVAL;
+ mp->b_datap->db_type = M_IOCNAK;
+ iocp->ioc_count = 0;
+ qreply(q, mp);
+ return;
+ }
+
+ CONN_INC_IOCTLREF_LOCKED(connp);
ip_wput_nondata(q, mp);
+ CONN_DEC_IOCTLREF(connp);
}
/*
@@ -21876,16 +21930,27 @@ tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
case TCP_IOC_ABORT_CONN:
case TI_GETPEERNAME:
case TI_GETMYNAME:
- ip1dbg(("tcp_ioctl: cmd 0x%x on non sreams socket",
+ ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket",
cmd));
error = EINVAL;
break;
default:
/*
- * Pass on to IP using helper stream
+ * If the conn is not closing, pass on to IP using
+ * helper stream. Bump the ioctlref to prevent tcp_close
+ * from closing the rq/wq out from underneath the ioctl
+ * if it ends up queued or aborted/interrupted.
*/
+ mutex_enter(&connp->conn_lock);
+ if (connp->conn_state_flags & (CONN_CLOSING)) {
+ mutex_exit(&connp->conn_lock);
+ error = EINVAL;
+ break;
+ }
+ CONN_INC_IOCTLREF_LOCKED(connp);
error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
cmd, arg, mode, cr, rvalp);
+ CONN_DEC_IOCTLREF(connp);
break;
}
return (error);