summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Wesolowski <keith.wesolowski@joyent.com>2012-03-18 01:00:49 +0000
committerKeith M Wesolowski <keith.wesolowski@joyent.com>2012-03-19 18:53:19 +0000
commitd27fed2832f8726d06a5611c7fbf5e0c061e458f (patch)
tree0e5f2477f7aed6d8424fe9fda6292213eaedf6d2
parent2e9c9a5042bf2e640d2716e2ffd54d2f2460e089 (diff)
downloadillumos-joyent-d27fed2832f8726d06a5611c7fbf5e0c061e458f.tar.gz
OS-1041 node process stuck in squeue mess
-rw-r--r--usr/src/uts/common/inet/ip.h10
-rw-r--r--usr/src/uts/common/inet/ip/ip_attr.c105
-rw-r--r--usr/src/uts/common/inet/squeue.c1
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c6
-rw-r--r--usr/src/uts/common/inet/tcp_stack.h3
5 files changed, 101 insertions, 24 deletions
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index 7fd66dab72..bd50364310 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -2196,6 +2196,8 @@ struct ip_xmit_attr_s {
*/
ixa_notify_t ixa_notify; /* Registered upcall notify function */
void *ixa_notify_cookie; /* ULP cookie for ixa_notify */
+
+ uint_t ixa_tcpcleanup; /* Used by conn_ixa_cleanup */
};
/*
@@ -2267,6 +2269,14 @@ struct ip_xmit_attr_s {
#define IXA_FREE_TSL 0x00000002 /* ixa_tsl needs to be rele */
/*
+ * Trivial state machine used to synchronize IXA cleanup for TCP connections.
+ * See conn_ixa_cleanup().
+ */
+#define IXATC_IDLE 0x00000000
+#define IXATC_INPROGRESS 0x00000001
+#define IXATC_COMPLETE 0x00000002
+
+/*
* Simplistic way to set the ixa_xmit_hint for locally generated traffic
* and forwarded traffic. The shift amount are based on the size of the
* structs to discard the low order bits which don't have much if any variation
diff --git a/usr/src/uts/common/inet/ip/ip_attr.c b/usr/src/uts/common/inet/ip/ip_attr.c
index 3197858f8e..c33a9b0efe 100644
--- a/usr/src/uts/common/inet/ip/ip_attr.c
+++ b/usr/src/uts/common/inet/ip/ip_attr.c
@@ -1176,6 +1176,59 @@ ixa_cleanup_stale(ip_xmit_attr_t *ixa)
}
}
+static mblk_t *
+tcp_ixa_cleanup_getmblk(conn_t *connp)
+{
+ tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
+ int need_retry;
+ mblk_t *mp;
+
+ mutex_enter(&tcps->tcps_ixa_cleanup_lock);
+
+ /*
+ * It's possible that someone else came in and started cleaning up
+ * another connection between the time we verified this one is not being
+ * cleaned up and the time we actually get the shared mblk. If that's
+ * the case, we've dropped the lock, and some other thread may have
+ * cleaned up this connection again, and is still waiting for
+ * notification of that cleanup's completion. Therefore we need to
+ * recheck.
+ */
+ do {
+ need_retry = 0;
+ while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) {
+ cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
+ &tcps->tcps_ixa_cleanup_lock);
+ }
+
+ while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
+ /*
+ * Multiple concurrent cleanups; need to have the last
+ * one run since it could be an unplumb.
+ */
+ need_retry = 1;
+ cv_wait(&tcps->tcps_ixa_cleanup_ready_cv,
+ &tcps->tcps_ixa_cleanup_lock);
+ }
+ } while (need_retry);
+
+ /*
+ * We now have the lock and the mblk; now make sure that no one else can
+ * try to clean up this connection or enqueue it for cleanup, clear the
+ * mblk pointer for this stack, drop the lock, and return the mblk.
+ */
+ ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock));
+ ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE);
+ ASSERT(tcps->tcps_ixa_cleanup_mp == mp);
+ ASSERT(mp != NULL);
+
+ connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS;
+ tcps->tcps_ixa_cleanup_mp = NULL;
+ mutex_exit(&tcps->tcps_ixa_cleanup_lock);
+
+ return (mp);
+}
+
/*
* Used to run ixa_cleanup_stale inside the tcp squeue.
* When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp
@@ -1195,11 +1248,39 @@ tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2,
mutex_enter(&tcps->tcps_ixa_cleanup_lock);
ASSERT(tcps->tcps_ixa_cleanup_mp == NULL);
+ connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE;
tcps->tcps_ixa_cleanup_mp = mp;
- cv_signal(&tcps->tcps_ixa_cleanup_cv);
+ cv_signal(&tcps->tcps_ixa_cleanup_ready_cv);
+ /*
+ * It is possible for any number of threads to be waiting for cleanup of
+ * different connections. Absent a per-connection (or per-IXA) CV, we
+ * need to wake them all up even though only one can be waiting on this
+ * particular cleanup.
+ */
+ cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
mutex_exit(&tcps->tcps_ixa_cleanup_lock);
}
+static void
+tcp_ixa_cleanup_wait_and_finish(conn_t *connp)
+{
+ tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
+
+ mutex_enter(&tcps->tcps_ixa_cleanup_lock);
+
+ ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE);
+
+ while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) {
+ cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
+ &tcps->tcps_ixa_cleanup_lock);
+ }
+
+ ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE);
+ connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE;
+ cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
+
+ mutex_exit(&tcps->tcps_ixa_cleanup_lock);
+}
/*
* ipcl_walk() function to help release any IRE, NCE, or DCEs that
@@ -1217,18 +1298,7 @@ conn_ixa_cleanup(conn_t *connp, void *arg)
tcp_stack_t *tcps;
tcps = connp->conn_netstack->netstack_tcp;
-
- mutex_enter(&tcps->tcps_ixa_cleanup_lock);
- while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
- /*
- * Multiple concurrent cleanups; need to have the last
- * one run since it could be an unplumb.
- */
- cv_wait(&tcps->tcps_ixa_cleanup_cv,
- &tcps->tcps_ixa_cleanup_lock);
- }
- tcps->tcps_ixa_cleanup_mp = NULL;
- mutex_exit(&tcps->tcps_ixa_cleanup_lock);
+ mp = tcp_ixa_cleanup_getmblk(connp);
if (connp->conn_sqp->sq_run == curthread) {
/* Already on squeue */
@@ -1237,15 +1307,8 @@ conn_ixa_cleanup(conn_t *connp, void *arg)
CONN_INC_REF(connp);
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup,
connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP);
-
- /* Wait until tcp_ixa_cleanup has run */
- mutex_enter(&tcps->tcps_ixa_cleanup_lock);
- while (tcps->tcps_ixa_cleanup_mp == NULL) {
- cv_wait(&tcps->tcps_ixa_cleanup_cv,
- &tcps->tcps_ixa_cleanup_lock);
- }
- mutex_exit(&tcps->tcps_ixa_cleanup_lock);
}
+ tcp_ixa_cleanup_wait_and_finish(connp);
} else if (IPCL_IS_SCTP(connp)) {
sctp_t *sctp;
sctp_faddr_t *fp;
diff --git a/usr/src/uts/common/inet/squeue.c b/usr/src/uts/common/inet/squeue.c
index e2e6924c8f..2e08dc359b 100644
--- a/usr/src/uts/common/inet/squeue.c
+++ b/usr/src/uts/common/inet/squeue.c
@@ -555,6 +555,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
ASSERT(MUTEX_HELD(&sqp->sq_lock));
ASSERT(sqp->sq_first != NULL);
now = gethrtime();
+ sqp->sq_run = curthread;
sqp->sq_drain(sqp, SQS_ENTER, now + squeue_drain_ns);
/*
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 1bb87e5c56..f79427e766 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -3792,7 +3792,8 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns)
ASSERT(error == 0);
tcps->tcps_ixa_cleanup_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL);
ASSERT(tcps->tcps_ixa_cleanup_mp != NULL);
- cv_init(&tcps->tcps_ixa_cleanup_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&tcps->tcps_ixa_cleanup_ready_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&tcps->tcps_ixa_cleanup_done_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&tcps->tcps_ixa_cleanup_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&tcps->tcps_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -3857,7 +3858,8 @@ tcp_stack_fini(netstackid_t stackid, void *arg)
freeb(tcps->tcps_ixa_cleanup_mp);
tcps->tcps_ixa_cleanup_mp = NULL;
- cv_destroy(&tcps->tcps_ixa_cleanup_cv);
+ cv_destroy(&tcps->tcps_ixa_cleanup_ready_cv);
+ cv_destroy(&tcps->tcps_ixa_cleanup_done_cv);
mutex_destroy(&tcps->tcps_ixa_cleanup_lock);
/*
diff --git a/usr/src/uts/common/inet/tcp_stack.h b/usr/src/uts/common/inet/tcp_stack.h
index 2dccf6b78c..e46ebe08da 100644
--- a/usr/src/uts/common/inet/tcp_stack.h
+++ b/usr/src/uts/common/inet/tcp_stack.h
@@ -101,7 +101,8 @@ struct tcp_stack {
/* Used to synchronize access when reclaiming memory */
mblk_t *tcps_ixa_cleanup_mp;
kmutex_t tcps_ixa_cleanup_lock;
- kcondvar_t tcps_ixa_cleanup_cv;
+ kcondvar_t tcps_ixa_cleanup_ready_cv;
+ kcondvar_t tcps_ixa_cleanup_done_cv;
/* Variables for handling kmem reclaim call back. */
kmutex_t tcps_reclaim_lock;