summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiddheshwar Mahesh <Siddheshwar.Mahesh@Sun.COM>2010-01-17 13:06:01 -0600
committerSiddheshwar Mahesh <Siddheshwar.Mahesh@Sun.COM>2010-01-17 13:06:01 -0600
commit9c86cdcda5fc91d89b1299e529bf22d7958dbc71 (patch)
tree3bf9f7fecb1d98f33c5bf1ba6454570559a1dac8
parent7e3e5701c73b753fb9dd17a0cbe0568b4cdda39e (diff)
downloadillumos-joyent-9c86cdcda5fc91d89b1299e529bf22d7958dbc71.tar.gz
6856642 NFS/RDMA client bad mutex panic at rpcib:rib_clnt_cm_handler
6875892 NFS/RDMA client panic: rib_rbuf_free 6915315 Wrong order of rw_enter() calls in rib_detach_hca() can cause deadlock
-rw-r--r--usr/src/uts/common/rpc/rpcib.c157
1 files changed, 99 insertions, 58 deletions
diff --git a/usr/src/uts/common/rpc/rpcib.c b/usr/src/uts/common/rpc/rpcib.c
index 31dfa13f18..c40a61603b 100644
--- a/usr/src/uts/common/rpc/rpcib.c
+++ b/usr/src/uts/common/rpc/rpcib.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -318,6 +318,8 @@ static int avl_compare(const void *t1, const void *t2);
static void rib_stop_services(rib_hca_t *);
static void rib_close_channels(rib_conn_list_t *);
static void rib_conn_close(void *);
+static void rib_recv_rele(rib_qp_t *);
+static rdma_stat rib_conn_release_locked(CONN *conn);
/*
* RPCIB addressing operations
@@ -392,7 +394,7 @@ static int rib_free_svc_recv(struct svc_recv *);
static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t);
static void rib_free_wid(struct recv_wid *);
static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *);
-static void rib_detach_hca(rib_hca_t *);
+static void rib_detach_hca(ibt_hca_hdl_t);
static void rib_close_a_channel(CONN *);
static void rib_send_hold(rib_qp_t *);
static void rib_send_rele(rib_qp_t *);
@@ -1194,6 +1196,7 @@ rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
qp = rwid->qp;
+
if (wc.wc_status == IBT_WC_SUCCESS) {
XDR inxdrs, *xdrs;
uint_t xid, vers, op, find_xid = 0;
@@ -1230,6 +1233,7 @@ rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
rib_rbuf_free(conn, RECV_BUFFER,
(void *)(uintptr_t)rwid->addr);
rib_free_wid(rwid);
+ rib_recv_rele(qp);
continue;
}
@@ -1300,6 +1304,7 @@ rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
(void *)(uintptr_t)rwid->addr);
}
rib_free_wid(rwid);
+ rib_recv_rele(qp);
}
}
@@ -1332,11 +1337,6 @@ rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
qp = s_recvp->qp;
conn = qptoc(qp);
- mutex_enter(&qp->posted_rbufs_lock);
- qp->n_posted_rbufs--;
- if (qp->n_posted_rbufs == 0)
- cv_signal(&qp->posted_rbufs_cv);
- mutex_exit(&qp->posted_rbufs_lock);
if (wc.wc_status == IBT_WC_SUCCESS) {
XDR inxdrs, *xdrs;
@@ -1361,6 +1361,7 @@ rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
rib_rbuf_free(conn, RECV_BUFFER,
(void *)(uintptr_t)s_recvp->vaddr);
XDR_DESTROY(xdrs);
+ rib_recv_rele(qp);
(void) rib_free_svc_recv(s_recvp);
continue;
}
@@ -1373,6 +1374,7 @@ rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
*/
rib_rbuf_free(conn, RECV_BUFFER,
(void *)(uintptr_t)s_recvp->vaddr);
+ rib_recv_rele(qp);
(void) rib_free_svc_recv(s_recvp);
continue;
}
@@ -1389,6 +1391,7 @@ rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
mutex_enter(&qp->rdlist_lock);
rdma_done_notify(qp, xid);
mutex_exit(&qp->rdlist_lock);
+ rib_recv_rele(qp);
(void) rib_free_svc_recv(s_recvp);
continue;
}
@@ -1433,6 +1436,7 @@ rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
rib_rbuf_free(conn, RECV_BUFFER,
(void *)(uintptr_t)s_recvp->vaddr);
}
+ rib_recv_rele(qp);
(void) rib_free_svc_recv(s_recvp);
}
}
@@ -1459,30 +1463,11 @@ rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
rib_attach_hca();
break;
case IBT_HCA_DETACH_EVENT:
- {
- rib_hca_t *hca;
-
- rw_enter(&rib_stat->hcas_list_lock, RW_READER);
- for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
- rw_enter(&hca->state_lock, RW_READER);
- if ((hca->state != HCA_DETACHED) &&
- (hca->hca_hdl == hca_hdl)) {
- rw_exit(&hca->state_lock);
- break;
- }
- rw_exit(&hca->state_lock);
- }
- rw_exit(&rib_stat->hcas_list_lock);
-
- if (hca == NULL)
- return;
- ASSERT(hca->hca_hdl == hca_hdl);
- rib_detach_hca(hca);
+ rib_detach_hca(hca_hdl);
#ifdef DEBUG
cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n");
#endif
break;
- }
case IBT_EVENT_PORT_UP:
/*
* A port is up. We should call rib_listen() since there is
@@ -2068,10 +2053,10 @@ rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list)
mutex_enter(&qp->send_rbufs_lock);
while (qp->n_send_rbufs)
cv_wait(&qp->send_rbufs_cv, &qp->send_rbufs_lock);
- mutex_exit(&qp->send_rbufs_lock);
+ mutex_exit(&qp->send_rbufs_lock);
(void) ibt_free_channel(qp->qp_hdl);
- qp->qp_hdl = NULL;
+ qp->qp_hdl = NULL;
}
ASSERT(qp->rdlist == NULL);
@@ -2167,6 +2152,16 @@ rib_send_rele(rib_qp_t *qp)
mutex_exit(&qp->send_rbufs_lock);
}
+void
+rib_recv_rele(rib_qp_t *qp)
+{
+ mutex_enter(&qp->posted_rbufs_lock);
+ qp->n_posted_rbufs--;
+ if (qp->n_posted_rbufs == 0)
+ cv_signal(&qp->posted_rbufs_cv);
+ mutex_exit(&qp->posted_rbufs_lock);
+}
+
/*
* Wait for send completion notification. Only on receiving a
* notification be it a successful or error completion, free the
@@ -2538,6 +2533,11 @@ rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid)
ret = RDMA_CONNLOST;
goto done;
}
+
+ mutex_enter(&qp->posted_rbufs_lock);
+ qp->n_posted_rbufs++;
+ mutex_exit(&qp->posted_rbufs_lock);
+
mutex_exit(&conn->c_lock);
return (RDMA_SUCCESS);
@@ -2610,7 +2610,6 @@ rib_svc_post(CONN* conn, struct clist *cl)
rdma_stat
rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid)
{
-
return (rib_clnt_post(conn, cl, msgid));
}
@@ -4261,13 +4260,11 @@ again:
cn->c_state == C_CONN_PEND)
;
if (cv_stat == 0) {
- cn->c_ref--;
- mutex_exit(&cn->c_lock);
+ (void) rib_conn_release_locked(cn);
return (RDMA_INTR);
}
if (cv_stat < 0) {
- cn->c_ref--;
- mutex_exit(&cn->c_lock);
+ (void) rib_conn_release_locked(cn);
return (RDMA_TIMEDOUT);
}
if (cn->c_state == C_CONNECTED) {
@@ -4275,8 +4272,7 @@ again:
mutex_exit(&cn->c_lock);
return (RDMA_SUCCESS);
} else {
- cn->c_ref--;
- mutex_exit(&cn->c_lock);
+ (void) rib_conn_release_locked(cn);
return (RDMA_TIMEDOUT);
}
}
@@ -4390,6 +4386,29 @@ rib_connect(struct netbuf *s_svcaddr, struct netbuf *d_svcaddr,
(void) rib_add_connlist(cn, &hca->cl_conn_list);
status = rib_conn_to_srv(hca, qp, rpt);
mutex_enter(&cn->c_lock);
+
+ if (cn->c_flags & C_CLOSE_PENDING) {
+ /*
+ * This handles a case where the module or
+ * HCA detached in the time a connection is
+ * established. In such a case close the
+ * connection immediately if this is the
+ * only reference.
+ */
+ if (cn->c_ref == 1) {
+ cn->c_ref--;
+ cn->c_state = C_DISCONN_PEND;
+ mutex_exit(&cn->c_lock);
+ rib_conn_close((void *)cn);
+ return (RDMA_FAILED);
+ }
+
+ /*
+ * Connection to be closed later when c_ref = 0
+ */
+ status = RDMA_FAILED;
+ }
+
if (status == RDMA_SUCCESS) {
cn->c_state = C_CONNECTED;
*conn = cn;
@@ -4397,7 +4416,7 @@ rib_connect(struct netbuf *s_svcaddr, struct netbuf *d_svcaddr,
cn->c_state = C_ERROR_CONN;
cn->c_ref--;
}
- cv_broadcast(&cn->c_cv);
+ cv_signal(&cn->c_cv);
mutex_exit(&cn->c_lock);
return (status);
}
@@ -4412,6 +4431,7 @@ rib_conn_close(void *rarg)
if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
+
/*
* Live connection in CONNECTED state.
*/
@@ -4424,7 +4444,6 @@ rib_conn_close(void *rarg)
mutex_enter(&conn->c_lock);
conn->c_flags &= ~C_CLOSE_PENDING;
- cv_signal(&conn->c_cv);
}
mutex_exit(&conn->c_lock);
@@ -4490,8 +4509,17 @@ rib_conn_timeout_call(void *carg)
static rdma_stat
rib_conn_release(CONN *conn)
{
-
mutex_enter(&conn->c_lock);
+ return (rib_conn_release_locked(conn));
+}
+
+/*
+ * Expects conn->c_lock to be held on entry.
+ * c_lock released on return
+ */
+static rdma_stat
+rib_conn_release_locked(CONN *conn)
+{
conn->c_ref--;
conn->c_last_used = gethrestime_sec();
@@ -4641,6 +4669,11 @@ rib_close_channels(rib_conn_list_t *connlist)
tmp = conn->c_next;
if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
+ if (conn->c_state == C_CONN_PEND) {
+ conn->c_flags |= C_CLOSE_PENDING;
+ goto next;
+ }
+
conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
/*
@@ -4657,6 +4690,7 @@ rib_close_channels(rib_conn_list_t *connlist)
/* Signal a pending rib_disconnect_channel() */
cv_signal(&conn->c_cv);
}
+next:
mutex_exit(&conn->c_lock);
conn = tmp;
}
@@ -4787,10 +4821,35 @@ rib_stop_hca_services(rib_hca_t *hca)
* Cleans and closes up all uses of the HCA
*/
static void
-rib_detach_hca(rib_hca_t *hca)
+rib_detach_hca(ibt_hca_hdl_t hca_hdl)
{
+ rib_hca_t *hca = NULL;
rib_hca_t **hcap;
+ rw_enter(&rib_stat->hcas_list_lock, RW_WRITER);
+ for (hcap = &rib_stat->hcas_list; *hcap; hcap = &(*hcap)->next) {
+ hca = *hcap;
+ rw_enter(&hca->state_lock, RW_WRITER);
+ if (hca->hca_hdl == hca_hdl) {
+ /*
+ * Mark as detached and remove from
+ * hca list.
+ */
+ hca->state = HCA_DETACHED;
+ *hcap = hca->next;
+ rib_stat->nhca_inited--;
+ rib_mod.rdma_count--;
+ rw_exit(&hca->state_lock);
+ break;
+ }
+ rw_exit(&hca->state_lock);
+ }
+ rw_exit(&rib_stat->hcas_list_lock);
+
+ if (hca == NULL)
+ return;
+ ASSERT(hca->hca_hdl == hca_hdl);
+
/*
* Stop all services on the HCA
* Go through cl_conn_list and close all rc_channels
@@ -4802,24 +4861,6 @@ rib_detach_hca(rib_hca_t *hca)
* Free the protection domain
* ibt_close_hca()
*/
- rw_enter(&hca->state_lock, RW_WRITER);
- if (hca->state == HCA_DETACHED) {
- rw_exit(&hca->state_lock);
- return;
- }
-
- hca->state = HCA_DETACHED;
- rw_enter(&rib_stat->hcas_list_lock, RW_WRITER);
- for (hcap = &rib_stat->hcas_list; *hcap && (*hcap != hca);
- hcap = &(*hcap)->next)
- ;
- ASSERT(*hcap == hca);
- *hcap = hca->next;
- rib_stat->nhca_inited--;
- rib_mod.rdma_count--;
- rw_exit(&rib_stat->hcas_list_lock);
- rw_exit(&hca->state_lock);
-
rib_stop_hca_services(hca);
kmem_free(hca, sizeof (*hca));