summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorethindra <none@none>2007-08-20 17:13:57 -0700
committerethindra <none@none>2007-08-20 17:13:57 -0700
commit1f8aaf0dbb5709114b4d2925d6b4ce5f931819ed (patch)
tree993d9421b41a02789ed4656e62bbaf7a1bfaaf2a
parent68468c977801055e2f883d12181ff8a79a26a811 (diff)
downloadillumos-gate-1f8aaf0dbb5709114b4d2925d6b4ce5f931819ed.tar.gz
6489144 mi_rx_lock shouldn't be held across callbacks
-rw-r--r--usr/src/uts/common/io/aggr/aggr_grp.c6
-rw-r--r--usr/src/uts/common/io/aggr/aggr_port.c3
-rw-r--r--usr/src/uts/common/io/dls/dls_link.c9
-rw-r--r--usr/src/uts/common/io/mac/mac.c176
-rw-r--r--usr/src/uts/common/sys/mac.h4
-rw-r--r--usr/src/uts/common/sys/mac_impl.h6
-rw-r--r--usr/src/uts/sun4v/io/vsw.c4
7 files changed, 182 insertions, 26 deletions
diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c
index 8d0758fed3..6cd5e1b8ae 100644
--- a/usr/src/uts/common/io/aggr/aggr_grp.c
+++ b/usr/src/uts/common/io/aggr/aggr_grp.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -278,7 +278,7 @@ aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
return (B_FALSE);
- mac_rx_remove(port->lp_mh, port->lp_mrh);
+ mac_rx_remove(port->lp_mh, port->lp_mrh, B_FALSE);
port->lp_state = AGGR_PORT_STATE_STANDBY;
aggr_grp_multicst_port(port, B_FALSE);
@@ -890,7 +890,7 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
for (i = 0; i < ETHER_NSTAT && !grp->lg_closing; i++) {
stat = i + MACTYPE_STAT_MIN;
if (!ETHER_STAT_ISACOUNTER(stat))
- continue;
+ continue;
val = aggr_port_stat(port, stat);
val -= port->lp_ether_stat[i];
grp->lg_ether_stat[i] += val;
diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c
index 3d2fb3c0de..93d06e3e77 100644
--- a/usr/src/uts/common/io/aggr/aggr_port.c
+++ b/usr/src/uts/common/io/aggr/aggr_port.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -192,6 +192,7 @@ aggr_port_create(const char *name, aggr_port_t **pp)
void
aggr_port_delete(aggr_port_t *port)
{
+ mac_rx_remove_wait(port->lp_mh);
mac_resource_set(port->lp_mh, NULL, NULL);
mac_notify_remove(port->lp_mh, port->lp_mnh);
mac_active_clear(port->lp_mh);
diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c
index 8ecdcdd7f5..ffa17acc31 100644
--- a/usr/src/uts/common/io/dls/dls_link.c
+++ b/usr/src/uts/common/io/dls/dls_link.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -918,6 +918,7 @@ dls_mac_rele(dls_link_t *dlp)
ASSERT(dlp->dl_mh != NULL);
if (--dlp->dl_macref == 0) {
+ mac_rx_remove_wait(dlp->dl_mh);
mac_close(dlp->dl_mh);
dlp->dl_mh = NULL;
dlp->dl_mip = NULL;
@@ -999,7 +1000,7 @@ dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
/* Replace the existing receive function if there is one. */
if (dlp->dl_mrh != NULL)
- mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
+ mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE);
dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
mutex_exit(&dlp->dl_lock);
}
@@ -1075,7 +1076,7 @@ dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
*/
if (dlp->dl_impl_count == 0) {
rw_exit(&dlp->dl_impl_lock);
- mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
+ mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE);
dlp->dl_mrh = NULL;
} else {
boolean_t promisc = B_FALSE;
@@ -1097,7 +1098,7 @@ dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
else
rx = i_dls_link_rx;
- mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
+ mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE);
dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
}
mutex_exit(&dlp->dl_lock);
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index 3604e02c64..26e72074f7 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -44,6 +44,7 @@
#include <sys/modctl.h>
#include <sys/fs/dv_node.h>
#include <sys/atomic.h>
+#include <sys/sdt.h>
#define IMPL_HASHSZ 67 /* prime */
@@ -86,6 +87,8 @@ i_mac_constructor(void *buf, void *arg, int kmflag)
mutex_init(&mip->mi_activelink_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&mip->mi_notify_ref_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&mip->mi_notify_cv, NULL, CV_DRIVER, NULL);
+ mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&mip->mi_rx_cv, NULL, CV_DRIVER, NULL);
return (0);
}
@@ -115,6 +118,8 @@ i_mac_destructor(void *buf, void *arg)
mutex_destroy(&mip->mi_activelink_lock);
mutex_destroy(&mip->mi_notify_ref_lock);
cv_destroy(&mip->mi_notify_cv);
+ mutex_destroy(&mip->mi_lock);
+ cv_destroy(&mip->mi_rx_cv);
}
static void
@@ -943,6 +948,11 @@ mac_notify(mac_handle_t mh)
i_mac_notify(mip, type);
}
+/*
+ * Register a receive function for this mac.
+ * More information on this function's interaction with mac_rx()
+ * can be found atop mac_rx().
+ */
mac_rx_handle_t
mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
{
@@ -957,7 +967,19 @@ mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
* Add it to the head of the 'rx' callback list.
*/
rw_enter(&(mip->mi_rx_lock), RW_WRITER);
+
+ /*
+ * mac_rx() will only call callbacks that are marked inuse.
+ */
+ mrfp->mrf_inuse = B_TRUE;
mrfp->mrf_nextp = mip->mi_mrfp;
+
+ /*
+ * mac_rx() could be traversing the remainder of the list
+ * and miss the new callback we're adding here. This is not a problem
+ * because we do not guarantee the callback to take effect immediately
+ * after mac_rx_add() returns.
+ */
mip->mi_mrfp = mrfp;
rw_exit(&(mip->mi_rx_lock));
@@ -965,11 +987,14 @@ mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
}
/*
- * Unregister a receive function for this mac. This removes the function
- * from the list of receive functions for this mac.
+ * Unregister a receive function for this mac.
+ * This function does not block if wait is B_FALSE. This is useful
+ * for clients who call mac_rx_remove() from a non-blockable context.
+ * More information on this function's interaction with mac_rx()
+ * can be found atop mac_rx().
*/
void
-mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh)
+mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh, boolean_t wait)
{
mac_impl_t *mip = (mac_impl_t *)mh;
mac_rx_fn_t *mrfp = (mac_rx_fn_t *)mrh;
@@ -979,17 +1004,55 @@ mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh)
/*
* Search the 'rx' callback list for the function closure.
*/
- rw_enter(&(mip->mi_rx_lock), RW_WRITER);
+ rw_enter(&mip->mi_rx_lock, RW_WRITER);
for (pp = &(mip->mi_mrfp); (p = *pp) != NULL; pp = &(p->mrf_nextp)) {
if (p == mrfp)
break;
}
ASSERT(p != NULL);
+ /*
+ * If mac_rx() is running, mark callback for deletion
+ * and return (if wait is false), or wait until mac_rx()
+ * exits (if wait is true).
+ */
+ if (mip->mi_rx_ref > 0) {
+ DTRACE_PROBE1(defer_delete, mac_impl_t *, mip);
+ p->mrf_inuse = B_FALSE;
+ mutex_enter(&mip->mi_lock);
+ mip->mi_rx_removed++;
+ mutex_exit(&mip->mi_lock);
+
+ rw_exit(&mip->mi_rx_lock);
+ if (wait)
+ mac_rx_remove_wait(mh);
+ return;
+ }
+
/* Remove it from the list. */
*pp = p->mrf_nextp;
kmem_free(mrfp, sizeof (mac_rx_fn_t));
- rw_exit(&(mip->mi_rx_lock));
+ rw_exit(&mip->mi_rx_lock);
+}
+
+/*
+ * Wait for all pending callback removals to be completed by mac_rx().
+ * Note that if we call mac_rx_remove() immediately before this, there is no
+ * guarantee we would wait *only* on the callback that we specified.
+ * mac_rx_remove() could have been called by other threads and we would have
+ * to wait for other marked callbacks to be removed as well.
+ */
+void
+mac_rx_remove_wait(mac_handle_t mh)
+{
+ mac_impl_t *mip = (mac_impl_t *)mh;
+
+ mutex_enter(&mip->mi_lock);
+ while (mip->mi_rx_removed > 0) {
+ DTRACE_PROBE1(need_wait, mac_impl_t *, mip);
+ cv_wait(&mip->mi_rx_cv, &mip->mi_lock);
+ }
+ mutex_exit(&mip->mi_lock);
}
mac_txloop_handle_t
@@ -1385,36 +1448,119 @@ mac_unregister(mac_handle_t mh)
return (0);
}
+/*
+ * To avoid potential deadlocks, mac_rx() releases mi_rx_lock
+ * before invoking its list of upcalls. This introduces races with
+ * mac_rx_remove() and mac_rx_add(), who can potentially modify the
+ * upcall list while mi_rx_lock is not being held. The race with
+ * mac_rx_remove() is handled by incrementing mi_rx_ref upon entering
+ * mac_rx(); a non-zero mi_rx_ref would tell mac_rx_remove()
+ * to not modify the list but instead mark an upcall for deletion.
+ * before mac_rx() exits, mi_rx_ref is decremented and if it
+ * is 0, the marked upcalls will be removed from the list and freed.
+ * The race with mac_rx_add() is harmless because mac_rx_add() only
+ * prepends to the list and since mac_rx() saves the list head
+ * before releasing mi_rx_lock, any prepended upcall won't be seen
+ * until the next packet chain arrives.
+ *
+ * To minimize lock contention between multiple parallel invocations
+ * of mac_rx(), mi_rx_lock is acquired as a READER lock. The
+ * use of atomic operations ensures the sanity of mi_rx_ref. mi_rx_lock
+ * will be upgraded to WRITER mode when there are marked upcalls to be
+ * cleaned.
+ */
void
-mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *bp)
+mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
{
mac_impl_t *mip = (mac_impl_t *)mh;
+ mblk_t *bp = mp_chain;
mac_rx_fn_t *mrfp;
/*
* Call all registered receive functions.
*/
rw_enter(&mip->mi_rx_lock, RW_READER);
- mrfp = mip->mi_mrfp;
- if (mrfp == NULL) {
+ if ((mrfp = mip->mi_mrfp) == NULL) {
/* There are no registered receive functions. */
freemsgchain(bp);
rw_exit(&mip->mi_rx_lock);
return;
}
+ atomic_inc_32(&mip->mi_rx_ref);
+ rw_exit(&mip->mi_rx_lock);
+
+ /*
+ * Call registered receive functions.
+ */
do {
mblk_t *recv_bp;
- if (mrfp->mrf_nextp != NULL) {
- /* XXX Do we bump a counter if copymsgchain() fails? */
- recv_bp = copymsgchain(bp);
- } else {
- recv_bp = bp;
+ recv_bp = (mrfp->mrf_nextp != NULL) ? copymsgchain(bp) : bp;
+ if (recv_bp != NULL) {
+ if (mrfp->mrf_inuse) {
+ /*
+ * Send bp itself and keep the copy.
+ * If there's only one active receiver,
+ * it should get the original message,
+ * tagged with the hardware checksum flags.
+ */
+ mrfp->mrf_fn(mrfp->mrf_arg, mrh, bp);
+ bp = recv_bp;
+ } else {
+ freemsgchain(recv_bp);
+ }
}
- if (recv_bp != NULL)
- mrfp->mrf_fn(mrfp->mrf_arg, mrh, recv_bp);
mrfp = mrfp->mrf_nextp;
} while (mrfp != NULL);
+
+ rw_enter(&mip->mi_rx_lock, RW_READER);
+ if (atomic_dec_32_nv(&mip->mi_rx_ref) == 0 && mip->mi_rx_removed > 0) {
+ mac_rx_fn_t **pp, *p;
+ uint32_t cnt = 0;
+
+ DTRACE_PROBE1(delete_callbacks, mac_impl_t *, mip);
+
+ /*
+ * Need to become exclusive before doing cleanup
+ */
+ if (rw_tryupgrade(&mip->mi_rx_lock) == 0) {
+ rw_exit(&mip->mi_rx_lock);
+ rw_enter(&mip->mi_rx_lock, RW_WRITER);
+ }
+
+ /*
+ * We return if another thread has already entered and cleaned
+ * up the list.
+ */
+ if (mip->mi_rx_ref > 0 || mip->mi_rx_removed == 0) {
+ rw_exit(&mip->mi_rx_lock);
+ return;
+ }
+
+ /*
+ * Free removed callbacks.
+ */
+ pp = &mip->mi_mrfp;
+ while (*pp != NULL) {
+ if (!(*pp)->mrf_inuse) {
+ p = *pp;
+ *pp = (*pp)->mrf_nextp;
+ kmem_free(p, sizeof (*p));
+ cnt++;
+ continue;
+ }
+ pp = &(*pp)->mrf_nextp;
+ }
+
+ /*
+ * Wake up mac_rx_remove_wait()
+ */
+ mutex_enter(&mip->mi_lock);
+ ASSERT(mip->mi_rx_removed == cnt);
+ mip->mi_rx_removed = 0;
+ cv_broadcast(&mip->mi_rx_cv);
+ mutex_exit(&mip->mi_lock);
+ }
rw_exit(&mip->mi_rx_lock);
}
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index fee2278fdf..09e4c47f86 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -520,7 +520,9 @@ extern void mac_notify_remove(mac_handle_t,
mac_notify_handle_t);
extern void mac_notify(mac_handle_t);
extern mac_rx_handle_t mac_rx_add(mac_handle_t, mac_rx_t, void *);
-extern void mac_rx_remove(mac_handle_t, mac_rx_handle_t);
+extern void mac_rx_remove(mac_handle_t, mac_rx_handle_t,
+ boolean_t);
+extern void mac_rx_remove_wait(mac_handle_t);
extern mblk_t *mac_txloop(void *, mblk_t *);
extern mac_txloop_handle_t mac_txloop_add(mac_handle_t, mac_txloop_t,
void *);
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index d1f4d6bbe6..7009c69973 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -66,6 +66,7 @@ struct mac_rx_fn_s {
mac_rx_fn_t *mrf_nextp;
mac_rx_t mrf_fn;
void *mrf_arg;
+ boolean_t mrf_inuse;
};
typedef struct mac_txloop_fn_s mac_txloop_fn_t;
@@ -132,6 +133,11 @@ typedef struct mac_impl_s {
boolean_t mi_activelink;
mac_txinfo_t mi_txinfo;
mac_txinfo_t mi_txloopinfo;
+ uint32_t mi_rx_ref; /* #threads in mac_rx() */
+ uint32_t mi_rx_removed; /* #callbacks marked */
+ /* for removal */
+ kmutex_t mi_lock;
+ kcondvar_t mi_rx_cv;
} mac_impl_t;
#define mi_getstat mi_callbacks->mc_getstat
diff --git a/usr/src/uts/sun4v/io/vsw.c b/usr/src/uts/sun4v/io/vsw.c
index 60a02e6755..43fff001a1 100644
--- a/usr/src/uts/sun4v/io/vsw.c
+++ b/usr/src/uts/sun4v/io/vsw.c
@@ -732,7 +732,7 @@ vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
/* remove mac layer callback */
mutex_enter(&vswp->mac_lock);
if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
- mac_rx_remove(vswp->mh, vswp->mrh);
+ mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
vswp->mrh = NULL;
}
mutex_exit(&vswp->mac_lock);
@@ -1297,7 +1297,7 @@ vsw_mac_detach(vsw_t *vswp)
if (vswp->mstarted)
mac_stop(vswp->mh);
if (vswp->mrh != NULL)
- mac_rx_remove(vswp->mh, vswp->mrh);
+ mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
if (vswp->mresources)
mac_resource_set(vswp->mh, NULL, NULL);
mac_close(vswp->mh);