diff options
author | ethindra <none@none> | 2007-08-20 17:13:57 -0700 |
---|---|---|
committer | ethindra <none@none> | 2007-08-20 17:13:57 -0700 |
commit | 1f8aaf0dbb5709114b4d2925d6b4ce5f931819ed (patch) | |
tree | 993d9421b41a02789ed4656e62bbaf7a1bfaaf2a | |
parent | 68468c977801055e2f883d12181ff8a79a26a811 (diff) | |
download | illumos-gate-1f8aaf0dbb5709114b4d2925d6b4ce5f931819ed.tar.gz |
6489144 mi_rx_lock shouldn't be held across callbacks
-rw-r--r-- | usr/src/uts/common/io/aggr/aggr_grp.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/io/aggr/aggr_port.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/io/dls/dls_link.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/io/mac/mac.c | 176 | ||||
-rw-r--r-- | usr/src/uts/common/sys/mac.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/mac_impl.h | 6 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/vsw.c | 4 |
7 files changed, 182 insertions, 26 deletions
diff --git a/usr/src/uts/common/io/aggr/aggr_grp.c b/usr/src/uts/common/io/aggr/aggr_grp.c index 8d0758fed3..6cd5e1b8ae 100644 --- a/usr/src/uts/common/io/aggr/aggr_grp.c +++ b/usr/src/uts/common/io/aggr/aggr_grp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -278,7 +278,7 @@ aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) if (port->lp_state != AGGR_PORT_STATE_ATTACHED) return (B_FALSE); - mac_rx_remove(port->lp_mh, port->lp_mrh); + mac_rx_remove(port->lp_mh, port->lp_mrh, B_FALSE); port->lp_state = AGGR_PORT_STATE_STANDBY; aggr_grp_multicst_port(port, B_FALSE); @@ -890,7 +890,7 @@ aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, for (i = 0; i < ETHER_NSTAT && !grp->lg_closing; i++) { stat = i + MACTYPE_STAT_MIN; if (!ETHER_STAT_ISACOUNTER(stat)) - continue; + continue; val = aggr_port_stat(port, stat); val -= port->lp_ether_stat[i]; grp->lg_ether_stat[i] += val; diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c index 3d2fb3c0de..93d06e3e77 100644 --- a/usr/src/uts/common/io/aggr/aggr_port.c +++ b/usr/src/uts/common/io/aggr/aggr_port.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -192,6 +192,7 @@ aggr_port_create(const char *name, aggr_port_t **pp) void aggr_port_delete(aggr_port_t *port) { + mac_rx_remove_wait(port->lp_mh); mac_resource_set(port->lp_mh, NULL, NULL); mac_notify_remove(port->lp_mh, port->lp_mnh); mac_active_clear(port->lp_mh); diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c index 8ecdcdd7f5..ffa17acc31 100644 --- a/usr/src/uts/common/io/dls/dls_link.c +++ b/usr/src/uts/common/io/dls/dls_link.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -918,6 +918,7 @@ dls_mac_rele(dls_link_t *dlp) ASSERT(dlp->dl_mh != NULL); if (--dlp->dl_macref == 0) { + mac_rx_remove_wait(dlp->dl_mh); mac_close(dlp->dl_mh); dlp->dl_mh = NULL; dlp->dl_mip = NULL; @@ -999,7 +1000,7 @@ dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip) /* Replace the existing receive function if there is one. */ if (dlp->dl_mrh != NULL) - mac_rx_remove(dlp->dl_mh, dlp->dl_mrh); + mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE); dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp); mutex_exit(&dlp->dl_lock); } @@ -1075,7 +1076,7 @@ dls_link_remove(dls_link_t *dlp, dls_impl_t *dip) */ if (dlp->dl_impl_count == 0) { rw_exit(&dlp->dl_impl_lock); - mac_rx_remove(dlp->dl_mh, dlp->dl_mrh); + mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE); dlp->dl_mrh = NULL; } else { boolean_t promisc = B_FALSE; @@ -1097,7 +1098,7 @@ dls_link_remove(dls_link_t *dlp, dls_impl_t *dip) else rx = i_dls_link_rx; - mac_rx_remove(dlp->dl_mh, dlp->dl_mrh); + mac_rx_remove(dlp->dl_mh, dlp->dl_mrh, B_FALSE); dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp); } mutex_exit(&dlp->dl_lock); diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c index 3604e02c64..26e72074f7 100644 --- a/usr/src/uts/common/io/mac/mac.c +++ b/usr/src/uts/common/io/mac/mac.c @@ -44,6 +44,7 @@ #include <sys/modctl.h> #include <sys/fs/dv_node.h> #include <sys/atomic.h> +#include <sys/sdt.h> #define IMPL_HASHSZ 67 /* prime */ @@ -86,6 +87,8 @@ i_mac_constructor(void *buf, void *arg, int kmflag) mutex_init(&mip->mi_activelink_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&mip->mi_notify_ref_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&mip->mi_notify_cv, NULL, CV_DRIVER, NULL); + mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&mip->mi_rx_cv, NULL, CV_DRIVER, NULL); return (0); } @@ -115,6 +118,8 @@ i_mac_destructor(void *buf, void *arg) mutex_destroy(&mip->mi_activelink_lock); mutex_destroy(&mip->mi_notify_ref_lock); cv_destroy(&mip->mi_notify_cv); + mutex_destroy(&mip->mi_lock); + cv_destroy(&mip->mi_rx_cv); } static void @@ -943,6 +948,11 @@ mac_notify(mac_handle_t mh) i_mac_notify(mip, type); } +/* + * Register a receive function for this mac. + * More information on this function's interaction with mac_rx() + * can be found atop mac_rx(). + */ mac_rx_handle_t mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg) { @@ -957,7 +967,19 @@ mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg) * Add it to the head of the 'rx' callback list. */ rw_enter(&(mip->mi_rx_lock), RW_WRITER); + + /* + * mac_rx() will only call callbacks that are marked inuse. + */ + mrfp->mrf_inuse = B_TRUE; mrfp->mrf_nextp = mip->mi_mrfp; + + /* + * mac_rx() could be traversing the remainder of the list + * and miss the new callback we're adding here. This is not a problem + * because we do not guarantee the callback to take effect immediately + * after mac_rx_add() returns. + */ mip->mi_mrfp = mrfp; rw_exit(&(mip->mi_rx_lock)); @@ -965,11 +987,14 @@ mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg) } /* - * Unregister a receive function for this mac. This removes the function - * from the list of receive functions for this mac. + * Unregister a receive function for this mac. + * This function does not block if wait is B_FALSE. This is useful + * for clients who call mac_rx_remove() from a non-blockable context. + * More information on this function's interaction with mac_rx() + * can be found atop mac_rx(). */ void -mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh) +mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh, boolean_t wait) { mac_impl_t *mip = (mac_impl_t *)mh; mac_rx_fn_t *mrfp = (mac_rx_fn_t *)mrh; @@ -979,17 +1004,55 @@ mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh) /* * Search the 'rx' callback list for the function closure. */ - rw_enter(&(mip->mi_rx_lock), RW_WRITER); + rw_enter(&mip->mi_rx_lock, RW_WRITER); for (pp = &(mip->mi_mrfp); (p = *pp) != NULL; pp = &(p->mrf_nextp)) { if (p == mrfp) break; } ASSERT(p != NULL); + /* + * If mac_rx() is running, mark callback for deletion + * and return (if wait is false), or wait until mac_rx() + * exits (if wait is true). + */ + if (mip->mi_rx_ref > 0) { + DTRACE_PROBE1(defer_delete, mac_impl_t *, mip); + p->mrf_inuse = B_FALSE; + mutex_enter(&mip->mi_lock); + mip->mi_rx_removed++; + mutex_exit(&mip->mi_lock); + + rw_exit(&mip->mi_rx_lock); + if (wait) + mac_rx_remove_wait(mh); + return; + } + /* Remove it from the list. */ *pp = p->mrf_nextp; kmem_free(mrfp, sizeof (mac_rx_fn_t)); - rw_exit(&(mip->mi_rx_lock)); + rw_exit(&mip->mi_rx_lock); +} + +/* + * Wait for all pending callback removals to be completed by mac_rx(). + * Note that if we call mac_rx_remove() immediately before this, there is no + * guarantee we would wait *only* on the callback that we specified. + * mac_rx_remove() could have been called by other threads and we would have + * to wait for other marked callbacks to be removed as well. + */ +void +mac_rx_remove_wait(mac_handle_t mh) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + mutex_enter(&mip->mi_lock); + while (mip->mi_rx_removed > 0) { + DTRACE_PROBE1(need_wait, mac_impl_t *, mip); + cv_wait(&mip->mi_rx_cv, &mip->mi_lock); + } + mutex_exit(&mip->mi_lock); } mac_txloop_handle_t @@ -1385,36 +1448,119 @@ mac_unregister(mac_handle_t mh) return (0); } +/* + * To avoid potential deadlocks, mac_rx() releases mi_rx_lock + * before invoking its list of upcalls. This introduces races with + * mac_rx_remove() and mac_rx_add(), who can potentially modify the + * upcall list while mi_rx_lock is not being held. The race with + * mac_rx_remove() is handled by incrementing mi_rx_ref upon entering + * mac_rx(); a non-zero mi_rx_ref would tell mac_rx_remove() + * to not modify the list but instead mark an upcall for deletion. + * before mac_rx() exits, mi_rx_ref is decremented and if it + * is 0, the marked upcalls will be removed from the list and freed. + * The race with mac_rx_add() is harmless because mac_rx_add() only + * prepends to the list and since mac_rx() saves the list head + * before releasing mi_rx_lock, any prepended upcall won't be seen + * until the next packet chain arrives. + * + * To minimize lock contention between multiple parallel invocations + * of mac_rx(), mi_rx_lock is acquired as a READER lock. The + * use of atomic operations ensures the sanity of mi_rx_ref. mi_rx_lock + * will be upgraded to WRITER mode when there are marked upcalls to be + * cleaned. + */ void -mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *bp) +mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) { mac_impl_t *mip = (mac_impl_t *)mh; + mblk_t *bp = mp_chain; mac_rx_fn_t *mrfp; /* * Call all registered receive functions. */ rw_enter(&mip->mi_rx_lock, RW_READER); - mrfp = mip->mi_mrfp; - if (mrfp == NULL) { + if ((mrfp = mip->mi_mrfp) == NULL) { /* There are no registered receive functions. */ freemsgchain(bp); rw_exit(&mip->mi_rx_lock); return; } + atomic_inc_32(&mip->mi_rx_ref); + rw_exit(&mip->mi_rx_lock); + + /* + * Call registered receive functions. + */ do { mblk_t *recv_bp; - if (mrfp->mrf_nextp != NULL) { - /* XXX Do we bump a counter if copymsgchain() fails? */ - recv_bp = copymsgchain(bp); - } else { - recv_bp = bp; + recv_bp = (mrfp->mrf_nextp != NULL) ? copymsgchain(bp) : bp; + if (recv_bp != NULL) { + if (mrfp->mrf_inuse) { + /* + * Send bp itself and keep the copy. + * If there's only one active receiver, + * it should get the original message, + * tagged with the hardware checksum flags. + */ + mrfp->mrf_fn(mrfp->mrf_arg, mrh, bp); + bp = recv_bp; + } else { + freemsgchain(recv_bp); + } } - if (recv_bp != NULL) - mrfp->mrf_fn(mrfp->mrf_arg, mrh, recv_bp); mrfp = mrfp->mrf_nextp; } while (mrfp != NULL); + + rw_enter(&mip->mi_rx_lock, RW_READER); + if (atomic_dec_32_nv(&mip->mi_rx_ref) == 0 && mip->mi_rx_removed > 0) { + mac_rx_fn_t **pp, *p; + uint32_t cnt = 0; + + DTRACE_PROBE1(delete_callbacks, mac_impl_t *, mip); + + /* + * Need to become exclusive before doing cleanup + */ + if (rw_tryupgrade(&mip->mi_rx_lock) == 0) { + rw_exit(&mip->mi_rx_lock); + rw_enter(&mip->mi_rx_lock, RW_WRITER); + } + + /* + * We return if another thread has already entered and cleaned + * up the list. + */ + if (mip->mi_rx_ref > 0 || mip->mi_rx_removed == 0) { + rw_exit(&mip->mi_rx_lock); + return; + } + + /* + * Free removed callbacks. + */ + pp = &mip->mi_mrfp; + while (*pp != NULL) { + if (!(*pp)->mrf_inuse) { + p = *pp; + *pp = (*pp)->mrf_nextp; + kmem_free(p, sizeof (*p)); + cnt++; + continue; + } + pp = &(*pp)->mrf_nextp; + } + + /* + * Wake up mac_rx_remove_wait() + */ + mutex_enter(&mip->mi_lock); + ASSERT(mip->mi_rx_removed == cnt); + mip->mi_rx_removed = 0; + cv_broadcast(&mip->mi_rx_cv); + mutex_exit(&mip->mi_lock); + } rw_exit(&mip->mi_rx_lock); } diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index fee2278fdf..09e4c47f86 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -520,7 +520,9 @@ extern void mac_notify_remove(mac_handle_t, mac_notify_handle_t); extern void mac_notify(mac_handle_t); extern mac_rx_handle_t mac_rx_add(mac_handle_t, mac_rx_t, void *); -extern void mac_rx_remove(mac_handle_t, mac_rx_handle_t); +extern void mac_rx_remove(mac_handle_t, mac_rx_handle_t, + boolean_t); +extern void mac_rx_remove_wait(mac_handle_t); extern mblk_t *mac_txloop(void *, mblk_t *); extern mac_txloop_handle_t mac_txloop_add(mac_handle_t, mac_txloop_t, void *); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index d1f4d6bbe6..7009c69973 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -66,6 +66,7 @@ struct mac_rx_fn_s { mac_rx_fn_t *mrf_nextp; mac_rx_t mrf_fn; void *mrf_arg; + boolean_t mrf_inuse; }; typedef struct mac_txloop_fn_s mac_txloop_fn_t; @@ -132,6 +133,11 @@ typedef struct mac_impl_s { boolean_t mi_activelink; mac_txinfo_t mi_txinfo; mac_txinfo_t mi_txloopinfo; + uint32_t mi_rx_ref; /* #threads in mac_rx() */ + uint32_t mi_rx_removed; /* #callbacks marked */ + /* for removal */ + kmutex_t mi_lock; + kcondvar_t mi_rx_cv; } mac_impl_t; #define mi_getstat mi_callbacks->mc_getstat diff --git a/usr/src/uts/sun4v/io/vsw.c b/usr/src/uts/sun4v/io/vsw.c index 60a02e6755..43fff001a1 100644 --- a/usr/src/uts/sun4v/io/vsw.c +++ b/usr/src/uts/sun4v/io/vsw.c @@ -732,7 +732,7 @@ vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) /* remove mac layer callback */ mutex_enter(&vswp->mac_lock); if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { - mac_rx_remove(vswp->mh, vswp->mrh); + mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); vswp->mrh = NULL; } mutex_exit(&vswp->mac_lock); @@ -1297,7 +1297,7 @@ vsw_mac_detach(vsw_t *vswp) if (vswp->mstarted) mac_stop(vswp->mh); if (vswp->mrh != NULL) - mac_rx_remove(vswp->mh, vswp->mrh); + mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); if (vswp->mresources) mac_resource_set(vswp->mh, NULL, NULL); mac_close(vswp->mh); |