diff options
author | Ryan Zezeski <rpz@joyent.com> | 2017-04-12 08:24:51 -0600 |
---|---|---|
committer | Ryan Zezeski <rpz@joyent.com> | 2017-05-19 13:51:50 -0600 |
commit | 87b3eba89974d328822c6ffda4259b7f3d4edc54 (patch) | |
tree | d041b4dafb043d268206b4ae5227e1bda81b8b35 | |
parent | d0151f97529dd89fa4e77bb6f9061d41248f7004 (diff) | |
download | illumos-joyent-87b3eba89974d328822c6ffda4259b7f3d4edc54.tar.gz |
OS-6056 dls_devnet_unset() and DLS temp holds can deadlock
OS-5989 OS-3506 introduced deadlock in dls_devnet_prop_task
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
-rw-r--r-- | usr/src/uts/common/io/dld/dld_drv.c | 64 | ||||
-rw-r--r-- | usr/src/uts/common/io/dls/dls_mgmt.c | 218 |
2 files changed, 207 insertions, 75 deletions
diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c index cb1857ed0c..5541103d35 100644 --- a/usr/src/uts/common/io/dld/dld_drv.c +++ b/usr/src/uts/common/io/dld/dld_drv.c @@ -344,17 +344,25 @@ drv_ioc_attr(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) zone_check_datalink(&zoneid, diap->dia_linkid) != 0) return (ENOENT); - if ((err = mac_perim_enter_by_linkid(diap->dia_linkid, &mph)) != 0) + if ((err = dls_devnet_hold_tmp(diap->dia_linkid, &dlh)) != 0) return (err); - if ((err = dls_devnet_hold_link(diap->dia_linkid, &dlh, &dlp)) != 0) { + if ((err = mac_perim_enter_by_macname(dls_devnet_mac(dlh), + &mph)) != 0) { + dls_devnet_rele_tmp(dlh); + return (err); + } + + if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) { mac_perim_exit(mph); + dls_devnet_rele_tmp(dlh); return (err); } mac_sdu_get(dlp->dl_mh, NULL, &diap->dia_max_sdu); - dls_devnet_rele_link(dlh, dlp); + dls_link_rele(dlp); mac_perim_exit(mph); + dls_devnet_rele_tmp(dlh); return (0); } @@ -662,10 +670,11 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set, goto done; } - if ((err = mac_perim_enter_by_linkid(linkid, &mph)) != 0) + if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0) goto done; - - if ((err = dls_devnet_hold_link(linkid, &dlh, &dlp)) != 0) + if ((err = mac_perim_enter_by_macname(dls_devnet_mac(dlh), &mph)) != 0) + goto done; + if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) goto done; /* @@ -796,8 +805,8 @@ done: if (!set && ddi_copyout(kprop, (void *)arg, dsize, mode) != 0) err = EFAULT; - if (dlh != NULL && dlp != NULL) - dls_devnet_rele_link(dlh, dlp); + if (dlp != NULL) + dls_link_rele(dlp); if (mph != NULL) { int32_t cpuid; @@ -814,6 +823,9 @@ done: mac_client_set_intr_cpu(mdip, dlp->dl_mch, cpuid); } + if (dlh != NULL) + dls_devnet_rele_tmp(dlh); + if (kprop != NULL) kmem_free(kprop, dsize); return (err); @@ -1319,10 +1331,13 @@ drv_ioc_gettran(void *karg, intptr_t arg, int mode, cred_t *cred, dls_link_t *dlp = NULL; dld_ioc_gettran_t *dgt = karg; - if ((ret = mac_perim_enter_by_linkid(dgt->dgt_linkid, &mph)) != 0) + if ((ret = dls_devnet_hold_tmp(dgt->dgt_linkid, &dlh)) != 0) goto done; - if ((ret = dls_devnet_hold_link(dgt->dgt_linkid, &dlh, &dlp)) != 0) + if ((ret = mac_perim_enter_by_macname(dls_devnet_mac(dlh), &mph)) != 0) + goto done; + + if ((ret = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) goto done; /* @@ -1341,13 +1356,14 @@ drv_ioc_gettran(void *karg, intptr_t arg, int mode, cred_t *cred, } done: - if (dlh != NULL && dlp != NULL) { - dls_devnet_rele_link(dlh, dlp); - } + if (dlp != NULL) + dls_link_rele(dlp); - if (mph != NULL) { + if (mph != NULL) mac_perim_exit(mph); - } + + if (dlh != NULL) + dls_devnet_rele_tmp(dlh); return (ret); } @@ -1371,10 +1387,13 @@ drv_ioc_readtran(void *karg, intptr_t arg, int mode, cred_t *cred, if (dti->dti_nbytes != 256 || dti->dti_off != 0) return (EINVAL); - if ((ret = mac_perim_enter_by_linkid(dti->dti_linkid, &mph)) != 0) + if ((ret = dls_devnet_hold_tmp(dti->dti_linkid, &dlh)) != 0) goto done; - if ((ret = dls_devnet_hold_link(dti->dti_linkid, &dlh, &dlp)) != 0) + if ((ret = mac_perim_enter_by_macname(dls_devnet_mac(dlh), &mph)) != 0) + goto done; + + if ((ret = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) goto done; /* @@ -1394,13 +1413,14 @@ drv_ioc_readtran(void *karg, intptr_t arg, int mode, cred_t *cred, } done: - if (dlh != NULL && dlp != NULL) { - dls_devnet_rele_link(dlh, dlp); - } + if (dlp != NULL) + dls_link_rele(dlp); - if (mph != NULL) { + if (mph != NULL) mac_perim_exit(mph); - } + + if (dlh != NULL) + dls_devnet_rele_tmp(dlh); return (ret); } diff --git a/usr/src/uts/common/io/dls/dls_mgmt.c b/usr/src/uts/common/io/dls/dls_mgmt.c index 0e8fd6b350..8ace105ddb 100644 --- a/usr/src/uts/common/io/dls/dls_mgmt.c +++ b/usr/src/uts/common/io/dls/dls_mgmt.c @@ -21,7 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2017 Joyent, Inc. */ /* * Copyright (c) 2016 by Delphix. All rights reserved. @@ -86,6 +86,14 @@ static door_handle_t dls_mgmt_dh = NULL; /* dls_devnet_t dd_flags */ #define DD_CONDEMNED 0x1 #define DD_IMPLICIT_IPTUN 0x2 /* Implicitly-created ip*.*tun* tunnel */ +#define DD_INITIALIZING 0x4 + +/* + * If the link is marked as initializing or condemned then it should + * not be visible outside of the DLS framework. + */ +#define DD_NOT_VISIBLE(flags) ( \ + (flags & (DD_CONDEMNED | DD_INITIALIZING)) != 0) /* * This structure is used to keep the <linkid, macname> mapping. @@ -116,7 +124,7 @@ static int i_dls_devnet_create_iptun(const char *, const char *, datalink_id_t *); static int i_dls_devnet_destroy_iptun(datalink_id_t); static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t); -static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t); +static int dls_devnet_unset(mac_handle_t, datalink_id_t *, boolean_t); /*ARGSUSED*/ static int @@ -136,9 +144,9 @@ i_dls_devnet_destructor(void *buf, void *arg) { dls_devnet_t *ddp = buf; - ASSERT(ddp->dd_ksp == NULL); - ASSERT(ddp->dd_ref == 0); - ASSERT(ddp->dd_tref == 0); + VERIFY(ddp->dd_ksp == NULL); + VERIFY(ddp->dd_ref == 0); + VERIFY(ddp->dd_tref == 0); mutex_destroy(&ddp->dd_mutex); cv_destroy(&ddp->dd_cv); } @@ -840,12 +848,16 @@ dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit) } /* - * Associate a linkid with a given link (identified by macname) + * Associate the linkid with the link identified by macname. If this + * is called on behalf of a physical link then linkid may be + * DATALINK_INVALID_LINKID. Otherwise, if called on behalf of a + * virtual link, linkid must have a value. */ static int -dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid, +dls_devnet_set(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid, dls_devnet_t **ddpp) { + const char *macname = mac_name(mh); dls_devnet_t *ddp = NULL; datalink_class_t class; int err; @@ -878,17 +890,41 @@ dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid, } /* - * This might be a physical link that has already - * been created, but which does not have a linkid - * because dlmgmtd was not running when it was created. + * If we arrive here we know we are attempting to set + * the linkid on a physical link. A virtual link + * should never arrive here because it should never + * call this function without a linkid. Virtual links + * are created through dlgmtmd and thus we know + * dlmgmtd is alive to assign it a linkid (search for + * uses of dladm_create_datalink_id() to prove this to + * yourself); we don't have the same guarantee for a + * physical link which may perform an upcall for a + * linkid while dlmgmtd is down but will continue + * creating a devnet without the linkid (see + * softmac_create_datalink() to see how physical link + * creation works). That is why there is no entry in + * the id hash but there is one in the macname hash -- + * softmac couldn't acquire a linkid the first time it + * called this function. + * + * Because of the check above, we also know that + * ddp->dd_linkid is not set. Following this, the link + * must still be in the DD_INITIALIZING state because + * that flag is removed IFF dd_linkid is set. This is + * why we can ASSERT the DD_INITIALIZING flag below if + * the call to i_dls_devnet_setzid() fails. */ if (linkid == DATALINK_INVALID_LINKID || class != DATALINK_CLASS_PHYS) { err = EINVAL; goto done; } + + ASSERT(ddp->dd_flags & DD_INITIALIZING); + } else { ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP); + ddp->dd_flags = DD_INITIALIZING; ddp->dd_tref = 0; ddp->dd_ref++; ddp->dd_owner_zid = zoneid; @@ -906,12 +942,6 @@ dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid, (mod_hash_val_t)ddp) == 0); devnet_need_rebuild = B_TRUE; stat_create = B_TRUE; - mutex_enter(&ddp->dd_mutex); - if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == NULL)) { - ddp->dd_prop_taskid = taskq_dispatch(system_taskq, - dls_devnet_prop_task, ddp, TQ_SLEEP); - } - mutex_exit(&ddp->dd_mutex); } err = 0; done: @@ -926,8 +956,18 @@ done: if (err == 0) { if (zoneid != GLOBAL_ZONEID && (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE, - B_FALSE)) != 0) - (void) dls_devnet_unset(macname, &linkid, B_TRUE); + B_FALSE)) != 0) { + /* + * At this point the link is marked as + * DD_INITIALIZING -- there can be no + * outstanding temp refs and therefore no need + * to wait for them. + */ + ASSERT(ddp->dd_flags & DD_INITIALIZING); + (void) dls_devnet_unset(mh, &linkid, B_FALSE); + return (err); + } + /* * The kstat subsystem holds its own locks (rather perimeter) * before calling the ks_update (dls_devnet_stat_update) entry @@ -938,17 +978,32 @@ done: dls_devnet_stat_create(ddp, zoneid, zoneid); if (ddpp != NULL) *ddpp = ddp; + + mutex_enter(&ddp->dd_mutex); + if (linkid != DATALINK_INVALID_LINKID && + !ddp->dd_prop_loaded && ddp->dd_prop_taskid == NULL) { + ddp->dd_prop_taskid = taskq_dispatch(system_taskq, + dls_devnet_prop_task, ddp, TQ_SLEEP); + } + mutex_exit(&ddp->dd_mutex); + } return (err); } /* - * Disassociate a linkid with a given link (identified by macname) - * This waits until temporary references to the dls_devnet_t are gone. + * Disassociate the linkid from the link identified by macname. If + * wait is B_TRUE, wait until all temporary refs are released and the + * prop task is finished. + * + * If waiting then you SHOULD NOT call this from inside the MAC perim + * as deadlock will ensue. Otherwise, this function is safe to call + * from inside or outside the MAC perim. */ static int -dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait) +dls_devnet_unset(mac_handle_t mh, datalink_id_t *id, boolean_t wait) { + const char *macname = mac_name(mh); dls_devnet_t *ddp; int err; mod_hash_val_t val; @@ -969,7 +1024,7 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait) * deadlock. Return EBUSY if the asynchronous thread started for * property loading as part of the post attach hasn't yet completed. */ - ASSERT(ddp->dd_ref != 0); + VERIFY(ddp->dd_ref != 0); if ((ddp->dd_ref != 1) || (!wait && (ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) { int zstatus = 0; @@ -1025,6 +1080,30 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait) ddp->dd_ref--; *id = ddp->dd_linkid; + /* + * Remove this dls_devnet_t from the hash table. + */ + VERIFY(mod_hash_remove(i_dls_devnet_hash, + (mod_hash_key_t)ddp->dd_mac, &val) == 0); + + if (ddp->dd_linkid != DATALINK_INVALID_LINKID) { + VERIFY(mod_hash_remove(i_dls_devnet_id_hash, + (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0); + + devnet_need_rebuild = B_TRUE; + } + rw_exit(&i_dls_devnet_lock); + + /* + * It is important to call i_dls_devnet_setzid() WITHOUT the + * i_dls_devnet_lock held. The setzid call grabs the MAC + * perim; thus causing DLS -> MAC lock ordering if performed + * with the i_dls_devnet_lock held. This forces consumers to + * grab the MAC perim before calling dls_devnet_unset() (the + * locking rules state MAC -> DLS order). By performing the + * setzid outside of the i_dls_devnet_lock consumers can + * safely call dls_devnet_unset() outside the MAC perim. + */ if (ddp->dd_zid != GLOBAL_ZONEID) { /* * We need to release the dd_mutex before we try and destroy the @@ -1045,28 +1124,19 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait) B_FALSE); } - /* - * Remove this dls_devnet_t from the hash table. - */ - VERIFY(mod_hash_remove(i_dls_devnet_hash, - (mod_hash_key_t)ddp->dd_mac, &val) == 0); - - if (ddp->dd_linkid != DATALINK_INVALID_LINKID) { - VERIFY(mod_hash_remove(i_dls_devnet_id_hash, - (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0); - - devnet_need_rebuild = B_TRUE; - } - rw_exit(&i_dls_devnet_lock); - if (wait) { /* * Wait until all temporary references are released. + * The holders of the tref need the MAC perim to + * perform their work and release the tref. To avoid + * deadlock, assert that the perim is never held here. */ + ASSERT0(MAC_PERIM_HELD(mh)); while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != NULL)) cv_wait(&ddp->dd_cv, &ddp->dd_mutex); } else { - ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL); + VERIFY(ddp->dd_tref == 0); + VERIFY(ddp->dd_prop_taskid == NULL); } if (ddp->dd_linkid != DATALINK_INVALID_LINKID) { @@ -1107,8 +1177,8 @@ dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp) } mutex_enter(&ddp->dd_mutex); - ASSERT(ddp->dd_ref > 0); - if (ddp->dd_flags & DD_CONDEMNED) { + VERIFY(ddp->dd_ref > 0); + if (DD_NOT_VISIBLE(ddp->dd_flags)) { mutex_exit(&ddp->dd_mutex); rw_exit(&i_dls_devnet_lock); return (ENOENT); @@ -1147,8 +1217,8 @@ dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp, } mutex_enter(&ddp->dd_mutex); - ASSERT(ddp->dd_ref > 0); - if (ddp->dd_flags & DD_CONDEMNED) { + VERIFY(ddp->dd_ref > 0); + if (DD_NOT_VISIBLE(ddp->dd_flags)) { mutex_exit(&ddp->dd_mutex); rw_exit(&i_dls_devnet_lock); softmac_rele_device(ddh); @@ -1227,8 +1297,8 @@ dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp) return (ENOENT); } mutex_enter(&ddp->dd_mutex); - ASSERT(ddp->dd_ref > 0); - if (ddp->dd_flags & DD_CONDEMNED) { + VERIFY(ddp->dd_ref > 0); + if (DD_NOT_VISIBLE(ddp->dd_flags)) { mutex_exit(&ddp->dd_mutex); rw_exit(&i_dls_devnet_lock); softmac_rele_device(ddh); @@ -1248,7 +1318,7 @@ void dls_devnet_rele(dls_devnet_t *ddp) { mutex_enter(&ddp->dd_mutex); - ASSERT(ddp->dd_ref > 1); + VERIFY(ddp->dd_ref > 1); ddp->dd_ref--; if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) { mutex_exit(&ddp->dd_mutex); @@ -1803,13 +1873,32 @@ dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid) * we need to use the linkid to get the user name for the link * when we create the MAC client. */ - if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) { + if ((err = dls_devnet_set(mh, linkid, zoneid, &ddp)) == 0) { if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) { mac_perim_exit(mph); - (void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE); + (void) dls_devnet_unset(mh, &linkid, B_FALSE); return (err); } + + /* + * If dd_linkid is set then the link was successfully + * initialized. In this case we can remove the + * initializing flag and make the link visible to the + * rest of the system. + * + * If not set then we were called by softmac and it + * was unable to obtain a linkid for the physical link + * because dlmgmtd is down. In that case softmac will + * eventually obtain a linkid and call + * dls_devnet_recreate() to complete initialization. + */ + mutex_enter(&ddp->dd_mutex); + if (ddp->dd_linkid != DATALINK_INVALID_LINKID) + ddp->dd_flags &= ~DD_INITIALIZING; + mutex_exit(&ddp->dd_mutex); + } + mac_perim_exit(mph); return (err); } @@ -1823,8 +1912,19 @@ dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid) int dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid) { - ASSERT(linkid != DATALINK_INVALID_LINKID); - return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL)); + dls_devnet_t *ddp; + int err; + + VERIFY(linkid != DATALINK_INVALID_LINKID); + if ((err = dls_devnet_set(mh, linkid, GLOBAL_ZONEID, &ddp)) == 0) { + mutex_enter(&ddp->dd_mutex); + if (ddp->dd_linkid != DATALINK_INVALID_LINKID) + ddp->dd_flags &= ~DD_INITIALIZING; + mutex_exit(&ddp->dd_mutex); + } + + return (err); + } int @@ -1834,8 +1934,7 @@ dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait) mac_perim_handle_t mph; *idp = DATALINK_INVALID_LINKID; - mac_perim_enter_by_mh(mh, &mph); - err = dls_devnet_unset(mac_name(mh), idp, wait); + err = dls_devnet_unset(mh, idp, wait); /* * We continue on in the face of ENOENT because the devnet @@ -1873,12 +1972,14 @@ dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait) * reference that dls has on the mac_impl_t. */ if (err != 0 && err != ENOENT) { - mac_perim_exit(mph); return (err); } + mac_perim_enter_by_mh(mh, &mph); err = dls_link_rele_by_name(mac_name(mh)); if (err != 0) { + dls_devnet_t *ddp; + /* * XXX It is a general GLDv3 bug that dls_devnet_set() has to * be called to re-set the link when destroy fails. The @@ -1886,8 +1987,19 @@ dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait) * called from kernel context or from a zone other than that * which initially created the link. */ - (void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()), - NULL); + (void) dls_devnet_set(mh, *idp, crgetzoneid(CRED()), &ddp); + + /* + * You might think dd_linkid should always be set + * here, but in the case where dls_devnet_unset() + * returns ENOENT it will be DATALINK_INVALID_LINKID. + * Stay consistent with the rest of DLS and only + * remove the initializing flag if linkid is set. + */ + mutex_enter(&ddp->dd_mutex); + if (ddp->dd_linkid != DATALINK_INVALID_LINKID) + ddp->dd_flags &= ~DD_INITIALIZING; + mutex_exit(&ddp->dd_mutex); } mac_perim_exit(mph); |