diff options
| author | ahrens <none@none> | 2006-06-02 11:59:16 -0700 |
|---|---|---|
| committer | ahrens <none@none> | 2006-06-02 11:59:16 -0700 |
| commit | 8a2f1b9190d1dc288470a1fd2776d79ce82cb129 (patch) | |
| tree | 424c392c94b108379b82ca81f890daff61e89ec4 /usr/src | |
| parent | 82d33c01b078ed404a986a369750cdb4743773fb (diff) | |
| download | illumos-joyent-8a2f1b9190d1dc288470a1fd2776d79ce82cb129.tar.gz | |
6430121 3-way deadlock involving tc_lock within zfs
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/cmd/ztest/ztest.c | 5 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_tx.c | 439 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu.h | 1 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu_tx.h | 57 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dnode.h | 1 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_acl.c | 6 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_dir.c | 4 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_vnops.c | 70 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_znode.c | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/zvol.c | 5 |
10 files changed, 293 insertions, 297 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index e8065c74f5..2b068c0e84 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -1963,12 +1963,13 @@ ztest_dmu_write_parallel(ztest_args_t *za) txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT; error = dmu_tx_assign(tx, txg_how); if (error) { - dmu_tx_abort(tx); if (error == ERESTART) { ASSERT(txg_how == TXG_NOWAIT); - txg_wait_open(dmu_objset_pool(os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); continue; } + dmu_tx_abort(tx); ztest_record_enospc("dmu write parallel"); return; } diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c index 1b4a0c2bd0..d9c232e112 100644 --- a/usr/src/uts/common/fs/zfs/dmu_tx.c +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c @@ -33,16 +33,13 @@ #include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */ #include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */ #include <sys/dsl_pool.h> -#include <sys/zap_impl.h> /* for ZAP_BLOCK_SHIFT */ +#include <sys/zap_impl.h> /* for fzap_default_block_shift */ #include <sys/spa.h> #include <sys/zfs_context.h> typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, uint64_t arg1, uint64_t arg2); -#ifdef ZFS_DEBUG -int dmu_use_tx_debug_bufs = 1; -#endif dmu_tx_t * dmu_tx_create_ds(dsl_dir_t *dd) @@ -52,9 +49,11 @@ dmu_tx_create_ds(dsl_dir_t *dd) if (dd) tx->tx_pool = dd->dd_pool; list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), - offsetof(dmu_tx_hold_t, dth_node)); + offsetof(dmu_tx_hold_t, txh_node)); +#ifdef ZFS_DEBUG refcount_create(&tx->tx_space_written); refcount_create(&tx->tx_space_freed); +#endif return (tx); } @@ -92,12 +91,11 @@ dmu_tx_private_ok(dmu_tx_t *tx) return (tx->tx_anyobj); } -static void +static dmu_tx_hold_t * dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, - enum dmu_tx_hold_type type, dmu_tx_hold_func_t func, - uint64_t arg1, uint64_t arg2) + enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) { - dmu_tx_hold_t *dth; + dmu_tx_hold_t *txh; dnode_t *dn = NULL; int err; @@ -105,7 +103,7 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, err = dnode_hold(os->os, object, tx, &dn); if (err) { tx->tx_err = err; - return; + return (NULL); } if (err == 0 && tx->tx_txg != 0) { @@ -116,23 +114,23 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, * now, at least). */ ASSERT(dn->dn_assigned_txg == 0); - ASSERT(dn->dn_assigned_tx == NULL); dn->dn_assigned_txg = tx->tx_txg; - dn->dn_assigned_tx = tx; (void) refcount_add(&dn->dn_tx_holds, tx); mutex_exit(&dn->dn_mtx); } } - dth = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); - dth->dth_dnode = dn; - dth->dth_type = type; - dth->dth_arg1 = arg1; - dth->dth_arg2 = arg2; - list_insert_tail(&tx->tx_holds, dth); + txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); + txh->txh_tx = tx; + txh->txh_dnode = dn; +#ifdef ZFS_DEBUG + txh->txh_type = type; + txh->txh_arg1 = arg1; + txh->txh_arg2 = arg2; +#endif + list_insert_tail(&tx->tx_holds, txh); - if (func) - func(tx, dn, arg1, arg2); + return (txh); } void @@ -143,8 +141,8 @@ dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object) * the hold on the dnode_t can cause problems. */ if (!dmu_tx_is_syncing(tx)) { - dmu_tx_hold_object_impl(tx, os, object, THT_NEWOBJECT, - NULL, 0, 0); + (void) dmu_tx_hold_object_impl(tx, os, + object, THT_NEWOBJECT, 0, 0); } } @@ -166,10 +164,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) /* ARGSUSED */ static void -dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) +dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) { - uint64_t start, end, i, space; + dnode_t *dn = txh->txh_dnode; + uint64_t start, end, i; int min_bs, max_bs, min_ibs, max_ibs, epbs, bits; + int err = 0; if (len == 0) return; @@ -179,24 +179,19 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) min_ibs = DN_MIN_INDBLKSHIFT; max_ibs = DN_MAX_INDBLKSHIFT; + /* * For i/o error checking, read the first and last level-0 * blocks (if they are not aligned), and all the level-1 blocks. - * We needn't do this on the meta-dnode, because we've already - * read it in. */ - if (dn && dn->dn_object != DMU_META_DNODE_OBJECT) { - int err; - + if (dn) { if (dn->dn_maxblkid == 0) { err = dmu_tx_check_ioerr(NULL, dn, 0, 0); - if (err) { - tx->tx_err = err; - return; - } + if (err) + goto out; } else { - zio_t *zio = zio_root(tx->tx_pool->dp_spa, + zio_t *zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); /* first level-0 block */ @@ -204,10 +199,8 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) { err = dmu_tx_check_ioerr(zio, dn, 0, start); - if (err) { - tx->tx_err = err; - return; - } + if (err) + goto out; } /* last level-0 block */ @@ -215,10 +208,8 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) if (end != start && P2PHASE(off+len, dn->dn_datablksz)) { err = dmu_tx_check_ioerr(zio, dn, 0, end); - if (err) { - tx->tx_err = err; - return; - } + if (err) + goto out; } /* level-1 blocks */ @@ -227,18 +218,14 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) end >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT; for (i = start+1; i < end; i++) { err = dmu_tx_check_ioerr(zio, dn, 1, i); - if (err) { - tx->tx_err = err; - return; - } + if (err) + goto out; } } err = zio_wait(zio); - if (err) { - tx->tx_err = err; - return; - } + if (err) + goto out; } } @@ -261,7 +248,7 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) */ start = P2ALIGN(off, 1ULL << max_bs); end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1; - space = end - start + 1; + txh->txh_space_towrite += end - start + 1; start >>= min_bs; end >>= min_bs; @@ -282,60 +269,60 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) * we can't increase the number of levels beyond that. */ if (start != 0 && end != 0) - space += 1ULL << max_ibs; - space += (end - start + 1) << max_ibs; + txh->txh_space_towrite += 1ULL << max_ibs; + txh->txh_space_towrite += (end - start + 1) << max_ibs; } - ASSERT(space < 2 * DMU_MAX_ACCESS); + ASSERT(txh->txh_space_towrite < 2 * DMU_MAX_ACCESS); - tx->tx_space_towrite += space; +out: + if (err) + txh->txh_tx->tx_err = err; } static void -dmu_tx_count_dnode(dmu_tx_t *tx, dnode_t *dn) +dmu_tx_count_dnode(dmu_tx_hold_t *txh) { - dnode_t *mdn = tx->tx_objset->os->os_meta_dnode; - uint64_t object = dn ? dn->dn_object : DN_MAX_OBJECT - 1; - uint64_t pre_write_space; + dnode_t *dn = txh->txh_dnode; + dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode; + uint64_t space = mdn->dn_datablksz + + ((mdn->dn_nlevels-1) << mdn->dn_indblkshift); - ASSERT(object < DN_MAX_OBJECT); - pre_write_space = tx->tx_space_towrite; - dmu_tx_count_write(tx, mdn, object << DNODE_SHIFT, 1 << DNODE_SHIFT); if (dn && dn->dn_dbuf->db_blkptr && dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, dn->dn_dbuf->db_blkptr->blk_birth)) { - tx->tx_space_tooverwrite += - tx->tx_space_towrite - pre_write_space; - tx->tx_space_towrite = pre_write_space; + txh->txh_space_tooverwrite += space; + } else { + txh->txh_space_towrite += space; } } -/* ARGSUSED */ -static void -dmu_tx_hold_write_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) -{ - dmu_tx_count_write(tx, dn, off, len); - dmu_tx_count_dnode(tx, dn); -} - void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) { + dmu_tx_hold_t *txh; + ASSERT(tx->tx_txg == 0); ASSERT(len < DMU_MAX_ACCESS); ASSERT(len == 0 || UINT64_MAX - off >= len - 1); - dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_WRITE, - dmu_tx_hold_write_impl, off, len); + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_WRITE, off, len); + if (txh == NULL) + return; + + dmu_tx_count_write(txh, off, len); + dmu_tx_count_dnode(txh); } static void -dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) +dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) { uint64_t blkid, nblks; uint64_t space = 0; + dnode_t *dn = txh->txh_dnode; dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; - spa_t *spa = tx->tx_pool->dp_spa; + spa_t *spa = txh->txh_tx->tx_pool->dp_spa; int dirty; /* @@ -349,7 +336,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) list_link_active(&dn->dn_dirty_link[1]) | list_link_active(&dn->dn_dirty_link[2]) | list_link_active(&dn->dn_dirty_link[3]); - if (dirty || dn->dn_assigned_tx || dn->dn_phys->dn_nlevels == 0) + if (dirty || dn->dn_assigned_txg || dn->dn_phys->dn_nlevels == 0) return; /* @@ -416,7 +403,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL); if (err != 0) { - tx->tx_err = err; + txh->txh_tx->tx_err = err; dbuf_rele(dbuf, FTAG); break; } @@ -434,8 +421,8 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) } dbuf_rele(dbuf, FTAG); } - if (err != 0 && err != ENOENT) { - tx->tx_err = err; + if (err && err != ENOENT) { + txh->txh_tx->tx_err = err; break; } @@ -444,22 +431,32 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) } rw_exit(&dn->dn_struct_rwlock); - tx->tx_space_tofree += space; + txh->txh_space_tofree += space; } -static void -dmu_tx_hold_free_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) +void +dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) { + dmu_tx_hold_t *txh; + dnode_t *dn; uint64_t start, end, i; int err, shift; zio_t *zio; + ASSERT(tx->tx_txg == 0); + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_FREE, off, len); + if (txh == NULL) + return; + dn = txh->txh_dnode; + /* first block */ if (off != 0) - dmu_tx_count_write(tx, dn, off, 1); + dmu_tx_count_write(txh, off, 1); /* last block */ if (len != DMU_OBJECT_END) - dmu_tx_count_write(tx, dn, off+len, 1); + dmu_tx_count_write(txh, off+len, 1); if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz) return; @@ -503,28 +500,27 @@ dmu_tx_hold_free_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) } } - dmu_tx_count_dnode(tx, dn); - dmu_tx_count_free(tx, dn, off, len); + dmu_tx_count_dnode(txh); + dmu_tx_count_free(txh, off, len); } void -dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) -{ - ASSERT(tx->tx_txg == 0); - - dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_FREE, - dmu_tx_hold_free_impl, off, len); -} - -/* ARGSUSED */ -static void -dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname) +dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name) { + dmu_tx_hold_t *txh; + dnode_t *dn; uint64_t nblocks; int epbs, err; - char *name = (char *)(uintptr_t)iname; - dmu_tx_count_dnode(tx, dn); + ASSERT(tx->tx_txg == 0); + + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_ZAP, add, (uintptr_t)name); + if (txh == NULL) + return; + dn = txh->txh_dnode; + + dmu_tx_count_dnode(txh); if (dn == NULL) { /* @@ -532,7 +528,7 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname) * block. So there will be at most 2 blocks total, * including the header block. */ - dmu_tx_count_write(tx, dn, 0, 2 << fzap_default_block_shift); + dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift); return; } @@ -551,9 +547,9 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname) if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, dn->dn_phys->dn_blkptr[0].blk_birth)) - tx->tx_space_tooverwrite += dn->dn_datablksz; + txh->txh_space_tooverwrite += dn->dn_datablksz; else - tx->tx_space_towrite += dn->dn_datablksz; + txh->txh_space_towrite += dn->dn_datablksz; return; } @@ -574,7 +570,7 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname) * 3 blocks overwritten: target leaf, ptrtbl block, header block * 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks */ - dmu_tx_count_write(tx, dn, dn->dn_maxblkid * dn->dn_datablksz, + dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz, (3 + add ? 3 : 0) << dn->dn_datablkshift); /* @@ -583,49 +579,38 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname) */ epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) - tx->tx_space_towrite += 3 << dn->dn_indblkshift; -} - -void -dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name) -{ - ASSERT(tx->tx_txg == 0); - - dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_ZAP, - dmu_tx_hold_zap_impl, add, (uintptr_t)name); + txh->txh_space_towrite += 3 << dn->dn_indblkshift; } void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) { - ASSERT(tx->tx_txg == 0); - - dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_BONUS, - dmu_tx_hold_write_impl, 0, 0); -} + dmu_tx_hold_t *txh; + ASSERT(tx->tx_txg == 0); -/* ARGSUSED */ -static void -dmu_tx_hold_space_impl(dmu_tx_t *tx, dnode_t *dn, - uint64_t space, uint64_t unused) -{ - tx->tx_space_towrite += space; + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + object, THT_BONUS, 0, 0); + if (txh) + dmu_tx_count_dnode(txh); } void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) { + dmu_tx_hold_t *txh; ASSERT(tx->tx_txg == 0); - dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, THT_SPACE, - dmu_tx_hold_space_impl, space, 0); + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, + DMU_NEW_OBJECT, THT_SPACE, space, 0); + + txh->txh_space_towrite += space; } int dmu_tx_holds(dmu_tx_t *tx, uint64_t object) { - dmu_tx_hold_t *dth; + dmu_tx_hold_t *txh; int holds = 0; /* @@ -639,9 +624,9 @@ dmu_tx_holds(dmu_tx_t *tx, uint64_t object) /* if (tx->tx_anyobj == TRUE) */ /* return (0); */ - for (dth = list_head(&tx->tx_holds); dth; - dth = list_next(&tx->tx_holds, dth)) { - if (dth->dth_dnode && dth->dth_dnode->dn_object == object) + for (txh = list_head(&tx->tx_holds); txh; + txh = list_next(&tx->tx_holds, txh)) { + if (txh->txh_dnode && txh->txh_dnode->dn_object == object) holds++; } @@ -652,7 +637,7 @@ dmu_tx_holds(dmu_tx_t *tx, uint64_t object) void dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) { - dmu_tx_hold_t *dth; + dmu_tx_hold_t *txh; int match_object = FALSE, match_offset = FALSE; dnode_t *dn = db->db_dnode; @@ -667,28 +652,28 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) if (db->db.db_object == DMU_META_DNODE_OBJECT) return; - for (dth = list_head(&tx->tx_holds); dth; - dth = list_next(&tx->tx_holds, dth)) { + for (txh = list_head(&tx->tx_holds); txh; + txh = list_next(&tx->tx_holds, txh)) { ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg); - if (dth->dth_dnode == dn && dth->dth_type != THT_NEWOBJECT) + if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) match_object = TRUE; - if (dth->dth_dnode == NULL || dth->dth_dnode == dn) { + if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { int datablkshift = dn->dn_datablkshift ? dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; int shift = datablkshift + epbs * db->db_level; uint64_t beginblk = shift >= 64 ? 0 : - (dth->dth_arg1 >> shift); + (txh->txh_arg1 >> shift); uint64_t endblk = shift >= 64 ? 0 : - ((dth->dth_arg1 + dth->dth_arg2 - 1) >> shift); + ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); uint64_t blkid = db->db_blkid; - /* XXX dth_arg2 better not be zero... */ + /* XXX txh_arg2 better not be zero... */ - dprintf("found dth type %x beginblk=%llx endblk=%llx\n", - dth->dth_type, beginblk, endblk); + dprintf("found txh type %x beginblk=%llx endblk=%llx\n", + txh->txh_type, beginblk, endblk); - switch (dth->dth_type) { + switch (txh->txh_type) { case THT_WRITE: if (blkid >= beginblk && blkid <= endblk) match_offset = TRUE; @@ -710,11 +695,11 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) break; case THT_FREE: if (blkid == beginblk && - (dth->dth_arg1 != 0 || + (txh->txh_arg1 != 0 || dn->dn_maxblkid == 0)) match_offset = TRUE; if (blkid == endblk && - dth->dth_arg2 != DMU_OBJECT_END) + txh->txh_arg2 != DMU_OBJECT_END) match_offset = TRUE; break; case THT_BONUS: @@ -728,7 +713,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) match_object = TRUE; break; default: - ASSERT(!"bad dth_type"); + ASSERT(!"bad txh_type"); } } if (match_object && match_offset) @@ -741,104 +726,108 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) #endif static int -dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how, dmu_tx_hold_t **last_dth) +dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) { - dmu_tx_hold_t *dth; - uint64_t lsize, asize, fsize, towrite; + dmu_tx_hold_t *txh; + uint64_t lsize, asize, fsize, towrite, tofree, tooverwrite; - *last_dth = NULL; + ASSERT3U(tx->tx_txg, ==, 0); + if (tx->tx_err) + return (tx->tx_err); tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); + tx->tx_needassign_txh = NULL; - if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) - return (ERESTART); - if (tx->tx_err) - return (tx->tx_err); + /* + * NB: No error returns are allowed after txg_hold_open, but + * before processing the dnode holds, due to the + * dmu_tx_unassign() logic. + */ - for (dth = list_head(&tx->tx_holds); dth; - dth = list_next(&tx->tx_holds, dth)) { - dnode_t *dn = dth->dth_dnode; + towrite = tofree = tooverwrite = 0; + for (txh = list_head(&tx->tx_holds); txh; + txh = list_next(&tx->tx_holds, txh)) { + dnode_t *dn = txh->txh_dnode; if (dn != NULL) { mutex_enter(&dn->dn_mtx); - while (dn->dn_assigned_txg == tx->tx_txg - 1) { - if (txg_how != TXG_WAIT) { - mutex_exit(&dn->dn_mtx); - return (ERESTART); - } - cv_wait(&dn->dn_notxholds, &dn->dn_mtx); + if (dn->dn_assigned_txg == tx->tx_txg - 1) { + mutex_exit(&dn->dn_mtx); + tx->tx_needassign_txh = txh; + return (ERESTART); } - if (dn->dn_assigned_txg == 0) { - ASSERT(dn->dn_assigned_tx == NULL); + if (dn->dn_assigned_txg == 0) dn->dn_assigned_txg = tx->tx_txg; - dn->dn_assigned_tx = tx; - } else { - ASSERT(dn->dn_assigned_txg == tx->tx_txg); - if (dn->dn_assigned_tx != tx) - dn->dn_assigned_tx = NULL; - } + ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); (void) refcount_add(&dn->dn_tx_holds, tx); mutex_exit(&dn->dn_mtx); } - *last_dth = dth; - if (tx->tx_err) - return (tx->tx_err); + towrite += txh->txh_space_towrite; + tofree += txh->txh_space_tofree; + tooverwrite += txh->txh_space_tooverwrite; } /* + * NB: This check must be after we've held the dnodes, so that + * the dmu_tx_unassign() logic will work properly + */ + if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) + return (ERESTART); + + /* * If a snapshot has been taken since we made our estimates, * assume that we won't be able to free or overwrite anything. */ if (tx->tx_objset && dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) > tx->tx_lastsnap_txg) { - tx->tx_space_towrite += tx->tx_space_tooverwrite; - tx->tx_space_tooverwrite = 0; - tx->tx_space_tofree = 0; + towrite += tooverwrite; + tooverwrite = tofree = 0; } /* * Convert logical size to worst-case allocated size. */ - fsize = spa_get_asize(tx->tx_pool->dp_spa, tx->tx_space_tooverwrite) + - tx->tx_space_tofree; - lsize = tx->tx_space_towrite + tx->tx_space_tooverwrite; + fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree; + lsize = towrite + tooverwrite; asize = spa_get_asize(tx->tx_pool->dp_spa, lsize); - towrite = tx->tx_space_towrite; + +#ifdef ZFS_DEBUG tx->tx_space_towrite = asize; + tx->tx_space_tofree = tofree; + tx->tx_space_tooverwrite = tooverwrite; +#endif if (tx->tx_dir && asize != 0) { int err = dsl_dir_tempreserve_space(tx->tx_dir, lsize, asize, fsize, &tx->tx_tempreserve_cookie, tx); - if (err) { - tx->tx_space_towrite = towrite; + if (err) return (err); - } } return (0); } -static uint64_t -dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth) +static void +dmu_tx_unassign(dmu_tx_t *tx) { - uint64_t txg = tx->tx_txg; - dmu_tx_hold_t *dth; + dmu_tx_hold_t *txh; - ASSERT(txg != 0); + if (tx->tx_txg == 0) + return; txg_rele_to_quiesce(&tx->tx_txgh); - for (dth = last_dth; dth; dth = list_prev(&tx->tx_holds, dth)) { - dnode_t *dn = dth->dth_dnode; + for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh; + txh = list_next(&tx->tx_holds, txh)) { + dnode_t *dn = txh->txh_dnode; if (dn == NULL) continue; mutex_enter(&dn->dn_mtx); - ASSERT3U(dn->dn_assigned_txg, ==, txg); + ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { dn->dn_assigned_txg = 0; - dn->dn_assigned_tx = NULL; cv_broadcast(&dn->dn_notxholds); } mutex_exit(&dn->dn_mtx); @@ -846,8 +835,8 @@ dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth) txg_rele_to_sync(&tx->tx_txgh); + tx->tx_lasttried_txg = tx->tx_txg; tx->tx_txg = 0; - return (txg); } /* @@ -860,7 +849,7 @@ dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth) * (2) TXG_NOWAIT. If we can't assign into the current open txg without * blocking, returns immediately with ERESTART. This should be used * whenever you're holding locks. On an ERESTART error, the caller - * should drop locks, do a txg_wait_open(dp, 0), and try again. + * should drop locks, do a dmu_tx_wait(tx), and try again. * * (3) A specific txg. Use this if you need to ensure that multiple * transactions all sync in the same txg. Like TXG_NOWAIT, it @@ -869,20 +858,19 @@ dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth) int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) { - dmu_tx_hold_t *last_dth; int err; ASSERT(tx->tx_txg == 0); ASSERT(txg_how != 0); ASSERT(!dsl_pool_sync_context(tx->tx_pool)); - while ((err = dmu_tx_try_assign(tx, txg_how, &last_dth)) != 0) { - uint64_t txg = dmu_tx_unassign(tx, last_dth); + while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { + dmu_tx_unassign(tx); if (err != ERESTART || txg_how != TXG_WAIT) return (err); - txg_wait_open(tx->tx_pool, txg + 1); + dmu_tx_wait(tx); } txg_rele_to_quiesce(&tx->tx_txgh); @@ -891,8 +879,28 @@ dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) } void +dmu_tx_wait(dmu_tx_t *tx) +{ + ASSERT(tx->tx_txg == 0); + ASSERT(tx->tx_lasttried_txg != 0); + + if (tx->tx_needassign_txh) { + dnode_t *dn = tx->tx_needassign_txh->txh_dnode; + + mutex_enter(&dn->dn_mtx); + while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) + cv_wait(&dn->dn_notxholds, &dn->dn_mtx); + mutex_exit(&dn->dn_mtx); + tx->tx_needassign_txh = NULL; + } else { + txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1); + } +} + +void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) { +#ifdef ZFS_DEBUG if (tx->tx_dir == NULL || delta == 0) return; @@ -903,20 +911,21 @@ dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta) } else { (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL); } +#endif } void dmu_tx_commit(dmu_tx_t *tx) { - dmu_tx_hold_t *dth; + dmu_tx_hold_t *txh; ASSERT(tx->tx_txg != 0); - while (dth = list_head(&tx->tx_holds)) { - dnode_t *dn = dth->dth_dnode; + while (txh = list_head(&tx->tx_holds)) { + dnode_t *dn = txh->txh_dnode; - list_remove(&tx->tx_holds, dth); - kmem_free(dth, sizeof (dmu_tx_hold_t)); + list_remove(&tx->tx_holds, txh); + kmem_free(txh, sizeof (dmu_tx_hold_t)); if (dn == NULL) continue; mutex_enter(&dn->dn_mtx); @@ -924,19 +933,18 @@ dmu_tx_commit(dmu_tx_t *tx) if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { dn->dn_assigned_txg = 0; - dn->dn_assigned_tx = NULL; cv_broadcast(&dn->dn_notxholds); } mutex_exit(&dn->dn_mtx); dnode_rele(dn, tx); } - if (tx->tx_dir && tx->tx_space_towrite > 0) { + if (tx->tx_tempreserve_cookie) dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); - } if (tx->tx_anyobj == FALSE) txg_rele_to_sync(&tx->tx_txgh); +#ifdef ZFS_DEBUG dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n", tx->tx_space_towrite, refcount_count(&tx->tx_space_written), tx->tx_space_tofree, refcount_count(&tx->tx_space_freed)); @@ -944,9 +952,6 @@ dmu_tx_commit(dmu_tx_t *tx) refcount_count(&tx->tx_space_written)); refcount_destroy_many(&tx->tx_space_freed, refcount_count(&tx->tx_space_freed)); -#ifdef ZFS_DEBUG - if (tx->tx_debug_buf) - kmem_free(tx->tx_debug_buf, 4096); #endif kmem_free(tx, sizeof (dmu_tx_t)); } @@ -954,25 +959,23 @@ dmu_tx_commit(dmu_tx_t *tx) void dmu_tx_abort(dmu_tx_t *tx) { - dmu_tx_hold_t *dth; + dmu_tx_hold_t *txh; ASSERT(tx->tx_txg == 0); - while (dth = list_head(&tx->tx_holds)) { - dnode_t *dn = dth->dth_dnode; + while (txh = list_head(&tx->tx_holds)) { + dnode_t *dn = txh->txh_dnode; - list_remove(&tx->tx_holds, dth); - kmem_free(dth, sizeof (dmu_tx_hold_t)); + list_remove(&tx->tx_holds, txh); + kmem_free(txh, sizeof (dmu_tx_hold_t)); if (dn != NULL) dnode_rele(dn, tx); } +#ifdef ZFS_DEBUG refcount_destroy_many(&tx->tx_space_written, refcount_count(&tx->tx_space_written)); refcount_destroy_many(&tx->tx_space_freed, refcount_count(&tx->tx_space_freed)); -#ifdef ZFS_DEBUG - if (tx->tx_debug_buf) - kmem_free(tx->tx_debug_buf, 4096); #endif kmem_free(tx, sizeof (dmu_tx_t)); } diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index 88b59a1618..b24c7132e2 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -400,6 +400,7 @@ void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name); void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object); void dmu_tx_abort(dmu_tx_t *tx); int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); +void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_commit(dmu_tx_t *tx); /* diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h index 9b55c56bc9..422d9d3ffb 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h @@ -38,6 +38,7 @@ extern "C" { #endif struct dmu_buf_impl; +struct dmu_tx_hold; struct dnode_link; struct dsl_pool; struct dnode; @@ -54,18 +55,18 @@ struct dmu_tx { struct dsl_pool *tx_pool; uint64_t tx_txg; uint64_t tx_lastsnap_txg; + uint64_t tx_lasttried_txg; txg_handle_t tx_txgh; - uint64_t tx_space_towrite; - refcount_t tx_space_written; - uint64_t tx_space_tofree; - refcount_t tx_space_freed; - uint64_t tx_space_tooverwrite; void *tx_tempreserve_cookie; + struct dmu_tx_hold *tx_needassign_txh; uint8_t tx_anyobj; int tx_err; #ifdef ZFS_DEBUG - char *tx_debug_buf; - int tx_debug_len; + uint64_t tx_space_towrite; + uint64_t tx_space_tofree; + uint64_t tx_space_tooverwrite; + refcount_t tx_space_written; + refcount_t tx_space_freed; #endif }; @@ -80,12 +81,17 @@ enum dmu_tx_hold_type { }; typedef struct dmu_tx_hold { - list_node_t dth_node; - struct dnode *dth_dnode; - enum dmu_tx_hold_type dth_type; - uint64_t dth_arg1; - uint64_t dth_arg2; - /* XXX track what the actual estimates were for this hold */ + dmu_tx_t *txh_tx; + list_node_t txh_node; + struct dnode *txh_dnode; + uint64_t txh_space_towrite; + uint64_t txh_space_tofree; + uint64_t txh_space_tooverwrite; +#ifdef ZFS_DEBUG + enum dmu_tx_hold_type txh_type; + uint64_t txh_arg1; + uint64_t txh_arg2; +#endif } dmu_tx_hold_t; @@ -97,6 +103,7 @@ int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); void dmu_tx_commit(dmu_tx_t *tx); void dmu_tx_abort(dmu_tx_t *tx); uint64_t dmu_tx_get_txg(dmu_tx_t *tx); +void dmu_tx_wait(dmu_tx_t *tx); /* * These routines are defined in dmu_spa.h, and are called by the SPA. @@ -116,33 +123,9 @@ int dmu_tx_holds(dmu_tx_t *tx, uint64_t object); void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space); #ifdef ZFS_DEBUG - -extern int dmu_use_tx_debug_bufs; - -#define dprintf_tx(tx, fmt, ...) \ - if (dmu_use_tx_debug_bufs) \ - do { \ - char *__bufp; \ - int __len; \ - if (tx->tx_debug_buf == NULL) { \ - __bufp = kmem_zalloc(4096, KM_SLEEP); \ - tx->tx_debug_buf = __bufp; \ - tx->tx_debug_len = __len = 4096; \ - } else { \ - __len = tx->tx_debug_len; \ - __bufp = &tx->tx_debug_buf[4096-__len]; \ - } \ - tx->tx_debug_len -= snprintf(__bufp, __len, fmt, __VA_ARGS__); \ -_NOTE(CONSTCOND) } while (0); \ - else dprintf(fmt, __VA_ARGS__) - #define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db) - #else - -#define dprintf_tx(tx, fmt, ...) #define DMU_TX_DIRTY_BUF(tx, db) - #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h index 48b06a6749..e7158bc4c1 100644 --- a/usr/src/uts/common/fs/zfs/sys/dnode.h +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h @@ -167,7 +167,6 @@ typedef struct dnode { uint64_t dn_allocated_txg; uint64_t dn_free_txg; uint64_t dn_assigned_txg; - struct dmu_tx *dn_assigned_tx; /* if only one tx cares */ kcondvar_t dn_notxholds; enum dnode_dirtycontext dn_dirtyctx; uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */ diff --git a/usr/src/uts/common/fs/zfs/zfs_acl.c b/usr/src/uts/common/fs/zfs/zfs_acl.c index 8a3f63d037..42fabdda49 100644 --- a/usr/src/uts/common/fs/zfs/zfs_acl.c +++ b/usr/src/uts/common/fs/zfs/zfs_acl.c @@ -1192,15 +1192,15 @@ top: error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); - mutex_exit(&zp->z_acl_lock); mutex_exit(&zp->z_lock); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); return (error); } diff --git a/usr/src/uts/common/fs/zfs/zfs_dir.c b/usr/src/uts/common/fs/zfs/zfs_dir.c index 8262e9d882..f51372f521 100644 --- a/usr/src/uts/common/fs/zfs/zfs_dir.c +++ b/usr/src/uts/common/fs/zfs/zfs_dir.c @@ -783,6 +783,8 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { + if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) + dmu_tx_wait(tx); dmu_tx_abort(tx); return (error); } @@ -858,7 +860,7 @@ top: zfs_dirent_unlock(dl); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + /* NB: we already did dmu_tx_wait() if necessary */ goto top; } diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index e5562396fc..bc2ba44350 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -108,7 +108,7 @@ * forever, because the previous txg can't quiesce until B's tx commits. * * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, - * then drop all locks, call txg_wait_open(), and try again. + * then drop all locks, call dmu_tx_wait(), and try again. * * (5) If the operation succeeded, generate the intent log entry for it * before dropping locks. This ensures that the ordering of events @@ -130,14 +130,15 @@ * dmu_tx_hold_*(); // hold each object you might modify * error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign * if (error) { - * dmu_tx_abort(tx); // abort DMU tx * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry * VN_RELE(...); // release held vnodes * if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - * txg_wait_open(dmu_objset_pool(os), 0); + * dmu_tx_wait(tx); + * dmu_tx_abort(tx); * goto top; * } + * dmu_tx_abort(tx); // abort DMU tx * ZFS_EXIT(zfsvfs); // finished in zfs * return (error); // really out of space * } @@ -668,11 +669,12 @@ top: dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); goto no_tx_done; } @@ -776,12 +778,13 @@ top: dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); goto no_tx_done; } } @@ -1109,13 +1112,14 @@ top: 0, SPA_MAXBLOCKSIZE); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); zfs_dirent_unlock(dl); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } @@ -1162,8 +1166,8 @@ top: error = zfs_freesp(zp, 0, 0, mode, TRUE); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + /* NB: we already did dmu_tx_wait() */ zfs_dirent_unlock(dl); - txg_wait_open(dmu_objset_pool(os), 0); goto top; } } @@ -1296,13 +1300,14 @@ top: error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); zfs_dirent_unlock(dl); VN_RELE(vp); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } @@ -1437,12 +1442,13 @@ top: 0, SPA_MAXBLOCKSIZE); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); zfs_dirent_unlock(dl); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } @@ -1542,14 +1548,15 @@ top: dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, FALSE, NULL); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); rw_exit(&zp->z_parent_lock); zfs_dirent_unlock(dl); VN_RELE(vp); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } @@ -1962,9 +1969,8 @@ top: * should be addressed in openat(). */ do { - if (err == ERESTART) - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); + /* NB: we already did dmu_tx_wait() if necessary */ } while (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT); if (err) { ZFS_EXIT(zfsvfs); @@ -2088,11 +2094,12 @@ top: if (err) { if (attrzp) VN_RELE(ZTOV(attrzp)); - dmu_tx_abort(tx); if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (err); } @@ -2411,7 +2418,6 @@ top: dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, FALSE, NULL); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); if (zl != NULL) zfs_rename_unlock(&zl); zfs_dirent_unlock(sdl); @@ -2420,9 +2426,11 @@ top: if (tzp) VN_RELE(ZTOV(tzp)); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } @@ -2516,12 +2524,13 @@ top: dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); zfs_dirent_unlock(dl); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } @@ -2715,12 +2724,13 @@ top: dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { - dmu_tx_abort(tx); zfs_dirent_unlock(dl); if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); ZFS_EXIT(zfsvfs); return (error); } @@ -2785,12 +2795,13 @@ top: dmu_tx_hold_bonus(tx, zp->z_id); err = dmu_tx_assign(tx, zfsvfs->z_assign); if (err != 0) { - dmu_tx_abort(tx); zfs_range_unlock(zp, rl); if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + dmu_tx_wait(tx); + dmu_tx_abort(tx); goto top; } + dmu_tx_abort(tx); goto out; } @@ -3412,9 +3423,8 @@ top: len = bfp->l_len; /* 0 means from off to end of file */ do { - if (error == ERESTART) - txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); error = zfs_freesp(zp, off, len, flag, TRUE); + /* NB: we already did dmu_tx_wait() if necessary */ } while (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT); ZFS_EXIT(zfsvfs); diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c index 3000fc8db3..bb113ca1af 100644 --- a/usr/src/uts/common/fs/zfs/zfs_znode.c +++ b/usr/src/uts/common/fs/zfs/zfs_znode.c @@ -993,6 +993,8 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { + if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) + dmu_tx_wait(tx); dmu_tx_abort(tx); zfs_range_unlock(zp, rl); return (error); diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 87810d10f7..c153d25cec 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -239,16 +239,11 @@ zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); -restart: tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); error = dmu_tx_assign(tx, zv->zv_txg_assign); if (error) { dmu_tx_abort(tx); - if (error == ERESTART && zv->zv_txg_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(os), 0); - goto restart; - } } else { dmu_write(os, ZVOL_OBJ, off, len, data, tx); dmu_tx_commit(tx); |
