summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorahrens <none@none>2006-06-02 11:59:16 -0700
committerahrens <none@none>2006-06-02 11:59:16 -0700
commit8a2f1b9190d1dc288470a1fd2776d79ce82cb129 (patch)
tree424c392c94b108379b82ca81f890daff61e89ec4 /usr/src
parent82d33c01b078ed404a986a369750cdb4743773fb (diff)
downloadillumos-joyent-8a2f1b9190d1dc288470a1fd2776d79ce82cb129.tar.gz
6430121 3-way deadlock involving tc_lock within zfs
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/ztest/ztest.c5
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c439
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_tx.h57
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dnode.h1
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_acl.c6
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_dir.c4
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c70
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_znode.c2
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c5
10 files changed, 293 insertions, 297 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index e8065c74f5..2b068c0e84 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -1963,12 +1963,13 @@ ztest_dmu_write_parallel(ztest_args_t *za)
txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
error = dmu_tx_assign(tx, txg_how);
if (error) {
- dmu_tx_abort(tx);
if (error == ERESTART) {
ASSERT(txg_how == TXG_NOWAIT);
- txg_wait_open(dmu_objset_pool(os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
continue;
}
+ dmu_tx_abort(tx);
ztest_record_enospc("dmu write parallel");
return;
}
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 1b4a0c2bd0..d9c232e112 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -33,16 +33,13 @@
#include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */
#include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */
#include <sys/dsl_pool.h>
-#include <sys/zap_impl.h> /* for ZAP_BLOCK_SHIFT */
+#include <sys/zap_impl.h> /* for fzap_default_block_shift */
#include <sys/spa.h>
#include <sys/zfs_context.h>
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
uint64_t arg1, uint64_t arg2);
-#ifdef ZFS_DEBUG
-int dmu_use_tx_debug_bufs = 1;
-#endif
dmu_tx_t *
dmu_tx_create_ds(dsl_dir_t *dd)
@@ -52,9 +49,11 @@ dmu_tx_create_ds(dsl_dir_t *dd)
if (dd)
tx->tx_pool = dd->dd_pool;
list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
- offsetof(dmu_tx_hold_t, dth_node));
+ offsetof(dmu_tx_hold_t, txh_node));
+#ifdef ZFS_DEBUG
refcount_create(&tx->tx_space_written);
refcount_create(&tx->tx_space_freed);
+#endif
return (tx);
}
@@ -92,12 +91,11 @@ dmu_tx_private_ok(dmu_tx_t *tx)
return (tx->tx_anyobj);
}
-static void
+static dmu_tx_hold_t *
dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
- enum dmu_tx_hold_type type, dmu_tx_hold_func_t func,
- uint64_t arg1, uint64_t arg2)
+ enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2)
{
- dmu_tx_hold_t *dth;
+ dmu_tx_hold_t *txh;
dnode_t *dn = NULL;
int err;
@@ -105,7 +103,7 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
err = dnode_hold(os->os, object, tx, &dn);
if (err) {
tx->tx_err = err;
- return;
+ return (NULL);
}
if (err == 0 && tx->tx_txg != 0) {
@@ -116,23 +114,23 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
* now, at least).
*/
ASSERT(dn->dn_assigned_txg == 0);
- ASSERT(dn->dn_assigned_tx == NULL);
dn->dn_assigned_txg = tx->tx_txg;
- dn->dn_assigned_tx = tx;
(void) refcount_add(&dn->dn_tx_holds, tx);
mutex_exit(&dn->dn_mtx);
}
}
- dth = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP);
- dth->dth_dnode = dn;
- dth->dth_type = type;
- dth->dth_arg1 = arg1;
- dth->dth_arg2 = arg2;
- list_insert_tail(&tx->tx_holds, dth);
+ txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP);
+ txh->txh_tx = tx;
+ txh->txh_dnode = dn;
+#ifdef ZFS_DEBUG
+ txh->txh_type = type;
+ txh->txh_arg1 = arg1;
+ txh->txh_arg2 = arg2;
+#endif
+ list_insert_tail(&tx->tx_holds, txh);
- if (func)
- func(tx, dn, arg1, arg2);
+ return (txh);
}
void
@@ -143,8 +141,8 @@ dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object)
* the hold on the dnode_t can cause problems.
*/
if (!dmu_tx_is_syncing(tx)) {
- dmu_tx_hold_object_impl(tx, os, object, THT_NEWOBJECT,
- NULL, 0, 0);
+ (void) dmu_tx_hold_object_impl(tx, os,
+ object, THT_NEWOBJECT, 0, 0);
}
}
@@ -166,10 +164,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
/* ARGSUSED */
static void
-dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
+dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
{
- uint64_t start, end, i, space;
+ dnode_t *dn = txh->txh_dnode;
+ uint64_t start, end, i;
int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
+ int err = 0;
if (len == 0)
return;
@@ -179,24 +179,19 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
min_ibs = DN_MIN_INDBLKSHIFT;
max_ibs = DN_MAX_INDBLKSHIFT;
+
/*
* For i/o error checking, read the first and last level-0
* blocks (if they are not aligned), and all the level-1 blocks.
- * We needn't do this on the meta-dnode, because we've already
- * read it in.
*/
- if (dn && dn->dn_object != DMU_META_DNODE_OBJECT) {
- int err;
-
+ if (dn) {
if (dn->dn_maxblkid == 0) {
err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
- if (err) {
- tx->tx_err = err;
- return;
- }
+ if (err)
+ goto out;
} else {
- zio_t *zio = zio_root(tx->tx_pool->dp_spa,
+ zio_t *zio = zio_root(dn->dn_objset->os_spa,
NULL, NULL, ZIO_FLAG_CANFAIL);
/* first level-0 block */
@@ -204,10 +199,8 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
if (P2PHASE(off, dn->dn_datablksz) ||
len < dn->dn_datablksz) {
err = dmu_tx_check_ioerr(zio, dn, 0, start);
- if (err) {
- tx->tx_err = err;
- return;
- }
+ if (err)
+ goto out;
}
/* last level-0 block */
@@ -215,10 +208,8 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
if (end != start &&
P2PHASE(off+len, dn->dn_datablksz)) {
err = dmu_tx_check_ioerr(zio, dn, 0, end);
- if (err) {
- tx->tx_err = err;
- return;
- }
+ if (err)
+ goto out;
}
/* level-1 blocks */
@@ -227,18 +218,14 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
end >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
for (i = start+1; i < end; i++) {
err = dmu_tx_check_ioerr(zio, dn, 1, i);
- if (err) {
- tx->tx_err = err;
- return;
- }
+ if (err)
+ goto out;
}
}
err = zio_wait(zio);
- if (err) {
- tx->tx_err = err;
- return;
- }
+ if (err)
+ goto out;
}
}
@@ -261,7 +248,7 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
*/
start = P2ALIGN(off, 1ULL << max_bs);
end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1;
- space = end - start + 1;
+ txh->txh_space_towrite += end - start + 1;
start >>= min_bs;
end >>= min_bs;
@@ -282,60 +269,60 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
* we can't increase the number of levels beyond that.
*/
if (start != 0 && end != 0)
- space += 1ULL << max_ibs;
- space += (end - start + 1) << max_ibs;
+ txh->txh_space_towrite += 1ULL << max_ibs;
+ txh->txh_space_towrite += (end - start + 1) << max_ibs;
}
- ASSERT(space < 2 * DMU_MAX_ACCESS);
+ ASSERT(txh->txh_space_towrite < 2 * DMU_MAX_ACCESS);
- tx->tx_space_towrite += space;
+out:
+ if (err)
+ txh->txh_tx->tx_err = err;
}
static void
-dmu_tx_count_dnode(dmu_tx_t *tx, dnode_t *dn)
+dmu_tx_count_dnode(dmu_tx_hold_t *txh)
{
- dnode_t *mdn = tx->tx_objset->os->os_meta_dnode;
- uint64_t object = dn ? dn->dn_object : DN_MAX_OBJECT - 1;
- uint64_t pre_write_space;
+ dnode_t *dn = txh->txh_dnode;
+ dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode;
+ uint64_t space = mdn->dn_datablksz +
+ ((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
- ASSERT(object < DN_MAX_OBJECT);
- pre_write_space = tx->tx_space_towrite;
- dmu_tx_count_write(tx, mdn, object << DNODE_SHIFT, 1 << DNODE_SHIFT);
if (dn && dn->dn_dbuf->db_blkptr &&
dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
dn->dn_dbuf->db_blkptr->blk_birth)) {
- tx->tx_space_tooverwrite +=
- tx->tx_space_towrite - pre_write_space;
- tx->tx_space_towrite = pre_write_space;
+ txh->txh_space_tooverwrite += space;
+ } else {
+ txh->txh_space_towrite += space;
}
}
-/* ARGSUSED */
-static void
-dmu_tx_hold_write_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
-{
- dmu_tx_count_write(tx, dn, off, len);
- dmu_tx_count_dnode(tx, dn);
-}
-
void
dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
{
+ dmu_tx_hold_t *txh;
+
ASSERT(tx->tx_txg == 0);
ASSERT(len < DMU_MAX_ACCESS);
ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
- dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_WRITE,
- dmu_tx_hold_write_impl, off, len);
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_WRITE, off, len);
+ if (txh == NULL)
+ return;
+
+ dmu_tx_count_write(txh, off, len);
+ dmu_tx_count_dnode(txh);
}
static void
-dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
+dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
{
uint64_t blkid, nblks;
uint64_t space = 0;
+ dnode_t *dn = txh->txh_dnode;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
- spa_t *spa = tx->tx_pool->dp_spa;
+ spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
int dirty;
/*
@@ -349,7 +336,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
list_link_active(&dn->dn_dirty_link[1]) |
list_link_active(&dn->dn_dirty_link[2]) |
list_link_active(&dn->dn_dirty_link[3]);
- if (dirty || dn->dn_assigned_tx || dn->dn_phys->dn_nlevels == 0)
+ if (dirty || dn->dn_assigned_txg || dn->dn_phys->dn_nlevels == 0)
return;
/*
@@ -416,7 +403,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
err = dbuf_read(dbuf, NULL,
DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
if (err != 0) {
- tx->tx_err = err;
+ txh->txh_tx->tx_err = err;
dbuf_rele(dbuf, FTAG);
break;
}
@@ -434,8 +421,8 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
}
dbuf_rele(dbuf, FTAG);
}
- if (err != 0 && err != ENOENT) {
- tx->tx_err = err;
+ if (err && err != ENOENT) {
+ txh->txh_tx->tx_err = err;
break;
}
@@ -444,22 +431,32 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
}
rw_exit(&dn->dn_struct_rwlock);
- tx->tx_space_tofree += space;
+ txh->txh_space_tofree += space;
}
-static void
-dmu_tx_hold_free_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
+void
+dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
{
+ dmu_tx_hold_t *txh;
+ dnode_t *dn;
uint64_t start, end, i;
int err, shift;
zio_t *zio;
+ ASSERT(tx->tx_txg == 0);
+
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_FREE, off, len);
+ if (txh == NULL)
+ return;
+ dn = txh->txh_dnode;
+
/* first block */
if (off != 0)
- dmu_tx_count_write(tx, dn, off, 1);
+ dmu_tx_count_write(txh, off, 1);
/* last block */
if (len != DMU_OBJECT_END)
- dmu_tx_count_write(tx, dn, off+len, 1);
+ dmu_tx_count_write(txh, off+len, 1);
if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
return;
@@ -503,28 +500,27 @@ dmu_tx_hold_free_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
}
}
- dmu_tx_count_dnode(tx, dn);
- dmu_tx_count_free(tx, dn, off, len);
+ dmu_tx_count_dnode(txh);
+ dmu_tx_count_free(txh, off, len);
}
void
-dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
-{
- ASSERT(tx->tx_txg == 0);
-
- dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_FREE,
- dmu_tx_hold_free_impl, off, len);
-}
-
-/* ARGSUSED */
-static void
-dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname)
+dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
{
+ dmu_tx_hold_t *txh;
+ dnode_t *dn;
uint64_t nblocks;
int epbs, err;
- char *name = (char *)(uintptr_t)iname;
- dmu_tx_count_dnode(tx, dn);
+ ASSERT(tx->tx_txg == 0);
+
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_ZAP, add, (uintptr_t)name);
+ if (txh == NULL)
+ return;
+ dn = txh->txh_dnode;
+
+ dmu_tx_count_dnode(txh);
if (dn == NULL) {
/*
@@ -532,7 +528,7 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname)
* block. So there will be at most 2 blocks total,
* including the header block.
*/
- dmu_tx_count_write(tx, dn, 0, 2 << fzap_default_block_shift);
+ dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift);
return;
}
@@ -551,9 +547,9 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname)
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
dn->dn_phys->dn_blkptr[0].blk_birth))
- tx->tx_space_tooverwrite += dn->dn_datablksz;
+ txh->txh_space_tooverwrite += dn->dn_datablksz;
else
- tx->tx_space_towrite += dn->dn_datablksz;
+ txh->txh_space_towrite += dn->dn_datablksz;
return;
}
@@ -574,7 +570,7 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname)
* 3 blocks overwritten: target leaf, ptrtbl block, header block
* 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks
*/
- dmu_tx_count_write(tx, dn, dn->dn_maxblkid * dn->dn_datablksz,
+ dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz,
(3 + add ? 3 : 0) << dn->dn_datablkshift);
/*
@@ -583,49 +579,38 @@ dmu_tx_hold_zap_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t add, uint64_t iname)
*/
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
- tx->tx_space_towrite += 3 << dn->dn_indblkshift;
-}
-
-void
-dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
-{
- ASSERT(tx->tx_txg == 0);
-
- dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_ZAP,
- dmu_tx_hold_zap_impl, add, (uintptr_t)name);
+ txh->txh_space_towrite += 3 << dn->dn_indblkshift;
}
void
dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
{
- ASSERT(tx->tx_txg == 0);
-
- dmu_tx_hold_object_impl(tx, tx->tx_objset, object, THT_BONUS,
- dmu_tx_hold_write_impl, 0, 0);
-}
+ dmu_tx_hold_t *txh;
+ ASSERT(tx->tx_txg == 0);
-/* ARGSUSED */
-static void
-dmu_tx_hold_space_impl(dmu_tx_t *tx, dnode_t *dn,
- uint64_t space, uint64_t unused)
-{
- tx->tx_space_towrite += space;
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_BONUS, 0, 0);
+ if (txh)
+ dmu_tx_count_dnode(txh);
}
void
dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
{
+ dmu_tx_hold_t *txh;
ASSERT(tx->tx_txg == 0);
- dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, THT_SPACE,
- dmu_tx_hold_space_impl, space, 0);
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ DMU_NEW_OBJECT, THT_SPACE, space, 0);
+
+ txh->txh_space_towrite += space;
}
int
dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
{
- dmu_tx_hold_t *dth;
+ dmu_tx_hold_t *txh;
int holds = 0;
/*
@@ -639,9 +624,9 @@ dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
/* if (tx->tx_anyobj == TRUE) */
/* return (0); */
- for (dth = list_head(&tx->tx_holds); dth;
- dth = list_next(&tx->tx_holds, dth)) {
- if (dth->dth_dnode && dth->dth_dnode->dn_object == object)
+ for (txh = list_head(&tx->tx_holds); txh;
+ txh = list_next(&tx->tx_holds, txh)) {
+ if (txh->txh_dnode && txh->txh_dnode->dn_object == object)
holds++;
}
@@ -652,7 +637,7 @@ dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
void
dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
{
- dmu_tx_hold_t *dth;
+ dmu_tx_hold_t *txh;
int match_object = FALSE, match_offset = FALSE;
dnode_t *dn = db->db_dnode;
@@ -667,28 +652,28 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
if (db->db.db_object == DMU_META_DNODE_OBJECT)
return;
- for (dth = list_head(&tx->tx_holds); dth;
- dth = list_next(&tx->tx_holds, dth)) {
+ for (txh = list_head(&tx->tx_holds); txh;
+ txh = list_next(&tx->tx_holds, txh)) {
ASSERT(dn == NULL || dn->dn_assigned_txg == tx->tx_txg);
- if (dth->dth_dnode == dn && dth->dth_type != THT_NEWOBJECT)
+ if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT)
match_object = TRUE;
- if (dth->dth_dnode == NULL || dth->dth_dnode == dn) {
+ if (txh->txh_dnode == NULL || txh->txh_dnode == dn) {
int datablkshift = dn->dn_datablkshift ?
dn->dn_datablkshift : SPA_MAXBLOCKSHIFT;
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
int shift = datablkshift + epbs * db->db_level;
uint64_t beginblk = shift >= 64 ? 0 :
- (dth->dth_arg1 >> shift);
+ (txh->txh_arg1 >> shift);
uint64_t endblk = shift >= 64 ? 0 :
- ((dth->dth_arg1 + dth->dth_arg2 - 1) >> shift);
+ ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift);
uint64_t blkid = db->db_blkid;
- /* XXX dth_arg2 better not be zero... */
+ /* XXX txh_arg2 better not be zero... */
- dprintf("found dth type %x beginblk=%llx endblk=%llx\n",
- dth->dth_type, beginblk, endblk);
+ dprintf("found txh type %x beginblk=%llx endblk=%llx\n",
+ txh->txh_type, beginblk, endblk);
- switch (dth->dth_type) {
+ switch (txh->txh_type) {
case THT_WRITE:
if (blkid >= beginblk && blkid <= endblk)
match_offset = TRUE;
@@ -710,11 +695,11 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
break;
case THT_FREE:
if (blkid == beginblk &&
- (dth->dth_arg1 != 0 ||
+ (txh->txh_arg1 != 0 ||
dn->dn_maxblkid == 0))
match_offset = TRUE;
if (blkid == endblk &&
- dth->dth_arg2 != DMU_OBJECT_END)
+ txh->txh_arg2 != DMU_OBJECT_END)
match_offset = TRUE;
break;
case THT_BONUS:
@@ -728,7 +713,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
match_object = TRUE;
break;
default:
- ASSERT(!"bad dth_type");
+ ASSERT(!"bad txh_type");
}
}
if (match_object && match_offset)
@@ -741,104 +726,108 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
#endif
static int
-dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how, dmu_tx_hold_t **last_dth)
+dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
{
- dmu_tx_hold_t *dth;
- uint64_t lsize, asize, fsize, towrite;
+ dmu_tx_hold_t *txh;
+ uint64_t lsize, asize, fsize, towrite, tofree, tooverwrite;
- *last_dth = NULL;
+ ASSERT3U(tx->tx_txg, ==, 0);
+ if (tx->tx_err)
+ return (tx->tx_err);
tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
+ tx->tx_needassign_txh = NULL;
- if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
- return (ERESTART);
- if (tx->tx_err)
- return (tx->tx_err);
+ /*
+ * NB: No error returns are allowed after txg_hold_open, but
+ * before processing the dnode holds, due to the
+ * dmu_tx_unassign() logic.
+ */
- for (dth = list_head(&tx->tx_holds); dth;
- dth = list_next(&tx->tx_holds, dth)) {
- dnode_t *dn = dth->dth_dnode;
+ towrite = tofree = tooverwrite = 0;
+ for (txh = list_head(&tx->tx_holds); txh;
+ txh = list_next(&tx->tx_holds, txh)) {
+ dnode_t *dn = txh->txh_dnode;
if (dn != NULL) {
mutex_enter(&dn->dn_mtx);
- while (dn->dn_assigned_txg == tx->tx_txg - 1) {
- if (txg_how != TXG_WAIT) {
- mutex_exit(&dn->dn_mtx);
- return (ERESTART);
- }
- cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
+ if (dn->dn_assigned_txg == tx->tx_txg - 1) {
+ mutex_exit(&dn->dn_mtx);
+ tx->tx_needassign_txh = txh;
+ return (ERESTART);
}
- if (dn->dn_assigned_txg == 0) {
- ASSERT(dn->dn_assigned_tx == NULL);
+ if (dn->dn_assigned_txg == 0)
dn->dn_assigned_txg = tx->tx_txg;
- dn->dn_assigned_tx = tx;
- } else {
- ASSERT(dn->dn_assigned_txg == tx->tx_txg);
- if (dn->dn_assigned_tx != tx)
- dn->dn_assigned_tx = NULL;
- }
+ ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
(void) refcount_add(&dn->dn_tx_holds, tx);
mutex_exit(&dn->dn_mtx);
}
- *last_dth = dth;
- if (tx->tx_err)
- return (tx->tx_err);
+ towrite += txh->txh_space_towrite;
+ tofree += txh->txh_space_tofree;
+ tooverwrite += txh->txh_space_tooverwrite;
}
/*
+ * NB: This check must be after we've held the dnodes, so that
+ * the dmu_tx_unassign() logic will work properly
+ */
+ if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
+ return (ERESTART);
+
+ /*
* If a snapshot has been taken since we made our estimates,
* assume that we won't be able to free or overwrite anything.
*/
if (tx->tx_objset &&
dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) >
tx->tx_lastsnap_txg) {
- tx->tx_space_towrite += tx->tx_space_tooverwrite;
- tx->tx_space_tooverwrite = 0;
- tx->tx_space_tofree = 0;
+ towrite += tooverwrite;
+ tooverwrite = tofree = 0;
}
/*
* Convert logical size to worst-case allocated size.
*/
- fsize = spa_get_asize(tx->tx_pool->dp_spa, tx->tx_space_tooverwrite) +
- tx->tx_space_tofree;
- lsize = tx->tx_space_towrite + tx->tx_space_tooverwrite;
+ fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree;
+ lsize = towrite + tooverwrite;
asize = spa_get_asize(tx->tx_pool->dp_spa, lsize);
- towrite = tx->tx_space_towrite;
+
+#ifdef ZFS_DEBUG
tx->tx_space_towrite = asize;
+ tx->tx_space_tofree = tofree;
+ tx->tx_space_tooverwrite = tooverwrite;
+#endif
if (tx->tx_dir && asize != 0) {
int err = dsl_dir_tempreserve_space(tx->tx_dir,
lsize, asize, fsize, &tx->tx_tempreserve_cookie, tx);
- if (err) {
- tx->tx_space_towrite = towrite;
+ if (err)
return (err);
- }
}
return (0);
}
-static uint64_t
-dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth)
+static void
+dmu_tx_unassign(dmu_tx_t *tx)
{
- uint64_t txg = tx->tx_txg;
- dmu_tx_hold_t *dth;
+ dmu_tx_hold_t *txh;
- ASSERT(txg != 0);
+ if (tx->tx_txg == 0)
+ return;
txg_rele_to_quiesce(&tx->tx_txgh);
- for (dth = last_dth; dth; dth = list_prev(&tx->tx_holds, dth)) {
- dnode_t *dn = dth->dth_dnode;
+ for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh;
+ txh = list_next(&tx->tx_holds, txh)) {
+ dnode_t *dn = txh->txh_dnode;
if (dn == NULL)
continue;
mutex_enter(&dn->dn_mtx);
- ASSERT3U(dn->dn_assigned_txg, ==, txg);
+ ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
dn->dn_assigned_txg = 0;
- dn->dn_assigned_tx = NULL;
cv_broadcast(&dn->dn_notxholds);
}
mutex_exit(&dn->dn_mtx);
@@ -846,8 +835,8 @@ dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth)
txg_rele_to_sync(&tx->tx_txgh);
+ tx->tx_lasttried_txg = tx->tx_txg;
tx->tx_txg = 0;
- return (txg);
}
/*
@@ -860,7 +849,7 @@ dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth)
* (2) TXG_NOWAIT. If we can't assign into the current open txg without
* blocking, returns immediately with ERESTART. This should be used
* whenever you're holding locks. On an ERESTART error, the caller
- * should drop locks, do a txg_wait_open(dp, 0), and try again.
+ * should drop locks, do a dmu_tx_wait(tx), and try again.
*
* (3) A specific txg. Use this if you need to ensure that multiple
* transactions all sync in the same txg. Like TXG_NOWAIT, it
@@ -869,20 +858,19 @@ dmu_tx_unassign(dmu_tx_t *tx, dmu_tx_hold_t *last_dth)
int
dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
{
- dmu_tx_hold_t *last_dth;
int err;
ASSERT(tx->tx_txg == 0);
ASSERT(txg_how != 0);
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
- while ((err = dmu_tx_try_assign(tx, txg_how, &last_dth)) != 0) {
- uint64_t txg = dmu_tx_unassign(tx, last_dth);
+ while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
+ dmu_tx_unassign(tx);
if (err != ERESTART || txg_how != TXG_WAIT)
return (err);
- txg_wait_open(tx->tx_pool, txg + 1);
+ dmu_tx_wait(tx);
}
txg_rele_to_quiesce(&tx->tx_txgh);
@@ -891,8 +879,28 @@ dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
}
void
+dmu_tx_wait(dmu_tx_t *tx)
+{
+ ASSERT(tx->tx_txg == 0);
+ ASSERT(tx->tx_lasttried_txg != 0);
+
+ if (tx->tx_needassign_txh) {
+ dnode_t *dn = tx->tx_needassign_txh->txh_dnode;
+
+ mutex_enter(&dn->dn_mtx);
+ while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
+ cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
+ mutex_exit(&dn->dn_mtx);
+ tx->tx_needassign_txh = NULL;
+ } else {
+ txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
+ }
+}
+
+void
dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
{
+#ifdef ZFS_DEBUG
if (tx->tx_dir == NULL || delta == 0)
return;
@@ -903,20 +911,21 @@ dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
} else {
(void) refcount_add_many(&tx->tx_space_freed, -delta, NULL);
}
+#endif
}
void
dmu_tx_commit(dmu_tx_t *tx)
{
- dmu_tx_hold_t *dth;
+ dmu_tx_hold_t *txh;
ASSERT(tx->tx_txg != 0);
- while (dth = list_head(&tx->tx_holds)) {
- dnode_t *dn = dth->dth_dnode;
+ while (txh = list_head(&tx->tx_holds)) {
+ dnode_t *dn = txh->txh_dnode;
- list_remove(&tx->tx_holds, dth);
- kmem_free(dth, sizeof (dmu_tx_hold_t));
+ list_remove(&tx->tx_holds, txh);
+ kmem_free(txh, sizeof (dmu_tx_hold_t));
if (dn == NULL)
continue;
mutex_enter(&dn->dn_mtx);
@@ -924,19 +933,18 @@ dmu_tx_commit(dmu_tx_t *tx)
if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
dn->dn_assigned_txg = 0;
- dn->dn_assigned_tx = NULL;
cv_broadcast(&dn->dn_notxholds);
}
mutex_exit(&dn->dn_mtx);
dnode_rele(dn, tx);
}
- if (tx->tx_dir && tx->tx_space_towrite > 0) {
+ if (tx->tx_tempreserve_cookie)
dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
- }
if (tx->tx_anyobj == FALSE)
txg_rele_to_sync(&tx->tx_txgh);
+#ifdef ZFS_DEBUG
dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
tx->tx_space_towrite, refcount_count(&tx->tx_space_written),
tx->tx_space_tofree, refcount_count(&tx->tx_space_freed));
@@ -944,9 +952,6 @@ dmu_tx_commit(dmu_tx_t *tx)
refcount_count(&tx->tx_space_written));
refcount_destroy_many(&tx->tx_space_freed,
refcount_count(&tx->tx_space_freed));
-#ifdef ZFS_DEBUG
- if (tx->tx_debug_buf)
- kmem_free(tx->tx_debug_buf, 4096);
#endif
kmem_free(tx, sizeof (dmu_tx_t));
}
@@ -954,25 +959,23 @@ dmu_tx_commit(dmu_tx_t *tx)
void
dmu_tx_abort(dmu_tx_t *tx)
{
- dmu_tx_hold_t *dth;
+ dmu_tx_hold_t *txh;
ASSERT(tx->tx_txg == 0);
- while (dth = list_head(&tx->tx_holds)) {
- dnode_t *dn = dth->dth_dnode;
+ while (txh = list_head(&tx->tx_holds)) {
+ dnode_t *dn = txh->txh_dnode;
- list_remove(&tx->tx_holds, dth);
- kmem_free(dth, sizeof (dmu_tx_hold_t));
+ list_remove(&tx->tx_holds, txh);
+ kmem_free(txh, sizeof (dmu_tx_hold_t));
if (dn != NULL)
dnode_rele(dn, tx);
}
+#ifdef ZFS_DEBUG
refcount_destroy_many(&tx->tx_space_written,
refcount_count(&tx->tx_space_written));
refcount_destroy_many(&tx->tx_space_freed,
refcount_count(&tx->tx_space_freed));
-#ifdef ZFS_DEBUG
- if (tx->tx_debug_buf)
- kmem_free(tx->tx_debug_buf, 4096);
#endif
kmem_free(tx, sizeof (dmu_tx_t));
}
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index 88b59a1618..b24c7132e2 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -400,6 +400,7 @@ void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
/*
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h
index 9b55c56bc9..422d9d3ffb 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h
@@ -38,6 +38,7 @@ extern "C" {
#endif
struct dmu_buf_impl;
+struct dmu_tx_hold;
struct dnode_link;
struct dsl_pool;
struct dnode;
@@ -54,18 +55,18 @@ struct dmu_tx {
struct dsl_pool *tx_pool;
uint64_t tx_txg;
uint64_t tx_lastsnap_txg;
+ uint64_t tx_lasttried_txg;
txg_handle_t tx_txgh;
- uint64_t tx_space_towrite;
- refcount_t tx_space_written;
- uint64_t tx_space_tofree;
- refcount_t tx_space_freed;
- uint64_t tx_space_tooverwrite;
void *tx_tempreserve_cookie;
+ struct dmu_tx_hold *tx_needassign_txh;
uint8_t tx_anyobj;
int tx_err;
#ifdef ZFS_DEBUG
- char *tx_debug_buf;
- int tx_debug_len;
+ uint64_t tx_space_towrite;
+ uint64_t tx_space_tofree;
+ uint64_t tx_space_tooverwrite;
+ refcount_t tx_space_written;
+ refcount_t tx_space_freed;
#endif
};
@@ -80,12 +81,17 @@ enum dmu_tx_hold_type {
};
typedef struct dmu_tx_hold {
- list_node_t dth_node;
- struct dnode *dth_dnode;
- enum dmu_tx_hold_type dth_type;
- uint64_t dth_arg1;
- uint64_t dth_arg2;
- /* XXX track what the actual estimates were for this hold */
+ dmu_tx_t *txh_tx;
+ list_node_t txh_node;
+ struct dnode *txh_dnode;
+ uint64_t txh_space_towrite;
+ uint64_t txh_space_tofree;
+ uint64_t txh_space_tooverwrite;
+#ifdef ZFS_DEBUG
+ enum dmu_tx_hold_type txh_type;
+ uint64_t txh_arg1;
+ uint64_t txh_arg2;
+#endif
} dmu_tx_hold_t;
@@ -97,6 +103,7 @@ int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
+void dmu_tx_wait(dmu_tx_t *tx);
/*
* These routines are defined in dmu_spa.h, and are called by the SPA.
@@ -116,33 +123,9 @@ int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
#ifdef ZFS_DEBUG
-
-extern int dmu_use_tx_debug_bufs;
-
-#define dprintf_tx(tx, fmt, ...) \
- if (dmu_use_tx_debug_bufs) \
- do { \
- char *__bufp; \
- int __len; \
- if (tx->tx_debug_buf == NULL) { \
- __bufp = kmem_zalloc(4096, KM_SLEEP); \
- tx->tx_debug_buf = __bufp; \
- tx->tx_debug_len = __len = 4096; \
- } else { \
- __len = tx->tx_debug_len; \
- __bufp = &tx->tx_debug_buf[4096-__len]; \
- } \
- tx->tx_debug_len -= snprintf(__bufp, __len, fmt, __VA_ARGS__); \
-_NOTE(CONSTCOND) } while (0); \
- else dprintf(fmt, __VA_ARGS__)
-
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
-
#else
-
-#define dprintf_tx(tx, fmt, ...)
#define DMU_TX_DIRTY_BUF(tx, db)
-
#endif
#ifdef __cplusplus
diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h
index 48b06a6749..e7158bc4c1 100644
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h
@@ -167,7 +167,6 @@ typedef struct dnode {
uint64_t dn_allocated_txg;
uint64_t dn_free_txg;
uint64_t dn_assigned_txg;
- struct dmu_tx *dn_assigned_tx; /* if only one tx cares */
kcondvar_t dn_notxholds;
enum dnode_dirtycontext dn_dirtyctx;
uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
diff --git a/usr/src/uts/common/fs/zfs/zfs_acl.c b/usr/src/uts/common/fs/zfs/zfs_acl.c
index 8a3f63d037..42fabdda49 100644
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c
@@ -1192,15 +1192,15 @@ top:
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
-
mutex_exit(&zp->z_acl_lock);
mutex_exit(&zp->z_lock);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
return (error);
}
diff --git a/usr/src/uts/common/fs/zfs/zfs_dir.c b/usr/src/uts/common/fs/zfs/zfs_dir.c
index 8262e9d882..f51372f521 100644
--- a/usr/src/uts/common/fs/zfs/zfs_dir.c
+++ b/usr/src/uts/common/fs/zfs/zfs_dir.c
@@ -783,6 +783,8 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
+ if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
+ dmu_tx_wait(tx);
dmu_tx_abort(tx);
return (error);
}
@@ -858,7 +860,7 @@ top:
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ /* NB: we already did dmu_tx_wait() if necessary */
goto top;
}
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index e5562396fc..bc2ba44350 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -108,7 +108,7 @@
* forever, because the previous txg can't quiesce until B's tx commits.
*
* If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
- * then drop all locks, call txg_wait_open(), and try again.
+ * then drop all locks, call dmu_tx_wait(), and try again.
*
* (5) If the operation succeeded, generate the intent log entry for it
* before dropping locks. This ensures that the ordering of events
@@ -130,14 +130,15 @@
* dmu_tx_hold_*(); // hold each object you might modify
* error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign
* if (error) {
- * dmu_tx_abort(tx); // abort DMU tx
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
* VN_RELE(...); // release held vnodes
* if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- * txg_wait_open(dmu_objset_pool(os), 0);
+ * dmu_tx_wait(tx);
+ * dmu_tx_abort(tx);
* goto top;
* }
+ * dmu_tx_abort(tx); // abort DMU tx
* ZFS_EXIT(zfsvfs); // finished in zfs
* return (error); // really out of space
* }
@@ -668,11 +669,12 @@ top:
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
goto no_tx_done;
}
@@ -776,12 +778,13 @@ top:
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
if (error == ERESTART &&
zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
goto no_tx_done;
}
}
@@ -1109,13 +1112,14 @@ top:
0, SPA_MAXBLOCKSIZE);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
zfs_dirent_unlock(dl);
if (error == ERESTART &&
zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1162,8 +1166,8 @@ top:
error = zfs_freesp(zp, 0, 0, mode, TRUE);
if (error == ERESTART &&
zfsvfs->z_assign == TXG_NOWAIT) {
+ /* NB: we already did dmu_tx_wait() */
zfs_dirent_unlock(dl);
- txg_wait_open(dmu_objset_pool(os), 0);
goto top;
}
}
@@ -1296,13 +1300,14 @@ top:
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1437,12 +1442,13 @@ top:
0, SPA_MAXBLOCKSIZE);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1542,14 +1548,15 @@ top:
dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, FALSE, NULL);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
rw_exit(&zp->z_parent_lock);
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1962,9 +1969,8 @@ top:
* should be addressed in openat().
*/
do {
- if (err == ERESTART)
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
+ /* NB: we already did dmu_tx_wait() if necessary */
} while (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT);
if (err) {
ZFS_EXIT(zfsvfs);
@@ -2088,11 +2094,12 @@ top:
if (err) {
if (attrzp)
VN_RELE(ZTOV(attrzp));
- dmu_tx_abort(tx);
if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (err);
}
@@ -2411,7 +2418,6 @@ top:
dmu_tx_hold_zap(tx, zfsvfs->z_dqueue, FALSE, NULL);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
if (zl != NULL)
zfs_rename_unlock(&zl);
zfs_dirent_unlock(sdl);
@@ -2420,9 +2426,11 @@ top:
if (tzp)
VN_RELE(ZTOV(tzp));
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -2516,12 +2524,13 @@ top:
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -2715,12 +2724,13 @@ top:
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
- dmu_tx_abort(tx);
zfs_dirent_unlock(dl);
if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -2785,12 +2795,13 @@ top:
dmu_tx_hold_bonus(tx, zp->z_id);
err = dmu_tx_assign(tx, zfsvfs->z_assign);
if (err != 0) {
- dmu_tx_abort(tx);
zfs_range_unlock(zp, rl);
if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
goto top;
}
+ dmu_tx_abort(tx);
goto out;
}
@@ -3412,9 +3423,8 @@ top:
len = bfp->l_len; /* 0 means from off to end of file */
do {
- if (error == ERESTART)
- txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
error = zfs_freesp(zp, off, len, flag, TRUE);
+ /* NB: we already did dmu_tx_wait() if necessary */
} while (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT);
ZFS_EXIT(zfsvfs);
diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c
index 3000fc8db3..bb113ca1af 100644
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c
@@ -993,6 +993,8 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
error = dmu_tx_assign(tx, zfsvfs->z_assign);
if (error) {
+ if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT)
+ dmu_tx_wait(tx);
dmu_tx_abort(tx);
zfs_range_unlock(zp, rl);
return (error);
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 87810d10f7..c153d25cec 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -239,16 +239,11 @@ zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
-restart:
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
error = dmu_tx_assign(tx, zv->zv_txg_assign);
if (error) {
dmu_tx_abort(tx);
- if (error == ERESTART && zv->zv_txg_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(os), 0);
- goto restart;
- }
} else {
dmu_write(os, ZVOL_OBJ, off, len, data, tx);
dmu_tx_commit(tx);