summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Ahrens <mahrens@delphix.com>2013-02-28 12:44:05 -0800
committerChristopher Siden <chris.siden@delphix.com>2013-02-28 12:44:05 -0800
commit3b2aab18808792cbd248a12f1edf139b89833c13 (patch)
treee39983fcf021dd4318db022a25376018a2f21e2c
parent584d084a45d320c86a541cf9072cccd91b4da17b (diff)
downloadillumos-gate-3b2aab18808792cbd248a12f1edf139b89833c13.tar.gz
3464 zfs synctask code needs restructuring
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Approved by: Garrett D'Amore <garrett@damore.org>
-rw-r--r--usr/src/cmd/mdb/common/modules/zfs/zfs.c2
-rw-r--r--usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c4
-rw-r--r--usr/src/cmd/truss/expound.c5
-rw-r--r--usr/src/cmd/zdb/zdb.c9
-rw-r--r--usr/src/cmd/zfs/zfs_main.c116
-rw-r--r--usr/src/cmd/zhack/zhack.c27
-rw-r--r--usr/src/cmd/ztest/ztest.c190
-rw-r--r--usr/src/common/nvpair/fnvpair.c13
-rw-r--r--usr/src/lib/libnvpair/mapfile-vers1
-rw-r--r--usr/src/lib/libzfs/common/libzfs.h6
-rw-r--r--usr/src/lib/libzfs/common/libzfs_dataset.c316
-rw-r--r--usr/src/lib/libzfs/common/libzfs_sendrecv.c12
-rw-r--r--usr/src/lib/libzfs_core/common/libzfs_core.c102
-rw-r--r--usr/src/lib/libzfs_core/common/libzfs_core.h4
-rw-r--r--usr/src/lib/libzfs_core/common/mapfile-vers3
-rw-r--r--usr/src/lib/libzpool/common/kernel.c5
-rw-r--r--usr/src/lib/libzpool/common/llib-lzpool3
-rw-r--r--usr/src/lib/libzpool/common/sys/zfs_context.h15
-rw-r--r--usr/src/man/man1m/zfs.1m6
-rw-r--r--usr/src/uts/common/Makefile.files4
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c10
-rw-r--r--usr/src/uts/common/fs/zfs/bplist.c8
-rw-r--r--usr/src/uts/common/fs/zfs/bpobj.c4
-rw-r--r--usr/src/uts/common/fs/zfs/dbuf.c85
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c2
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_diff.c80
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_objset.c855
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_send.c911
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_traverse.c42
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c31
-rw-r--r--usr/src/uts/common/fs/zfs/dnode.c6
-rw-r--r--usr/src/uts/common/fs/zfs/dnode_sync.c1
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dataset.c3578
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_deleg.c138
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_destroy.c926
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dir.c587
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_pool.c216
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_prop.c475
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_scan.c118
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_synctask.c243
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_userhold.c536
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c38
-rw-r--r--usr/src/uts/common/fs/zfs/refcount.c21
-rw-r--r--usr/src/uts/common/fs/zfs/rrwlock.c41
-rw-r--r--usr/src/uts/common/fs/zfs/sa.c6
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c81
-rw-r--r--usr/src/uts/common/fs/zfs/spa_history.c26
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c23
-rw-r--r--usr/src/uts/common/fs/zfs/space_map.c43
-rw-r--r--usr/src/uts/common/fs/zfs/sys/arc.h2
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dbuf.h9
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu.h52
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_objset.h11
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_send.h66
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_tx.h6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_dataset.h112
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_destroy.h52
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_dir.h17
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_pool.h16
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_prop.h53
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_synctask.h46
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_userhold.h57
-rw-r--r--usr/src/uts/common/fs/zfs/sys/metaslab.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/refcount.h5
-rw-r--r--usr/src/uts/common/fs/zfs/sys/rrwlock.h7
-rw-r--r--usr/src/uts/common/fs/zfs/sys/space_map.h2
-rw-r--r--usr/src/uts/common/fs/zfs/sys/txg.h9
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfeature.h17
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_debug.h12
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h4
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_znode.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zil.h4
-rw-r--r--usr/src/uts/common/fs/zfs/txg.c37
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ctldir.c33
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c1083
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vfsops.c74
-rw-r--r--usr/src/uts/common/fs/zfs/zil.c137
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c8
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c85
-rw-r--r--usr/src/uts/common/sys/nvpair.h1
80 files changed, 6227 insertions, 5770 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index 83cfa66dcb..17b905d81c 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -859,7 +859,7 @@ dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
NULL) != argc)
return (DCMD_USAGE);
- if (mdb_lookup_by_name("zfs_dbgmsgs", &sym)) {
+ if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) {
mdb_warn("can't find zfs_dbgmsgs");
return (DCMD_ERR);
}
diff --git a/usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c b/usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c
index c9849e184f..d607a4b178 100644
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
/*
@@ -202,8 +203,7 @@ snapshot_hold(char *volname, char *snapname, char *jname, boolean_t recursive)
}
p = strchr(snapname, '@') + 1;
- if (zfs_hold(zhp, p, jname, recursive, B_TRUE, B_FALSE,
- cleanup_fd, 0, 0) != 0) {
+ if (zfs_hold(zhp, p, jname, recursive, B_FALSE, cleanup_fd) != 0) {
NDMP_LOG(LOG_ERR, "Cannot hold snapshot %s", p);
zfs_close(zhp);
return (-1);
diff --git a/usr/src/cmd/truss/expound.c b/usr/src/cmd/truss/expound.c
index 9a14502a95..422ead7df9 100644
--- a/usr/src/cmd/truss/expound.c
+++ b/usr/src/cmd/truss/expound.c
@@ -22,6 +22,7 @@
/*
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -4876,9 +4877,7 @@ show_zfs_ioc(private_t *pri, long addr)
if (zc.zc_value[0])
(void) printf(" zc_value=%s\n", zc.zc_value);
if (zc.zc_string[0])
- (void) printf(" zc_strign=%s\n", zc.zc_string);
- if (zc.zc_top_ds[0])
- (void) printf(" zc_top_ds=%s\n", zc.zc_top_ds);
+ (void) printf(" zc_string=%s\n", zc.zc_string);
if (zc.zc_guid != 0) {
(void) printf(" zc_guid=%llu\n",
(u_longlong_t)zc.zc_guid);
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 2234a71528..46a630fb42 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -1658,7 +1658,9 @@ dump_dir(objset_t *os)
int print_header = 1;
int i, error;
+ dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
dmu_objset_fast_stat(os, &dds);
+ dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (dds.dds_type < DMU_OST_NUMTYPES)
type = objset_types[dds.dds_type];
@@ -2109,7 +2111,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
zio_nowait(zio_read(NULL, spa, bp, data, size,
zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
-
}
zcb->zcb_readfails = 0;
@@ -2297,8 +2298,10 @@ dump_block_stats(spa_t *spa)
*/
(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
count_block_cb, &zcb, NULL);
- (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
- count_block_cb, &zcb, NULL);
+ if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
+ (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
+ count_block_cb, &zcb, NULL);
+ }
if (spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index e5e35e6f16..5e95aed88b 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -898,6 +898,7 @@ typedef struct destroy_cbdata {
boolean_t cb_parsable;
boolean_t cb_dryrun;
nvlist_t *cb_nvl;
+ nvlist_t *cb_batchedsnaps;
/* first snap in contiguous run */
char *cb_firstsnap;
@@ -994,9 +995,27 @@ destroy_callback(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}
+ if (cb->cb_dryrun) {
+ zfs_close(zhp);
+ return (0);
+ }
+
+ /*
+ * We batch up all contiguous snapshots (even of different
+ * filesystems) and destroy them with one ioctl. We can't
+ * simply do all snap deletions and then all fs deletions,
+ * because we must delete a clone before its origin.
+ */
+ if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
+ fnvlist_add_boolean(cb->cb_batchedsnaps, name);
+ } else {
+ int error = zfs_destroy_snaps_nvl(g_zfs,
+ cb->cb_batchedsnaps, B_FALSE);
+ fnvlist_free(cb->cb_batchedsnaps);
+ cb->cb_batchedsnaps = fnvlist_alloc();
- if (!cb->cb_dryrun) {
- if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
+ if (error != 0 ||
+ zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
zfs_close(zhp);
return (-1);
@@ -1152,8 +1171,10 @@ static int
zfs_do_destroy(int argc, char **argv)
{
destroy_cbdata_t cb = { 0 };
+ int rv = 0;
+ int err = 0;
int c;
- zfs_handle_t *zhp;
+ zfs_handle_t *zhp = NULL;
char *at;
zfs_type_t type = ZFS_TYPE_DATASET;
@@ -1207,11 +1228,9 @@ zfs_do_destroy(int argc, char **argv)
at = strchr(argv[0], '@');
if (at != NULL) {
- int err = 0;
/* Build the list of snaps to destroy in cb_nvl. */
- if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0)
- nomem();
+ cb.cb_nvl = fnvlist_alloc();
*at = '\0';
zhp = zfs_open(g_zfs, argv[0],
@@ -1222,17 +1241,15 @@ zfs_do_destroy(int argc, char **argv)
cb.cb_snapspec = at + 1;
if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 ||
cb.cb_error) {
- zfs_close(zhp);
- nvlist_free(cb.cb_nvl);
- return (1);
+ rv = 1;
+ goto out;
}
if (nvlist_empty(cb.cb_nvl)) {
(void) fprintf(stderr, gettext("could not find any "
"snapshots to destroy; check snapshot names.\n"));
- zfs_close(zhp);
- nvlist_free(cb.cb_nvl);
- return (1);
+ rv = 1;
+ goto out;
}
if (cb.cb_verbose) {
@@ -1251,18 +1268,26 @@ zfs_do_destroy(int argc, char **argv)
}
if (!cb.cb_dryrun) {
- if (cb.cb_doclones)
+ if (cb.cb_doclones) {
+ cb.cb_batchedsnaps = fnvlist_alloc();
err = destroy_clones(&cb);
+ if (err == 0) {
+ err = zfs_destroy_snaps_nvl(g_zfs,
+ cb.cb_batchedsnaps, B_FALSE);
+ }
+ if (err != 0) {
+ rv = 1;
+ goto out;
+ }
+ }
if (err == 0) {
- err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl,
+ err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl,
cb.cb_defer_destroy);
}
}
- zfs_close(zhp);
- nvlist_free(cb.cb_nvl);
if (err != 0)
- return (1);
+ rv = 1;
} else {
/* Open the given dataset */
if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
@@ -1283,8 +1308,8 @@ zfs_do_destroy(int argc, char **argv)
zfs_get_name(zhp));
(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
"to destroy the pool itself\n"), zfs_get_name(zhp));
- zfs_close(zhp);
- return (1);
+ rv = 1;
+ goto out;
}
/*
@@ -1294,30 +1319,42 @@ zfs_do_destroy(int argc, char **argv)
if (!cb.cb_doclones &&
zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
&cb) != 0) {
- zfs_close(zhp);
- return (1);
+ rv = 1;
+ goto out;
}
if (cb.cb_error) {
- zfs_close(zhp);
- return (1);
+ rv = 1;
+ goto out;
}
+ cb.cb_batchedsnaps = fnvlist_alloc();
if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback,
&cb) != 0) {
- zfs_close(zhp);
- return (1);
+ rv = 1;
+ goto out;
}
/*
* Do the real thing. The callback will close the
* handle regardless of whether it succeeds or not.
*/
- if (destroy_callback(zhp, &cb) != 0)
- return (1);
+ err = destroy_callback(zhp, &cb);
+ zhp = NULL;
+ if (err == 0) {
+ err = zfs_destroy_snaps_nvl(g_zfs,
+ cb.cb_batchedsnaps, cb.cb_defer_destroy);
+ }
+ if (err != 0)
+ rv = 1;
}
- return (0);
+out:
+ fnvlist_free(cb.cb_batchedsnaps);
+ fnvlist_free(cb.cb_nvl);
+ if (zhp != NULL)
+ zfs_close(zhp);
+ return (rv);
}
static boolean_t
@@ -5052,28 +5089,12 @@ cleanup2:
return (error);
}
-/*
- * zfs allow [-r] [-t] <tag> <snap> ...
- *
- * -r Recursively hold
- * -t Temporary hold (hidden option)
- *
- * Apply a user-hold with the given tag to the list of snapshots.
- */
static int
zfs_do_allow(int argc, char **argv)
{
return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
}
-/*
- * zfs unallow [-r] [-t] <tag> <snap> ...
- *
- * -r Recursively hold
- * -t Temporary hold (hidden option)
- *
- * Apply a user-hold with the given tag to the list of snapshots.
- */
static int
zfs_do_unallow(int argc, char **argv)
{
@@ -5087,7 +5108,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
int i;
const char *tag;
boolean_t recursive = B_FALSE;
- boolean_t temphold = B_FALSE;
const char *opts = holding ? "rt" : "r";
int c;
@@ -5097,9 +5117,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
case 'r':
recursive = B_TRUE;
break;
- case 't':
- temphold = B_TRUE;
- break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -5148,7 +5165,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
}
if (holding) {
if (zfs_hold(zhp, delim+1, tag, recursive,
- temphold, B_FALSE, -1, 0, 0) != 0)
+ B_FALSE, -1) != 0)
++errors;
} else {
if (zfs_release(zhp, delim+1, tag, recursive) != 0)
@@ -5164,7 +5181,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
* zfs hold [-r] [-t] <tag> <snap> ...
*
* -r Recursively hold
- * -t Temporary hold (hidden option)
*
* Apply a user-hold with the given tag to the list of snapshots.
*/
diff --git a/usr/src/cmd/zhack/zhack.c b/usr/src/cmd/zhack/zhack.c
index 7cc83d2dc0..d9ad36cccb 100644
--- a/usr/src/cmd/zhack/zhack.c
+++ b/usr/src/cmd/zhack/zhack.c
@@ -46,6 +46,7 @@
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfeature.h>
+#include <sys/dmu_tx.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
@@ -273,10 +274,10 @@ zhack_do_feature_stat(int argc, char **argv)
}
static void
-feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+feature_enable_sync(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- zfeature_info_t *feature = arg2;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ zfeature_info_t *feature = arg;
spa_feature_enable(spa, feature, tx);
spa_history_log_internal(spa, "zhack enable feature", tx,
@@ -344,8 +345,8 @@ zhack_do_feature_enable(int argc, char **argv)
if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
fatal("feature already enabled: %s", feature.fi_guid);
- VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
- feature_enable_sync, spa, &feature, 5));
+ VERIFY0(dsl_sync_task(spa_name(spa), NULL,
+ feature_enable_sync, &feature, 5));
spa_close(spa, FTAG);
@@ -353,10 +354,10 @@ zhack_do_feature_enable(int argc, char **argv)
}
static void
-feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+feature_incr_sync(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- zfeature_info_t *feature = arg2;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ zfeature_info_t *feature = arg;
spa_feature_incr(spa, feature, tx);
spa_history_log_internal(spa, "zhack feature incr", tx,
@@ -364,10 +365,10 @@ feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
-feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+feature_decr_sync(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- zfeature_info_t *feature = arg2;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ zfeature_info_t *feature = arg;
spa_feature_decr(spa, feature, tx);
spa_history_log_internal(spa, "zhack feature decr", tx,
@@ -442,8 +443,8 @@ zhack_do_feature_ref(int argc, char **argv)
if (decr && !spa_feature_is_active(spa, &feature))
fatal("feature refcount already 0: %s", feature.fi_guid);
- VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
- decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5));
+ VERIFY0(dsl_sync_task(spa_name(spa), NULL,
+ decr ? feature_decr_sync : feature_incr_sync, &feature, 5));
spa_close(spa, FTAG);
}
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index b51ab84bfc..ed460551c6 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -103,10 +103,12 @@
#include <sys/metaslab_impl.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
+#include <sys/dsl_destroy.h>
#include <sys/dsl_scan.h>
#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <sys/zfeature.h>
+#include <sys/dsl_userhold.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@@ -365,7 +367,7 @@ ztest_info_t ztest_info[] = {
{ ztest_scrub, 1, &zopt_rarely },
{ ztest_spa_upgrade, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
- { ztest_vdev_attach_detach, 1, &zopt_rarely },
+ { ztest_vdev_attach_detach, 1, &zopt_sometimes },
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
{ ztest_vdev_add_remove, 1,
&ztest_opts.zo_vdevtime },
@@ -1006,9 +1008,8 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
uint64_t curval;
int error;
- error = dsl_prop_set(osname, propname,
- (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL),
- sizeof (value), 1, &value);
+ error = dsl_prop_set_int(osname, propname,
+ (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1016,8 +1017,7 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
}
ASSERT0(error);
- VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
- 1, &curval, setpoint), ==, 0);
+ VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
if (ztest_opts.zo_verbose >= 6) {
VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
@@ -2479,8 +2479,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
int error;
VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
- leaves =
- MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
+ leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -3180,7 +3179,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
/*
* Verify that the dataset contains a directory object.
*/
- VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os));
+ VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os));
error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
@@ -3188,12 +3187,16 @@ ztest_objset_destroy_cb(const char *name, void *arg)
ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
}
- dmu_objset_rele(os, FTAG);
+ dmu_objset_disown(os, FTAG);
/*
* Destroy the dataset.
*/
- VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE));
+ if (strchr(name, '@') != NULL) {
+ VERIFY0(dsl_destroy_snapshot(name, B_FALSE));
+ } else {
+ VERIFY0(dsl_destroy_head(name));
+ }
return (0);
}
@@ -3203,16 +3206,17 @@ ztest_snapshot_create(char *osname, uint64_t id)
char snapname[MAXNAMELEN];
int error;
- (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
- (u_longlong_t)id);
+ (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id);
- error = dmu_objset_snapshot_one(osname, strchr(snapname, '@') + 1);
+ error = dmu_objset_snapshot_one(osname, snapname);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
return (B_FALSE);
}
- if (error != 0 && error != EEXIST)
- fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error);
+ if (error != 0 && error != EEXIST) {
+ fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname,
+ snapname, error);
+ }
return (B_TRUE);
}
@@ -3225,7 +3229,7 @@ ztest_snapshot_destroy(char *osname, uint64_t id)
(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
(u_longlong_t)id);
- error = dmu_objset_destroy(snapname, B_FALSE);
+ error = dsl_destroy_snapshot(snapname, B_FALSE);
if (error != 0 && error != ENOENT)
fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
return (B_TRUE);
@@ -3271,7 +3275,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
- VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os));
+ VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
+ FTAG, &os));
/*
* Verify that we can create a new dataset.
@@ -3286,8 +3291,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", name, error);
}
- VERIFY3U(0, ==,
- dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
+ VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
ztest_zd_init(&zdtmp, NULL, os);
@@ -3363,21 +3367,21 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
(void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
- error = dmu_objset_destroy(clone2name, B_FALSE);
+ error = dsl_destroy_head(clone2name);
if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
- error = dmu_objset_destroy(snap3name, B_FALSE);
+ fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error);
+ error = dsl_destroy_snapshot(snap3name, B_FALSE);
if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
- error = dmu_objset_destroy(snap2name, B_FALSE);
+ fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error);
+ error = dsl_destroy_snapshot(snap2name, B_FALSE);
if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
- error = dmu_objset_destroy(clone1name, B_FALSE);
+ fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error);
+ error = dsl_destroy_head(clone1name);
if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
- error = dmu_objset_destroy(snap1name, B_FALSE);
+ fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error);
+ error = dsl_destroy_snapshot(snap1name, B_FALSE);
if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
+ fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error);
}
/*
@@ -3386,8 +3390,7 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
void
ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
{
- objset_t *clone;
- dsl_dataset_t *ds;
+ objset_t *os;
char snap1name[MAXNAMELEN];
char clone1name[MAXNAMELEN];
char snap2name[MAXNAMELEN];
@@ -3415,12 +3418,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
}
- error = dmu_objset_hold(snap1name, FTAG, &clone);
- if (error)
- fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
-
- error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
- dmu_objset_rele(clone, FTAG);
+ error = dmu_objset_clone(clone1name, snap1name);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -3447,12 +3445,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
}
- error = dmu_objset_hold(snap3name, FTAG, &clone);
- if (error)
- fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
-
- error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
- dmu_objset_rele(clone, FTAG);
+ error = dmu_objset_clone(clone2name, snap3name);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -3461,14 +3454,14 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
}
- error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds);
+ error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
if (error)
- fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error);
+ fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
error = dsl_dataset_promote(clone2name, NULL);
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
- dsl_dataset_disown(ds, FTAG);
+ dmu_objset_disown(os, FTAG);
out:
ztest_dsl_dataset_cleanup(osname, id);
@@ -4280,7 +4273,7 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
}
count = -1ULL;
- VERIFY(zap_count(os, object, &count) == 0);
+ VERIFY0(zap_count(os, object, &count));
ASSERT(count != -1ULL);
/*
@@ -4591,6 +4584,22 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
(void) rw_unlock(&ztest_name_lock);
}
+static int
+user_release_one(const char *snapname, const char *holdname)
+{
+ nvlist_t *snaps, *holds;
+ int error;
+
+ snaps = fnvlist_alloc();
+ holds = fnvlist_alloc();
+ fnvlist_add_boolean(holds, holdname);
+ fnvlist_add_nvlist(snaps, snapname, holds);
+ fnvlist_free(holds);
+ error = dsl_dataset_user_release(snaps, NULL);
+ fnvlist_free(snaps);
+ return (error);
+}
+
/*
* Test snapshot hold/release and deferred destroy.
*/
@@ -4605,22 +4614,30 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
char clonename[100];
char tag[100];
char osname[MAXNAMELEN];
+ nvlist_t *holds;
(void) rw_rdlock(&ztest_name_lock);
dmu_objset_name(os, osname);
- (void) snprintf(snapname, 100, "sh1_%llu", id);
- (void) snprintf(fullname, 100, "%s@%s", osname, snapname);
- (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id);
- (void) snprintf(tag, 100, "%tag_%llu", id);
+ (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id);
+ (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
+ (void) snprintf(clonename, sizeof (clonename),
+ "%s/ch1_%llu", osname, id);
+ (void) snprintf(tag, sizeof (tag), "tag_%llu", id);
/*
* Clean up from any previous run.
*/
- (void) dmu_objset_destroy(clonename, B_FALSE);
- (void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
- (void) dmu_objset_destroy(fullname, B_FALSE);
+ error = dsl_destroy_head(clonename);
+ if (error != ENOENT)
+ ASSERT0(error);
+ error = user_release_one(fullname, tag);
+ if (error != ESRCH && error != ENOENT)
+ ASSERT0(error);
+ error = dsl_destroy_snapshot(fullname, B_FALSE);
+ if (error != ENOENT)
+ ASSERT0(error);
/*
* Create snapshot, clone it, mark snap for deferred destroy,
@@ -4635,12 +4652,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
}
- error = dmu_objset_hold(fullname, FTAG, &origin);
- if (error)
- fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
-
- error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0);
- dmu_objset_rele(origin, FTAG);
+ error = dmu_objset_clone(clonename, fullname);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc("dmu_objset_clone");
@@ -4649,15 +4661,15 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_clone(%s) = %d", clonename, error);
}
- error = dmu_objset_destroy(fullname, B_TRUE);
+ error = dsl_destroy_snapshot(fullname, B_TRUE);
if (error) {
- fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
+ fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
fullname, error);
}
- error = dmu_objset_destroy(clonename, B_FALSE);
+ error = dsl_destroy_head(clonename);
if (error)
- fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error);
+ fatal(0, "dsl_destroy_head(%s) = %d", clonename, error);
error = dmu_objset_hold(fullname, FTAG, &origin);
if (error != ENOENT)
@@ -4677,28 +4689,31 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
}
- error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE,
- B_TRUE, -1);
+ holds = fnvlist_alloc();
+ fnvlist_add_string(holds, fullname, tag);
+ error = dsl_dataset_user_hold(holds, 0, NULL);
+ fnvlist_free(holds);
+
if (error)
fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
- error = dmu_objset_destroy(fullname, B_FALSE);
+ error = dsl_destroy_snapshot(fullname, B_FALSE);
if (error != EBUSY) {
- fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d",
+ fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
fullname, error);
}
- error = dmu_objset_destroy(fullname, B_TRUE);
+ error = dsl_destroy_snapshot(fullname, B_TRUE);
if (error) {
- fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
+ fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
fullname, error);
}
- error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
+ error = user_release_one(fullname, tag);
if (error)
- fatal(0, "dsl_dataset_user_release(%s)", fullname, tag);
+ fatal(0, "user_release_one(%s)", fullname, tag);
- VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT);
+ VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
out:
(void) rw_unlock(&ztest_name_lock);
@@ -4952,8 +4967,12 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
*/
for (int i = 0; i < copies; i++) {
uint64_t offset = i * blocksize;
- VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &db,
- DMU_READ_NO_PREFETCH));
+ int error = dmu_buf_hold(os, object, offset, FTAG, &db,
+ DMU_READ_NO_PREFETCH);
+ if (error != 0) {
+ fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
+ os, (long long)object, (long long) offset, error);
+ }
ASSERT(db->db_offset == offset);
ASSERT(db->db_size == blocksize);
ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
@@ -5163,6 +5182,7 @@ ztest_spa_import_export(char *oldname, char *newname)
nvlist_t *config, *newconfig;
uint64_t pool_guid;
spa_t *spa;
+ int error;
if (ztest_opts.zo_verbose >= 4) {
(void) printf("import/export: old = %s, new = %s\n",
@@ -5207,7 +5227,12 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Import it under the new name.
*/
- VERIFY3U(0, ==, spa_import(newname, config, NULL, 0));
+ error = spa_import(newname, config, NULL, 0);
+ if (error != 0) {
+ dump_nvlist(config, 0);
+ fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
+ oldname, newname, error);
+ }
ztest_walk_pool_directory("pools after import");
@@ -5414,7 +5439,7 @@ ztest_dataset_open(int d)
}
ASSERT(error == 0 || error == EEXIST);
- VERIFY0(dmu_objset_hold(name, zd, &os));
+ VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
(void) rw_unlock(&ztest_name_lock);
ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
@@ -5455,7 +5480,7 @@ ztest_dataset_close(int d)
ztest_ds_t *zd = &ztest_ds[d];
zil_close(zd->zd_zilog);
- dmu_objset_rele(zd->zd_os, zd);
+ dmu_objset_disown(zd->zd_os, zd);
ztest_zd_fini(zd);
}
@@ -5499,13 +5524,14 @@ ztest_run(ztest_shared_t *zs)
* Open our pool.
*/
kernel_init(FREAD | FWRITE);
- VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0);
+ VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
spa->spa_debug = B_TRUE;
ztest_spa = spa;
- VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os));
+ VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
+ DMU_OST_ANY, B_TRUE, FTAG, &os));
zs->zs_guid = dmu_objset_fsid_guid(os);
- dmu_objset_rele(os, FTAG);
+ dmu_objset_disown(os, FTAG);
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
diff --git a/usr/src/common/nvpair/fnvpair.c b/usr/src/common/nvpair/fnvpair.c
index 8c5591c0b2..b3cf173f64 100644
--- a/usr/src/common/nvpair/fnvpair.c
+++ b/usr/src/common/nvpair/fnvpair.c
@@ -26,6 +26,7 @@
#include <sys/nvpair.h>
#include <sys/kmem.h>
#include <sys/debug.h>
+#include <sys/param.h>
#ifndef _KERNEL
#include <stdlib.h>
#endif
@@ -114,6 +115,18 @@ fnvlist_merge(nvlist_t *dst, nvlist_t *src)
VERIFY0(nvlist_merge(dst, src, KM_SLEEP));
}
+size_t
+fnvlist_num_pairs(nvlist_t *nvl)
+{
+ size_t count = 0;
+ nvpair_t *pair;
+
+ for (pair = nvlist_next_nvpair(nvl, 0); pair != NULL;
+ pair = nvlist_next_nvpair(nvl, pair))
+ count++;
+ return (count);
+}
+
void
fnvlist_add_boolean(nvlist_t *nvl, const char *name)
{
diff --git a/usr/src/lib/libnvpair/mapfile-vers b/usr/src/lib/libnvpair/mapfile-vers
index 0a1e88b341..a014835447 100644
--- a/usr/src/lib/libnvpair/mapfile-vers
+++ b/usr/src/lib/libnvpair/mapfile-vers
@@ -49,6 +49,7 @@ SYMBOL_VERSION ILLUMOS_0.1 { # Illumos additions
fnvlist_unpack;
fnvlist_dup;
fnvlist_merge;
+ fnvlist_num_pairs;
fnvlist_add_boolean;
fnvlist_add_boolean_value;
fnvlist_add_byte;
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 49c86dd18f..4d1e8186d2 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -550,7 +550,7 @@ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
extern int zfs_destroy(zfs_handle_t *, boolean_t);
extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
-extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t);
+extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t);
extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
@@ -593,8 +593,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *,
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
extern int zfs_promote(zfs_handle_t *);
-extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
- boolean_t, boolean_t, int, uint64_t, uint64_t);
+extern int zfs_hold(zfs_handle_t *, const char *, const char *,
+ boolean_t, boolean_t, int);
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index 3ad1ed69d5..6121a0f161 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -1973,10 +1973,7 @@ get_clones_cb(zfs_handle_t *zhp, void *arg)
NULL, NULL, 0, B_TRUE) != 0)
goto out;
if (strcmp(gca->buf, gca->origin) == 0) {
- if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) {
- zfs_close(zhp);
- return (no_memory(zhp->zfs_hdl));
- }
+ fnvlist_add_boolean(gca->value, zfs_get_name(zhp));
gca->numclones--;
}
@@ -3142,45 +3139,49 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
zhp->zfs_name, snapname);
} else {
- ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer);
+ ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer);
}
nvlist_free(dd.nvl);
return (ret);
}
/*
- * Destroys all the snapshots named in the nvlist. They must be underneath
- * the zhp (either snapshots of it, or snapshots of its descendants).
+ * Destroys all the snapshots named in the nvlist.
*/
int
-zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)
+zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
{
int ret;
nvlist_t *errlist;
ret = lzc_destroy_snaps(snaps, defer, &errlist);
- if (ret != 0) {
- for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL);
- pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
- char errbuf[1024];
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
- nvpair_name(pair));
+ if (ret == 0)
+ return (0);
- switch (fnvpair_value_int32(pair)) {
- case EEXIST:
- zfs_error_aux(zhp->zfs_hdl,
- dgettext(TEXT_DOMAIN,
- "snapshot is cloned"));
- ret = zfs_error(zhp->zfs_hdl, EZFS_EXISTS,
- errbuf);
- break;
- default:
- ret = zfs_standard_error(zhp->zfs_hdl, errno,
- errbuf);
- break;
- }
+ if (nvlist_next_nvpair(errlist, NULL) == NULL) {
+ char errbuf[1024];
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
+
+ ret = zfs_standard_error(hdl, ret, errbuf);
+ }
+ for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
+ char errbuf[1024];
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
+ nvpair_name(pair));
+
+ switch (fnvpair_value_int32(pair)) {
+ case EEXIST:
+ zfs_error_aux(hdl,
+ dgettext(TEXT_DOMAIN, "snapshot is cloned"));
+ ret = zfs_error(hdl, EZFS_EXISTS, errbuf);
+ break;
+ default:
+ ret = zfs_standard_error(hdl, errno, errbuf);
+ break;
}
}
@@ -4047,7 +4048,7 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
zc.zc_nvlist_dst_size = sizeof (buf);
if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
- char errbuf[ZFS_MAXNAMELEN + 32];
+ char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
@@ -4069,37 +4070,83 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
return (0);
}
+struct holdarg {
+ nvlist_t *nvl;
+ const char *snapname;
+ const char *tag;
+ boolean_t recursive;
+};
+
+static int
+zfs_hold_one(zfs_handle_t *zhp, void *arg)
+{
+ struct holdarg *ha = arg;
+ zfs_handle_t *szhp;
+ char name[ZFS_MAXNAMELEN];
+ int rv = 0;
+
+ (void) snprintf(name, sizeof (name),
+ "%s@%s", zhp->zfs_name, ha->snapname);
+
+ szhp = make_dataset_handle(zhp->zfs_hdl, name);
+ if (szhp) {
+ fnvlist_add_string(ha->nvl, name, ha->tag);
+ zfs_close(szhp);
+ }
+
+ if (ha->recursive)
+ rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
+ zfs_close(zhp);
+ return (rv);
+}
+
int
zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
- boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
- int cleanup_fd, uint64_t dsobj, uint64_t createtxg)
+ boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
{
- zfs_cmd_t zc = { 0 };
+ int ret;
+ struct holdarg ha;
+ nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
+ char errbuf[1024];
+ nvpair_t *elem;
- ASSERT(!recursive || dsobj == 0);
+ ha.nvl = fnvlist_alloc();
+ ha.snapname = snapname;
+ ha.tag = tag;
+ ha.recursive = recursive;
+ (void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
+ ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
+ fnvlist_free(ha.nvl);
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
- if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
- >= sizeof (zc.zc_string))
- return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
- zc.zc_cookie = recursive;
- zc.zc_temphold = temphold;
- zc.zc_cleanup_fd = cleanup_fd;
- zc.zc_sendobj = dsobj;
- zc.zc_createtxg = createtxg;
+ if (ret == 0)
+ return (0);
- if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
- char errbuf[ZFS_MAXNAMELEN+32];
+ if (nvlist_next_nvpair(errors, NULL) == NULL) {
+ /* no hold-specific errors */
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot hold"));
+ switch (ret) {
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded"));
+ (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+ break;
+ case EINVAL:
+ (void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+ break;
+ default:
+ (void) zfs_standard_error(hdl, ret, errbuf);
+ }
+ }
- /*
- * if it was recursive, the one that actually failed will be in
- * zc.zc_name.
- */
- (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot hold '%s@%s'"), zc.zc_name, snapname);
- switch (errno) {
+ for (elem = nvlist_next_nvpair(errors, NULL);
+ elem != NULL;
+ elem = nvlist_next_nvpair(errors, elem)) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN,
+ "cannot hold snapshot '%s'"), nvpair_name(elem));
+ switch (fnvpair_value_int32(elem)) {
case E2BIG:
/*
* Temporary tags wind up having the ds object id
@@ -4107,66 +4154,122 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
* above, it's still possible for the tag to wind
* up being slightly too long.
*/
- return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf));
- case ENOTSUP:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool must be upgraded"));
- return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+ (void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf);
+ break;
case EINVAL:
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ (void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+ break;
case EEXIST:
- return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
+ (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
+ break;
case ENOENT:
if (enoent_ok)
return (ENOENT);
/* FALLTHROUGH */
default:
- return (zfs_standard_error_fmt(hdl, errno, errbuf));
+ (void) zfs_standard_error(hdl,
+ fnvpair_value_int32(elem), errbuf);
}
}
- return (0);
+ fnvlist_free(errors);
+ return (ret);
+}
+
+struct releasearg {
+ nvlist_t *nvl;
+ const char *snapname;
+ const char *tag;
+ boolean_t recursive;
+};
+
+static int
+zfs_release_one(zfs_handle_t *zhp, void *arg)
+{
+ struct holdarg *ha = arg;
+ zfs_handle_t *szhp;
+ char name[ZFS_MAXNAMELEN];
+ int rv = 0;
+
+ (void) snprintf(name, sizeof (name),
+ "%s@%s", zhp->zfs_name, ha->snapname);
+
+ szhp = make_dataset_handle(zhp->zfs_hdl, name);
+ if (szhp) {
+ nvlist_t *holds = fnvlist_alloc();
+ fnvlist_add_boolean(holds, ha->tag);
+ fnvlist_add_nvlist(ha->nvl, name, holds);
+ zfs_close(szhp);
+ }
+
+ if (ha->recursive)
+ rv = zfs_iter_filesystems(zhp, zfs_release_one, ha);
+ zfs_close(zhp);
+ return (rv);
}
int
zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
boolean_t recursive)
{
- zfs_cmd_t zc = { 0 };
+ int ret;
+ struct holdarg ha;
+ nvlist_t *errors;
+ nvpair_t *elem;
libzfs_handle_t *hdl = zhp->zfs_hdl;
- (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
- if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
- >= sizeof (zc.zc_string))
- return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
- zc.zc_cookie = recursive;
+ ha.nvl = fnvlist_alloc();
+ ha.snapname = snapname;
+ ha.tag = tag;
+ ha.recursive = recursive;
+ (void) zfs_release_one(zfs_handle_dup(zhp), &ha);
+ ret = lzc_release(ha.nvl, &errors);
+ fnvlist_free(ha.nvl);
- if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
- char errbuf[ZFS_MAXNAMELEN+32];
+ if (ret == 0)
+ return (0);
+
+ if (nvlist_next_nvpair(errors, NULL) == NULL) {
+ /* no hold-specific errors */
+ char errbuf[1024];
- /*
- * if it was recursive, the one that actually failed will be in
- * zc.zc_name.
- */
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
- "cannot release '%s' from '%s@%s'"), tag, zc.zc_name,
- snapname);
+ "cannot release"));
switch (errno) {
- case ESRCH:
- return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
- return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+ (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+ break;
+ default:
+ (void) zfs_standard_error_fmt(hdl, errno, errbuf);
+ }
+ }
+
+ for (elem = nvlist_next_nvpair(errors, NULL);
+ elem != NULL;
+ elem = nvlist_next_nvpair(errors, elem)) {
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN,
+ "cannot release hold from snapshot '%s'"),
+ nvpair_name(elem));
+ switch (fnvpair_value_int32(elem)) {
+ case ESRCH:
+ (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf);
+ break;
case EINVAL:
- return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ (void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+ break;
default:
- return (zfs_standard_error_fmt(hdl, errno, errbuf));
+ (void) zfs_standard_error_fmt(hdl,
+ fnvpair_value_int32(elem), errbuf);
}
}
- return (0);
+ fnvlist_free(errors);
+ return (ret);
}
int
@@ -4177,7 +4280,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
int nvsz = 2048;
void *nvbuf;
int err = 0;
- char errbuf[ZFS_MAXNAMELEN+32];
+ char errbuf[1024];
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
@@ -4242,7 +4345,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
char *nvbuf;
- char errbuf[ZFS_MAXNAMELEN+32];
+ char errbuf[1024];
size_t nvsz;
int err;
@@ -4293,38 +4396,18 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
int
zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
{
- zfs_cmd_t zc = { 0 };
- libzfs_handle_t *hdl = zhp->zfs_hdl;
- int nvsz = 2048;
- void *nvbuf;
- int err = 0;
- char errbuf[ZFS_MAXNAMELEN+32];
-
- assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
-
-tryagain:
-
- nvbuf = malloc(nvsz);
- if (nvbuf == NULL) {
- err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
- goto out;
- }
+ int err;
+ char errbuf[1024];
- zc.zc_nvlist_dst_size = nvsz;
- zc.zc_nvlist_dst = (uintptr_t)nvbuf;
+ err = lzc_get_holds(zhp->zfs_name, nvl);
- (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
+ if (err != 0) {
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
- if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
- zc.zc_name);
- switch (errno) {
- case ENOMEM:
- free(nvbuf);
- nvsz = zc.zc_nvlist_dst_size;
- goto tryagain;
-
+ zhp->zfs_name);
+ switch (err) {
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
@@ -4340,19 +4423,8 @@ tryagain:
err = zfs_standard_error_fmt(hdl, errno, errbuf);
break;
}
- } else {
- /* success */
- int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
- if (rc) {
- (void) snprintf(errbuf, sizeof (errbuf),
- dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
- zc.zc_name);
- err = zfs_standard_error_fmt(hdl, rc, errbuf);
- }
}
- free(nvbuf);
-out:
return (err);
}
diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c
index ee6e64319e..ea0634ee70 100644
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c
@@ -972,9 +972,7 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
*/
if (pzhp) {
error = zfs_hold(pzhp, thissnap, sdd->holdtag,
- B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
- zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
- zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
+ B_FALSE, B_TRUE, sdd->cleanup_fd);
zfs_close(pzhp);
}
@@ -1713,12 +1711,11 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
err = ENOENT;
}
- if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
+ if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
seq++;
- (void) strncpy(newname, name, baselen);
- (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
- "recv-%u-%u", getpid(), seq);
+ (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
+ baselen, name, getpid(), seq);
(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
if (flags->verbose) {
@@ -2643,7 +2640,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
/*
* Determine name of destination snapshot, store in zc_value.
*/
- (void) strcpy(zc.zc_top_ds, tosnap);
(void) strcpy(zc.zc_value, tosnap);
(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
free(cp);
diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.c b/usr/src/lib/libzfs_core/common/libzfs_core.c
index 73afd50b8d..5e8ee07796 100644
--- a/usr/src/lib/libzfs_core/common/libzfs_core.c
+++ b/usr/src/lib/libzfs_core/common/libzfs_core.c
@@ -132,6 +132,7 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name,
zc.zc_nvlist_src_size = size;
if (resultp != NULL) {
+ *resultp = NULL;
zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
malloc(zc.zc_nvlist_dst_size);
@@ -159,8 +160,6 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name,
if (zc.zc_nvlist_dst_filled) {
*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
zc.zc_nvlist_dst_size);
- } else if (resultp != NULL) {
- *resultp = NULL;
}
out:
@@ -209,7 +208,7 @@ lzc_clone(const char *fsname, const char *origin,
* The value will be the (int32) error code.
*
* The return value will be 0 if all snapshots were created, otherwise it will
- * be the errno of a (undetermined) snapshot that failed.
+ * be the errno of a (unspecified) snapshot that failed.
*/
int
lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
@@ -258,7 +257,7 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
* The return value will be 0 if all snapshots were destroyed (or marked for
* later destruction if 'defer' is set) or didn't exist to begin with.
*
- * Otherwise the return value will be the errno of a (undetermined) snapshot
+ * Otherwise the return value will be the errno of a (unspecified) snapshot
* that failed, no snapshots will be destroyed, and the errlist will have an
* entry for each snapshot that failed. The value in the errlist will be
* the (int32) error code.
@@ -333,6 +332,101 @@ lzc_exists(const char *dataset)
}
/*
+ * Create "user holds" on snapshots. If there is a hold on a snapshot,
+ * the snapshot can not be destroyed. (However, it can be marked for deletion
+ * by lzc_destroy_snaps(defer=B_TRUE).)
+ *
+ * The keys in the nvlist are snapshot names.
+ * The snapshots must all be in the same pool.
+ * The value is the name of the hold (string type).
+ *
+ * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
+ * In this case, when the cleanup_fd is closed (including on process
+ * termination), the holds will be released. If the system is shut down
+ * uncleanly, the holds will be released when the pool is next opened
+ * or imported.
+ *
+ * The return value will be 0 if all holds were created. Otherwise the return
+ * value will be the errno of a (unspecified) hold that failed, no holds will
+ * be created, and the errlist will have an entry for each hold that
+ * failed (name = snapshot). The value in the errlist will be the error
+ * code (int32).
+ */
+int
+lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
+{
+ char pool[MAXNAMELEN];
+ nvlist_t *args;
+ nvpair_t *elem;
+ int error;
+
+ /* determine the pool name */
+ elem = nvlist_next_nvpair(holds, NULL);
+ if (elem == NULL)
+ return (0);
+ (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
+ pool[strcspn(pool, "/@")] = '\0';
+
+ args = fnvlist_alloc();
+ fnvlist_add_nvlist(args, "holds", holds);
+ if (cleanup_fd != -1)
+ fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
+
+ error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
+ nvlist_free(args);
+ return (error);
+}
+
+/*
+ * Release "user holds" on snapshots. If the snapshot has been marked for
+ * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
+ * any clones, and all the user holds are removed, then the snapshot will be
+ * destroyed.
+ *
+ * The keys in the nvlist are snapshot names.
+ * The snapshots must all be in the same pool.
+ * The value is a nvlist whose keys are the holds to remove.
+ *
+ * The return value will be 0 if all holds were removed.
+ * Otherwise the return value will be the errno of a (unspecified) release
+ * that failed, no holds will be released, and the errlist will have an
+ * entry for each snapshot that has failed releases (name = snapshot).
+ * The value in the errlist will be the error code (int32) of a failed release.
+ */
+int
+lzc_release(nvlist_t *holds, nvlist_t **errlist)
+{
+ char pool[MAXNAMELEN];
+ nvpair_t *elem;
+
+ /* determine the pool name */
+ elem = nvlist_next_nvpair(holds, NULL);
+ if (elem == NULL)
+ return (0);
+ (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
+ pool[strcspn(pool, "/@")] = '\0';
+
+ return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
+}
+
+/*
+ * Retrieve list of user holds on the specified snapshot.
+ *
+ * On success, *holdsp will be set to a nvlist which the caller must free.
+ * The keys are the names of the holds, and the value is the creation time
+ * of the hold (uint64) in seconds since the epoch.
+ */
+int
+lzc_get_holds(const char *snapname, nvlist_t **holdsp)
+{
+ int error;
+ nvlist_t *innvl = fnvlist_alloc();
+ error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
+ fnvlist_free(innvl);
+ return (error);
+}
+
+/*
* If fromsnap is NULL, a full (non-incremental) stream will be sent.
*/
int
diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.h b/usr/src/lib/libzfs_core/common/libzfs_core.h
index 9edc884a14..f5fd6cda9f 100644
--- a/usr/src/lib/libzfs_core/common/libzfs_core.h
+++ b/usr/src/lib/libzfs_core/common/libzfs_core.h
@@ -46,6 +46,10 @@ int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist);
int lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
uint64_t *usedp);
+int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist);
+int lzc_release(nvlist_t *holds, nvlist_t **errlist);
+int lzc_get_holds(const char *snapname, nvlist_t **holdsp);
+
int lzc_send(const char *snapname, const char *fromsnap, int fd);
int lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, int fd);
diff --git a/usr/src/lib/libzfs_core/common/mapfile-vers b/usr/src/lib/libzfs_core/common/mapfile-vers
index 612688334f..1c9930a9cd 100644
--- a/usr/src/lib/libzfs_core/common/mapfile-vers
+++ b/usr/src/lib/libzfs_core/common/mapfile-vers
@@ -45,7 +45,10 @@ SYMBOL_VERSION ILLUMOS_0.1 {
lzc_create;
lzc_destroy_snaps;
lzc_exists;
+ lzc_get_holds;
+ lzc_hold;
lzc_receive;
+ lzc_release;
lzc_send;
lzc_send_space;
lzc_snaprange_space;
diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c
index cc0d5428be..96280941a6 100644
--- a/usr/src/lib/libzpool/common/kernel.c
+++ b/usr/src/lib/libzpool/common/kernel.c
@@ -34,6 +34,7 @@
#include <sys/stat.h>
#include <sys/processor.h>
#include <sys/zfs_context.h>
+#include <sys/rrwlock.h>
#include <sys/zmod.h>
#include <sys/utsname.h>
#include <sys/systeminfo.h>
@@ -859,6 +860,8 @@ umem_out_of_memory(void)
void
kernel_init(int mode)
{
+ extern uint_t rrw_tsd_key;
+
umem_nofail_callback(umem_out_of_memory);
physmem = sysconf(_SC_PHYS_PAGES);
@@ -877,6 +880,8 @@ kernel_init(int mode)
mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
spa_init(mode);
+
+ tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
}
void
diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool
index 934a7409b7..d3864d2a9a 100644
--- a/usr/src/lib/libzpool/common/llib-lzpool
+++ b/usr/src/lib/libzpool/common/llib-lzpool
@@ -57,6 +57,9 @@
#include <sys/sa.h>
#include <sys/zfs_sa.h>
#include <sys/zfeature.h>
+#include <sys/dmu_tx.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_userhold.h>
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h
index bcb27cf1a5..7802da4e2b 100644
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h
@@ -61,6 +61,8 @@ extern "C" {
#include <dirent.h>
#include <time.h>
#include <procfs.h>
+#include <pthread.h>
+#include <sys/debug.h>
#include <libsysevent.h>
#include <sys/note.h>
#include <sys/types.h>
@@ -224,6 +226,9 @@ typedef int krw_t;
#undef RW_WRITE_HELD
#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock)
+#undef RW_LOCK_HELD
+#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
+
extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
extern void rw_destroy(krwlock_t *rwlp);
extern void rw_enter(krwlock_t *rwlp, krw_t rw);
@@ -253,6 +258,14 @@ extern void cv_signal(kcondvar_t *cv);
extern void cv_broadcast(kcondvar_t *cv);
/*
+ * Thread-specific data
+ */
+#define tsd_get(k) pthread_getspecific(k)
+#define tsd_set(k, v) pthread_setspecific(k, v)
+#define tsd_create(kp, d) pthread_key_create(kp, d)
+#define tsd_destroy(kp) /* nothing */
+
+/*
* kstat creation, installation and deletion
*/
extern kstat_t *kstat_create(const char *, int,
@@ -519,7 +532,7 @@ typedef struct callb_cpr {
#define INGLOBALZONE(z) (1)
extern char *kmem_asprintf(const char *fmt, ...);
-#define strfree(str) kmem_free((str), strlen(str)+1)
+#define strfree(str) kmem_free((str), strlen(str) + 1)
/*
* Hostname information
diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m
index 18441dad6e..c4d579d7cb 100644
--- a/usr/src/man/man1m/zfs.1m
+++ b/usr/src/man/man1m/zfs.1m
@@ -1868,7 +1868,9 @@ descendent file systems.
.ad
.sp .6
.RS 4n
-Recursively destroy all dependents.
+Recursively destroy all clones of these snapshots, including the clones,
+snapshots, and children. If this flag is specified, the \fB-d\fR flag will
+have no effect.
.RE
.sp
@@ -1904,7 +1906,7 @@ Print verbose information about the deleted data.
.RE
.sp
-Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR
+Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR
options, as they can destroy large portions of a pool and cause unexpected
behavior for mounted file systems in use.
.RE
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 1eb0e3158a..e0fa959279 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1346,8 +1346,10 @@ ZFS_COMMON_OBJS += \
dsl_dir.o \
dsl_dataset.o \
dsl_deadlist.o \
+ dsl_destroy.o \
dsl_pool.o \
dsl_synctask.o \
+ dsl_userhold.o \
dmu_zfetch.o \
dsl_deleg.o \
dsl_prop.o \
@@ -1358,6 +1360,7 @@ ZFS_COMMON_OBJS += \
lzjb.o \
metaslab.o \
refcount.o \
+ rrwlock.o \
sa.o \
sha256.o \
spa.o \
@@ -1417,7 +1420,6 @@ ZFS_OBJS += \
zfs_onexit.o \
zfs_replay.o \
zfs_rlock.o \
- rrwlock.o \
zfs_vfsops.o \
zfs_vnops.o \
zvol.o
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index ca3baeaeeb..9588e40433 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -1633,12 +1633,12 @@ arc_buf_free(arc_buf_t *buf, void *tag)
}
}
-int
+boolean_t
arc_buf_remove_ref(arc_buf_t *buf, void* tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock = HDR_LOCK(hdr);
- int no_callback = (buf->b_efunc == NULL);
+ boolean_t no_callback = (buf->b_efunc == NULL);
if (hdr->b_state == arc_anon) {
ASSERT(hdr->b_datacnt == 1);
@@ -1843,7 +1843,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
ARCSTAT_INCR(arcstat_mutex_miss, missed);
/*
- * We have just evicted some date into the ghost state, make
+ * We have just evicted some data into the ghost state, make
* sure we also adjust the ghost state size if necessary.
*/
if (arc_no_grow &&
@@ -2622,7 +2622,7 @@ arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
if (zio == NULL || zio->io_error == 0)
bcopy(buf->b_data, arg, buf->b_hdr->b_size);
- VERIFY(arc_buf_remove_ref(buf, arg) == 1);
+ VERIFY(arc_buf_remove_ref(buf, arg));
}
/* a generic arc_done_func_t */
@@ -2631,7 +2631,7 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
arc_buf_t **bufp = arg;
if (zio && zio->io_error) {
- VERIFY(arc_buf_remove_ref(buf, arg) == 1);
+ VERIFY(arc_buf_remove_ref(buf, arg));
*bufp = NULL;
} else {
*bufp = buf;
diff --git a/usr/src/uts/common/fs/zfs/bplist.c b/usr/src/uts/common/fs/zfs/bplist.c
index 066ccc6b1e..ee12db3a26 100644
--- a/usr/src/uts/common/fs/zfs/bplist.c
+++ b/usr/src/uts/common/fs/zfs/bplist.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/bplist.h>
@@ -52,6 +53,12 @@ bplist_append(bplist_t *bpl, const blkptr_t *bp)
mutex_exit(&bpl->bpl_lock);
}
+/*
+ * To aid debugging, we keep the most recently removed entry. This way if
+ * we are in the callback, we can easily locate the entry.
+ */
+static bplist_entry_t *bplist_iterate_last_removed;
+
void
bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
{
@@ -59,6 +66,7 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
mutex_enter(&bpl->bpl_lock);
while (bpe = list_head(&bpl->bpl_list)) {
+ bplist_iterate_last_removed = bpe;
list_remove(&bpl->bpl_list, bpe);
mutex_exit(&bpl->bpl_lock);
func(arg, &bpe->bpe_blk, tx);
diff --git a/usr/src/uts/common/fs/zfs/bpobj.c b/usr/src/uts/common/fs/zfs/bpobj.c
index 1920da4408..bcb5f331f0 100644
--- a/usr/src/uts/common/fs/zfs/bpobj.c
+++ b/usr/src/uts/common/fs/zfs/bpobj.c
@@ -392,6 +392,10 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
}
+ dmu_object_info_t doi;
+ ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
+ ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
+
mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index 8bf3d099ef..57abfa1003 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -39,7 +39,7 @@
#include <sys/sa_impl.h>
static void dbuf_destroy(dmu_buf_impl_t *db);
-static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
+static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
/*
@@ -499,7 +499,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
} else {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT3P(db->db_buf, ==, NULL);
- VERIFY(arc_buf_remove_ref(buf, db) == 1);
+ VERIFY(arc_buf_remove_ref(buf, db));
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
@@ -828,10 +828,12 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
continue;
/* found a level 0 buffer in the range */
- if (dbuf_undirty(db, tx))
+ mutex_enter(&db->db_mtx);
+ if (dbuf_undirty(db, tx)) {
+ /* mutex has been dropped and dbuf destroyed */
continue;
+ }
- mutex_enter(&db->db_mtx);
if (db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL ||
db->db_state == DB_EVICTING) {
@@ -958,7 +960,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
mutex_enter(&db->db_mtx);
dbuf_set_data(db, buf);
- VERIFY(arc_buf_remove_ref(obuf, db) == 1);
+ VERIFY(arc_buf_remove_ref(obuf, db));
db->db.db_size = size;
if (db->db_level == 0) {
@@ -1258,7 +1260,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (dr);
}
-static int
+/*
+ * Return TRUE if this evicted the dbuf.
+ */
+static boolean_t
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
{
dnode_t *dn;
@@ -1267,18 +1272,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(txg != 0);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
+ ASSERT0(db->db_level);
+ ASSERT(MUTEX_HELD(&db->db_mtx));
- mutex_enter(&db->db_mtx);
/*
* If this buffer is not dirty, we're done.
*/
for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
if (dr->dr_txg <= txg)
break;
- if (dr == NULL || dr->dr_txg < txg) {
- mutex_exit(&db->db_mtx);
- return (0);
- }
+ if (dr == NULL || dr->dr_txg < txg)
+ return (B_FALSE);
ASSERT(dr->dr_txg == txg);
ASSERT(dr->dr_dbuf == db);
@@ -1286,24 +1290,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dn = DB_DNODE(db);
/*
- * If this buffer is currently held, we cannot undirty
- * it, since one of the current holders may be in the
- * middle of an update. Note that users of dbuf_undirty()
- * should not place a hold on the dbuf before the call.
- * Also note: we can get here with a spill block, so
- * test for that similar to how dbuf_dirty does.
+ * Note: This code will probably work even if there are concurrent
+ * holders, but it is untested in that scenerio, as the ZPL and
+ * ztest have additional locking (the range locks) that prevents
+ * that type of concurrent access.
*/
- if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
- mutex_exit(&db->db_mtx);
- /* Make sure we don't toss this buffer at sync phase */
- if (db->db_blkid != DMU_SPILL_BLKID) {
- mutex_enter(&dn->dn_mtx);
- dnode_clear_range(dn, db->db_blkid, 1, tx);
- mutex_exit(&dn->dn_mtx);
- }
- DB_DNODE_EXIT(db);
- return (0);
- }
+ ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt);
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
@@ -1332,21 +1324,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
}
DB_DNODE_EXIT(db);
- if (db->db_level == 0) {
- if (db->db_state != DB_NOFILL) {
- dbuf_unoverride(dr);
+ if (db->db_state != DB_NOFILL) {
+ dbuf_unoverride(dr);
- ASSERT(db->db_buf != NULL);
- ASSERT(dr->dt.dl.dr_data != NULL);
- if (dr->dt.dl.dr_data != db->db_buf)
- VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
- db) == 1);
- }
- } else {
ASSERT(db->db_buf != NULL);
- ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
- mutex_destroy(&dr->dt.di.dr_mtx);
- list_destroy(&dr->dt.di.dr_children);
+ ASSERT(dr->dt.dl.dr_data != NULL);
+ if (dr->dt.dl.dr_data != db->db_buf)
+ VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
@@ -1358,13 +1342,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
dbuf_set_data(db, NULL);
- VERIFY(arc_buf_remove_ref(buf, db) == 1);
+ VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
- return (1);
+ return (B_TRUE);
}
- mutex_exit(&db->db_mtx);
- return (0);
+ return (B_FALSE);
}
#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
@@ -1463,7 +1446,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
- VERIFY(arc_buf_remove_ref(buf, db) == 1);
+ VERIFY(arc_buf_remove_ref(buf, db));
xuio_stat_wbuf_copied();
return;
}
@@ -1481,10 +1464,10 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
arc_release(db->db_buf, db);
}
dr->dt.dl.dr_data = buf;
- VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+ VERIFY(arc_buf_remove_ref(db->db_buf, db));
} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
arc_release(db->db_buf, db);
- VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
+ VERIFY(arc_buf_remove_ref(db->db_buf, db));
}
db->db_buf = NULL;
}
@@ -2067,10 +2050,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* This dbuf has anonymous data associated with it.
*/
dbuf_set_data(db, NULL);
- VERIFY(arc_buf_remove_ref(buf, db) == 1);
+ VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
- VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
+ VERIFY(!arc_buf_remove_ref(db->db_buf, db));
/*
* A dbuf will be eligible for eviction if either the
@@ -2567,7 +2550,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
if (db->db_state != DB_NOFILL) {
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
- db) == 1);
+ db));
else if (!arc_released(db->db_buf))
arc_set_callback(db->db_buf, dbuf_do_evict, db);
}
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index 21cdd7c1ed..6ee37ac8b7 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -1194,7 +1194,7 @@ void
dmu_return_arcbuf(arc_buf_t *buf)
{
arc_return_buf(buf, FTAG);
- VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
+ VERIFY(arc_buf_remove_ref(buf, FTAG));
}
/*
diff --git a/usr/src/uts/common/fs/zfs/dmu_diff.c b/usr/src/uts/common/fs/zfs/dmu_diff.c
index dc237780c0..2d1aaa4c44 100644
--- a/usr/src/uts/common/fs/zfs/dmu_diff.c
+++ b/usr/src/uts/common/fs/zfs/dmu_diff.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -155,51 +156,49 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
int
-dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp)
+dmu_diff(const char *tosnap_name, const char *fromsnap_name,
+ struct vnode *vp, offset_t *offp)
{
struct diffarg da;
- dsl_dataset_t *ds = tosnap->os_dsl_dataset;
- dsl_dataset_t *fromds = fromsnap->os_dsl_dataset;
- dsl_dataset_t *findds;
- dsl_dataset_t *relds;
- int err = 0;
-
- /* make certain we are looking at snapshots */
- if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds))
+ dsl_dataset_t *fromsnap;
+ dsl_dataset_t *tosnap;
+ dsl_pool_t *dp;
+ int error;
+ uint64_t fromtxg;
+
+ if (strchr(tosnap_name, '@') == NULL ||
+ strchr(fromsnap_name, '@') == NULL)
return (EINVAL);
- /* fromsnap must be earlier and from the same lineage as tosnap */
- if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)
- return (EXDEV);
-
- relds = NULL;
- findds = ds;
-
- while (fromds->ds_dir != findds->ds_dir) {
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
- if (!dsl_dir_is_clone(findds->ds_dir)) {
- if (relds)
- dsl_dataset_rele(relds, FTAG);
- return (EXDEV);
- }
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_dataset_hold_obj(dp,
- findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds);
- rw_exit(&dp->dp_config_rwlock);
+ error = dsl_pool_hold(tosnap_name, FTAG, &dp);
+ if (error != 0)
+ return (error);
- if (relds)
- dsl_dataset_rele(relds, FTAG);
+ error = dsl_dataset_hold(dp, tosnap_name, FTAG, &tosnap);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
- if (err)
- return (EXDEV);
+ error = dsl_dataset_hold(dp, fromsnap_name, FTAG, &fromsnap);
+ if (error != 0) {
+ dsl_dataset_rele(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
- relds = findds;
+ if (!dsl_dataset_is_before(tosnap, fromsnap)) {
+ dsl_dataset_rele(fromsnap, FTAG);
+ dsl_dataset_rele(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (EXDEV);
}
- if (relds)
- dsl_dataset_rele(relds, FTAG);
+ fromtxg = fromsnap->ds_phys->ds_creation_txg;
+ dsl_dataset_rele(fromsnap, FTAG);
+
+ dsl_dataset_long_hold(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
da.da_vp = vp;
da.da_offp = offp;
@@ -207,15 +206,18 @@ dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp)
da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0;
da.da_err = 0;
- err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg,
+ error = traverse_dataset(tosnap, fromtxg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da);
- if (err) {
- da.da_err = err;
+ if (error != 0) {
+ da.da_err = error;
} else {
/* we set the da.da_err we return as side-effect */
(void) write_record(&da);
}
+ dsl_dataset_long_rele(tosnap, FTAG);
+ dsl_dataset_rele(tosnap, FTAG);
+
return (da.da_err);
}
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 74c1192017..a646f40450 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -44,6 +44,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/sa.h>
#include <sys/zfs_onexit.h>
+#include <sys/dsl_destroy.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -280,7 +281,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
err = arc_read(NULL, spa, os->os_rootbp,
arc_getbuf_func, &os->os_phys_buf,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
- if (err) {
+ if (err != 0) {
kmem_free(os, sizeof (objset_t));
/* convert checksum errors into IO errors */
if (err == ECKSUM)
@@ -320,34 +321,49 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
* checksum/compression/copies.
*/
if (ds) {
- err = dsl_prop_register(ds, "primarycache",
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os);
- if (err == 0)
- err = dsl_prop_register(ds, "secondarycache",
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
secondary_cache_changed_cb, os);
+ }
if (!dsl_dataset_is_snapshot(ds)) {
- if (err == 0)
- err = dsl_prop_register(ds, "checksum",
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_CHECKSUM),
checksum_changed_cb, os);
- if (err == 0)
- err = dsl_prop_register(ds, "compression",
+ }
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_COMPRESSION),
compression_changed_cb, os);
- if (err == 0)
- err = dsl_prop_register(ds, "copies",
+ }
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_COPIES),
copies_changed_cb, os);
- if (err == 0)
- err = dsl_prop_register(ds, "dedup",
+ }
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_DEDUP),
dedup_changed_cb, os);
- if (err == 0)
- err = dsl_prop_register(ds, "logbias",
+ }
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_LOGBIAS),
logbias_changed_cb, os);
- if (err == 0)
- err = dsl_prop_register(ds, "sync",
+ }
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_SYNC),
sync_changed_cb, os);
+ }
}
- if (err) {
+ if (err != 0) {
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
- &os->os_phys_buf) == 1);
+ &os->os_phys_buf));
kmem_free(os, sizeof (objset_t));
return (err);
}
@@ -425,44 +441,66 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
return (err);
}
-/* called from zpl */
+/*
+ * Holds the pool while the objset is held. Therefore only one objset
+ * can be held at a time.
+ */
int
dmu_objset_hold(const char *name, void *tag, objset_t **osp)
{
+ dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
- err = dsl_dataset_hold(name, tag, &ds);
- if (err)
+ err = dsl_pool_hold(name, tag, &dp);
+ if (err != 0)
+ return (err);
+ err = dsl_dataset_hold(dp, name, tag, &ds);
+ if (err != 0) {
+ dsl_pool_rele(dp, tag);
return (err);
+ }
err = dmu_objset_from_ds(ds, osp);
- if (err)
+ if (err != 0) {
dsl_dataset_rele(ds, tag);
+ dsl_pool_rele(dp, tag);
+ }
return (err);
}
-/* called from zpl */
+/*
+ * dsl_pool must not be held when this is called.
+ * Upon successful return, there will be a longhold on the dataset,
+ * and the dsl_pool will not be held.
+ */
int
dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp)
{
+ dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
- err = dsl_dataset_own(name, B_FALSE, tag, &ds);
- if (err)
+ err = dsl_pool_hold(name, FTAG, &dp);
+ if (err != 0)
+ return (err);
+ err = dsl_dataset_own(dp, name, tag, &ds);
+ if (err != 0) {
+ dsl_pool_rele(dp, FTAG);
return (err);
+ }
err = dmu_objset_from_ds(ds, osp);
- if (err) {
+ dsl_pool_rele(dp, FTAG);
+ if (err != 0) {
dsl_dataset_disown(ds, tag);
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
- dmu_objset_disown(*osp, tag);
+ dsl_dataset_disown(ds, tag);
return (EINVAL);
} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
- dmu_objset_disown(*osp, tag);
+ dsl_dataset_disown(ds, tag);
return (EROFS);
}
return (err);
@@ -471,7 +509,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
void
dmu_objset_rele(objset_t *os, void *tag)
{
+ dsl_pool_t *dp = dmu_objset_pool(os);
dsl_dataset_rele(os->os_dsl_dataset, tag);
+ dsl_pool_rele(dp, tag);
}
void
@@ -480,7 +520,7 @@ dmu_objset_disown(objset_t *os, void *tag)
dsl_dataset_disown(os->os_dsl_dataset, tag);
}
-int
+void
dmu_objset_evict_dbufs(objset_t *os)
{
dnode_t *dn;
@@ -515,9 +555,7 @@ dmu_objset_evict_dbufs(objset_t *os)
mutex_enter(&os->os_lock);
dn = next_dn;
}
- dn = list_head(&os->os_dnodes);
mutex_exit(&os->os_lock);
- return (dn != DMU_META_DNODE(os));
}
void
@@ -530,33 +568,37 @@ dmu_objset_evict(objset_t *os)
if (ds) {
if (!dsl_dataset_is_snapshot(ds)) {
- VERIFY(0 == dsl_prop_unregister(ds, "checksum",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_CHECKSUM),
checksum_changed_cb, os));
- VERIFY(0 == dsl_prop_unregister(ds, "compression",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_COMPRESSION),
compression_changed_cb, os));
- VERIFY(0 == dsl_prop_unregister(ds, "copies",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_COPIES),
copies_changed_cb, os));
- VERIFY(0 == dsl_prop_unregister(ds, "dedup",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_DEDUP),
dedup_changed_cb, os));
- VERIFY(0 == dsl_prop_unregister(ds, "logbias",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_LOGBIAS),
logbias_changed_cb, os));
- VERIFY(0 == dsl_prop_unregister(ds, "sync",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_SYNC),
sync_changed_cb, os));
}
- VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os));
- VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
+ VERIFY0(dsl_prop_unregister(ds,
+ zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
secondary_cache_changed_cb, os));
}
if (os->os_sa)
sa_tear_down(os);
- /*
- * We should need only a single pass over the dnode list, since
- * nothing can be added to the list at this point.
- */
- (void) dmu_objset_evict_dbufs(os);
+ dmu_objset_evict_dbufs(os);
dnode_special_close(&os->os_meta_dnode);
if (DMU_USERUSED_DNODE(os)) {
@@ -567,7 +609,7 @@ dmu_objset_evict(objset_t *os)
ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
- VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
+ VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
/*
* This is a barrier to prevent the objset from going away in
@@ -599,10 +641,11 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
dnode_t *mdn;
ASSERT(dmu_tx_is_syncing(tx));
+
if (ds != NULL)
- VERIFY(0 == dmu_objset_from_ds(ds, &os));
+ VERIFY0(dmu_objset_from_ds(ds, &os));
else
- VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os));
+ VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os));
mdn = DMU_META_DNODE(os);
@@ -650,359 +693,181 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
return (os);
}
-struct oscarg {
- void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
- void *userarg;
- dsl_dataset_t *clone_origin;
- const char *lastname;
- dmu_objset_type_t type;
- uint64_t flags;
- cred_t *cr;
-};
+typedef struct dmu_objset_create_arg {
+ const char *doca_name;
+ cred_t *doca_cred;
+ void (*doca_userfunc)(objset_t *os, void *arg,
+ cred_t *cr, dmu_tx_t *tx);
+ void *doca_userarg;
+ dmu_objset_type_t doca_type;
+ uint64_t doca_flags;
+} dmu_objset_create_arg_t;
/*ARGSUSED*/
static int
-dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_objset_create_check(void *arg, dmu_tx_t *tx)
{
- dsl_dir_t *dd = arg1;
- struct oscarg *oa = arg2;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- int err;
- uint64_t ddobj;
-
- err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
- oa->lastname, sizeof (uint64_t), 1, &ddobj);
- if (err != ENOENT)
- return (err ? err : EEXIST);
+ dmu_objset_create_arg_t *doca = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *pdd;
+ const char *tail;
+ int error;
- if (oa->clone_origin != NULL) {
- /* You can't clone across pools. */
- if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool)
- return (EXDEV);
+ if (strchr(doca->doca_name, '@') != NULL)
+ return (EINVAL);
- /* You can only clone snapshots, not the head datasets. */
- if (!dsl_dataset_is_snapshot(oa->clone_origin))
- return (EINVAL);
+ error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
+ if (error != 0)
+ return (error);
+ if (tail == NULL) {
+ dsl_dir_rele(pdd, FTAG);
+ return (EEXIST);
}
+ dsl_dir_rele(pdd, FTAG);
return (0);
}
static void
-dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dir_t *dd = arg1;
- spa_t *spa = dd->dd_pool->dp_spa;
- struct oscarg *oa = arg2;
- uint64_t obj;
+ dmu_objset_create_arg_t *doca = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *pdd;
+ const char *tail;
dsl_dataset_t *ds;
+ uint64_t obj;
blkptr_t *bp;
+ objset_t *os;
- ASSERT(dmu_tx_is_syncing(tx));
+ VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
- obj = dsl_dataset_create_sync(dd, oa->lastname,
- oa->clone_origin, oa->flags, oa->cr, tx);
+ obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
+ doca->doca_cred, tx);
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds));
+ VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
bp = dsl_dataset_get_blkptr(ds);
- if (BP_IS_HOLE(bp)) {
- objset_t *os =
- dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
+ os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
+ ds, bp, doca->doca_type, tx);
- if (oa->userfunc)
- oa->userfunc(os, oa->userarg, oa->cr, tx);
+ if (doca->doca_userfunc != NULL) {
+ doca->doca_userfunc(os, doca->doca_userarg,
+ doca->doca_cred, tx);
}
- if (oa->clone_origin == NULL) {
- spa_history_log_internal_ds(ds, "create", tx, "");
- } else {
- char namebuf[MAXNAMELEN];
- dsl_dataset_name(oa->clone_origin, namebuf);
- spa_history_log_internal_ds(ds, "clone", tx,
- "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object);
- }
+ spa_history_log_internal_ds(ds, "create", tx, "");
dsl_dataset_rele(ds, FTAG);
+ dsl_dir_rele(pdd, FTAG);
}
int
dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
{
- dsl_dir_t *pdd;
- const char *tail;
- int err = 0;
- struct oscarg oa = { 0 };
-
- ASSERT(strchr(name, '@') == NULL);
- err = dsl_dir_open(name, FTAG, &pdd, &tail);
- if (err)
- return (err);
- if (tail == NULL) {
- dsl_dir_close(pdd, FTAG);
- return (EEXIST);
- }
+ dmu_objset_create_arg_t doca;
- oa.userfunc = func;
- oa.userarg = arg;
- oa.lastname = tail;
- oa.type = type;
- oa.flags = flags;
- oa.cr = CRED();
+ doca.doca_name = name;
+ doca.doca_cred = CRED();
+ doca.doca_flags = flags;
+ doca.doca_userfunc = func;
+ doca.doca_userarg = arg;
+ doca.doca_type = type;
- err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
- dmu_objset_create_sync, pdd, &oa, 5);
- dsl_dir_close(pdd, FTAG);
- return (err);
+ return (dsl_sync_task(name,
+ dmu_objset_create_check, dmu_objset_create_sync, &doca, 5));
}
-int
-dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags)
+typedef struct dmu_objset_clone_arg {
+ const char *doca_clone;
+ const char *doca_origin;
+ cred_t *doca_cred;
+} dmu_objset_clone_arg_t;
+
+/*ARGSUSED*/
+static int
+dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
{
+ dmu_objset_clone_arg_t *doca = arg;
dsl_dir_t *pdd;
const char *tail;
- int err = 0;
- struct oscarg oa = { 0 };
+ int error;
+ dsl_dataset_t *origin;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
- ASSERT(strchr(name, '@') == NULL);
- err = dsl_dir_open(name, FTAG, &pdd, &tail);
- if (err)
- return (err);
+ if (strchr(doca->doca_clone, '@') != NULL)
+ return (EINVAL);
+
+ error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
+ if (error != 0)
+ return (error);
if (tail == NULL) {
- dsl_dir_close(pdd, FTAG);
+ dsl_dir_rele(pdd, FTAG);
return (EEXIST);
}
-
- oa.lastname = tail;
- oa.clone_origin = clone_origin;
- oa.flags = flags;
- oa.cr = CRED();
-
- err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
- dmu_objset_create_sync, pdd, &oa, 5);
- dsl_dir_close(pdd, FTAG);
- return (err);
-}
-
-int
-dmu_objset_destroy(const char *name, boolean_t defer)
-{
- dsl_dataset_t *ds;
- int error;
-
- error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
- if (error == 0) {
- error = dsl_dataset_destroy(ds, FTAG, defer);
- /* dsl_dataset_destroy() closes the ds. */
+ /* You can't clone across pools. */
+ if (pdd->dd_pool != dp) {
+ dsl_dir_rele(pdd, FTAG);
+ return (EXDEV);
}
+ dsl_dir_rele(pdd, FTAG);
- return (error);
-}
-
-typedef struct snapallarg {
- dsl_sync_task_group_t *saa_dstg;
- boolean_t saa_needsuspend;
- nvlist_t *saa_props;
-
- /* the following are used only if 'temporary' is set: */
- boolean_t saa_temporary;
- const char *saa_htag;
- struct dsl_ds_holdarg *saa_ha;
- dsl_dataset_t *saa_newds;
-} snapallarg_t;
-
-typedef struct snaponearg {
- const char *soa_longname; /* long snap name */
- const char *soa_snapname; /* short snap name */
- snapallarg_t *soa_saa;
-} snaponearg_t;
-
-static int
-snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- objset_t *os = arg1;
- snaponearg_t *soa = arg2;
- snapallarg_t *saa = soa->soa_saa;
- int error;
-
- /* The props have already been checked by zfs_check_userprops(). */
-
- error = dsl_dataset_snapshot_check(os->os_dsl_dataset,
- soa->soa_snapname, tx);
- if (error)
+ error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
+ if (error != 0)
return (error);
- if (saa->saa_temporary) {
- /*
- * Ideally we would just call
- * dsl_dataset_user_hold_check() and
- * dsl_dataset_destroy_check() here. However the
- * dataset we want to hold and destroy is the snapshot
- * that we just confirmed we can create, but it won't
- * exist until after these checks are run. Do any
- * checks we can here and if more checks are added to
- * those routines in the future, similar checks may be
- * necessary here.
- */
- if (spa_version(os->os_spa) < SPA_VERSION_USERREFS)
- return (ENOTSUP);
- /*
- * Not checking number of tags because the tag will be
- * unique, as it will be the only tag.
- */
- if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
- return (E2BIG);
-
- saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg),
- KM_SLEEP);
- saa->saa_ha->temphold = B_TRUE;
- saa->saa_ha->htag = saa->saa_htag;
+ /* You can't clone across pools. */
+ if (origin->ds_dir->dd_pool != dp) {
+ dsl_dataset_rele(origin, FTAG);
+ return (EXDEV);
}
- return (error);
-}
-static void
-snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- objset_t *os = arg1;
- dsl_dataset_t *ds = os->os_dsl_dataset;
- snaponearg_t *soa = arg2;
- snapallarg_t *saa = soa->soa_saa;
-
- dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx);
-
- if (saa->saa_props != NULL) {
- dsl_props_arg_t pa;
- pa.pa_props = saa->saa_props;
- pa.pa_source = ZPROP_SRC_LOCAL;
- dsl_props_set_sync(ds->ds_prev, &pa, tx);
+ /* You can only clone snapshots, not the head datasets. */
+ if (!dsl_dataset_is_snapshot(origin)) {
+ dsl_dataset_rele(origin, FTAG);
+ return (EINVAL);
}
+ dsl_dataset_rele(origin, FTAG);
- if (saa->saa_temporary) {
- struct dsl_ds_destroyarg da;
-
- dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx);
- kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg));
- saa->saa_ha = NULL;
- saa->saa_newds = ds->ds_prev;
-
- da.ds = ds->ds_prev;
- da.defer = B_TRUE;
- dsl_dataset_destroy_sync(&da, FTAG, tx);
- }
+ return (0);
}
-static int
-snapshot_one_impl(const char *snapname, void *arg)
+static void
+dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
{
- char fsname[MAXPATHLEN];
- snapallarg_t *saa = arg;
- snaponearg_t *soa;
- objset_t *os;
- int err;
-
- (void) strlcpy(fsname, snapname, sizeof (fsname));
- strchr(fsname, '@')[0] = '\0';
-
- err = dmu_objset_hold(fsname, saa, &os);
- if (err != 0)
- return (err);
-
- /*
- * If the objset is in an inconsistent state (eg, in the process
- * of being destroyed), don't snapshot it.
- */
- if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
- dmu_objset_rele(os, saa);
- return (EBUSY);
- }
-
- if (saa->saa_needsuspend) {
- err = zil_suspend(dmu_objset_zil(os));
- if (err) {
- dmu_objset_rele(os, saa);
- return (err);
- }
- }
+ dmu_objset_clone_arg_t *doca = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *pdd;
+ const char *tail;
+ dsl_dataset_t *origin, *ds;
+ uint64_t obj;
+ char namebuf[MAXNAMELEN];
- soa = kmem_zalloc(sizeof (*soa), KM_SLEEP);
- soa->soa_saa = saa;
- soa->soa_longname = snapname;
- soa->soa_snapname = strchr(snapname, '@') + 1;
+ VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
+ VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
- dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync,
- os, soa, 3);
+ obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
+ doca->doca_cred, tx);
- return (0);
+ VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
+ dsl_dataset_name(origin, namebuf);
+ spa_history_log_internal_ds(ds, "clone", tx,
+ "origin=%s (%llu)", namebuf, origin->ds_object);
+ dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(pdd, FTAG);
}
-/*
- * The snapshots must all be in the same pool.
- */
int
-dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
+dmu_objset_clone(const char *clone, const char *origin)
{
- dsl_sync_task_t *dst;
- snapallarg_t saa = { 0 };
- spa_t *spa;
- int rv = 0;
- int err;
- nvpair_t *pair;
-
- pair = nvlist_next_nvpair(snaps, NULL);
- if (pair == NULL)
- return (0);
-
- err = spa_open(nvpair_name(pair), &spa, FTAG);
- if (err)
- return (err);
- saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- saa.saa_props = props;
- saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
-
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- err = snapshot_one_impl(nvpair_name(pair), &saa);
- if (err != 0) {
- if (errors != NULL) {
- fnvlist_add_int32(errors,
- nvpair_name(pair), err);
- }
- rv = err;
- }
- }
+ dmu_objset_clone_arg_t doca;
- /*
- * If any call to snapshot_one_impl() failed, don't execute the
- * sync task. The error handling code below will clean up the
- * snaponearg_t from any successful calls to
- * snapshot_one_impl().
- */
- if (rv == 0)
- err = dsl_sync_task_group_wait(saa.saa_dstg);
- if (err != 0)
- rv = err;
-
- for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
- dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
- objset_t *os = dst->dst_arg1;
- snaponearg_t *soa = dst->dst_arg2;
- if (dst->dst_err != 0) {
- if (errors != NULL) {
- fnvlist_add_int32(errors,
- soa->soa_longname, dst->dst_err);
- }
- rv = dst->dst_err;
- }
-
- if (saa.saa_needsuspend)
- zil_resume(dmu_objset_zil(os));
- dmu_objset_rele(os, &saa);
- kmem_free(soa, sizeof (*soa));
- }
+ doca.doca_clone = clone;
+ doca.doca_origin = origin;
+ doca.doca_cred = CRED();
- dsl_sync_task_group_destroy(saa.saa_dstg);
- spa_close(spa, FTAG);
- return (rv);
+ return (dsl_sync_task(clone,
+ dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 5));
}
int
@@ -1013,59 +878,12 @@ dmu_objset_snapshot_one(const char *fsname, const char *snapname)
nvlist_t *snaps = fnvlist_alloc();
fnvlist_add_boolean(snaps, longsnap);
- err = dmu_objset_snapshot(snaps, NULL, NULL);
- fnvlist_free(snaps);
strfree(longsnap);
+ err = dsl_dataset_snapshot(snaps, NULL, NULL);
+ fnvlist_free(snaps);
return (err);
}
-int
-dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd)
-{
- dsl_sync_task_t *dst;
- snapallarg_t saa = { 0 };
- spa_t *spa;
- minor_t minor;
- int err;
-
- err = spa_open(snapname, &spa, FTAG);
- if (err)
- return (err);
- saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- saa.saa_htag = tag;
- saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
- saa.saa_temporary = B_TRUE;
-
- if (cleanup_fd < 0) {
- spa_close(spa, FTAG);
- return (EINVAL);
- }
- if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
- spa_close(spa, FTAG);
- return (err);
- }
-
- err = snapshot_one_impl(snapname, &saa);
-
- if (err == 0)
- err = dsl_sync_task_group_wait(saa.saa_dstg);
-
- for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
- dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
- objset_t *os = dst->dst_arg1;
- dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor);
- if (saa.saa_needsuspend)
- zil_resume(dmu_objset_zil(os));
- dmu_objset_rele(os, &saa);
- }
-
- zfs_onexit_fd_rele(cleanup_fd);
- dsl_sync_task_group_destroy(saa.saa_dstg);
- spa_close(spa, FTAG);
- return (err);
-}
-
-
static void
dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
{
@@ -1101,9 +919,9 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
objset_t *os = arg;
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
- ASSERT(bp == os->os_rootbp);
- ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
- ASSERT(BP_GET_LEVEL(bp) == 0);
+ ASSERT3P(bp, ==, os->os_rootbp);
+ ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
+ ASSERT0(BP_GET_LEVEL(bp));
/*
* Update rootbp fill count: it should be the number of objects
@@ -1210,7 +1028,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
while (dr = list_head(list)) {
- ASSERT(dr->dr_dbuf->db_level == 0);
+ ASSERT0(dr->dr_dbuf->db_level);
list_remove(list, dr);
if (dr->dr_zio)
zio_nowait(dr->dr_zio);
@@ -1505,12 +1323,12 @@ dmu_objset_userspace_upgrade(objset_t *os)
return (EINTR);
objerr = dmu_bonus_hold(os, obj, FTAG, &db);
- if (objerr)
+ if (objerr != 0)
continue;
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, obj);
objerr = dmu_tx_assign(tx, TXG_WAIT);
- if (objerr) {
+ if (objerr != 0) {
dmu_tx_abort(tx);
continue;
}
@@ -1593,6 +1411,8 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
zap_cursor_t cursor;
zap_attribute_t attr;
+ ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
+
if (ds->ds_phys->ds_snapnames_zapobj == 0)
return (ENOENT);
@@ -1659,42 +1479,122 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name,
return (0);
}
-struct findarg {
- int (*func)(const char *, void *);
- void *arg;
-};
-
-/* ARGSUSED */
-static int
-findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
-{
- struct findarg *fa = arg;
- return (fa->func(dsname, fa->arg));
-}
-
/*
- * Find all objsets under name, and for each, call 'func(child_name, arg)'.
- * Perhaps change all callers to use dmu_objset_find_spa()?
+ * Find objsets under and including ddobj, call func(ds) on each.
*/
int
-dmu_objset_find(char *name, int func(const char *, void *), void *arg,
- int flags)
+dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
+ int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
{
- struct findarg fa;
- fa.func = func;
- fa.arg = arg;
- return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags));
+ dsl_dir_t *dd;
+ dsl_dataset_t *ds;
+ zap_cursor_t zc;
+ zap_attribute_t *attr;
+ uint64_t thisobj;
+ int err;
+
+ ASSERT(dsl_pool_config_held(dp));
+
+ err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
+ if (err != 0)
+ return (err);
+
+ /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
+ if (dd->dd_myname[0] == '$') {
+ dsl_dir_rele(dd, FTAG);
+ return (0);
+ }
+
+ thisobj = dd->dd_phys->dd_head_dataset_obj;
+ attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+
+ /*
+ * Iterate over all children.
+ */
+ if (flags & DS_FIND_CHILDREN) {
+ for (zap_cursor_init(&zc, dp->dp_meta_objset,
+ dd->dd_phys->dd_child_dir_zapobj);
+ zap_cursor_retrieve(&zc, attr) == 0;
+ (void) zap_cursor_advance(&zc)) {
+ ASSERT3U(attr->za_integer_length, ==,
+ sizeof (uint64_t));
+ ASSERT3U(attr->za_num_integers, ==, 1);
+
+ err = dmu_objset_find_dp(dp, attr->za_first_integer,
+ func, arg, flags);
+ if (err != 0)
+ break;
+ }
+ zap_cursor_fini(&zc);
+
+ if (err != 0) {
+ dsl_dir_rele(dd, FTAG);
+ kmem_free(attr, sizeof (zap_attribute_t));
+ return (err);
+ }
+ }
+
+ /*
+ * Iterate over all snapshots.
+ */
+ if (flags & DS_FIND_SNAPSHOTS) {
+ dsl_dataset_t *ds;
+ err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
+
+ if (err == 0) {
+ uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
+ dsl_dataset_rele(ds, FTAG);
+
+ for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
+ zap_cursor_retrieve(&zc, attr) == 0;
+ (void) zap_cursor_advance(&zc)) {
+ ASSERT3U(attr->za_integer_length, ==,
+ sizeof (uint64_t));
+ ASSERT3U(attr->za_num_integers, ==, 1);
+
+ err = dsl_dataset_hold_obj(dp,
+ attr->za_first_integer, FTAG, &ds);
+ if (err != 0)
+ break;
+ err = func(dp, ds, arg);
+ dsl_dataset_rele(ds, FTAG);
+ if (err != 0)
+ break;
+ }
+ zap_cursor_fini(&zc);
+ }
+ }
+
+ dsl_dir_rele(dd, FTAG);
+ kmem_free(attr, sizeof (zap_attribute_t));
+
+ if (err != 0)
+ return (err);
+
+ /*
+ * Apply to self.
+ */
+ err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
+ if (err != 0)
+ return (err);
+ err = func(dp, ds, arg);
+ dsl_dataset_rele(ds, FTAG);
+ return (err);
}
/*
- * Find all objsets under name, call func on each
+ * Find all objsets under name, and for each, call 'func(child_name, arg)'.
+ * The dp_config_rwlock must not be held when this is called, and it
+ * will not be held when the callback is called.
+ * Therefore this function should only be used when the pool is not changing
+ * (e.g. in syncing context), or the callback can deal with the possible races.
*/
-int
-dmu_objset_find_spa(spa_t *spa, const char *name,
- int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
+static int
+dmu_objset_find_impl(spa_t *spa, const char *name,
+ int func(const char *, void *), void *arg, int flags)
{
dsl_dir_t *dd;
- dsl_pool_t *dp;
+ dsl_pool_t *dp = spa_get_dsl(spa);
dsl_dataset_t *ds;
zap_cursor_t zc;
zap_attribute_t *attr;
@@ -1702,21 +1602,23 @@ dmu_objset_find_spa(spa_t *spa, const char *name,
uint64_t thisobj;
int err;
- if (name == NULL)
- name = spa_name(spa);
- err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
- if (err)
+ dsl_pool_config_enter(dp, FTAG);
+
+ err = dsl_dir_hold(dp, name, FTAG, &dd, NULL);
+ if (err != 0) {
+ dsl_pool_config_exit(dp, FTAG);
return (err);
+ }
/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
if (dd->dd_myname[0] == '$') {
- dsl_dir_close(dd, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ dsl_pool_config_exit(dp, FTAG);
return (0);
}
thisobj = dd->dd_phys->dd_head_dataset_obj;
attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
- dp = dd->dd_pool;
/*
* Iterate over all children.
@@ -1726,19 +1628,24 @@ dmu_objset_find_spa(spa_t *spa, const char *name,
dd->dd_phys->dd_child_dir_zapobj);
zap_cursor_retrieve(&zc, attr) == 0;
(void) zap_cursor_advance(&zc)) {
- ASSERT(attr->za_integer_length == sizeof (uint64_t));
- ASSERT(attr->za_num_integers == 1);
+ ASSERT3U(attr->za_integer_length, ==,
+ sizeof (uint64_t));
+ ASSERT3U(attr->za_num_integers, ==, 1);
child = kmem_asprintf("%s/%s", name, attr->za_name);
- err = dmu_objset_find_spa(spa, child, func, arg, flags);
+ dsl_pool_config_exit(dp, FTAG);
+ err = dmu_objset_find_impl(spa, child,
+ func, arg, flags);
+ dsl_pool_config_enter(dp, FTAG);
strfree(child);
- if (err)
+ if (err != 0)
break;
}
zap_cursor_fini(&zc);
- if (err) {
- dsl_dir_close(dd, FTAG);
+ if (err != 0) {
+ dsl_dir_rele(dd, FTAG);
+ dsl_pool_config_exit(dp, FTAG);
kmem_free(attr, sizeof (zap_attribute_t));
return (err);
}
@@ -1748,11 +1655,7 @@ dmu_objset_find_spa(spa_t *spa, const char *name,
* Iterate over all snapshots.
*/
if (flags & DS_FIND_SNAPSHOTS) {
- if (!dsl_pool_sync_context(dp))
- rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
- if (!dsl_pool_sync_context(dp))
- rw_exit(&dp->dp_config_rwlock);
if (err == 0) {
uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
@@ -1761,64 +1664,50 @@ dmu_objset_find_spa(spa_t *spa, const char *name,
for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
zap_cursor_retrieve(&zc, attr) == 0;
(void) zap_cursor_advance(&zc)) {
- ASSERT(attr->za_integer_length ==
+ ASSERT3U(attr->za_integer_length, ==,
sizeof (uint64_t));
- ASSERT(attr->za_num_integers == 1);
+ ASSERT3U(attr->za_num_integers, ==, 1);
child = kmem_asprintf("%s@%s",
name, attr->za_name);
- err = func(spa, attr->za_first_integer,
- child, arg);
+ dsl_pool_config_exit(dp, FTAG);
+ err = func(child, arg);
+ dsl_pool_config_enter(dp, FTAG);
strfree(child);
- if (err)
+ if (err != 0)
break;
}
zap_cursor_fini(&zc);
}
}
- dsl_dir_close(dd, FTAG);
+ dsl_dir_rele(dd, FTAG);
kmem_free(attr, sizeof (zap_attribute_t));
+ dsl_pool_config_exit(dp, FTAG);
- if (err)
+ if (err != 0)
return (err);
- /*
- * Apply to self if appropriate.
- */
- err = func(spa, thisobj, name, arg);
- return (err);
+ /* Apply to self. */
+ return (func(name, arg));
}
-/* ARGSUSED */
+/*
+ * See comment above dmu_objset_find_impl().
+ */
int
-dmu_objset_prefetch(const char *name, void *arg)
+dmu_objset_find(char *name, int func(const char *, void *), void *arg,
+ int flags)
{
- dsl_dataset_t *ds;
-
- if (dsl_dataset_hold(name, FTAG, &ds))
- return (0);
-
- if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
- mutex_enter(&ds->ds_opening_lock);
- if (ds->ds_objset == NULL) {
- uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
- zbookmark_t zb;
-
- SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
- ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
-
- (void) arc_read(NULL, dsl_dataset_get_spa(ds),
- &ds->ds_phys->ds_bp, NULL, NULL,
- ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
- &aflags, &zb);
- }
- mutex_exit(&ds->ds_opening_lock);
- }
+ spa_t *spa;
+ int error;
- dsl_dataset_rele(ds, FTAG);
- return (0);
+ error = spa_open(name, &spa, FTAG);
+ if (error != 0)
+ return (error);
+ error = dmu_objset_find_impl(spa, name, func, arg, flags);
+ spa_close(spa, FTAG);
+ return (error);
}
void
@@ -1834,3 +1723,19 @@ dmu_objset_get_user(objset_t *os)
ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
return (os->os_user_ptr);
}
+
+/*
+ * Determine name of filesystem, given name of snapshot.
+ * buf must be at least MAXNAMELEN bytes
+ */
+int
+dmu_fsname(const char *snapname, char *buf)
+{
+ char *atp = strchr(snapname, '@');
+ if (atp == NULL)
+ return (EINVAL);
+ if (atp - snapname >= MAXNAMELEN)
+ return (ENAMETOOLONG);
+ (void) strlcpy(buf, snapname, atp - snapname + 1);
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index c2493357d6..a2a3647b4b 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -46,11 +46,14 @@
#include <sys/avl.h>
#include <sys/ddt.h>
#include <sys/zfs_onexit.h>
+#include <sys/dmu_send.h>
+#include <sys/dsl_destroy.h>
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
static char *dmu_recv_tag = "dmu_recv_tag";
+static const char *recv_clone_name = "%recv";
static int
dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
@@ -290,7 +293,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
return (EINTR);
- if (dsp->dsa_err)
+ if (dsp->dsa_err != 0)
return (EINTR);
return (0);
}
@@ -340,7 +343,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
uint64_t dnobj = (zb->zb_blkid <<
(DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
err = dump_dnode(dsp, dnobj, blk+i);
- if (err)
+ if (err != 0)
break;
}
(void) arc_buf_remove_ref(abuf, &abuf);
@@ -388,65 +391,33 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
/*
- * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
- * For example, they could both be snapshots of the same filesystem, and
- * 'earlier' is before 'later'. Or 'earlier' could be the origin of
- * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
- * filesystem. Or 'earlier' could be the origin's origin.
+ * Releases dp, ds, and fromds, using the specified tag.
*/
-static boolean_t
-is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
-{
- dsl_pool_t *dp = later->ds_dir->dd_pool;
- int error;
- boolean_t ret;
- dsl_dataset_t *origin;
-
- if (earlier->ds_phys->ds_creation_txg >=
- later->ds_phys->ds_creation_txg)
- return (B_FALSE);
-
- if (later->ds_dir == earlier->ds_dir)
- return (B_TRUE);
- if (!dsl_dir_is_clone(later->ds_dir))
- return (B_FALSE);
-
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
- rw_exit(&dp->dp_config_rwlock);
- return (B_TRUE);
- }
- error = dsl_dataset_hold_obj(dp,
- later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
- rw_exit(&dp->dp_config_rwlock);
- if (error != 0)
- return (B_FALSE);
- ret = is_before(origin, earlier);
- dsl_dataset_rele(origin, FTAG);
- return (ret);
-}
-
-int
-dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
- offset_t *off)
+static int
+dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
+ dsl_dataset_t *fromds, int outfd, vnode_t *vp, offset_t *off)
{
- dsl_dataset_t *ds = tosnap->os_dsl_dataset;
- dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
+ objset_t *os;
dmu_replay_record_t *drr;
dmu_sendarg_t *dsp;
int err;
uint64_t fromtxg = 0;
- /* tosnap must be a snapshot */
- if (ds->ds_phys->ds_next_snap_obj == 0)
- return (EINVAL);
-
- /*
- * fromsnap must be an earlier snapshot from the same fs as tosnap,
- * or the origin's fs.
- */
- if (fromds != NULL && !is_before(ds, fromds))
+ if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) {
+ dsl_dataset_rele(fromds, tag);
+ dsl_dataset_rele(ds, tag);
+ dsl_pool_rele(dp, tag);
return (EXDEV);
+ }
+
+ err = dmu_objset_from_ds(ds, &os);
+ if (err != 0) {
+ if (fromds != NULL)
+ dsl_dataset_rele(fromds, tag);
+ dsl_dataset_rele(ds, tag);
+ dsl_pool_rele(dp, tag);
+ return (err);
+ }
drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
drr->drr_type = DRR_BEGIN;
@@ -455,13 +426,17 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
DMU_SUBSTREAM);
#ifdef _KERNEL
- if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
+ if (dmu_objset_type(os) == DMU_OST_ZFS) {
uint64_t version;
- if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
+ if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) {
kmem_free(drr, sizeof (dmu_replay_record_t));
+ if (fromds != NULL)
+ dsl_dataset_rele(fromds, tag);
+ dsl_dataset_rele(ds, tag);
+ dsl_pool_rele(dp, tag);
return (EINVAL);
}
- if (version == ZPL_VERSION_SA) {
+ if (version >= ZPL_VERSION_SA) {
DMU_SET_FEATUREFLAGS(
drr->drr_u.drr_begin.drr_versioninfo,
DMU_BACKUP_FEATURE_SA_SPILL);
@@ -471,19 +446,22 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
drr->drr_u.drr_begin.drr_creation_time =
ds->ds_phys->ds_creation_time;
- drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
+ drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
if (fromds != NULL && ds->ds_dir != fromds->ds_dir)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
- if (fromds)
+ if (fromds != NULL)
drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
- if (fromds)
+ if (fromds != NULL) {
fromtxg = fromds->ds_phys->ds_creation_txg;
+ dsl_dataset_rele(fromds, tag);
+ fromds = NULL;
+ }
dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
@@ -491,7 +469,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
dsp->dsa_vp = vp;
dsp->dsa_outfd = outfd;
dsp->dsa_proc = curproc;
- dsp->dsa_os = tosnap;
+ dsp->dsa_os = os;
dsp->dsa_off = off;
dsp->dsa_toguid = ds->ds_phys->ds_guid;
ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
@@ -506,6 +484,9 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
goto out;
}
+ dsl_dataset_long_hold(ds, FTAG);
+ dsl_pool_rele(dp, tag);
+
err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
backup_cb, dsp);
@@ -513,8 +494,8 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
err = EINTR;
- if (err) {
- if (err == EINTR && dsp->dsa_err)
+ if (err != 0) {
+ if (err == EINTR && dsp->dsa_err != 0)
err = dsp->dsa_err;
goto out;
}
@@ -537,27 +518,96 @@ out:
kmem_free(drr, sizeof (dmu_replay_record_t));
kmem_free(dsp, sizeof (dmu_sendarg_t));
+ dsl_dataset_long_rele(ds, FTAG);
+ dsl_dataset_rele(ds, tag);
+
return (err);
}
int
-dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)
+dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
+ int outfd, vnode_t *vp, offset_t *off)
+{
+ dsl_pool_t *dp;
+ dsl_dataset_t *ds;
+ dsl_dataset_t *fromds = NULL;
+ int err;
+
+ err = dsl_pool_hold(pool, FTAG, &dp);
+ if (err != 0)
+ return (err);
+
+ err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
+ if (err != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+
+ if (fromsnap != 0) {
+ err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
+ if (err != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+ }
+
+ return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off));
+}
+
+int
+dmu_send(const char *tosnap, const char *fromsnap,
+ int outfd, vnode_t *vp, offset_t *off)
+{
+ dsl_pool_t *dp;
+ dsl_dataset_t *ds;
+ dsl_dataset_t *fromds = NULL;
+ int err;
+
+ if (strchr(tosnap, '@') == NULL)
+ return (EINVAL);
+ if (fromsnap != NULL && strchr(fromsnap, '@') == NULL)
+ return (EINVAL);
+
+ err = dsl_pool_hold(tosnap, FTAG, &dp);
+ if (err != 0)
+ return (err);
+
+ err = dsl_dataset_hold(dp, tosnap, FTAG, &ds);
+ if (err != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+
+ if (fromsnap != NULL) {
+ err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds);
+ if (err != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+ }
+ return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off));
+}
+
+int
+dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
{
- dsl_dataset_t *ds = tosnap->os_dsl_dataset;
- dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
int err;
uint64_t size;
+ ASSERT(dsl_pool_config_held(dp));
+
/* tosnap must be a snapshot */
- if (ds->ds_phys->ds_next_snap_obj == 0)
+ if (!dsl_dataset_is_snapshot(ds))
return (EINVAL);
/*
* fromsnap must be an earlier snapshot from the same fs as tosnap,
* or the origin's fs.
*/
- if (fromds != NULL && !is_before(ds, fromds))
+ if (fromds != NULL && !dsl_dataset_is_before(ds, fromds))
return (EXDEV);
/* Get uncompressed size estimate of changed data. */
@@ -567,7 +617,7 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)
uint64_t used, comp;
err = dsl_dataset_space_written(fromds, ds,
&used, &comp, &size);
- if (err)
+ if (err != 0)
return (err);
}
@@ -587,11 +637,8 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)
* block, which we observe in practice.
*/
uint64_t recordsize;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_prop_get_ds(ds, "recordsize",
- sizeof (recordsize), 1, &recordsize, NULL);
- rw_exit(&dp->dp_config_rwlock);
- if (err)
+ err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
+ if (err != 0)
return (err);
size -= size / recordsize * sizeof (blkptr_t);
@@ -603,93 +650,40 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)
return (0);
}
-struct recvbeginsyncarg {
- const char *tofs;
- const char *tosnap;
- dsl_dataset_t *origin;
- uint64_t fromguid;
- dmu_objset_type_t type;
- void *tag;
- boolean_t force;
- uint64_t dsflags;
- char clonelastname[MAXNAMELEN];
- dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
- cred_t *cr;
-};
-
-/* ARGSUSED */
-static int
-recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct recvbeginsyncarg *rbsa = arg2;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- uint64_t val;
- int err;
-
- err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
- strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val);
-
- if (err != ENOENT)
- return (err ? err : EEXIST);
-
- if (rbsa->origin) {
- /* make sure it's a snap in the same pool */
- if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
- return (EXDEV);
- if (!dsl_dataset_is_snapshot(rbsa->origin))
- return (EINVAL);
- if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
- return (ENODEV);
- }
-
- return (0);
-}
-
-static void
-recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- struct recvbeginsyncarg *rbsa = arg2;
- uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
- uint64_t dsobj;
-
- /* Create and open new dataset. */
- dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
- rbsa->origin, flags, rbsa->cr, tx);
- VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
- B_TRUE, dmu_recv_tag, &rbsa->ds));
-
- if (rbsa->origin == NULL) {
- (void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
- rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
- }
-
- spa_history_log_internal_ds(rbsa->ds, "receive new", tx, "");
-}
+typedef struct dmu_recv_begin_arg {
+ const char *drba_origin;
+ dmu_recv_cookie_t *drba_cookie;
+ cred_t *drba_cred;
+} dmu_recv_begin_arg_t;
-/* ARGSUSED */
static int
-recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
+recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
+ uint64_t fromguid)
{
- dsl_dataset_t *ds = arg1;
- struct recvbeginsyncarg *rbsa = arg2;
- int err;
uint64_t val;
+ int error;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
/* must not have any changes since most recent snapshot */
- if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
+ if (!drba->drba_cookie->drc_force &&
+ dsl_dataset_modified_since_lastsnap(ds))
return (ETXTBSY);
+ /* temporary clone name must not exist */
+ error = zap_lookup(dp->dp_meta_objset,
+ ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name,
+ 8, 1, &val);
+ if (error != ENOENT)
+ return (error == 0 ? EBUSY : error);
+
/* new snapshot name must not exist */
- err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
- ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val);
- if (err == 0)
- return (EEXIST);
- if (err != ENOENT)
- return (err);
+ error = zap_lookup(dp->dp_meta_objset,
+ ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap,
+ 8, 1, &val);
+ if (error != ENOENT)
+ return (error == 0 ? EEXIST : error);
- if (rbsa->fromguid) {
+ if (fromguid != 0) {
/* if incremental, most recent snapshot must match fromguid */
if (ds->ds_prev == NULL)
return (ENODEV);
@@ -698,20 +692,20 @@ recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
* most recent snapshot must match fromguid, or there are no
* changes since the fromguid one
*/
- if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) {
+ if (ds->ds_prev->ds_phys->ds_guid != fromguid) {
uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth;
uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj;
while (obj != 0) {
dsl_dataset_t *snap;
- err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
- obj, FTAG, &snap);
- if (err)
+ error = dsl_dataset_hold_obj(dp, obj, FTAG,
+ &snap);
+ if (error != 0)
return (ENODEV);
if (snap->ds_phys->ds_creation_txg < birth) {
dsl_dataset_rele(snap, FTAG);
return (ENODEV);
}
- if (snap->ds_phys->ds_guid == rbsa->fromguid) {
+ if (snap->ds_phys->ds_guid == fromguid) {
dsl_dataset_rele(snap, FTAG);
break; /* it's ok */
}
@@ -727,58 +721,153 @@ recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (ENODEV);
}
- /* temporary clone name must not exist */
- err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
- ds->ds_dir->dd_phys->dd_child_dir_zapobj,
- rbsa->clonelastname, 8, 1, &val);
- if (err == 0)
- return (EEXIST);
- if (err != ENOENT)
- return (err);
-
return (0);
+
+}
+
+static int
+dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
+{
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+ uint64_t fromguid = drrb->drr_fromguid;
+ int flags = drrb->drr_flags;
+ int error;
+ dsl_dataset_t *ds;
+ const char *tofs = drba->drba_cookie->drc_tofs;
+
+ /* already checked */
+ ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+
+ if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM ||
+ drrb->drr_type >= DMU_OST_NUMTYPES ||
+ ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL))
+ return (EINVAL);
+
+ /* Verify pool version supports SA if SA_SPILL feature set */
+ if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_SA_SPILL) &&
+ spa_version(dp->dp_spa) < SPA_VERSION_SA) {
+ return (ENOTSUP);
+ }
+
+ error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ if (error == 0) {
+ /* target fs already exists; recv into temp clone */
+
+ /* Can't recv a clone into an existing fs */
+ if (flags & DRR_FLAG_CLONE) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
+
+ error = recv_begin_check_existing_impl(drba, ds, fromguid);
+ dsl_dataset_rele(ds, FTAG);
+ } else if (error == ENOENT) {
+ /* target fs does not exist; must be a full backup or clone */
+ char buf[MAXNAMELEN];
+
+ /*
+ * If it's a non-clone incremental, we are missing the
+ * target fs, so fail the recv.
+ */
+ if (fromguid != 0 && !(flags & DRR_FLAG_CLONE))
+ return (ENOENT);
+
+ /* Open the parent of tofs */
+ ASSERT3U(strlen(tofs), <, MAXNAMELEN);
+ (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
+ error = dsl_dataset_hold(dp, buf, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ if (drba->drba_origin != NULL) {
+ dsl_dataset_t *origin;
+ error = dsl_dataset_hold(dp, drba->drba_origin,
+ FTAG, &origin);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
+ if (!dsl_dataset_is_snapshot(origin)) {
+ dsl_dataset_rele(origin, FTAG);
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
+ if (origin->ds_phys->ds_guid != fromguid) {
+ dsl_dataset_rele(origin, FTAG);
+ dsl_dataset_rele(ds, FTAG);
+ return (ENODEV);
+ }
+ dsl_dataset_rele(origin, FTAG);
+ }
+ dsl_dataset_rele(ds, FTAG);
+ error = 0;
+ }
+ return (error);
}
-/* ARGSUSED */
static void
-recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ohds = arg1;
- struct recvbeginsyncarg *rbsa = arg2;
- dsl_pool_t *dp = ohds->ds_dir->dd_pool;
- dsl_dataset_t *cds;
- uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+ const char *tofs = drba->drba_cookie->drc_tofs;
+ dsl_dataset_t *ds, *newds;
uint64_t dsobj;
+ int error;
+ uint64_t crflags;
+
+ crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
+ DS_FLAG_CI_DATASET : 0;
- /* create and open the temporary clone */
- dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
- ohds->ds_prev, flags, rbsa->cr, tx);
- VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
+ error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ if (error == 0) {
+ /* create temporary clone */
+ dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
+ ds->ds_prev, crflags, drba->drba_cred, tx);
+ dsl_dataset_rele(ds, FTAG);
+ } else {
+ dsl_dir_t *dd;
+ const char *tail;
+ dsl_dataset_t *origin = NULL;
+
+ VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail));
+
+ if (drba->drba_origin != NULL) {
+ VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
+ FTAG, &origin));
+ }
+
+ /* Create new dataset. */
+ dsobj = dsl_dataset_create_sync(dd,
+ strrchr(tofs, '/') + 1,
+ origin, crflags, drba->drba_cred, tx);
+ if (origin != NULL)
+ dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ drba->drba_cookie->drc_newfs = B_TRUE;
+ }
+ VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
+
+ dmu_buf_will_dirty(newds->ds_dbuf, tx);
+ newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
/*
* If we actually created a non-clone, we need to create the
* objset in our new dataset.
*/
- if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) {
+ if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
(void) dmu_objset_create_impl(dp->dp_spa,
- cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx);
+ newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
}
- rbsa->ds = cds;
-
- spa_history_log_internal_ds(cds, "receive over existing", tx, "");
-}
-
-static boolean_t
-dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb)
-{
- int featureflags;
-
- featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+ drba->drba_cookie->drc_ds = newds;
- /* Verify pool version supports SA if SA_SPILL feature set */
- return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
- (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA));
+ spa_history_log_internal_ds(newds, "receive", tx, "");
}
/*
@@ -786,132 +875,55 @@ dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb)
* succeeds; otherwise we will leak the holds on the datasets.
*/
int
-dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb,
- boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc)
+dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
+ boolean_t force, char *origin, dmu_recv_cookie_t *drc)
{
- int err = 0;
- boolean_t byteswap;
- struct recvbeginsyncarg rbsa = { 0 };
- uint64_t versioninfo;
- int flags;
- dsl_dataset_t *ds;
-
- if (drrb->drr_magic == DMU_BACKUP_MAGIC)
- byteswap = FALSE;
- else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
- byteswap = TRUE;
- else
- return (EINVAL);
-
- rbsa.tofs = tofs;
- rbsa.tosnap = tosnap;
- rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
- rbsa.fromguid = drrb->drr_fromguid;
- rbsa.type = drrb->drr_type;
- rbsa.tag = FTAG;
- rbsa.dsflags = 0;
- rbsa.cr = CRED();
- versioninfo = drrb->drr_versioninfo;
- flags = drrb->drr_flags;
-
- if (byteswap) {
- rbsa.type = BSWAP_32(rbsa.type);
- rbsa.fromguid = BSWAP_64(rbsa.fromguid);
- versioninfo = BSWAP_64(versioninfo);
- flags = BSWAP_32(flags);
- }
-
- if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM ||
- rbsa.type >= DMU_OST_NUMTYPES ||
- ((flags & DRR_FLAG_CLONE) && origin == NULL))
- return (EINVAL);
-
- if (flags & DRR_FLAG_CI_DATA)
- rbsa.dsflags = DS_FLAG_CI_DATASET;
+ dmu_recv_begin_arg_t drba = { 0 };
+ dmu_replay_record_t *drr;
bzero(drc, sizeof (dmu_recv_cookie_t));
drc->drc_drrb = drrb;
drc->drc_tosnap = tosnap;
- drc->drc_top_ds = top_ds;
+ drc->drc_tofs = tofs;
drc->drc_force = force;
- /*
- * Process the begin in syncing context.
- */
-
- /* open the dataset we are logically receiving into */
- err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
- if (err == 0) {
- if (dmu_recv_verify_features(ds, drrb)) {
- dsl_dataset_rele(ds, dmu_recv_tag);
- return (ENOTSUP);
- }
- /* target fs already exists; recv into temp clone */
-
- /* Can't recv a clone into an existing fs */
- if (flags & DRR_FLAG_CLONE) {
- dsl_dataset_rele(ds, dmu_recv_tag);
- return (EINVAL);
- }
-
- /* must not have an incremental recv already in progress */
- if (!mutex_tryenter(&ds->ds_recvlock)) {
- dsl_dataset_rele(ds, dmu_recv_tag);
- return (EBUSY);
- }
-
- /* tmp clone name is: tofs/%tosnap" */
- (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
- "%%%s", tosnap);
- rbsa.force = force;
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- recv_existing_check, recv_existing_sync, ds, &rbsa, 5);
- if (err) {
- mutex_exit(&ds->ds_recvlock);
- dsl_dataset_rele(ds, dmu_recv_tag);
- return (err);
- }
- drc->drc_logical_ds = ds;
- drc->drc_real_ds = rbsa.ds;
- } else if (err == ENOENT) {
- /* target fs does not exist; must be a full backup or clone */
- char *cp;
-
- /*
- * If it's a non-clone incremental, we are missing the
- * target fs, so fail the recv.
- */
- if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE))
- return (ENOENT);
-
- /* Open the parent of tofs */
- cp = strrchr(tofs, '/');
- *cp = '\0';
- err = dsl_dataset_hold(tofs, FTAG, &ds);
- *cp = '/';
- if (err)
- return (err);
+ if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
+ drc->drc_byteswap = B_TRUE;
+ else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
+ return (EINVAL);
- if (dmu_recv_verify_features(ds, drrb)) {
- dsl_dataset_rele(ds, FTAG);
- return (ENOTSUP);
- }
+ drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
+ drr->drr_type = DRR_BEGIN;
+ drr->drr_u.drr_begin = *drc->drc_drrb;
+ if (drc->drc_byteswap) {
+ fletcher_4_incremental_byteswap(drr,
+ sizeof (dmu_replay_record_t), &drc->drc_cksum);
+ } else {
+ fletcher_4_incremental_native(drr,
+ sizeof (dmu_replay_record_t), &drc->drc_cksum);
+ }
+ kmem_free(drr, sizeof (dmu_replay_record_t));
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5);
- dsl_dataset_rele(ds, FTAG);
- if (err)
- return (err);
- drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
- drc->drc_newfs = B_TRUE;
+ if (drc->drc_byteswap) {
+ drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+ drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
+ drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
+ drrb->drr_type = BSWAP_32(drrb->drr_type);
+ drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+ drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
}
- return (err);
+ drba.drba_origin = origin;
+ drba.drba_cookie = drc;
+ drba.drba_cred = CRED();
+
+ return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
+ &drba, 5));
}
struct restorearg {
int err;
- int byteswap;
+ boolean_t byteswap;
vnode_t *vp;
char *buf;
uint64_t voff;
@@ -947,7 +959,7 @@ free_guid_map_onexit(void *arg)
guid_map_entry_t *gmep;
while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
- dsl_dataset_rele(gmep->gme_ds, ca);
+ dsl_dataset_long_rele(gmep->gme_ds, gmep);
kmem_free(gmep, sizeof (guid_map_entry_t));
}
avl_destroy(ca);
@@ -975,7 +987,7 @@ restore_read(struct restorearg *ra, int len)
ra->err = EINVAL;
ra->voff += len - done - resid;
done = len - resid;
- if (ra->err)
+ if (ra->err != 0)
return (NULL);
}
@@ -1094,7 +1106,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
if (drro->drr_bonuslen) {
data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
- if (ra->err)
+ if (ra->err != 0)
return (ra->err);
}
@@ -1103,7 +1115,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
+ if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
@@ -1117,14 +1129,14 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen);
}
- if (err) {
+ if (err != 0) {
return (EINVAL);
}
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, drro->drr_object);
err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
+ if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
@@ -1172,7 +1184,7 @@ restore_freeobjects(struct restorearg *ra, objset_t *os,
continue;
err = dmu_free_object(os, obj);
- if (err)
+ if (err != 0)
return (err);
}
return (0);
@@ -1202,7 +1214,7 @@ restore_write(struct restorearg *ra, objset_t *os,
dmu_tx_hold_write(tx, drrw->drr_object,
drrw->drr_offset, drrw->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
+ if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
@@ -1264,7 +1276,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
dmu_tx_hold_write(tx, drrwbr->drr_object,
drrwbr->drr_offset, drrwbr->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
+ if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
@@ -1305,7 +1317,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
dmu_tx_hold_spill(tx, db->db_object);
err = dmu_tx_assign(tx, TXG_WAIT);
- if (err) {
+ if (err != 0) {
dmu_buf_rele(db, FTAG);
dmu_buf_rele(db_spill, FTAG);
dmu_tx_abort(tx);
@@ -1344,6 +1356,16 @@ restore_free(struct restorearg *ra, objset_t *os,
return (err);
}
+/* used to destroy the drc_ds on error */
+static void
+dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
+{
+ char name[MAXNAMELEN];
+ dsl_dataset_name(drc->drc_ds, name);
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ (void) dsl_destroy_head(name);
+}
+
/*
* NB: callers *must* call dmu_recv_end() if this succeeds.
*/
@@ -1357,52 +1379,24 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
zio_cksum_t pcksum;
int featureflags;
- if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
- ra.byteswap = TRUE;
-
- {
- /* compute checksum of drr_begin record */
- dmu_replay_record_t *drr;
- drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
-
- drr->drr_type = DRR_BEGIN;
- drr->drr_u.drr_begin = *drc->drc_drrb;
- if (ra.byteswap) {
- fletcher_4_incremental_byteswap(drr,
- sizeof (dmu_replay_record_t), &ra.cksum);
- } else {
- fletcher_4_incremental_native(drr,
- sizeof (dmu_replay_record_t), &ra.cksum);
- }
- kmem_free(drr, sizeof (dmu_replay_record_t));
- }
-
- if (ra.byteswap) {
- struct drr_begin *drrb = drc->drc_drrb;
- drrb->drr_magic = BSWAP_64(drrb->drr_magic);
- drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
- drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
- drrb->drr_type = BSWAP_32(drrb->drr_type);
- drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
- drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
- }
-
+ ra.byteswap = drc->drc_byteswap;
+ ra.cksum = drc->drc_cksum;
ra.vp = vp;
ra.voff = *voffp;
ra.bufsize = 1<<20;
ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
/* these were verified in dmu_recv_begin */
- ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) ==
+ ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
DMU_SUBSTREAM);
- ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES);
+ ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES);
/*
* Open the objset we are modifying.
*/
- VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
+ VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os));
- ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
+ ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
@@ -1415,7 +1409,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
goto out;
}
ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
- if (ra.err) {
+ if (ra.err != 0) {
cleanup_fd = -1;
goto out;
}
@@ -1429,12 +1423,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
ra.err = zfs_onexit_add_cb(minor,
free_guid_map_onexit, ra.guid_to_ds_map,
action_handlep);
- if (ra.err)
+ if (ra.err != 0)
goto out;
} else {
ra.err = zfs_onexit_cb_data(minor, *action_handlep,
(void **)&ra.guid_to_ds_map);
- if (ra.err)
+ if (ra.err != 0)
goto out;
}
@@ -1528,14 +1522,7 @@ out:
* destroy what we created, so we don't leave it in the
* inconsistent restoring state.
*/
- txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
-
- (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
- B_FALSE);
- if (drc->drc_real_ds != drc->drc_logical_ds) {
- mutex_exit(&drc->drc_logical_ds->ds_recvlock);
- dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
- }
+ dmu_recv_cleanup_ds(drc);
}
kmem_free(ra.buf, ra.bufsize);
@@ -1543,142 +1530,176 @@ out:
return (ra.err);
}
-struct recvendsyncarg {
- char *tosnap;
- uint64_t creation_time;
- uint64_t toguid;
-};
-
static int
-recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_recv_end_check(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- struct recvendsyncarg *resa = arg2;
+ dmu_recv_cookie_t *drc = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ int error;
+
+ ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag);
- return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx));
+ if (!drc->drc_newfs) {
+ dsl_dataset_t *origin_head;
+
+ error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head);
+ if (error != 0)
+ return (error);
+ error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
+ origin_head, drc->drc_force);
+ if (error != 0) {
+ dsl_dataset_rele(origin_head, FTAG);
+ return (error);
+ }
+ error = dsl_dataset_snapshot_check_impl(origin_head,
+ drc->drc_tosnap, tx);
+ dsl_dataset_rele(origin_head, FTAG);
+ if (error != 0)
+ return (error);
+
+ error = dsl_destroy_head_check_impl(drc->drc_ds, 1);
+ } else {
+ error = dsl_dataset_snapshot_check_impl(drc->drc_ds,
+ drc->drc_tosnap, tx);
+ }
+ return (error);
}
static void
-recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- struct recvendsyncarg *resa = arg2;
+ dmu_recv_cookie_t *drc = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+
+ spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
+ tx, "snap=%s", drc->drc_tosnap);
+
+ if (!drc->drc_newfs) {
+ dsl_dataset_t *origin_head;
+
+ VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG,
+ &origin_head));
+ dsl_dataset_clone_swap_sync_impl(drc->drc_ds,
+ origin_head, tx);
+ dsl_dataset_snapshot_sync_impl(origin_head,
+ drc->drc_tosnap, tx);
+
+ /* set snapshot's creation time and guid */
+ dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx);
+ origin_head->ds_prev->ds_phys->ds_creation_time =
+ drc->drc_drrb->drr_creation_time;
+ origin_head->ds_prev->ds_phys->ds_guid =
+ drc->drc_drrb->drr_toguid;
+ origin_head->ds_prev->ds_phys->ds_flags &=
+ ~DS_FLAG_INCONSISTENT;
+
+ dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
+ origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+
+ dsl_dataset_rele(origin_head, FTAG);
+ dsl_destroy_head_sync_impl(drc->drc_ds, tx);
+ } else {
+ dsl_dataset_t *ds = drc->drc_ds;
- dsl_dataset_snapshot_sync(ds, resa->tosnap, tx);
+ dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx);
- /* set snapshot's creation time and guid */
- dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
- ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time;
- ds->ds_prev->ds_phys->ds_guid = resa->toguid;
- ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ /* set snapshot's creation time and guid */
+ dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
+ ds->ds_prev->ds_phys->ds_creation_time =
+ drc->drc_drrb->drr_creation_time;
+ ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid;
+ ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
- spa_history_log_internal_ds(ds, "finished receiving", tx, "");
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ }
+ drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj;
+ /*
+ * Release the hold from dmu_recv_begin. This must be done before
+ * we return to open context, so that when we free the dataset's dnode,
+ * we can evict its bonus buffer.
+ */
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ drc->drc_ds = NULL;
}
static int
-add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
+add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
{
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
+ dsl_pool_t *dp;
dsl_dataset_t *snapds;
guid_map_entry_t *gmep;
int err;
ASSERT(guid_map != NULL);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
+ err = dsl_pool_hold(name, FTAG, &dp);
+ if (err != 0)
+ return (err);
+ err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snapds);
if (err == 0) {
gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
gmep->guid = snapds->ds_phys->ds_guid;
gmep->gme_ds = snapds;
avl_add(guid_map, gmep);
+ dsl_dataset_long_hold(snapds, gmep);
+ dsl_dataset_rele(snapds, FTAG);
}
- rw_exit(&dp->dp_config_rwlock);
+ dsl_pool_rele(dp, FTAG);
return (err);
}
+static int dmu_recv_end_modified_blocks = 3;
+
static int
dmu_recv_existing_end(dmu_recv_cookie_t *drc)
{
- struct recvendsyncarg resa;
- dsl_dataset_t *ds = drc->drc_logical_ds;
- int err, myerr;
-
- if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
- err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
- drc->drc_force);
- if (err)
- goto out;
- } else {
- mutex_exit(&ds->ds_recvlock);
- dsl_dataset_rele(ds, dmu_recv_tag);
- (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
- B_FALSE);
- return (EBUSY);
- }
+ int error;
+ char name[MAXNAMELEN];
- resa.creation_time = drc->drc_drrb->drr_creation_time;
- resa.toguid = drc->drc_drrb->drr_toguid;
- resa.tosnap = drc->drc_tosnap;
+#ifdef _KERNEL
+ /*
+ * We will be destroying the ds; make sure its origin is unmounted if
+ * necessary.
+ */
+ dsl_dataset_name(drc->drc_ds, name);
+ zfs_destroy_unmount_origin(name);
+#endif
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- recv_end_check, recv_end_sync, ds, &resa, 3);
- if (err) {
- /* swap back */
- (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE);
- }
+ error = dsl_sync_task(drc->drc_tofs,
+ dmu_recv_end_check, dmu_recv_end_sync, drc,
+ dmu_recv_end_modified_blocks);
-out:
- mutex_exit(&ds->ds_recvlock);
- if (err == 0 && drc->drc_guid_to_ds_map != NULL)
- (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
- dsl_dataset_disown(ds, dmu_recv_tag);
- myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
- ASSERT0(myerr);
- return (err);
+ if (error != 0)
+ dmu_recv_cleanup_ds(drc);
+ return (error);
}
static int
dmu_recv_new_end(dmu_recv_cookie_t *drc)
{
- struct recvendsyncarg resa;
- dsl_dataset_t *ds = drc->drc_logical_ds;
- int err;
-
- /*
- * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
- * expects it to have a ds_user_ptr (and zil), but clone_swap()
- * can close it.
- */
- txg_wait_synced(ds->ds_dir->dd_pool, 0);
+ int error;
- resa.creation_time = drc->drc_drrb->drr_creation_time;
- resa.toguid = drc->drc_drrb->drr_toguid;
- resa.tosnap = drc->drc_tosnap;
+ error = dsl_sync_task(drc->drc_tofs,
+ dmu_recv_end_check, dmu_recv_end_sync, drc,
+ dmu_recv_end_modified_blocks);
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- recv_end_check, recv_end_sync, ds, &resa, 3);
- if (err) {
- /* clean up the fs we just recv'd into */
- (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
- } else {
- if (drc->drc_guid_to_ds_map != NULL)
- (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
- /* release the hold from dmu_recv_begin */
- dsl_dataset_disown(ds, dmu_recv_tag);
+ if (error != 0) {
+ dmu_recv_cleanup_ds(drc);
+ } else if (drc->drc_guid_to_ds_map != NULL) {
+ (void) add_ds_to_guidmap(drc->drc_tofs,
+ drc->drc_guid_to_ds_map,
+ drc->drc_newsnapobj);
}
- return (err);
+ return (error);
}
int
dmu_recv_end(dmu_recv_cookie_t *drc)
{
- if (drc->drc_logical_ds != drc->drc_real_ds)
- return (dmu_recv_existing_end(drc));
- else
+ if (drc->drc_newfs)
return (dmu_recv_new_end(drc));
+ else
+ return (dmu_recv_existing_end(drc));
}
diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c
index f3d5069d47..e976517997 100644
--- a/usr/src/uts/common/fs/zfs/dmu_traverse.c
+++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c
@@ -265,7 +265,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
- if (err)
+ if (err != 0)
return (err);
cbp = buf->b_data;
@@ -282,7 +282,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
zb->zb_level - 1,
zb->zb_blkid * epb + i);
err = traverse_visitbp(td, dnp, &cbp[i], &czb);
- if (err) {
+ if (err != 0) {
if (!hard)
break;
lasterr = err;
@@ -295,7 +295,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
- if (err)
+ if (err != 0)
return (err);
dnp = buf->b_data;
@@ -308,7 +308,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
for (i = 0; i < epb; i++) {
err = traverse_dnode(td, &dnp[i], zb->zb_objset,
zb->zb_blkid * epb + i);
- if (err) {
+ if (err != 0) {
if (!hard)
break;
lasterr = err;
@@ -321,7 +321,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
- if (err)
+ if (err != 0)
return (err);
osp = buf->b_data;
@@ -405,7 +405,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
for (j = 0; j < dnp->dn_nblkptr; j++) {
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
- if (err) {
+ if (err != 0) {
if (!hard)
break;
lasterr = err;
@@ -415,7 +415,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
- if (err) {
+ if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@@ -514,14 +514,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
/* See comment on ZIL traversal in dsl_scan_visitds. */
- if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
- objset_t *os;
+ if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
+ uint32_t flags = ARC_WAIT;
+ objset_phys_t *osp;
+ arc_buf_t *buf;
- err = dmu_objset_from_ds(ds, &os);
- if (err)
+ err = arc_read(NULL, td.td_spa, rootbp,
+ arc_getbuf_func, &buf,
+ ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
+ if (err != 0)
return (err);
- traverse_zil(&td, &os->os_zil_header);
+ osp = buf->b_data;
+ traverse_zil(&td, &osp->os_zil_header);
+ (void) arc_buf_remove_ref(buf, &buf);
}
if (!(flags & TRAVERSE_PREFETCH_DATA) ||
@@ -583,7 +589,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
/* visit the MOS */
err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
txg_start, NULL, flags, func, arg);
- if (err)
+ if (err != 0)
return (err);
/* visit each dataset */
@@ -592,7 +598,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
- if (err) {
+ if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@@ -603,10 +609,10 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dsl_dataset_t *ds;
uint64_t txg = txg_start;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
+ dsl_pool_config_enter(dp, FTAG);
err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
- rw_exit(&dp->dp_config_rwlock);
- if (err) {
+ dsl_pool_config_exit(dp, FTAG);
+ if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@@ -616,7 +622,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
txg = ds->ds_phys->ds_prev_snap_txg;
err = traverse_dataset(ds, txg, flags, func, arg);
dsl_dataset_rele(ds, FTAG);
- if (err) {
+ if (err != 0) {
if (!hard)
return (err);
lasterr = err;
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 556ae6a83a..8e6beec4f1 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -898,7 +898,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
#endif
static int
-dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
+dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
dmu_tx_hold_t *txh;
spa_t *spa = tx->tx_pool->dp_spa;
@@ -962,13 +962,6 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
}
/*
- * NB: This check must be after we've held the dnodes, so that
- * the dmu_tx_unassign() logic will work properly
- */
- if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
- return (ERESTART);
-
- /*
* If a snapshot has been taken since we made our estimates,
* assume that we won't be able to free or overwrite anything.
*/
@@ -1048,26 +1041,25 @@ dmu_tx_unassign(dmu_tx_t *tx)
*
* (1) TXG_WAIT. If the current open txg is full, waits until there's
* a new one. This should be used when you're not holding locks.
- * If will only fail if we're truly out of space (or over quota).
+ * It will only fail if we're truly out of space (or over quota).
*
* (2) TXG_NOWAIT. If we can't assign into the current open txg without
* blocking, returns immediately with ERESTART. This should be used
* whenever you're holding locks. On an ERESTART error, the caller
* should drop locks, do a dmu_tx_wait(tx), and try again.
- *
- * (3) A specific txg. Use this if you need to ensure that multiple
- * transactions all sync in the same txg. Like TXG_NOWAIT, it
- * returns ERESTART if it can't assign you into the requested txg.
*/
int
-dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
+dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
int err;
ASSERT(tx->tx_txg == 0);
- ASSERT(txg_how != 0);
+ ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT);
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
+ /* If we might wait, we must not hold the config lock. */
+ ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
+
while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
dmu_tx_unassign(tx);
@@ -1088,6 +1080,7 @@ dmu_tx_wait(dmu_tx_t *tx)
spa_t *spa = tx->tx_pool->dp_spa;
ASSERT(tx->tx_txg == 0);
+ ASSERT(!dsl_pool_config_held(tx->tx_pool));
/*
* It's possible that the pool has become active after this thread
@@ -1214,6 +1207,14 @@ dmu_tx_get_txg(dmu_tx_t *tx)
return (tx->tx_txg);
}
+dsl_pool_t *
+dmu_tx_pool(dmu_tx_t *tx)
+{
+ ASSERT(tx->tx_pool != NULL);
+ return (tx->tx_pool);
+}
+
+
void
dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
{
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index 6838576dcf..5b5ece424a 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -72,7 +72,11 @@ dnode_cons(void *arg, void *unused, int kmflag)
mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
- refcount_create(&dn->dn_holds);
+ /*
+ * Every dbuf has a reference, and dropping a tracked reference is
+ * O(number of references), so don't track dn_holds.
+ */
+ refcount_create_untracked(&dn->dn_holds);
refcount_create(&dn->dn_tx_holds);
list_link_init(&dn->dn_link);
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c
index 38dab665fc..7d47ce02b4 100644
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c
@@ -477,6 +477,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
+ ASSERT3P(dn->dn_bonus, ==, NULL);
/*
* XXX - It would be nice to assert this, but we may still
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index 0a5ef837cc..5e0446d351 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@@ -45,12 +45,8 @@
#include <sys/zvol.h>
#include <sys/dsl_scan.h>
#include <sys/dsl_deadlist.h>
-
-static char *dsl_reaper = "the grim reaper";
-
-static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
-static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
-static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_userhold.h>
#define SWITCH64(x, y) \
{ \
@@ -63,9 +59,6 @@ static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
-#define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper)
-
-
/*
* Figure out how much of this delta should be propogated to the dsl_dir
* layer. If there's a refreservation, that space has already been
@@ -252,7 +245,7 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv)
{
dsl_dataset_t *ds = dsv;
- ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
+ ASSERT(ds->ds_owner == NULL);
unique_remove(ds->ds_fsid_guid);
@@ -260,32 +253,26 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv)
dmu_objset_evict(ds->ds_objset);
if (ds->ds_prev) {
- dsl_dataset_drop_ref(ds->ds_prev, ds);
+ dsl_dataset_rele(ds->ds_prev, ds);
ds->ds_prev = NULL;
}
bplist_destroy(&ds->ds_pending_deadlist);
- if (db != NULL) {
+ if (ds->ds_phys->ds_deadlist_obj != 0)
dsl_deadlist_close(&ds->ds_deadlist);
- } else {
- ASSERT(ds->ds_deadlist.dl_dbuf == NULL);
- ASSERT(!ds->ds_deadlist.dl_oldfmt);
- }
if (ds->ds_dir)
- dsl_dir_close(ds->ds_dir, ds);
+ dsl_dir_rele(ds->ds_dir, ds);
ASSERT(!list_link_active(&ds->ds_synced_link));
mutex_destroy(&ds->ds_lock);
- mutex_destroy(&ds->ds_recvlock);
mutex_destroy(&ds->ds_opening_lock);
- rw_destroy(&ds->ds_rwlock);
- cv_destroy(&ds->ds_exclusive_cv);
+ refcount_destroy(&ds->ds_longholds);
kmem_free(ds, sizeof (dsl_dataset_t));
}
-static int
+int
dsl_dataset_get_snapname(dsl_dataset_t *ds)
{
dsl_dataset_phys_t *headphys;
@@ -301,7 +288,7 @@ dsl_dataset_get_snapname(dsl_dataset_t *ds)
err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
FTAG, &headdbuf);
- if (err)
+ if (err != 0)
return (err);
headphys = headdbuf->db_data;
err = zap_value_search(dp->dp_meta_objset,
@@ -310,7 +297,7 @@ dsl_dataset_get_snapname(dsl_dataset_t *ds)
return (err);
}
-static int
+int
dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
@@ -330,8 +317,8 @@ dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
return (err);
}
-static int
-dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
+int
+dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
@@ -351,8 +338,8 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
return (err);
}
-static int
-dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
+int
+dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **dsp)
{
objset_t *mos = dp->dp_meta_objset;
@@ -361,11 +348,10 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
int err;
dmu_object_info_t doi;
- ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
- dsl_pool_sync_context(dp));
+ ASSERT(dsl_pool_config_held(dp));
err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
- if (err)
+ if (err != 0)
return (err);
/* Make sure dsobj has the correct object type. */
@@ -383,12 +369,9 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ds->ds_phys = dbuf->db_data;
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
-
- rw_init(&ds->ds_rwlock, 0, 0, 0);
- cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
+ refcount_create(&ds->ds_longholds);
bplist_create(&ds->ds_pending_deadlist);
dsl_deadlist_open(&ds->ds_deadlist,
@@ -398,15 +381,13 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
offsetof(dmu_sendarg_t, dsa_link));
if (err == 0) {
- err = dsl_dir_open_obj(dp,
+ err = dsl_dir_hold_obj(dp,
ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
}
- if (err) {
+ if (err != 0) {
mutex_destroy(&ds->ds_lock);
- mutex_destroy(&ds->ds_recvlock);
mutex_destroy(&ds->ds_opening_lock);
- rw_destroy(&ds->ds_rwlock);
- cv_destroy(&ds->ds_exclusive_cv);
+ refcount_destroy(&ds->ds_longholds);
bplist_destroy(&ds->ds_pending_deadlist);
dsl_deadlist_close(&ds->ds_deadlist);
kmem_free(ds, sizeof (dsl_dataset_t));
@@ -416,8 +397,8 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
if (!dsl_dataset_is_snapshot(ds)) {
ds->ds_snapname[0] = '\0';
- if (ds->ds_phys->ds_prev_snap_obj) {
- err = dsl_dataset_get_ref(dp,
+ if (ds->ds_phys->ds_prev_snap_obj != 0) {
+ err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj,
ds, &ds->ds_prev);
}
@@ -433,29 +414,14 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
}
if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
- /*
- * In sync context, we're called with either no lock
- * or with the write lock. If we're not syncing,
- * we're always called with the read lock held.
- */
- boolean_t need_lock =
- !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
- dsl_pool_sync_context(dp);
-
- if (need_lock)
- rw_enter(&dp->dp_config_rwlock, RW_READER);
-
- err = dsl_prop_get_ds(ds,
- "refreservation", sizeof (uint64_t), 1,
- &ds->ds_reserved, NULL);
+ err = dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
+ &ds->ds_reserved);
if (err == 0) {
- err = dsl_prop_get_ds(ds,
- "refquota", sizeof (uint64_t), 1,
- &ds->ds_quota, NULL);
+ err = dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_REFQUOTA),
+ &ds->ds_quota);
}
-
- if (need_lock)
- rw_exit(&dp->dp_config_rwlock);
} else {
ds->ds_reserved = ds->ds_quota = 0;
}
@@ -465,15 +431,13 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
bplist_destroy(&ds->ds_pending_deadlist);
dsl_deadlist_close(&ds->ds_deadlist);
if (ds->ds_prev)
- dsl_dataset_drop_ref(ds->ds_prev, ds);
- dsl_dir_close(ds->ds_dir, ds);
+ dsl_dataset_rele(ds->ds_prev, ds);
+ dsl_dir_rele(ds->ds_dir, ds);
mutex_destroy(&ds->ds_lock);
- mutex_destroy(&ds->ds_recvlock);
mutex_destroy(&ds->ds_opening_lock);
- rw_destroy(&ds->ds_rwlock);
- cv_destroy(&ds->ds_exclusive_cv);
+ refcount_destroy(&ds->ds_longholds);
kmem_free(ds, sizeof (dsl_dataset_t));
- if (err) {
+ if (err != 0) {
dmu_buf_rele(dbuf, tag);
return (err);
}
@@ -488,170 +452,118 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
- mutex_enter(&ds->ds_lock);
- if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
- mutex_exit(&ds->ds_lock);
- dmu_buf_rele(ds->ds_dbuf, tag);
- return (ENOENT);
- }
- mutex_exit(&ds->ds_lock);
*dsp = ds;
return (0);
}
-static int
-dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
-{
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
- /*
- * In syncing context we don't want the rwlock lock: there
- * may be an existing writer waiting for sync phase to
- * finish. We don't need to worry about such writers, since
- * sync phase is single-threaded, so the writer can't be
- * doing anything while we are active.
- */
- if (dsl_pool_sync_context(dp)) {
- ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
- return (0);
- }
-
- /*
- * Normal users will hold the ds_rwlock as a READER until they
- * are finished (i.e., call dsl_dataset_rele()). "Owners" will
- * drop their READER lock after they set the ds_owner field.
- *
- * If the dataset is being destroyed, the destroy thread will
- * obtain a WRITER lock for exclusive access after it's done its
- * open-context work and then change the ds_owner to
- * dsl_reaper once destruction is assured. So threads
- * may block here temporarily, until the "destructability" of
- * the dataset is determined.
- */
- ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
- mutex_enter(&ds->ds_lock);
- while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
- rw_exit(&dp->dp_config_rwlock);
- cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
- if (DSL_DATASET_IS_DESTROYED(ds)) {
- mutex_exit(&ds->ds_lock);
- dsl_dataset_drop_ref(ds, tag);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- return (ENOENT);
- }
- /*
- * The dp_config_rwlock lives above the ds_lock. And
- * we need to check DSL_DATASET_IS_DESTROYED() while
- * holding the ds_lock, so we have to drop and reacquire
- * the ds_lock here.
- */
- mutex_exit(&ds->ds_lock);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- mutex_enter(&ds->ds_lock);
- }
- mutex_exit(&ds->ds_lock);
- return (0);
-}
-
-int
-dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
- dsl_dataset_t **dsp)
-{
- int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
-
- if (err)
- return (err);
- return (dsl_dataset_hold_ref(*dsp, tag));
-}
-
int
-dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
+dsl_dataset_hold(dsl_pool_t *dp, const char *name,
void *tag, dsl_dataset_t **dsp)
{
- int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
- if (err)
- return (err);
- if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
- dsl_dataset_rele(*dsp, tag);
- *dsp = NULL;
- return (EBUSY);
- }
- return (0);
-}
-
-int
-dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
-{
dsl_dir_t *dd;
- dsl_pool_t *dp;
const char *snapname;
uint64_t obj;
int err = 0;
- err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
- if (err)
+ err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);
+ if (err != 0)
return (err);
- dp = dd->dd_pool;
+ ASSERT(dsl_pool_config_held(dp));
obj = dd->dd_phys->dd_head_dataset_obj;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- if (obj)
- err = dsl_dataset_get_ref(dp, obj, tag, dsp);
+ if (obj != 0)
+ err = dsl_dataset_hold_obj(dp, obj, tag, dsp);
else
err = ENOENT;
- if (err)
- goto out;
-
- err = dsl_dataset_hold_ref(*dsp, tag);
/* we may be looking for a snapshot */
if (err == 0 && snapname != NULL) {
- dsl_dataset_t *ds = NULL;
+ dsl_dataset_t *ds;
if (*snapname++ != '@') {
dsl_dataset_rele(*dsp, tag);
- err = ENOENT;
- goto out;
+ dsl_dir_rele(dd, FTAG);
+ return (ENOENT);
}
dprintf("looking for snapshot '%s'\n", snapname);
err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
if (err == 0)
- err = dsl_dataset_get_ref(dp, obj, tag, &ds);
+ err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
dsl_dataset_rele(*dsp, tag);
- ASSERT3U((err == 0), ==, (ds != NULL));
-
- if (ds) {
+ if (err == 0) {
mutex_enter(&ds->ds_lock);
if (ds->ds_snapname[0] == 0)
(void) strlcpy(ds->ds_snapname, snapname,
sizeof (ds->ds_snapname));
mutex_exit(&ds->ds_lock);
- err = dsl_dataset_hold_ref(ds, tag);
- *dsp = err ? NULL : ds;
+ *dsp = ds;
}
}
-out:
- rw_exit(&dp->dp_config_rwlock);
- dsl_dir_close(dd, FTAG);
+
+ dsl_dir_rele(dd, FTAG);
return (err);
}
int
-dsl_dataset_own(const char *name, boolean_t inconsistentok,
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
+ void *tag, dsl_dataset_t **dsp)
+{
+ int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
+ if (err != 0)
+ return (err);
+ if (!dsl_dataset_tryown(*dsp, tag)) {
+ dsl_dataset_rele(*dsp, tag);
+ *dsp = NULL;
+ return (EBUSY);
+ }
+ return (0);
+}
+
+int
+dsl_dataset_own(dsl_pool_t *dp, const char *name,
void *tag, dsl_dataset_t **dsp)
{
- int err = dsl_dataset_hold(name, tag, dsp);
- if (err)
+ int err = dsl_dataset_hold(dp, name, tag, dsp);
+ if (err != 0)
return (err);
- if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
+ if (!dsl_dataset_tryown(*dsp, tag)) {
dsl_dataset_rele(*dsp, tag);
return (EBUSY);
}
return (0);
}
+/*
+ * See the comment above dsl_pool_hold() for details. In summary, a long
+ * hold is used to prevent destruction of a dataset while the pool hold
+ * is dropped, allowing other concurrent operations (e.g. spa_sync()).
+ *
+ * The dataset and pool must be held when this function is called. After it
+ * is called, the pool hold may be released while the dataset is still held
+ * and accessed.
+ */
+void
+dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag)
+{
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
+ (void) refcount_add(&ds->ds_longholds, tag);
+}
+
+void
+dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag)
+{
+ (void) refcount_remove(&ds->ds_longholds, tag);
+}
+
+/* Return B_TRUE if there are any long holds on this dataset. */
+boolean_t
+dsl_dataset_long_held(dsl_dataset_t *ds)
+{
+ return (!refcount_is_zero(&ds->ds_longholds));
+}
+
void
dsl_dataset_name(dsl_dataset_t *ds, char *name)
{
@@ -659,7 +571,7 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name)
(void) strcpy(name, "mos");
} else {
dsl_dir_name(ds->ds_dir, name);
- VERIFY(0 == dsl_dataset_get_snapname(ds));
+ VERIFY0(dsl_dataset_get_snapname(ds));
if (ds->ds_snapname[0]) {
(void) strcat(name, "@");
/*
@@ -686,7 +598,7 @@ dsl_dataset_namelen(dsl_dataset_t *ds)
result = 3; /* "mos" */
} else {
result = dsl_dir_namelen(ds->ds_dir);
- VERIFY(0 == dsl_dataset_get_snapname(ds));
+ VERIFY0(dsl_dataset_get_snapname(ds));
if (ds->ds_snapname[0]) {
++result; /* adding one for the @-sign */
if (!MUTEX_HELD(&ds->ds_lock)) {
@@ -703,64 +615,41 @@ dsl_dataset_namelen(dsl_dataset_t *ds)
}
void
-dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
-{
- dmu_buf_rele(ds->ds_dbuf, tag);
-}
-
-void
dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
{
- if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
- rw_exit(&ds->ds_rwlock);
- }
- dsl_dataset_drop_ref(ds, tag);
+ dmu_buf_rele(ds->ds_dbuf, tag);
}
void
dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
{
- ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
- (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
+ ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL);
mutex_enter(&ds->ds_lock);
ds->ds_owner = NULL;
- if (RW_WRITE_HELD(&ds->ds_rwlock)) {
- rw_exit(&ds->ds_rwlock);
- cv_broadcast(&ds->ds_exclusive_cv);
- }
mutex_exit(&ds->ds_lock);
- if (ds->ds_dbuf)
- dsl_dataset_drop_ref(ds, tag);
+ dsl_dataset_long_rele(ds, tag);
+ if (ds->ds_dbuf != NULL)
+ dsl_dataset_rele(ds, tag);
else
dsl_dataset_evict(NULL, ds);
}
boolean_t
-dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
+dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
{
boolean_t gotit = FALSE;
mutex_enter(&ds->ds_lock);
- if (ds->ds_owner == NULL &&
- (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
+ if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
ds->ds_owner = tag;
- if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
- rw_exit(&ds->ds_rwlock);
+ dsl_dataset_long_hold(ds, tag);
gotit = TRUE;
}
mutex_exit(&ds->ds_lock);
return (gotit);
}
-void
-dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
-{
- ASSERT3P(owner, ==, ds->ds_owner);
- if (!RW_WRITE_HELD(&ds->ds_rwlock))
- rw_enter(&ds->ds_rwlock, RW_WRITER);
-}
-
uint64_t
dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx)
@@ -781,7 +670,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
- VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
+ VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
dmu_buf_will_dirty(dbuf, tx);
dsphys = dbuf->db_data;
bzero(dsphys, sizeof (dsl_dataset_phys_t));
@@ -799,7 +688,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
if (origin == NULL) {
dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
} else {
- dsl_dataset_t *ohds;
+ dsl_dataset_t *ohds; /* head of the origin snapshot */
dsphys->ds_prev_snap_obj = origin->ds_object;
dsphys->ds_prev_snap_txg =
@@ -816,7 +705,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dmu_buf_will_dirty(origin->ds_dbuf, tx);
origin->ds_phys->ds_num_children++;
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
+ VERIFY0(dsl_dataset_hold_obj(dp,
origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
@@ -828,9 +717,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
zap_create(mos,
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
}
- VERIFY(0 == zap_add_int(mos,
- origin->ds_phys->ds_next_clones_obj,
- dsobj, tx));
+ VERIFY0(zap_add_int(mos,
+ origin->ds_phys->ds_next_clones_obj, dsobj, tx));
}
dmu_buf_will_dirty(dd->dd_dbuf, tx);
@@ -842,7 +730,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
zap_create(mos,
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
}
- VERIFY3U(0, ==, zap_add_int(mos,
+ VERIFY0(zap_add_int(mos,
origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
}
}
@@ -858,6 +746,16 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
return (dsobj);
}
+static void
+dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+ objset_t *os;
+
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+ bzero(&os->os_zil_header, sizeof (os->os_zil_header));
+ dsl_dataset_dirty(ds, tx);
+}
+
uint64_t
dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
@@ -866,29 +764,28 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
uint64_t dsobj, ddobj;
dsl_dir_t *dd;
+ ASSERT(dmu_tx_is_syncing(tx));
ASSERT(lastname[0] != '@');
ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
- VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
+ VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
- dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
+ dsobj = dsl_dataset_create_sync_dd(dd, origin,
+ flags & ~DS_CREATE_FLAG_NODIRTY, tx);
dsl_deleg_set_create_perms(dd, tx, cr);
- dsl_dir_close(dd, FTAG);
+ dsl_dir_rele(dd, FTAG);
/*
* If we are creating a clone, make sure we zero out any stale
* data from the origin snapshots zil header.
*/
- if (origin != NULL) {
+ if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) {
dsl_dataset_t *ds;
- objset_t *os;
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
- VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
- bzero(&os->os_zil_header, sizeof (os->os_zil_header));
- dsl_dataset_dirty(ds, tx);
+ VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+ dsl_dataset_zero_zil(ds, tx);
dsl_dataset_rele(ds, FTAG);
}
@@ -896,329 +793,6 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
}
/*
- * The snapshots must all be in the same pool.
- */
-int
-dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer,
- nvlist_t *errlist)
-{
- int err;
- dsl_sync_task_t *dst;
- spa_t *spa;
- nvpair_t *pair;
- dsl_sync_task_group_t *dstg;
-
- pair = nvlist_next_nvpair(snaps, NULL);
- if (pair == NULL)
- return (0);
-
- err = spa_open(nvpair_name(pair), &spa, FTAG);
- if (err)
- return (err);
- dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-
- for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
- pair = nvlist_next_nvpair(snaps, pair)) {
- dsl_dataset_t *ds;
-
- err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
- if (err == 0) {
- struct dsl_ds_destroyarg *dsda;
-
- dsl_dataset_make_exclusive(ds, dstg);
- dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
- KM_SLEEP);
- dsda->ds = ds;
- dsda->defer = defer;
- dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
- dsl_dataset_destroy_sync, dsda, dstg, 0);
- } else if (err == ENOENT) {
- err = 0;
- } else {
- fnvlist_add_int32(errlist, nvpair_name(pair), err);
- break;
- }
- }
-
- if (err == 0)
- err = dsl_sync_task_group_wait(dstg);
-
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
- struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
- dsl_dataset_t *ds = dsda->ds;
-
- /*
- * Return the snapshots that triggered the error.
- */
- if (dst->dst_err != 0) {
- char name[ZFS_MAXNAMELEN];
- dsl_dataset_name(ds, name);
- fnvlist_add_int32(errlist, name, dst->dst_err);
- }
- ASSERT3P(dsda->rm_origin, ==, NULL);
- dsl_dataset_disown(ds, dstg);
- kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
- }
-
- dsl_sync_task_group_destroy(dstg);
- spa_close(spa, FTAG);
- return (err);
-
-}
-
-static boolean_t
-dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
-{
- boolean_t might_destroy = B_FALSE;
-
- mutex_enter(&ds->ds_lock);
- if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
- DS_IS_DEFER_DESTROY(ds))
- might_destroy = B_TRUE;
- mutex_exit(&ds->ds_lock);
-
- return (might_destroy);
-}
-
-/*
- * If we're removing a clone, and these three conditions are true:
- * 1) the clone's origin has no other children
- * 2) the clone's origin has no user references
- * 3) the clone's origin has been marked for deferred destruction
- * Then, prepare to remove the origin as part of this sync task group.
- */
-static int
-dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
-{
- dsl_dataset_t *ds = dsda->ds;
- dsl_dataset_t *origin = ds->ds_prev;
-
- if (dsl_dataset_might_destroy_origin(origin)) {
- char *name;
- int namelen;
- int error;
-
- namelen = dsl_dataset_namelen(origin) + 1;
- name = kmem_alloc(namelen, KM_SLEEP);
- dsl_dataset_name(origin, name);
-#ifdef _KERNEL
- error = zfs_unmount_snap(name, NULL);
- if (error) {
- kmem_free(name, namelen);
- return (error);
- }
-#endif
- error = dsl_dataset_own(name, B_TRUE, tag, &origin);
- kmem_free(name, namelen);
- if (error)
- return (error);
- dsda->rm_origin = origin;
- dsl_dataset_make_exclusive(origin, tag);
- }
-
- return (0);
-}
-
-/*
- * ds must be opened as OWNER. On return (whether successful or not),
- * ds will be closed and caller can no longer dereference it.
- */
-int
-dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
-{
- int err;
- dsl_sync_task_group_t *dstg;
- objset_t *os;
- dsl_dir_t *dd;
- uint64_t obj;
- struct dsl_ds_destroyarg dsda = { 0 };
-
- dsda.ds = ds;
-
- if (dsl_dataset_is_snapshot(ds)) {
- /* Destroying a snapshot is simpler */
- dsl_dataset_make_exclusive(ds, tag);
-
- dsda.defer = defer;
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
- &dsda, tag, 0);
- ASSERT3P(dsda.rm_origin, ==, NULL);
- goto out;
- } else if (defer) {
- err = EINVAL;
- goto out;
- }
-
- dd = ds->ds_dir;
-
- if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
- /*
- * Check for errors and mark this ds as inconsistent, in
- * case we crash while freeing the objects.
- */
- err = dsl_sync_task_do(dd->dd_pool,
- dsl_dataset_destroy_begin_check,
- dsl_dataset_destroy_begin_sync, ds, NULL, 0);
- if (err)
- goto out;
-
- err = dmu_objset_from_ds(ds, &os);
- if (err)
- goto out;
-
- /*
- * Remove all objects while in the open context so that
- * there is less work to do in the syncing context.
- */
- for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
- ds->ds_phys->ds_prev_snap_txg)) {
- /*
- * Ignore errors, if there is not enough disk space
- * we will deal with it in dsl_dataset_destroy_sync().
- */
- (void) dmu_free_object(os, obj);
- }
- if (err != ESRCH)
- goto out;
-
- /*
- * Sync out all in-flight IO.
- */
- txg_wait_synced(dd->dd_pool, 0);
-
- /*
- * If we managed to free all the objects in open
- * context, the user space accounting should be zero.
- */
- if (ds->ds_phys->ds_bp.blk_fill == 0 &&
- dmu_objset_userused_enabled(os)) {
- uint64_t count;
-
- ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
- &count) != 0 || count == 0);
- ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
- &count) != 0 || count == 0);
- }
- }
-
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
- err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
- rw_exit(&dd->dd_pool->dp_config_rwlock);
-
- if (err)
- goto out;
-
- /*
- * Blow away the dsl_dir + head dataset.
- */
- dsl_dataset_make_exclusive(ds, tag);
- /*
- * If we're removing a clone, we might also need to remove its
- * origin.
- */
- do {
- dsda.need_prep = B_FALSE;
- if (dsl_dir_is_clone(dd)) {
- err = dsl_dataset_origin_rm_prep(&dsda, tag);
- if (err) {
- dsl_dir_close(dd, FTAG);
- goto out;
- }
- }
-
- dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
- dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
- dsl_dataset_destroy_sync, &dsda, tag, 0);
- dsl_sync_task_create(dstg, dsl_dir_destroy_check,
- dsl_dir_destroy_sync, dd, FTAG, 0);
- err = dsl_sync_task_group_wait(dstg);
- dsl_sync_task_group_destroy(dstg);
-
- /*
- * We could be racing against 'zfs release' or 'zfs destroy -d'
- * on the origin snap, in which case we can get EBUSY if we
- * needed to destroy the origin snap but were not ready to
- * do so.
- */
- if (dsda.need_prep) {
- ASSERT(err == EBUSY);
- ASSERT(dsl_dir_is_clone(dd));
- ASSERT(dsda.rm_origin == NULL);
- }
- } while (dsda.need_prep);
-
- if (dsda.rm_origin != NULL)
- dsl_dataset_disown(dsda.rm_origin, tag);
-
- /* if it is successful, dsl_dir_destroy_sync will close the dd */
- if (err)
- dsl_dir_close(dd, FTAG);
-out:
- dsl_dataset_disown(ds, tag);
- return (err);
-}
-
-blkptr_t *
-dsl_dataset_get_blkptr(dsl_dataset_t *ds)
-{
- return (&ds->ds_phys->ds_bp);
-}
-
-void
-dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
-{
- ASSERT(dmu_tx_is_syncing(tx));
- /* If it's the meta-objset, set dp_meta_rootbp */
- if (ds == NULL) {
- tx->tx_pool->dp_meta_rootbp = *bp;
- } else {
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_bp = *bp;
- }
-}
-
-spa_t *
-dsl_dataset_get_spa(dsl_dataset_t *ds)
-{
- return (ds->ds_dir->dd_pool->dp_spa);
-}
-
-void
-dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
-{
- dsl_pool_t *dp;
-
- if (ds == NULL) /* this is the meta-objset */
- return;
-
- ASSERT(ds->ds_objset != NULL);
-
- if (ds->ds_phys->ds_next_snap_obj != 0)
- panic("dirtying snapshot!");
-
- dp = ds->ds_dir->dd_pool;
-
- if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
- /* up the hold count until we can be written out */
- dmu_buf_add_ref(ds->ds_dbuf, ds);
- }
-}
-
-boolean_t
-dsl_dataset_is_dirty(dsl_dataset_t *ds)
-{
- for (int t = 0; t < TXG_SIZE; t++) {
- if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
- ds, t))
- return (B_TRUE);
- }
- return (B_FALSE);
-}
-
-/*
* The unique space in the head dataset can be calculated by subtracting
* the space used in the most recent snapshot, that is still being used
* in this file system, from the space currently in use. To figure out
@@ -1226,7 +800,7 @@ dsl_dataset_is_dirty(dsl_dataset_t *ds)
* the total space used in the snapshot and subtract out the space that
* has been freed up since the snapshot was taken.
*/
-static void
+void
dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
{
uint64_t mrs_used;
@@ -1250,234 +824,10 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
}
-struct killarg {
- dsl_dataset_t *ds;
- dmu_tx_t *tx;
-};
-
-/* ARGSUSED */
-static int
-kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
- const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
-{
- struct killarg *ka = arg;
- dmu_tx_t *tx = ka->tx;
-
- if (bp == NULL)
- return (0);
-
- if (zb->zb_level == ZB_ZIL_LEVEL) {
- ASSERT(zilog != NULL);
- /*
- * It's a block in the intent log. It has no
- * accounting, so just free it.
- */
- dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
- } else {
- ASSERT(zilog == NULL);
- ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
- (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
- }
-
- return (0);
-}
-
-/* ARGSUSED */
-static int
-dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- uint64_t count;
- int err;
-
- /*
- * Can't delete a head dataset if there are snapshots of it.
- * (Except if the only snapshots are from the branch we cloned
- * from.)
- */
- if (ds->ds_prev != NULL &&
- ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
- return (EBUSY);
-
- /*
- * This is really a dsl_dir thing, but check it here so that
- * we'll be less likely to leave this dataset inconsistent &
- * nearly destroyed.
- */
- err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
- if (err)
- return (err);
- if (count != 0)
- return (EEXIST);
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
-
- /* Mark it as inconsistent on-disk, in case we crash */
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
-
- spa_history_log_internal_ds(ds, "destroy begin", tx, "");
-}
-
-static int
-dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
+void
+dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
dmu_tx_t *tx)
{
- dsl_dataset_t *ds = dsda->ds;
- dsl_dataset_t *ds_prev = ds->ds_prev;
-
- if (dsl_dataset_might_destroy_origin(ds_prev)) {
- struct dsl_ds_destroyarg ndsda = {0};
-
- /*
- * If we're not prepared to remove the origin, don't remove
- * the clone either.
- */
- if (dsda->rm_origin == NULL) {
- dsda->need_prep = B_TRUE;
- return (EBUSY);
- }
-
- ndsda.ds = ds_prev;
- ndsda.is_origin_rm = B_TRUE;
- return (dsl_dataset_destroy_check(&ndsda, tag, tx));
- }
-
- /*
- * If we're not going to remove the origin after all,
- * undo the open context setup.
- */
- if (dsda->rm_origin != NULL) {
- dsl_dataset_disown(dsda->rm_origin, tag);
- dsda->rm_origin = NULL;
- }
-
- return (0);
-}
-
-/*
- * If you add new checks here, you may need to add
- * additional checks to the "temporary" case in
- * snapshot_check() in dmu_objset.c.
- */
-/* ARGSUSED */
-int
-dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- struct dsl_ds_destroyarg *dsda = arg1;
- dsl_dataset_t *ds = dsda->ds;
-
- /* we have an owner hold, so noone else can destroy us */
- ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
-
- /*
- * Only allow deferred destroy on pools that support it.
- * NOTE: deferred destroy is only supported on snapshots.
- */
- if (dsda->defer) {
- if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
- SPA_VERSION_USERREFS)
- return (ENOTSUP);
- ASSERT(dsl_dataset_is_snapshot(ds));
- return (0);
- }
-
- /*
- * Can't delete a head dataset if there are snapshots of it.
- * (Except if the only snapshots are from the branch we cloned
- * from.)
- */
- if (ds->ds_prev != NULL &&
- ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
- return (EBUSY);
-
- /*
- * If we made changes this txg, traverse_dsl_dataset won't find
- * them. Try again.
- */
- if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
- return (EAGAIN);
-
- if (dsl_dataset_is_snapshot(ds)) {
- /*
- * If this snapshot has an elevated user reference count,
- * we can't destroy it yet.
- */
- if (ds->ds_userrefs > 0 && !dsda->releasing)
- return (EBUSY);
-
- mutex_enter(&ds->ds_lock);
- /*
- * Can't delete a branch point. However, if we're destroying
- * a clone and removing its origin due to it having a user
- * hold count of 0 and having been marked for deferred destroy,
- * it's OK for the origin to have a single clone.
- */
- if (ds->ds_phys->ds_num_children >
- (dsda->is_origin_rm ? 2 : 1)) {
- mutex_exit(&ds->ds_lock);
- return (EEXIST);
- }
- mutex_exit(&ds->ds_lock);
- } else if (dsl_dir_is_clone(ds->ds_dir)) {
- return (dsl_dataset_origin_check(dsda, arg2, tx));
- }
-
- /* XXX we should do some i/o error checking... */
- return (0);
-}
-
-struct refsarg {
- kmutex_t lock;
- boolean_t gone;
- kcondvar_t cv;
-};
-
-/* ARGSUSED */
-static void
-dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
-{
- struct refsarg *arg = argv;
-
- mutex_enter(&arg->lock);
- arg->gone = TRUE;
- cv_signal(&arg->cv);
- mutex_exit(&arg->lock);
-}
-
-static void
-dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
-{
- struct refsarg arg;
-
- mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
- arg.gone = FALSE;
- (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
- dsl_dataset_refs_gone);
- dmu_buf_rele(ds->ds_dbuf, tag);
- mutex_enter(&arg.lock);
- while (!arg.gone)
- cv_wait(&arg.cv, &arg.lock);
- ASSERT(arg.gone);
- mutex_exit(&arg.lock);
- ds->ds_dbuf = NULL;
- ds->ds_phys = NULL;
- mutex_destroy(&arg.lock);
- cv_destroy(&arg.cv);
-}
-
-static void
-remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
-{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t count;
int err;
@@ -1494,491 +844,69 @@ remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
* too many entries in the next_clones_obj even after failing to
* remove this one.
*/
- if (err != ENOENT) {
+ if (err != ENOENT)
VERIFY0(err);
- }
- ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
+ ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj,
&count));
ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
}
-static void
-dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
-{
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- zap_cursor_t zc;
- zap_attribute_t za;
-
- /*
- * If it is the old version, dd_clones doesn't exist so we can't
- * find the clones, but deadlist_remove_key() is a no-op so it
- * doesn't matter.
- */
- if (ds->ds_dir->dd_phys->dd_clones == 0)
- return;
-
- for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
- dsl_dataset_t *clone;
- VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
- za.za_first_integer, FTAG, &clone));
- if (clone->ds_dir->dd_origin_txg > mintxg) {
- dsl_deadlist_remove_key(&clone->ds_deadlist,
- mintxg, tx);
- dsl_dataset_remove_clones_key(clone, mintxg, tx);
- }
- dsl_dataset_rele(clone, FTAG);
- }
- zap_cursor_fini(&zc);
+blkptr_t *
+dsl_dataset_get_blkptr(dsl_dataset_t *ds)
+{
+ return (&ds->ds_phys->ds_bp);
}
-struct process_old_arg {
- dsl_dataset_t *ds;
- dsl_dataset_t *ds_prev;
- boolean_t after_branch_point;
- zio_t *pio;
- uint64_t used, comp, uncomp;
-};
-
-static int
-process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+void
+dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
{
- struct process_old_arg *poa = arg;
- dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
-
- if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
- dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
- if (poa->ds_prev && !poa->after_branch_point &&
- bp->blk_birth >
- poa->ds_prev->ds_phys->ds_prev_snap_txg) {
- poa->ds_prev->ds_phys->ds_unique_bytes +=
- bp_get_dsize_sync(dp->dp_spa, bp);
- }
+ ASSERT(dmu_tx_is_syncing(tx));
+ /* If it's the meta-objset, set dp_meta_rootbp */
+ if (ds == NULL) {
+ tx->tx_pool->dp_meta_rootbp = *bp;
} else {
- poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
- poa->comp += BP_GET_PSIZE(bp);
- poa->uncomp += BP_GET_UCSIZE(bp);
- dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ds->ds_phys->ds_bp = *bp;
}
- return (0);
-}
-
-static void
-process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
- dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
-{
- struct process_old_arg poa = { 0 };
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
-
- ASSERT(ds->ds_deadlist.dl_oldfmt);
- ASSERT(ds_next->ds_deadlist.dl_oldfmt);
-
- poa.ds = ds;
- poa.ds_prev = ds_prev;
- poa.after_branch_point = after_branch_point;
- poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
- VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
- process_old_cb, &poa, tx));
- VERIFY0(zio_wait(poa.pio));
- ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
-
- /* change snapused */
- dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
- -poa.used, -poa.comp, -poa.uncomp, tx);
-
- /* swap next's deadlist to our deadlist */
- dsl_deadlist_close(&ds->ds_deadlist);
- dsl_deadlist_close(&ds_next->ds_deadlist);
- SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
- ds->ds_phys->ds_deadlist_obj);
- dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
- dsl_deadlist_open(&ds_next->ds_deadlist, mos,
- ds_next->ds_phys->ds_deadlist_obj);
}
-static int
-old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+spa_t *
+dsl_dataset_get_spa(dsl_dataset_t *ds)
{
- int err;
- struct killarg ka;
-
- /*
- * Free everything that we point to (that's born after
- * the previous snapshot, if we are a clone)
- *
- * NB: this should be very quick, because we already
- * freed all the objects in open context.
- */
- ka.ds = ds;
- ka.tx = tx;
- err = traverse_dataset(ds,
- ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
- kill_blkptr, &ka);
- ASSERT0(err);
- ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
-
- return (err);
+ return (ds->ds_dir->dd_pool->dp_spa);
}
void
-dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
+dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
{
- struct dsl_ds_destroyarg *dsda = arg1;
- dsl_dataset_t *ds = dsda->ds;
- int err;
- int after_branch_point = FALSE;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- dsl_dataset_t *ds_prev = NULL;
- boolean_t wont_destroy;
- uint64_t obj;
-
- wont_destroy = (dsda->defer &&
- (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
-
- ASSERT(ds->ds_owner || wont_destroy);
- ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
- ASSERT(ds->ds_prev == NULL ||
- ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
- ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
+ dsl_pool_t *dp;
- if (wont_destroy) {
- ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
- spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
+ if (ds == NULL) /* this is the meta-objset */
return;
- }
-
- /* We need to log before removing it from the namespace. */
- spa_history_log_internal_ds(ds, "destroy", tx, "");
-
- /* signal any waiters that this dataset is going away */
- mutex_enter(&ds->ds_lock);
- ds->ds_owner = dsl_reaper;
- cv_broadcast(&ds->ds_exclusive_cv);
- mutex_exit(&ds->ds_lock);
-
- /* Remove our reservation */
- if (ds->ds_reserved != 0) {
- dsl_prop_setarg_t psa;
- uint64_t value = 0;
-
- dsl_prop_setarg_init_uint64(&psa, "refreservation",
- (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
- &value);
- psa.psa_effective_value = 0; /* predict default value */
-
- dsl_dataset_set_reservation_sync(ds, &psa, tx);
- ASSERT0(ds->ds_reserved);
- }
-
- ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
-
- dsl_scan_ds_destroyed(ds, tx);
-
- obj = ds->ds_object;
-
- if (ds->ds_phys->ds_prev_snap_obj != 0) {
- if (ds->ds_prev) {
- ds_prev = ds->ds_prev;
- } else {
- VERIFY(0 == dsl_dataset_hold_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
- }
- after_branch_point =
- (ds_prev->ds_phys->ds_next_snap_obj != obj);
-
- dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
- if (after_branch_point &&
- ds_prev->ds_phys->ds_next_clones_obj != 0) {
- remove_from_next_clones(ds_prev, obj, tx);
- if (ds->ds_phys->ds_next_snap_obj != 0) {
- VERIFY(0 == zap_add_int(mos,
- ds_prev->ds_phys->ds_next_clones_obj,
- ds->ds_phys->ds_next_snap_obj, tx));
- }
- }
- if (after_branch_point &&
- ds->ds_phys->ds_next_snap_obj == 0) {
- /* This clone is toast. */
- ASSERT(ds_prev->ds_phys->ds_num_children > 1);
- ds_prev->ds_phys->ds_num_children--;
-
- /*
- * If the clone's origin has no other clones, no
- * user holds, and has been marked for deferred
- * deletion, then we should have done the necessary
- * destroy setup for it.
- */
- if (ds_prev->ds_phys->ds_num_children == 1 &&
- ds_prev->ds_userrefs == 0 &&
- DS_IS_DEFER_DESTROY(ds_prev)) {
- ASSERT3P(dsda->rm_origin, !=, NULL);
- } else {
- ASSERT3P(dsda->rm_origin, ==, NULL);
- }
- } else if (!after_branch_point) {
- ds_prev->ds_phys->ds_next_snap_obj =
- ds->ds_phys->ds_next_snap_obj;
- }
- }
-
- if (dsl_dataset_is_snapshot(ds)) {
- dsl_dataset_t *ds_next;
- uint64_t old_unique;
- uint64_t used = 0, comp = 0, uncomp = 0;
-
- VERIFY(0 == dsl_dataset_hold_obj(dp,
- ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
- ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
-
- old_unique = ds_next->ds_phys->ds_unique_bytes;
-
- dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
- ds_next->ds_phys->ds_prev_snap_obj =
- ds->ds_phys->ds_prev_snap_obj;
- ds_next->ds_phys->ds_prev_snap_txg =
- ds->ds_phys->ds_prev_snap_txg;
- ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
- ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
-
-
- if (ds_next->ds_deadlist.dl_oldfmt) {
- process_old_deadlist(ds, ds_prev, ds_next,
- after_branch_point, tx);
- } else {
- /* Adjust prev's unique space. */
- if (ds_prev && !after_branch_point) {
- dsl_deadlist_space_range(&ds_next->ds_deadlist,
- ds_prev->ds_phys->ds_prev_snap_txg,
- ds->ds_phys->ds_prev_snap_txg,
- &used, &comp, &uncomp);
- ds_prev->ds_phys->ds_unique_bytes += used;
- }
-
- /* Adjust snapused. */
- dsl_deadlist_space_range(&ds_next->ds_deadlist,
- ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
- &used, &comp, &uncomp);
- dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
- -used, -comp, -uncomp, tx);
-
- /* Move blocks to be freed to pool's free list. */
- dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
- &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
- tx);
- dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
- DD_USED_HEAD, used, comp, uncomp, tx);
-
- /* Merge our deadlist into next's and free it. */
- dsl_deadlist_merge(&ds_next->ds_deadlist,
- ds->ds_phys->ds_deadlist_obj, tx);
- }
- dsl_deadlist_close(&ds->ds_deadlist);
- dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
-
- /* Collapse range in clone heads */
- dsl_dataset_remove_clones_key(ds,
- ds->ds_phys->ds_creation_txg, tx);
-
- if (dsl_dataset_is_snapshot(ds_next)) {
- dsl_dataset_t *ds_nextnext;
-
- /*
- * Update next's unique to include blocks which
- * were previously shared by only this snapshot
- * and it. Those blocks will be born after the
- * prev snap and before this snap, and will have
- * died after the next snap and before the one
- * after that (ie. be on the snap after next's
- * deadlist).
- */
- VERIFY(0 == dsl_dataset_hold_obj(dp,
- ds_next->ds_phys->ds_next_snap_obj,
- FTAG, &ds_nextnext));
- dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
- ds->ds_phys->ds_prev_snap_txg,
- ds->ds_phys->ds_creation_txg,
- &used, &comp, &uncomp);
- ds_next->ds_phys->ds_unique_bytes += used;
- dsl_dataset_rele(ds_nextnext, FTAG);
- ASSERT3P(ds_next->ds_prev, ==, NULL);
-
- /* Collapse range in this head. */
- dsl_dataset_t *hds;
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
- ds->ds_dir->dd_phys->dd_head_dataset_obj,
- FTAG, &hds));
- dsl_deadlist_remove_key(&hds->ds_deadlist,
- ds->ds_phys->ds_creation_txg, tx);
- dsl_dataset_rele(hds, FTAG);
-
- } else {
- ASSERT3P(ds_next->ds_prev, ==, ds);
- dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
- ds_next->ds_prev = NULL;
- if (ds_prev) {
- VERIFY(0 == dsl_dataset_get_ref(dp,
- ds->ds_phys->ds_prev_snap_obj,
- ds_next, &ds_next->ds_prev));
- }
-
- dsl_dataset_recalc_head_uniq(ds_next);
-
- /*
- * Reduce the amount of our unconsmed refreservation
- * being charged to our parent by the amount of
- * new unique data we have gained.
- */
- if (old_unique < ds_next->ds_reserved) {
- int64_t mrsdelta;
- uint64_t new_unique =
- ds_next->ds_phys->ds_unique_bytes;
-
- ASSERT(old_unique <= new_unique);
- mrsdelta = MIN(new_unique - old_unique,
- ds_next->ds_reserved - old_unique);
- dsl_dir_diduse_space(ds->ds_dir,
- DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
- }
- }
- dsl_dataset_rele(ds_next, FTAG);
- } else {
- zfeature_info_t *async_destroy =
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
- objset_t *os;
-
- /*
- * There's no next snapshot, so this is a head dataset.
- * Destroy the deadlist. Unless it's a clone, the
- * deadlist should be empty. (If it's a clone, it's
- * safe to ignore the deadlist contents.)
- */
- dsl_deadlist_close(&ds->ds_deadlist);
- dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
- ds->ds_phys->ds_deadlist_obj = 0;
-
- VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
-
- if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
- err = old_synchronous_dataset_destroy(ds, tx);
- } else {
- /*
- * Move the bptree into the pool's list of trees to
- * clean up and update space accounting information.
- */
- uint64_t used, comp, uncomp;
-
- zil_destroy_sync(dmu_objset_zil(os), tx);
-
- if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
- spa_feature_incr(dp->dp_spa, async_destroy, tx);
- dp->dp_bptree_obj = bptree_alloc(mos, tx);
- VERIFY(zap_add(mos,
- DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
- &dp->dp_bptree_obj, tx) == 0);
- }
-
- used = ds->ds_dir->dd_phys->dd_used_bytes;
- comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
- uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
-
- ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
- ds->ds_phys->ds_unique_bytes == used);
-
- bptree_add(mos, dp->dp_bptree_obj,
- &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
- used, comp, uncomp, tx);
- dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
- -used, -comp, -uncomp, tx);
- dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
- used, comp, uncomp, tx);
- }
-
- if (ds->ds_prev != NULL) {
- if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
- VERIFY3U(0, ==, zap_remove_int(mos,
- ds->ds_prev->ds_dir->dd_phys->dd_clones,
- ds->ds_object, tx));
- }
- dsl_dataset_rele(ds->ds_prev, ds);
- ds->ds_prev = ds_prev = NULL;
- }
- }
-
- /*
- * This must be done after the dsl_traverse(), because it will
- * re-open the objset.
- */
- if (ds->ds_objset) {
- dmu_objset_evict(ds->ds_objset);
- ds->ds_objset = NULL;
- }
- if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
- /* Erase the link in the dir */
- dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
- ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
- ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
- err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
- ASSERT(err == 0);
- } else {
- /* remove from snapshot namespace */
- dsl_dataset_t *ds_head;
- ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
- VERIFY(0 == dsl_dataset_hold_obj(dp,
- ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
- VERIFY(0 == dsl_dataset_get_snapname(ds));
-#ifdef ZFS_DEBUG
- {
- uint64_t val;
-
- err = dsl_dataset_snap_lookup(ds_head,
- ds->ds_snapname, &val);
- ASSERT0(err);
- ASSERT3U(val, ==, obj);
- }
-#endif
- err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
- ASSERT(err == 0);
- dsl_dataset_rele(ds_head, FTAG);
- }
+ ASSERT(ds->ds_objset != NULL);
- if (ds_prev && ds->ds_prev != ds_prev)
- dsl_dataset_rele(ds_prev, FTAG);
+ if (ds->ds_phys->ds_next_snap_obj != 0)
+ panic("dirtying snapshot!");
- spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
+ dp = ds->ds_dir->dd_pool;
- if (ds->ds_phys->ds_next_clones_obj != 0) {
- uint64_t count;
- ASSERT(0 == zap_count(mos,
- ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
- VERIFY(0 == dmu_object_free(mos,
- ds->ds_phys->ds_next_clones_obj, tx));
+ if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
+ /* up the hold count until we can be written out */
+ dmu_buf_add_ref(ds->ds_dbuf, ds);
}
- if (ds->ds_phys->ds_props_obj != 0)
- VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
- if (ds->ds_phys->ds_userrefs_obj != 0)
- VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
- dsl_dir_close(ds->ds_dir, ds);
- ds->ds_dir = NULL;
- dsl_dataset_drain_refs(ds, tag);
- VERIFY(0 == dmu_object_free(mos, obj, tx));
-
- if (dsda->rm_origin) {
- /*
- * Remove the origin of the clone we just destroyed.
- */
- struct dsl_ds_destroyarg ndsda = {0};
+}
- ndsda.ds = dsda->rm_origin;
- dsl_dataset_destroy_sync(&ndsda, tag, tx);
+boolean_t
+dsl_dataset_is_dirty(dsl_dataset_t *ds)
+{
+ for (int t = 0; t < TXG_SIZE; t++) {
+ if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
+ ds, t))
+ return (B_TRUE);
}
+ return (B_FALSE);
}
static int
@@ -2009,13 +937,24 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
return (0);
}
+typedef struct dsl_dataset_snapshot_arg {
+ nvlist_t *ddsa_snaps;
+ nvlist_t *ddsa_props;
+ nvlist_t *ddsa_errors;
+} dsl_dataset_snapshot_arg_t;
+
int
-dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
+dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx)
{
- int err;
+ int error;
uint64_t value;
+ ds->ds_trysnap_txg = tx->tx_txg;
+
+ if (!dmu_tx_is_syncing(tx))
+ return (0);
+
/*
* We don't allow multiple snapshots of the same txg. If there
* is already one, try again.
@@ -2026,39 +965,87 @@ dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
/*
* Check for conflicting snapshot name.
*/
- err = dsl_dataset_snap_lookup(ds, snapname, &value);
- if (err == 0)
+ error = dsl_dataset_snap_lookup(ds, snapname, &value);
+ if (error == 0)
return (EEXIST);
- if (err != ENOENT)
- return (err);
-
- /*
- * Check that the dataset's name is not too long. Name consists
- * of the dataset's length + 1 for the @-sign + snapshot name's length
- */
- if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
- return (ENAMETOOLONG);
+ if (error != ENOENT)
+ return (error);
- err = dsl_dataset_snapshot_reserve_space(ds, tx);
- if (err)
- return (err);
+ error = dsl_dataset_snapshot_reserve_space(ds, tx);
+ if (error != 0)
+ return (error);
- ds->ds_trysnap_txg = tx->tx_txg;
return (0);
}
+static int
+dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_snapshot_arg_t *ddsa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+ int rv = 0;
+
+ for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
+ int error = 0;
+ dsl_dataset_t *ds;
+ char *name, *atp;
+ char dsname[MAXNAMELEN];
+
+ name = nvpair_name(pair);
+ if (strlen(name) >= MAXNAMELEN)
+ error = ENAMETOOLONG;
+ if (error == 0) {
+ atp = strchr(name, '@');
+ if (atp == NULL)
+ error = EINVAL;
+ if (error == 0)
+ (void) strlcpy(dsname, name, atp - name + 1);
+ }
+ if (error == 0)
+ error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+ if (error == 0) {
+ error = dsl_dataset_snapshot_check_impl(ds,
+ atp + 1, tx);
+ dsl_dataset_rele(ds, FTAG);
+ }
+
+ if (error != 0) {
+ if (ddsa->ddsa_errors != NULL) {
+ fnvlist_add_int32(ddsa->ddsa_errors,
+ name, error);
+ }
+ rv = error;
+ }
+ }
+ return (rv);
+}
+
void
-dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
+dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx)
{
+ static zil_header_t zero_zil;
+
dsl_pool_t *dp = ds->ds_dir->dd_pool;
dmu_buf_t *dbuf;
dsl_dataset_phys_t *dsphys;
uint64_t dsobj, crtxg;
objset_t *mos = dp->dp_meta_objset;
- int err;
+ objset_t *os;
+
+ ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+
+ /*
+ * If we are on an old pool, the zil must not be active, in which
+ * case it will be zeroed. Usually zil_suspend() accomplishes this.
+ */
+ ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP ||
+ dmu_objset_from_ds(ds, &os) != 0 ||
+ bcmp(&os->os_phys->os_zil_header, &zero_zil,
+ sizeof (zero_zil)) == 0);
- ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
/*
* The origin's ds_creation_txg has to be < TXG_INITIAL
@@ -2070,7 +1057,7 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
- VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
+ VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
dmu_buf_will_dirty(dbuf, tx);
dsphys = dbuf->db_data;
bzero(dsphys, sizeof (dsl_dataset_phys_t));
@@ -2105,9 +1092,9 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
ds->ds_prev->ds_phys->ds_creation_txg);
ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
} else if (next_clones_obj != 0) {
- remove_from_next_clones(ds->ds_prev,
+ dsl_dataset_remove_from_next_clones(ds->ds_prev,
dsphys->ds_next_snap_obj, tx);
- VERIFY3U(0, ==, zap_add_int(mos,
+ VERIFY0(zap_add_int(mos,
next_clones_obj, dsobj, tx));
}
}
@@ -2126,9 +1113,6 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
}
dmu_buf_will_dirty(ds->ds_dbuf, tx);
- zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu",
- ds->ds_dir->dd_myname, snapname, dsobj,
- ds->ds_phys->ds_prev_snap_txg);
ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
dsl_deadlist_close(&ds->ds_deadlist);
@@ -2143,13 +1127,12 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
- err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
- snapname, 8, 1, &dsobj, tx);
- ASSERT(err == 0);
+ VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
+ snapname, 8, 1, &dsobj, tx));
if (ds->ds_prev)
- dsl_dataset_drop_ref(ds->ds_prev, ds);
- VERIFY(0 == dsl_dataset_get_ref(dp,
+ dsl_dataset_rele(ds->ds_prev, ds);
+ VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
dsl_scan_ds_snapshotted(ds, tx);
@@ -2159,6 +1142,198 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
}
+static void
+dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_snapshot_arg_t *ddsa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+
+ for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
+ dsl_dataset_t *ds;
+ char *name, *atp;
+ char dsname[MAXNAMELEN];
+
+ name = nvpair_name(pair);
+ atp = strchr(name, '@');
+ (void) strlcpy(dsname, name, atp - name + 1);
+ VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds));
+
+ dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx);
+ if (ddsa->ddsa_props != NULL) {
+ dsl_props_set_sync_impl(ds->ds_prev,
+ ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
+ }
+ dsl_dataset_rele(ds, FTAG);
+ }
+}
+
+/*
+ * The snapshots must all be in the same pool.
+ * All-or-nothing: if there are any failures, nothing will be modified.
+ */
+int
+dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
+{
+ dsl_dataset_snapshot_arg_t ddsa;
+ nvpair_t *pair;
+ boolean_t needsuspend;
+ int error;
+ spa_t *spa;
+ char *firstname;
+ nvlist_t *suspended = NULL;
+
+ pair = nvlist_next_nvpair(snaps, NULL);
+ if (pair == NULL)
+ return (0);
+ firstname = nvpair_name(pair);
+
+ error = spa_open(firstname, &spa, FTAG);
+ if (error != 0)
+ return (error);
+ needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
+ spa_close(spa, FTAG);
+
+ if (needsuspend) {
+ suspended = fnvlist_alloc();
+ for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(snaps, pair)) {
+ char fsname[MAXNAMELEN];
+ char *snapname = nvpair_name(pair);
+ char *atp;
+ void *cookie;
+
+ atp = strchr(snapname, '@');
+ if (atp == NULL) {
+ error = EINVAL;
+ break;
+ }
+ (void) strlcpy(fsname, snapname, atp - snapname + 1);
+
+ error = zil_suspend(fsname, &cookie);
+ if (error != 0)
+ break;
+ fnvlist_add_uint64(suspended, fsname,
+ (uintptr_t)cookie);
+ }
+ }
+
+ ddsa.ddsa_snaps = snaps;
+ ddsa.ddsa_props = props;
+ ddsa.ddsa_errors = errors;
+
+ if (error == 0) {
+ error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,
+ dsl_dataset_snapshot_sync, &ddsa,
+ fnvlist_num_pairs(snaps) * 3);
+ }
+
+ if (suspended != NULL) {
+ for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(suspended, pair)) {
+ zil_resume((void *)(uintptr_t)
+ fnvpair_value_uint64(pair));
+ }
+ fnvlist_free(suspended);
+ }
+
+ return (error);
+}
+
+typedef struct dsl_dataset_snapshot_tmp_arg {
+ const char *ddsta_fsname;
+ const char *ddsta_snapname;
+ minor_t ddsta_cleanup_minor;
+ const char *ddsta_htag;
+} dsl_dataset_snapshot_tmp_arg_t;
+
+static int
+dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ int error;
+
+ error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, tx);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
+
+ if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) {
+ dsl_dataset_rele(ds, FTAG);
+ return (ENOTSUP);
+ }
+ error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag,
+ B_TRUE, tx);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
+
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
+
+static void
+dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+
+ VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds));
+
+ dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx);
+ dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag,
+ ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx);
+ dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx);
+
+ dsl_dataset_rele(ds, FTAG);
+}
+
+int
+dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
+ minor_t cleanup_minor, const char *htag)
+{
+ dsl_dataset_snapshot_tmp_arg_t ddsta;
+ int error;
+ spa_t *spa;
+ boolean_t needsuspend;
+ void *cookie;
+
+ ddsta.ddsta_fsname = fsname;
+ ddsta.ddsta_snapname = snapname;
+ ddsta.ddsta_cleanup_minor = cleanup_minor;
+ ddsta.ddsta_htag = htag;
+
+ error = spa_open(fsname, &spa, FTAG);
+ if (error != 0)
+ return (error);
+ needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
+ spa_close(spa, FTAG);
+
+ if (needsuspend) {
+ error = zil_suspend(fsname, &cookie);
+ if (error != 0)
+ return (error);
+ }
+
+ error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check,
+ dsl_dataset_snapshot_tmp_sync, &ddsta, 3);
+
+ if (needsuspend)
+ zil_resume(cookie);
+ return (error);
+}
+
+
void
dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
{
@@ -2183,65 +1358,49 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
zap_cursor_t zc;
zap_attribute_t za;
- nvlist_t *propval;
- nvlist_t *val;
+ nvlist_t *propval = fnvlist_alloc();
+ nvlist_t *val = fnvlist_alloc();
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
/*
- * There may me missing entries in ds_next_clones_obj
+ * There may be missing entries in ds_next_clones_obj
* due to a bug in a previous version of the code.
* Only trust it if it has the right number of entries.
*/
if (ds->ds_phys->ds_next_clones_obj != 0) {
- ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
+ ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj,
&count));
}
- if (count != ds->ds_phys->ds_num_children - 1) {
+ if (count != ds->ds_phys->ds_num_children - 1)
goto fail;
- }
for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
char buf[ZFS_MAXNAMELEN];
- /*
- * Even though we hold the dp_config_rwlock, the dataset
- * may fail to open, returning ENOENT. If there is a
- * thread concurrently attempting to destroy this
- * dataset, it will have the ds_rwlock held for
- * RW_WRITER. Our call to dsl_dataset_hold_obj() ->
- * dsl_dataset_hold_ref() will fail its
- * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
- * dp_config_rwlock, and wait for the destroy progress
- * and signal ds_exclusive_cv. If the destroy was
- * successful, we will see that
- * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
- */
- if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
- za.za_first_integer, FTAG, &clone) != 0)
- continue;
+ VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
+ za.za_first_integer, FTAG, &clone));
dsl_dir_name(clone->ds_dir, buf);
- VERIFY(nvlist_add_boolean(val, buf) == 0);
+ fnvlist_add_boolean(val, buf);
dsl_dataset_rele(clone, FTAG);
}
zap_cursor_fini(&zc);
- VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
- VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
- propval) == 0);
+ fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
+ fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval);
fail:
nvlist_free(val);
nvlist_free(propval);
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
}
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
uint64_t refd, avail, uobjs, aobjs, ratio;
+ ASSERT(dsl_pool_config_held(dp));
+
ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
(ds->ds_phys->ds_uncompressed_bytes * 100 /
ds->ds_phys->ds_compressed_bytes);
@@ -2287,10 +1446,8 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_pool_t *dp = ds->ds_dir->dd_pool;
dsl_dataset_t *prev;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
int err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
- rw_exit(&dp->dp_config_rwlock);
if (err == 0) {
err = dsl_dataset_space_written(prev, ds, &written,
&comp, &uncomp);
@@ -2306,6 +1463,9 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
void
dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ ASSERT(dsl_pool_config_held(dp));
+
stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
stat->dds_guid = ds->ds_phys->ds_guid;
@@ -2317,16 +1477,14 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
stat->dds_is_snapshot = B_FALSE;
stat->dds_num_clones = 0;
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
if (dsl_dir_is_clone(ds->ds_dir)) {
dsl_dataset_t *ods;
- VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
+ VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
dsl_dataset_name(ods, stat->dds_origin);
- dsl_dataset_drop_ref(ods, FTAG);
+ dsl_dataset_rele(ods, FTAG);
}
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
}
}
@@ -2364,8 +1522,7 @@ dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
- ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
- dsl_pool_sync_context(dp));
+ ASSERT(dsl_pool_config_held(dp));
if (ds->ds_prev == NULL)
return (B_FALSE);
if (ds->ds_phys->ds_bp.blk_birth >
@@ -2387,237 +1544,225 @@ dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
return (B_FALSE);
}
+typedef struct dsl_dataset_rename_snapshot_arg {
+ const char *ddrsa_fsname;
+ const char *ddrsa_oldsnapname;
+ const char *ddrsa_newsnapname;
+ boolean_t ddrsa_recursive;
+ dmu_tx_t *ddrsa_tx;
+} dsl_dataset_rename_snapshot_arg_t;
+
/* ARGSUSED */
static int
-dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
+ dsl_dataset_t *hds, void *arg)
{
- dsl_dataset_t *ds = arg1;
- char *newsnapname = arg2;
- dsl_dir_t *dd = ds->ds_dir;
- dsl_dataset_t *hds;
+ dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+ int error;
uint64_t val;
- int err;
- err = dsl_dataset_hold_obj(dd->dd_pool,
- dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
- if (err)
- return (err);
-
- /* new name better not be in use */
- err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
- dsl_dataset_rele(hds, FTAG);
+ error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
+ if (error != 0) {
+ /* ignore nonexistent snapshots */
+ return (error == ENOENT ? 0 : error);
+ }
- if (err == 0)
- err = EEXIST;
- else if (err == ENOENT)
- err = 0;
+ /* new name should not exist */
+ error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val);
+ if (error == 0)
+ error = EEXIST;
+ else if (error == ENOENT)
+ error = 0;
/* dataset name + 1 for the "@" + the new snapshot name must fit */
- if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
- err = ENAMETOOLONG;
+ if (dsl_dir_namelen(hds->ds_dir) + 1 +
+ strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN)
+ error = ENAMETOOLONG;
- return (err);
+ return (error);
}
-static void
-dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+static int
+dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- const char *newsnapname = arg2;
- dsl_dir_t *dd = ds->ds_dir;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
+ dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *hds;
- int err;
-
- ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
-
- VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
- dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
+ int error;
- VERIFY(0 == dsl_dataset_get_snapname(ds));
- err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
- ASSERT0(err);
- mutex_enter(&ds->ds_lock);
- (void) strcpy(ds->ds_snapname, newsnapname);
- mutex_exit(&ds->ds_lock);
- err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
- ds->ds_snapname, 8, 1, &ds->ds_object, tx);
- ASSERT0(err);
+ error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds);
+ if (error != 0)
+ return (error);
- spa_history_log_internal_ds(ds, "rename", tx,
- "-> @%s", newsnapname);
+ if (ddrsa->ddrsa_recursive) {
+ error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
+ dsl_dataset_rename_snapshot_check_impl, ddrsa,
+ DS_FIND_CHILDREN);
+ } else {
+ error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa);
+ }
dsl_dataset_rele(hds, FTAG);
+ return (error);
}
-struct renamesnaparg {
- dsl_sync_task_group_t *dstg;
- char failed[MAXPATHLEN];
- char *oldsnap;
- char *newsnap;
-};
-
static int
-dsl_snapshot_rename_one(const char *name, void *arg)
+dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
+ dsl_dataset_t *hds, void *arg)
{
- struct renamesnaparg *ra = arg;
- dsl_dataset_t *ds = NULL;
- char *snapname;
- int err;
-
- snapname = kmem_asprintf("%s@%s", name, ra->oldsnap);
- (void) strlcpy(ra->failed, snapname, sizeof (ra->failed));
+ dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+ dsl_dataset_t *ds;
+ uint64_t val;
+ dmu_tx_t *tx = ddrsa->ddrsa_tx;
+ int error;
- /*
- * For recursive snapshot renames the parent won't be changing
- * so we just pass name for both the to/from argument.
- */
- err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
- if (err != 0) {
- strfree(snapname);
- return (err == ENOENT ? 0 : err);
+ error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
+ ASSERT(error == 0 || error == ENOENT);
+ if (error == ENOENT) {
+ /* ignore nonexistent snapshots */
+ return (0);
}
-#ifdef _KERNEL
- /*
- * For all filesystems undergoing rename, we'll need to unmount it.
- */
- (void) zfs_unmount_snap(snapname, NULL);
-#endif
- err = dsl_dataset_hold(snapname, ra->dstg, &ds);
- strfree(snapname);
- if (err != 0)
- return (err == ENOENT ? 0 : err);
+ VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds));
+
+ /* log before we change the name */
+ spa_history_log_internal_ds(ds, "rename", tx,
+ "-> @%s", ddrsa->ddrsa_newsnapname);
- dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
- dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
+ VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx));
+ mutex_enter(&ds->ds_lock);
+ (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname);
+ mutex_exit(&ds->ds_lock);
+ VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj,
+ ds->ds_snapname, 8, 1, &ds->ds_object, tx));
+ dsl_dataset_rele(ds, FTAG);
return (0);
}
-static int
-dsl_recursive_rename(char *oldname, const char *newname)
+static void
+dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)
{
- int err;
- struct renamesnaparg *ra;
- dsl_sync_task_t *dst;
- spa_t *spa;
- char *cp, *fsname = spa_strdup(oldname);
- int len = strlen(oldname) + 1;
-
- /* truncate the snapshot name to get the fsname */
- cp = strchr(fsname, '@');
- *cp = '\0';
-
- err = spa_open(fsname, &spa, FTAG);
- if (err) {
- kmem_free(fsname, len);
- return (err);
- }
- ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
- ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-
- ra->oldsnap = strchr(oldname, '@') + 1;
- ra->newsnap = strchr(newname, '@') + 1;
- *ra->failed = '\0';
-
- err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
- DS_FIND_CHILDREN);
- kmem_free(fsname, len);
-
- if (err == 0) {
- err = dsl_sync_task_group_wait(ra->dstg);
- }
+ dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *hds;
- for (dst = list_head(&ra->dstg->dstg_tasks); dst;
- dst = list_next(&ra->dstg->dstg_tasks, dst)) {
- dsl_dataset_t *ds = dst->dst_arg1;
- if (dst->dst_err) {
- dsl_dir_name(ds->ds_dir, ra->failed);
- (void) strlcat(ra->failed, "@", sizeof (ra->failed));
- (void) strlcat(ra->failed, ra->newsnap,
- sizeof (ra->failed));
- }
- dsl_dataset_rele(ds, ra->dstg);
+ VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds));
+ ddrsa->ddrsa_tx = tx;
+ if (ddrsa->ddrsa_recursive) {
+ VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
+ dsl_dataset_rename_snapshot_sync_impl, ddrsa,
+ DS_FIND_CHILDREN));
+ } else {
+ VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa));
}
-
- if (err)
- (void) strlcpy(oldname, ra->failed, sizeof (ra->failed));
-
- dsl_sync_task_group_destroy(ra->dstg);
- kmem_free(ra, sizeof (struct renamesnaparg));
- spa_close(spa, FTAG);
- return (err);
+ dsl_dataset_rele(hds, FTAG);
}
-static int
-dsl_valid_rename(const char *oldname, void *arg)
+int
+dsl_dataset_rename_snapshot(const char *fsname,
+ const char *oldsnapname, const char *newsnapname, boolean_t recursive)
{
- int delta = *(int *)arg;
+ dsl_dataset_rename_snapshot_arg_t ddrsa;
- if (strlen(oldname) + delta >= MAXNAMELEN)
- return (ENAMETOOLONG);
+ ddrsa.ddrsa_fsname = fsname;
+ ddrsa.ddrsa_oldsnapname = oldsnapname;
+ ddrsa.ddrsa_newsnapname = newsnapname;
+ ddrsa.ddrsa_recursive = recursive;
- return (0);
+ return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
+ dsl_dataset_rename_snapshot_sync, &ddrsa, 1));
}
-#pragma weak dmu_objset_rename = dsl_dataset_rename
-int
-dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
+static int
+dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
{
- dsl_dir_t *dd;
+ const char *fsname = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
- const char *tail;
- int err;
+ int64_t unused_refres_delta;
+ int error;
- err = dsl_dir_open(oldname, FTAG, &dd, &tail);
- if (err)
- return (err);
+ error = dsl_dataset_hold(dp, fsname, FTAG, &ds);
+ if (error != 0)
+ return (error);
- if (tail == NULL) {
- int delta = strlen(newname) - strlen(oldname);
+ /* must not be a snapshot */
+ if (dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
- /* if we're growing, validate child name lengths */
- if (delta > 0)
- err = dmu_objset_find(oldname, dsl_valid_rename,
- &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
+ /* must have a most recent snapshot */
+ if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
- if (err == 0)
- err = dsl_dir_rename(dd, newname);
- dsl_dir_close(dd, FTAG);
- return (err);
+ if (dsl_dataset_long_held(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EBUSY);
}
- if (tail[0] != '@') {
- /* the name ended in a nonexistent component */
- dsl_dir_close(dd, FTAG);
- return (ENOENT);
+ /*
+ * Check if the snap we are rolling back to uses more than
+ * the refquota.
+ */
+ if (ds->ds_quota != 0 &&
+ ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EDQUOT);
}
- dsl_dir_close(dd, FTAG);
+ /*
+ * When we do the clone swap, we will temporarily use more space
+ * due to the refreservation (the head will no longer have any
+ * unique space, so the entire amount of the refreservation will need
+ * to be free). We will immediately destroy the clone, freeing
+ * this space, but the freeing happens over many txg's.
+ */
+ unused_refres_delta = (int64_t)MIN(ds->ds_reserved,
+ ds->ds_phys->ds_unique_bytes);
- /* new name must be snapshot in same filesystem */
- tail = strchr(newname, '@');
- if (tail == NULL)
- return (EINVAL);
- tail++;
- if (strncmp(oldname, newname, tail - newname) != 0)
- return (EXDEV);
+ if (unused_refres_delta > 0 &&
+ unused_refres_delta >
+ dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (ENOSPC);
+ }
- if (recursive) {
- err = dsl_recursive_rename(oldname, newname);
- } else {
- err = dsl_dataset_hold(oldname, FTAG, &ds);
- if (err)
- return (err);
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_snapshot_rename_check,
- dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
+static void
+dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
+{
+ const char *fsname = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds, *clone;
+ uint64_t cloneobj;
- dsl_dataset_rele(ds, FTAG);
- }
+ VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds));
- return (err);
+ cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
+ ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
+
+ VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
+
+ dsl_dataset_clone_swap_sync_impl(clone, ds, tx);
+ dsl_dataset_zero_zil(ds, tx);
+
+ dsl_destroy_head_sync_impl(clone, tx);
+
+ dsl_dataset_rele(clone, FTAG);
+ dsl_dataset_rele(ds, FTAG);
+}
+
+int
+dsl_dataset_rollback(const char *fsname)
+{
+ return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
+ dsl_dataset_rollback_sync, (void *)fsname, 1));
}
struct promotenode {
@@ -2625,49 +1770,66 @@ struct promotenode {
dsl_dataset_t *ds;
};
-struct promotearg {
+typedef struct dsl_dataset_promote_arg {
+ const char *ddpa_clonename;
+ dsl_dataset_t *ddpa_clone;
list_t shared_snaps, origin_snaps, clone_snaps;
- dsl_dataset_t *origin_origin;
+ dsl_dataset_t *origin_origin; /* origin of the origin */
uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
char *err_ds;
-};
+} dsl_dataset_promote_arg_t;
static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
-static boolean_t snaplist_unstable(list_t *l);
+static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp,
+ void *tag);
+static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag);
static int
-dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *hds = arg1;
- struct promotearg *pa = arg2;
- struct promotenode *snap = list_head(&pa->shared_snaps);
- dsl_dataset_t *origin_ds = snap->ds;
+ dsl_dataset_promote_arg_t *ddpa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *hds;
+ struct promotenode *snap;
+ dsl_dataset_t *origin_ds;
int err;
uint64_t unused;
- /* Check that it is a real clone */
- if (!dsl_dir_is_clone(hds->ds_dir))
- return (EINVAL);
+ err = promote_hold(ddpa, dp, FTAG);
+ if (err != 0)
+ return (err);
- /* Since this is so expensive, don't do the preliminary check */
- if (!dmu_tx_is_syncing(tx))
- return (0);
+ hds = ddpa->ddpa_clone;
- if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
+ if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
+ promote_rele(ddpa, FTAG);
return (EXDEV);
+ }
+
+ /*
+ * Compute and check the amount of space to transfer. Since this is
+ * so expensive, don't do the preliminary check.
+ */
+ if (!dmu_tx_is_syncing(tx)) {
+ promote_rele(ddpa, FTAG);
+ return (0);
+ }
+
+ snap = list_head(&ddpa->shared_snaps);
+ origin_ds = snap->ds;
/* compute origin's new unique space */
- snap = list_tail(&pa->clone_snaps);
+ snap = list_tail(&ddpa->clone_snaps);
ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
dsl_deadlist_space_range(&snap->ds->ds_deadlist,
origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
- &pa->unique, &unused, &unused);
+ &ddpa->unique, &unused, &unused);
/*
* Walk the snapshots that we are moving
*
* Compute space to transfer. Consider the incremental changes
- * to used for each snapshot:
+ * to used by each snapshot:
* (my used) = (prev's used) + (blocks born) - (blocks killed)
* So each snapshot gave birth to:
* (blocks born) = (my used) - (prev's used) + (blocks killed)
@@ -2678,18 +1840,28 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* Note however, if we stop before we reach the ORIGIN we get:
* uN + kN + kN-1 + ... + kM - uM-1
*/
- pa->used = origin_ds->ds_phys->ds_referenced_bytes;
- pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
- pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
- for (snap = list_head(&pa->shared_snaps); snap;
- snap = list_next(&pa->shared_snaps, snap)) {
+ ddpa->used = origin_ds->ds_phys->ds_referenced_bytes;
+ ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes;
+ ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
+ for (snap = list_head(&ddpa->shared_snaps); snap;
+ snap = list_next(&ddpa->shared_snaps, snap)) {
uint64_t val, dlused, dlcomp, dluncomp;
dsl_dataset_t *ds = snap->ds;
+ /*
+ * If there are long holds, we won't be able to evict
+ * the objset.
+ */
+ if (dsl_dataset_long_held(ds)) {
+ err = EBUSY;
+ goto out;
+ }
+
/* Check that the snapshot name does not conflict */
- VERIFY(0 == dsl_dataset_get_snapname(ds));
+ VERIFY0(dsl_dataset_get_snapname(ds));
err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
if (err == 0) {
+ (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname);
err = EEXIST;
goto out;
}
@@ -2702,26 +1874,27 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deadlist_space(&ds->ds_deadlist,
&dlused, &dlcomp, &dluncomp);
- pa->used += dlused;
- pa->comp += dlcomp;
- pa->uncomp += dluncomp;
+ ddpa->used += dlused;
+ ddpa->comp += dlcomp;
+ ddpa->uncomp += dluncomp;
}
/*
* If we are a clone of a clone then we never reached ORIGIN,
* so we need to subtract out the clone origin's used space.
*/
- if (pa->origin_origin) {
- pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
- pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
- pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
+ if (ddpa->origin_origin) {
+ ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes;
+ ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes;
+ ddpa->uncomp -=
+ ddpa->origin_origin->ds_phys->ds_uncompressed_bytes;
}
/* Check that there is enough space here */
err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
- pa->used);
- if (err)
- return (err);
+ ddpa->used);
+ if (err != 0)
+ goto out;
/*
* Compute the amounts of space that will be used by snapshots
@@ -2739,68 +1912,75 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* calls will be fast because they do not have to
* iterate over all bps.
*/
- snap = list_head(&pa->origin_snaps);
- err = snaplist_space(&pa->shared_snaps,
- snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
- if (err)
- return (err);
+ snap = list_head(&ddpa->origin_snaps);
+ err = snaplist_space(&ddpa->shared_snaps,
+ snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap);
+ if (err != 0)
+ goto out;
- err = snaplist_space(&pa->clone_snaps,
+ err = snaplist_space(&ddpa->clone_snaps,
snap->ds->ds_dir->dd_origin_txg, &space);
- if (err)
- return (err);
- pa->cloneusedsnap += space;
+ if (err != 0)
+ goto out;
+ ddpa->cloneusedsnap += space;
}
if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
- err = snaplist_space(&pa->origin_snaps,
- origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
- if (err)
- return (err);
+ err = snaplist_space(&ddpa->origin_snaps,
+ origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap);
+ if (err != 0)
+ goto out;
}
- return (0);
out:
- pa->err_ds = snap->ds->ds_snapname;
+ promote_rele(ddpa, FTAG);
return (err);
}
static void
-dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *hds = arg1;
- struct promotearg *pa = arg2;
- struct promotenode *snap = list_head(&pa->shared_snaps);
- dsl_dataset_t *origin_ds = snap->ds;
+ dsl_dataset_promote_arg_t *ddpa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *hds;
+ struct promotenode *snap;
+ dsl_dataset_t *origin_ds;
dsl_dataset_t *origin_head;
- dsl_dir_t *dd = hds->ds_dir;
- dsl_pool_t *dp = hds->ds_dir->dd_pool;
+ dsl_dir_t *dd;
dsl_dir_t *odd = NULL;
uint64_t oldnext_obj;
int64_t delta;
- ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
+ VERIFY0(promote_hold(ddpa, dp, FTAG));
+ hds = ddpa->ddpa_clone;
+
+ ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE);
- snap = list_head(&pa->origin_snaps);
+ snap = list_head(&ddpa->shared_snaps);
+ origin_ds = snap->ds;
+ dd = hds->ds_dir;
+
+ snap = list_head(&ddpa->origin_snaps);
origin_head = snap->ds;
/*
* We need to explicitly open odd, since origin_ds's dd will be
* changing.
*/
- VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
+ VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
NULL, FTAG, &odd));
/* change origin's next snap */
dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
- snap = list_tail(&pa->clone_snaps);
+ snap = list_tail(&ddpa->clone_snaps);
ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
/* change the origin's next clone */
if (origin_ds->ds_phys->ds_next_clones_obj) {
- remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
- VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
+ dsl_dataset_remove_from_next_clones(origin_ds,
+ snap->ds->ds_object, tx);
+ VERIFY0(zap_add_int(dp->dp_meta_objset,
origin_ds->ds_phys->ds_next_clones_obj,
oldnext_obj, tx));
}
@@ -2817,39 +1997,43 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
/* change dd_clone entries */
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
- VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+ VERIFY0(zap_remove_int(dp->dp_meta_objset,
odd->dd_phys->dd_clones, hds->ds_object, tx));
- VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
- pa->origin_origin->ds_dir->dd_phys->dd_clones,
+ VERIFY0(zap_add_int(dp->dp_meta_objset,
+ ddpa->origin_origin->ds_dir->dd_phys->dd_clones,
hds->ds_object, tx));
- VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
- pa->origin_origin->ds_dir->dd_phys->dd_clones,
+ VERIFY0(zap_remove_int(dp->dp_meta_objset,
+ ddpa->origin_origin->ds_dir->dd_phys->dd_clones,
origin_head->ds_object, tx));
if (dd->dd_phys->dd_clones == 0) {
dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
}
- VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
+ VERIFY0(zap_add_int(dp->dp_meta_objset,
dd->dd_phys->dd_clones, origin_head->ds_object, tx));
-
}
/* move snapshots to this dir */
- for (snap = list_head(&pa->shared_snaps); snap;
- snap = list_next(&pa->shared_snaps, snap)) {
+ for (snap = list_head(&ddpa->shared_snaps); snap;
+ snap = list_next(&ddpa->shared_snaps, snap)) {
dsl_dataset_t *ds = snap->ds;
- /* unregister props as dsl_dir is changing */
+ /*
+ * Property callbacks are registered to a particular
+ * dsl_dir. Since ours is changing, evict the objset
+ * so that they will be unregistered from the old dsl_dir.
+ */
if (ds->ds_objset) {
dmu_objset_evict(ds->ds_objset);
ds->ds_objset = NULL;
}
+
/* move snap name entry */
- VERIFY(0 == dsl_dataset_get_snapname(ds));
- VERIFY(0 == dsl_dataset_snap_remove(origin_head,
+ VERIFY0(dsl_dataset_get_snapname(ds));
+ VERIFY0(dsl_dataset_snap_remove(origin_head,
ds->ds_snapname, tx));
- VERIFY(0 == zap_add(dp->dp_meta_objset,
+ VERIFY0(zap_add(dp->dp_meta_objset,
hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
8, 1, &ds->ds_object, tx));
@@ -2858,8 +2042,8 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
ds->ds_phys->ds_dir_obj = dd->dd_object;
ASSERT3P(ds->ds_dir, ==, odd);
- dsl_dir_close(ds->ds_dir, ds);
- VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
+ dsl_dir_rele(ds->ds_dir, ds);
+ VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object,
NULL, ds, &ds->ds_dir));
/* move any clone references */
@@ -2883,20 +2067,20 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
continue;
}
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
+ VERIFY0(dsl_dataset_hold_obj(dp,
za.za_first_integer, FTAG, &cnds));
o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
- VERIFY3U(zap_remove_int(dp->dp_meta_objset,
- odd->dd_phys->dd_clones, o, tx), ==, 0);
- VERIFY3U(zap_add_int(dp->dp_meta_objset,
- dd->dd_phys->dd_clones, o, tx), ==, 0);
+ VERIFY0(zap_remove_int(dp->dp_meta_objset,
+ odd->dd_phys->dd_clones, o, tx));
+ VERIFY0(zap_add_int(dp->dp_meta_objset,
+ dd->dd_phys->dd_clones, o, tx));
dsl_dataset_rele(cnds, FTAG);
}
zap_cursor_fini(&zc);
}
- ASSERT0(dsl_prop_numcb(ds));
+ ASSERT(!dsl_prop_hascb(ds));
}
/*
@@ -2906,31 +2090,31 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* is true for each of {clone,origin} independently.
*/
- delta = pa->cloneusedsnap -
+ delta = ddpa->cloneusedsnap -
dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
ASSERT3S(delta, >=, 0);
- ASSERT3U(pa->used, >=, delta);
+ ASSERT3U(ddpa->used, >=, delta);
dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
dsl_dir_diduse_space(dd, DD_USED_HEAD,
- pa->used - delta, pa->comp, pa->uncomp, tx);
+ ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx);
- delta = pa->originusedsnap -
+ delta = ddpa->originusedsnap -
odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
ASSERT3S(delta, <=, 0);
- ASSERT3U(pa->used, >=, -delta);
+ ASSERT3U(ddpa->used, >=, -delta);
dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
dsl_dir_diduse_space(odd, DD_USED_HEAD,
- -pa->used - delta, -pa->comp, -pa->uncomp, tx);
+ -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx);
- origin_ds->ds_phys->ds_unique_bytes = pa->unique;
+ origin_ds->ds_phys->ds_unique_bytes = ddpa->unique;
/* log history record */
spa_history_log_internal_ds(hds, "promote", tx, "");
- dsl_dir_close(odd, FTAG);
+ dsl_dir_rele(odd, FTAG);
+ promote_rele(ddpa, FTAG);
}
-static char *snaplist_tag = "snaplist";
/*
* Make a list of dsl_dataset_t's for the snapshots between first_obj
* (exclusive) and last_obj (inclusive). The list will be in reverse
@@ -2938,13 +2122,11 @@ static char *snaplist_tag = "snaplist";
* snapshots back to this dataset's origin.
*/
static int
-snaplist_make(dsl_pool_t *dp, boolean_t own,
- uint64_t first_obj, uint64_t last_obj, list_t *l)
+snaplist_make(dsl_pool_t *dp,
+ uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag)
{
uint64_t obj = last_obj;
- ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
-
list_create(l, sizeof (struct promotenode),
offsetof(struct promotenode, link));
@@ -2953,28 +2135,15 @@ snaplist_make(dsl_pool_t *dp, boolean_t own,
struct promotenode *snap;
int err;
- if (own) {
- err = dsl_dataset_own_obj(dp, obj,
- 0, snaplist_tag, &ds);
- if (err == 0)
- dsl_dataset_make_exclusive(ds, snaplist_tag);
- } else {
- err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
- }
- if (err == ENOENT) {
- /* lost race with snapshot destroy */
- struct promotenode *last = list_tail(l);
- ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
- obj = last->ds->ds_phys->ds_prev_snap_obj;
- continue;
- } else if (err) {
+ err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
+ ASSERT(err != ENOENT);
+ if (err != 0)
return (err);
- }
if (first_obj == 0)
first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
- snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
+ snap = kmem_alloc(sizeof (*snap), KM_SLEEP);
snap->ds = ds;
list_insert_tail(l, snap);
obj = ds->ds_phys->ds_prev_snap_obj;
@@ -2999,208 +2168,209 @@ snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
}
static void
-snaplist_destroy(list_t *l, boolean_t own)
+snaplist_destroy(list_t *l, void *tag)
{
struct promotenode *snap;
- if (!l || !list_link_active(&l->list_head))
+ if (l == NULL || !list_link_active(&l->list_head))
return;
while ((snap = list_tail(l)) != NULL) {
list_remove(l, snap);
- if (own)
- dsl_dataset_disown(snap->ds, snaplist_tag);
- else
- dsl_dataset_rele(snap->ds, snaplist_tag);
- kmem_free(snap, sizeof (struct promotenode));
+ dsl_dataset_rele(snap->ds, tag);
+ kmem_free(snap, sizeof (*snap));
}
list_destroy(l);
}
-/*
- * Promote a clone. Nomenclature note:
- * "clone" or "cds": the original clone which is being promoted
- * "origin" or "ods": the snapshot which is originally clone's origin
- * "origin head" or "ohds": the dataset which is the head
- * (filesystem/volume) for the origin
- * "origin origin": the origin of the origin's filesystem (typically
- * NULL, indicating that the clone is not a clone of a clone).
- */
-int
-dsl_dataset_promote(const char *name, char *conflsnap)
+static int
+promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
{
- dsl_dataset_t *ds;
+ int error;
dsl_dir_t *dd;
- dsl_pool_t *dp;
- dmu_object_info_t doi;
- struct promotearg pa = { 0 };
struct promotenode *snap;
- int err;
- err = dsl_dataset_hold(name, FTAG, &ds);
- if (err)
- return (err);
- dd = ds->ds_dir;
- dp = dd->dd_pool;
-
- err = dmu_object_info(dp->dp_meta_objset,
- ds->ds_phys->ds_snapnames_zapobj, &doi);
- if (err) {
- dsl_dataset_rele(ds, FTAG);
- return (err);
- }
+ error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
+ &ddpa->ddpa_clone);
+ if (error != 0)
+ return (error);
+ dd = ddpa->ddpa_clone->ds_dir;
- if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
- dsl_dataset_rele(ds, FTAG);
+ if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
+ !dsl_dir_is_clone(dd)) {
+ dsl_dataset_rele(ddpa->ddpa_clone, tag);
return (EINVAL);
}
- /*
- * We are going to inherit all the snapshots taken before our
- * origin (i.e., our new origin will be our parent's origin).
- * Take ownership of them so that we can rename them into our
- * namespace.
- */
- rw_enter(&dp->dp_config_rwlock, RW_READER);
-
- err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
- &pa.shared_snaps);
- if (err != 0)
+ error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj,
+ &ddpa->shared_snaps, tag);
+ if (error != 0)
goto out;
- err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
- if (err != 0)
+ error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
+ &ddpa->clone_snaps, tag);
+ if (error != 0)
goto out;
- snap = list_head(&pa.shared_snaps);
+ snap = list_head(&ddpa->shared_snaps);
ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
- err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
- snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
- if (err != 0)
+ error = snaplist_make(dp, dd->dd_phys->dd_origin_obj,
+ snap->ds->ds_dir->dd_phys->dd_head_dataset_obj,
+ &ddpa->origin_snaps, tag);
+ if (error != 0)
goto out;
if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
- err = dsl_dataset_hold_obj(dp,
+ error = dsl_dataset_hold_obj(dp,
snap->ds->ds_dir->dd_phys->dd_origin_obj,
- FTAG, &pa.origin_origin);
- if (err != 0)
+ tag, &ddpa->origin_origin);
+ if (error != 0)
goto out;
}
-
out:
- rw_exit(&dp->dp_config_rwlock);
+ if (error != 0)
+ promote_rele(ddpa, tag);
+ return (error);
+}
+
+static void
+promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag)
+{
+ snaplist_destroy(&ddpa->shared_snaps, tag);
+ snaplist_destroy(&ddpa->clone_snaps, tag);
+ snaplist_destroy(&ddpa->origin_snaps, tag);
+ if (ddpa->origin_origin != NULL)
+ dsl_dataset_rele(ddpa->origin_origin, tag);
+ dsl_dataset_rele(ddpa->ddpa_clone, tag);
+}
+
+/*
+ * Promote a clone.
+ *
+ * If it fails due to a conflicting snapshot name, "conflsnap" will be filled
+ * in with the name. (It must be at least MAXNAMELEN bytes long.)
+ */
+int
+dsl_dataset_promote(const char *name, char *conflsnap)
+{
+ dsl_dataset_promote_arg_t ddpa = { 0 };
+ uint64_t numsnaps;
+ int error;
+ objset_t *os;
/*
- * Add in 128x the snapnames zapobj size, since we will be moving
- * a bunch of snapnames to the promoted ds, and dirtying their
- * bonus buffers.
+ * We will modify space proportional to the number of
+ * snapshots. Compute numsnaps.
*/
- if (err == 0) {
- err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
- dsl_dataset_promote_sync, ds, &pa,
- 2 + 2 * doi.doi_physical_blocks_512);
- if (err && pa.err_ds && conflsnap)
- (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
- }
+ error = dmu_objset_hold(name, FTAG, &os);
+ if (error != 0)
+ return (error);
+ error = zap_count(dmu_objset_pool(os)->dp_meta_objset,
+ dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps);
+ dmu_objset_rele(os, FTAG);
+ if (error != 0)
+ return (error);
- snaplist_destroy(&pa.shared_snaps, B_TRUE);
- snaplist_destroy(&pa.clone_snaps, B_FALSE);
- snaplist_destroy(&pa.origin_snaps, B_FALSE);
- if (pa.origin_origin)
- dsl_dataset_rele(pa.origin_origin, FTAG);
- dsl_dataset_rele(ds, FTAG);
- return (err);
-}
+ ddpa.ddpa_clonename = name;
+ ddpa.err_ds = conflsnap;
-struct cloneswaparg {
- dsl_dataset_t *cds; /* clone dataset */
- dsl_dataset_t *ohds; /* origin's head dataset */
- boolean_t force;
- int64_t unused_refres_delta; /* change in unconsumed refreservation */
-};
+ return (dsl_sync_task(name, dsl_dataset_promote_check,
+ dsl_dataset_promote_sync, &ddpa, 2 + numsnaps));
+}
-/* ARGSUSED */
-static int
-dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
+int
+dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
+ dsl_dataset_t *origin_head, boolean_t force)
{
- struct cloneswaparg *csa = arg1;
+ int64_t unused_refres_delta;
/* they should both be heads */
- if (dsl_dataset_is_snapshot(csa->cds) ||
- dsl_dataset_is_snapshot(csa->ohds))
+ if (dsl_dataset_is_snapshot(clone) ||
+ dsl_dataset_is_snapshot(origin_head))
return (EINVAL);
/* the branch point should be just before them */
- if (csa->cds->ds_prev != csa->ohds->ds_prev)
+ if (clone->ds_prev != origin_head->ds_prev)
return (EINVAL);
- /* cds should be the clone (unless they are unrelated) */
- if (csa->cds->ds_prev != NULL &&
- csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
- csa->ohds->ds_object !=
- csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
+ /* clone should be the clone (unless they are unrelated) */
+ if (clone->ds_prev != NULL &&
+ clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
+ origin_head->ds_object !=
+ clone->ds_prev->ds_phys->ds_next_snap_obj)
return (EINVAL);
/* the clone should be a child of the origin */
- if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
+ if (clone->ds_dir->dd_parent != origin_head->ds_dir)
return (EINVAL);
- /* ohds shouldn't be modified unless 'force' */
- if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
+ /* origin_head shouldn't be modified unless 'force' */
+ if (!force && dsl_dataset_modified_since_lastsnap(origin_head))
return (ETXTBSY);
- /* adjust amount of any unconsumed refreservation */
- csa->unused_refres_delta =
- (int64_t)MIN(csa->ohds->ds_reserved,
- csa->ohds->ds_phys->ds_unique_bytes) -
- (int64_t)MIN(csa->ohds->ds_reserved,
- csa->cds->ds_phys->ds_unique_bytes);
+ /* origin_head should have no long holds (e.g. is not mounted) */
+ if (dsl_dataset_long_held(origin_head))
+ return (EBUSY);
+
+ /* check amount of any unconsumed refreservation */
+ unused_refres_delta =
+ (int64_t)MIN(origin_head->ds_reserved,
+ origin_head->ds_phys->ds_unique_bytes) -
+ (int64_t)MIN(origin_head->ds_reserved,
+ clone->ds_phys->ds_unique_bytes);
- if (csa->unused_refres_delta > 0 &&
- csa->unused_refres_delta >
- dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
+ if (unused_refres_delta > 0 &&
+ unused_refres_delta >
+ dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
return (ENOSPC);
- if (csa->ohds->ds_quota != 0 &&
- csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota)
+ /* clone can't be over the head's refquota */
+ if (origin_head->ds_quota != 0 &&
+ clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota)
return (EDQUOT);
return (0);
}
-/* ARGSUSED */
-static void
-dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+void
+dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
+ dsl_dataset_t *origin_head, dmu_tx_t *tx)
{
- struct cloneswaparg *csa = arg1;
- dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ int64_t unused_refres_delta;
- ASSERT(csa->cds->ds_reserved == 0);
- ASSERT(csa->ohds->ds_quota == 0 ||
- csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota);
+ ASSERT(clone->ds_reserved == 0);
+ ASSERT(origin_head->ds_quota == 0 ||
+ clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota);
- dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
- dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
+ dmu_buf_will_dirty(clone->ds_dbuf, tx);
+ dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
- if (csa->cds->ds_objset != NULL) {
- dmu_objset_evict(csa->cds->ds_objset);
- csa->cds->ds_objset = NULL;
+ if (clone->ds_objset != NULL) {
+ dmu_objset_evict(clone->ds_objset);
+ clone->ds_objset = NULL;
}
- if (csa->ohds->ds_objset != NULL) {
- dmu_objset_evict(csa->ohds->ds_objset);
- csa->ohds->ds_objset = NULL;
+ if (origin_head->ds_objset != NULL) {
+ dmu_objset_evict(origin_head->ds_objset);
+ origin_head->ds_objset = NULL;
}
+ unused_refres_delta =
+ (int64_t)MIN(origin_head->ds_reserved,
+ origin_head->ds_phys->ds_unique_bytes) -
+ (int64_t)MIN(origin_head->ds_reserved,
+ clone->ds_phys->ds_unique_bytes);
+
/*
* Reset origin's unique bytes, if it exists.
*/
- if (csa->cds->ds_prev) {
- dsl_dataset_t *origin = csa->cds->ds_prev;
+ if (clone->ds_prev) {
+ dsl_dataset_t *origin = clone->ds_prev;
uint64_t comp, uncomp;
dmu_buf_will_dirty(origin->ds_dbuf, tx);
- dsl_deadlist_space_range(&csa->cds->ds_deadlist,
+ dsl_deadlist_space_range(&clone->ds_deadlist,
origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
&origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
}
@@ -3208,9 +2378,9 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
/* swap blkptrs */
{
blkptr_t tmp;
- tmp = csa->ohds->ds_phys->ds_bp;
- csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
- csa->cds->ds_phys->ds_bp = tmp;
+ tmp = origin_head->ds_phys->ds_bp;
+ origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp;
+ clone->ds_phys->ds_bp = tmp;
}
/* set dd_*_bytes */
@@ -3219,25 +2389,25 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
uint64_t cdl_used, cdl_comp, cdl_uncomp;
uint64_t odl_used, odl_comp, odl_uncomp;
- ASSERT3U(csa->cds->ds_dir->dd_phys->
+ ASSERT3U(clone->ds_dir->dd_phys->
dd_used_breakdown[DD_USED_SNAP], ==, 0);
- dsl_deadlist_space(&csa->cds->ds_deadlist,
+ dsl_deadlist_space(&clone->ds_deadlist,
&cdl_used, &cdl_comp, &cdl_uncomp);
- dsl_deadlist_space(&csa->ohds->ds_deadlist,
+ dsl_deadlist_space(&origin_head->ds_deadlist,
&odl_used, &odl_comp, &odl_uncomp);
- dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
- (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
- dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
- (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
- duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
+ dused = clone->ds_phys->ds_referenced_bytes + cdl_used -
+ (origin_head->ds_phys->ds_referenced_bytes + odl_used);
+ dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp -
+ (origin_head->ds_phys->ds_compressed_bytes + odl_comp);
+ duncomp = clone->ds_phys->ds_uncompressed_bytes +
cdl_uncomp -
- (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
+ (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp);
- dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
+ dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD,
dused, dcomp, duncomp, tx);
- dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
+ dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD,
-dused, -dcomp, -duncomp, tx);
/*
@@ -3246,86 +2416,46 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* deadlist (since that's the only thing that's
* changing that affects the snapused).
*/
- dsl_deadlist_space_range(&csa->cds->ds_deadlist,
- csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
+ dsl_deadlist_space_range(&clone->ds_deadlist,
+ origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
&cdl_used, &cdl_comp, &cdl_uncomp);
- dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
- csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
+ dsl_deadlist_space_range(&origin_head->ds_deadlist,
+ origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
&odl_used, &odl_comp, &odl_uncomp);
- dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
+ dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used,
DD_USED_HEAD, DD_USED_SNAP, tx);
}
/* swap ds_*_bytes */
- SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
- csa->cds->ds_phys->ds_referenced_bytes);
- SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
- csa->cds->ds_phys->ds_compressed_bytes);
- SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
- csa->cds->ds_phys->ds_uncompressed_bytes);
- SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
- csa->cds->ds_phys->ds_unique_bytes);
+ SWITCH64(origin_head->ds_phys->ds_referenced_bytes,
+ clone->ds_phys->ds_referenced_bytes);
+ SWITCH64(origin_head->ds_phys->ds_compressed_bytes,
+ clone->ds_phys->ds_compressed_bytes);
+ SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes,
+ clone->ds_phys->ds_uncompressed_bytes);
+ SWITCH64(origin_head->ds_phys->ds_unique_bytes,
+ clone->ds_phys->ds_unique_bytes);
/* apply any parent delta for change in unconsumed refreservation */
- dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
- csa->unused_refres_delta, 0, 0, tx);
+ dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV,
+ unused_refres_delta, 0, 0, tx);
/*
* Swap deadlists.
*/
- dsl_deadlist_close(&csa->cds->ds_deadlist);
- dsl_deadlist_close(&csa->ohds->ds_deadlist);
- SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
- csa->cds->ds_phys->ds_deadlist_obj);
- dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
- csa->cds->ds_phys->ds_deadlist_obj);
- dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
- csa->ohds->ds_phys->ds_deadlist_obj);
-
- dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
-
- spa_history_log_internal_ds(csa->cds, "clone swap", tx,
- "parent=%s", csa->ohds->ds_dir->dd_myname);
-}
+ dsl_deadlist_close(&clone->ds_deadlist);
+ dsl_deadlist_close(&origin_head->ds_deadlist);
+ SWITCH64(origin_head->ds_phys->ds_deadlist_obj,
+ clone->ds_phys->ds_deadlist_obj);
+ dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset,
+ clone->ds_phys->ds_deadlist_obj);
+ dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset,
+ origin_head->ds_phys->ds_deadlist_obj);
-/*
- * Swap 'clone' with its origin head datasets. Used at the end of "zfs
- * recv" into an existing fs to swizzle the file system to the new
- * version, and by "zfs rollback". Can also be used to swap two
- * independent head datasets if neither has any snapshots.
- */
-int
-dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
- boolean_t force)
-{
- struct cloneswaparg csa;
- int error;
+ dsl_scan_ds_clone_swapped(origin_head, clone, tx);
- ASSERT(clone->ds_owner);
- ASSERT(origin_head->ds_owner);
-retry:
- /*
- * Need exclusive access for the swap. If we're swapping these
- * datasets back after an error, we already hold the locks.
- */
- if (!RW_WRITE_HELD(&clone->ds_rwlock))
- rw_enter(&clone->ds_rwlock, RW_WRITER);
- if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
- !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
- rw_exit(&clone->ds_rwlock);
- rw_enter(&origin_head->ds_rwlock, RW_WRITER);
- if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
- rw_exit(&origin_head->ds_rwlock);
- goto retry;
- }
- }
- csa.cds = clone;
- csa.ohds = origin_head;
- csa.force = force;
- error = dsl_sync_task_do(clone->ds_dir->dd_pool,
- dsl_dataset_clone_swap_check,
- dsl_dataset_clone_swap_sync, &csa, NULL, 9);
- return (error);
+ spa_history_log_internal_ds(clone, "clone swap", tx,
+ "parent=%s", origin_head->ds_dir->dd_myname);
}
/*
@@ -3335,21 +2465,20 @@ retry:
int
dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
{
- spa_t *spa;
dsl_pool_t *dp;
dsl_dataset_t *ds;
int error;
- if ((error = spa_open(pname, &spa, FTAG)) != 0)
+ error = dsl_pool_hold(pname, FTAG, &dp);
+ if (error != 0)
return (error);
- dp = spa_get_dsl(spa);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
+
+ error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
+ if (error == 0) {
dsl_dataset_name(ds, buf);
dsl_dataset_rele(ds, FTAG);
}
- rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
@@ -3402,102 +2531,134 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
return (error);
}
+typedef struct dsl_dataset_set_qr_arg {
+ const char *ddsqra_name;
+ zprop_source_t ddsqra_source;
+ uint64_t ddsqra_value;
+} dsl_dataset_set_qr_arg_t;
+
+
/* ARGSUSED */
static int
-dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_prop_setarg_t *psa = arg2;
- int err;
+ dsl_dataset_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ int error;
+ uint64_t newval;
- if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
+ if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
return (ENOTSUP);
- if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
- return (err);
+ error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ if (dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
+
+ error = dsl_prop_predict(ds->ds_dir,
+ zfs_prop_to_name(ZFS_PROP_REFQUOTA),
+ ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
- if (psa->psa_effective_value == 0)
+ if (newval == 0) {
+ dsl_dataset_rele(ds, FTAG);
return (0);
+ }
- if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
- psa->psa_effective_value < ds->ds_reserved)
+ if (newval < ds->ds_phys->ds_referenced_bytes ||
+ newval < ds->ds_reserved) {
+ dsl_dataset_rele(ds, FTAG);
return (ENOSPC);
+ }
+ dsl_dataset_rele(ds, FTAG);
return (0);
}
-extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
-
-void
-dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+static void
+dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_prop_setarg_t *psa = arg2;
- uint64_t effective_value = psa->psa_effective_value;
+ dsl_dataset_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ uint64_t newval;
+
+ VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
- dsl_prop_set_sync(ds, psa, tx);
- DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
+ dsl_prop_set_sync_impl(ds,
+ zfs_prop_to_name(ZFS_PROP_REFQUOTA),
+ ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
+ &ddsqra->ddsqra_value, tx);
- if (ds->ds_quota != effective_value) {
+ VERIFY0(dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval));
+
+ if (ds->ds_quota != newval) {
dmu_buf_will_dirty(ds->ds_dbuf, tx);
- ds->ds_quota = effective_value;
+ ds->ds_quota = newval;
}
+ dsl_dataset_rele(ds, FTAG);
}
int
-dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota)
+dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
+ uint64_t refquota)
{
- dsl_dataset_t *ds;
- dsl_prop_setarg_t psa;
- int err;
-
- dsl_prop_setarg_init_uint64(&psa, "refquota", source, &quota);
-
- err = dsl_dataset_hold(dsname, FTAG, &ds);
- if (err)
- return (err);
+ dsl_dataset_set_qr_arg_t ddsqra;
- /*
- * If someone removes a file, then tries to set the quota, we
- * want to make sure the file freeing takes effect.
- */
- txg_wait_open(ds->ds_dir->dd_pool, 0);
-
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
- ds, &psa, 0);
+ ddsqra.ddsqra_name = dsname;
+ ddsqra.ddsqra_source = source;
+ ddsqra.ddsqra_value = refquota;
- dsl_dataset_rele(ds, FTAG);
- return (err);
+ return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
+ dsl_dataset_set_refquota_sync, &ddsqra, 0));
}
static int
-dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_prop_setarg_t *psa = arg2;
- uint64_t effective_value;
- uint64_t unique;
- int err;
+ dsl_dataset_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ int error;
+ uint64_t newval, unique;
- if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
- SPA_VERSION_REFRESERVATION)
+ if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
return (ENOTSUP);
- if (dsl_dataset_is_snapshot(ds))
- return (EINVAL);
+ error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+ if (error != 0)
+ return (error);
- if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
- return (err);
+ if (dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
- effective_value = psa->psa_effective_value;
+ error = dsl_prop_predict(ds->ds_dir,
+ zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
+ ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
/*
* If we are doing the preliminary check in open context, the
* space estimates may be inaccurate.
*/
- if (!dmu_tx_is_syncing(tx))
+ if (!dmu_tx_is_syncing(tx)) {
+ dsl_dataset_rele(ds, FTAG);
return (0);
+ }
mutex_enter(&ds->ds_lock);
if (!DS_UNIQUE_IS_ACCURATE(ds))
@@ -3505,637 +2666,75 @@ dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
unique = ds->ds_phys->ds_unique_bytes;
mutex_exit(&ds->ds_lock);
- if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
- uint64_t delta = MAX(unique, effective_value) -
+ if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) {
+ uint64_t delta = MAX(unique, newval) -
MAX(unique, ds->ds_reserved);
- if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
- return (ENOSPC);
- if (ds->ds_quota > 0 &&
- effective_value > ds->ds_quota)
+ if (delta >
+ dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) ||
+ (ds->ds_quota > 0 && newval > ds->ds_quota)) {
+ dsl_dataset_rele(ds, FTAG);
return (ENOSPC);
+ }
}
+ dsl_dataset_rele(ds, FTAG);
return (0);
}
-static void
-dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+void
+dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
+ zprop_source_t source, uint64_t value, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_prop_setarg_t *psa = arg2;
- uint64_t effective_value = psa->psa_effective_value;
+ uint64_t newval;
uint64_t unique;
int64_t delta;
- dsl_prop_set_sync(ds, psa, tx);
- DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
+ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
+ source, sizeof (value), 1, &value, tx);
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ VERIFY0(dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval));
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
unique = ds->ds_phys->ds_unique_bytes;
- delta = MAX(0, (int64_t)(effective_value - unique)) -
+ delta = MAX(0, (int64_t)(newval - unique)) -
MAX(0, (int64_t)(ds->ds_reserved - unique));
- ds->ds_reserved = effective_value;
+ ds->ds_reserved = newval;
mutex_exit(&ds->ds_lock);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
mutex_exit(&ds->ds_dir->dd_lock);
}
-int
-dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
- uint64_t reservation)
-{
- dsl_dataset_t *ds;
- dsl_prop_setarg_t psa;
- int err;
-
- dsl_prop_setarg_init_uint64(&psa, "refreservation", source,
- &reservation);
-
- err = dsl_dataset_hold(dsname, FTAG, &ds);
- if (err)
- return (err);
-
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_set_reservation_check,
- dsl_dataset_set_reservation_sync, ds, &psa, 0);
-
- dsl_dataset_rele(ds, FTAG);
- return (err);
-}
-
-typedef struct zfs_hold_cleanup_arg {
- dsl_pool_t *dp;
- uint64_t dsobj;
- char htag[MAXNAMELEN];
-} zfs_hold_cleanup_arg_t;
-
-static void
-dsl_dataset_user_release_onexit(void *arg)
-{
- zfs_hold_cleanup_arg_t *ca = arg;
-
- (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
- B_TRUE);
- kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
-}
-
-void
-dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
- minor_t minor)
-{
- zfs_hold_cleanup_arg_t *ca;
-
- ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
- ca->dp = ds->ds_dir->dd_pool;
- ca->dsobj = ds->ds_object;
- (void) strlcpy(ca->htag, htag, sizeof (ca->htag));
- VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
- dsl_dataset_user_release_onexit, ca, NULL));
-}
-
-/*
- * If you add new checks here, you may need to add
- * additional checks to the "temporary" case in
- * snapshot_check() in dmu_objset.c.
- */
-static int
-dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- struct dsl_ds_holdarg *ha = arg2;
- const char *htag = ha->htag;
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- int error = 0;
-
- if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
- return (ENOTSUP);
-
- if (!dsl_dataset_is_snapshot(ds))
- return (EINVAL);
-
- /* tags must be unique */
- mutex_enter(&ds->ds_lock);
- if (ds->ds_phys->ds_userrefs_obj) {
- error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
- 8, 1, tx);
- if (error == 0)
- error = EEXIST;
- else if (error == ENOENT)
- error = 0;
- }
- mutex_exit(&ds->ds_lock);
-
- if (error == 0 && ha->temphold &&
- strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
- error = E2BIG;
-
- return (error);
-}
-
-void
-dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- struct dsl_ds_holdarg *ha = arg2;
- const char *htag = ha->htag;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- uint64_t now = gethrestime_sec();
- uint64_t zapobj;
-
- mutex_enter(&ds->ds_lock);
- if (ds->ds_phys->ds_userrefs_obj == 0) {
- /*
- * This is the first user hold for this dataset. Create
- * the userrefs zap object.
- */
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- zapobj = ds->ds_phys->ds_userrefs_obj =
- zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
- } else {
- zapobj = ds->ds_phys->ds_userrefs_obj;
- }
- ds->ds_userrefs++;
- mutex_exit(&ds->ds_lock);
-
- VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
-
- if (ha->temphold) {
- VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
- htag, &now, tx));
- }
-
- spa_history_log_internal_ds(ds, "hold", tx,
- "tag = %s temp = %d holds now = %llu",
- htag, (int)ha->temphold, ds->ds_userrefs);
-}
-
-static int
-dsl_dataset_user_hold_one(const char *dsname, void *arg)
-{
- struct dsl_ds_holdarg *ha = arg;
- dsl_dataset_t *ds;
- int error;
- char *name;
-
- /* alloc a buffer to hold dsname@snapname plus terminating NULL */
- name = kmem_asprintf("%s@%s", dsname, ha->snapname);
- error = dsl_dataset_hold(name, ha->dstg, &ds);
- strfree(name);
- if (error == 0) {
- ha->gotone = B_TRUE;
- dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
- dsl_dataset_user_hold_sync, ds, ha, 0);
- } else if (error == ENOENT && ha->recursive) {
- error = 0;
- } else {
- (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
- }
- return (error);
-}
-
-int
-dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
- boolean_t temphold)
-{
- struct dsl_ds_holdarg *ha;
- int error;
-
- ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
- ha->htag = htag;
- ha->temphold = temphold;
- error = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
- ds, ha, 0);
- kmem_free(ha, sizeof (struct dsl_ds_holdarg));
-
- return (error);
-}
-
-int
-dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
- boolean_t recursive, boolean_t temphold, int cleanup_fd)
-{
- struct dsl_ds_holdarg *ha;
- dsl_sync_task_t *dst;
- spa_t *spa;
- int error;
- minor_t minor = 0;
-
- if (cleanup_fd != -1) {
- /* Currently we only support cleanup-on-exit of tempholds. */
- if (!temphold)
- return (EINVAL);
- error = zfs_onexit_fd_hold(cleanup_fd, &minor);
- if (error)
- return (error);
- }
-
- ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
-
- (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
-
- error = spa_open(dsname, &spa, FTAG);
- if (error) {
- kmem_free(ha, sizeof (struct dsl_ds_holdarg));
- if (cleanup_fd != -1)
- zfs_onexit_fd_rele(cleanup_fd);
- return (error);
- }
-
- ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- ha->htag = htag;
- ha->snapname = snapname;
- ha->recursive = recursive;
- ha->temphold = temphold;
-
- if (recursive) {
- error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
- ha, DS_FIND_CHILDREN);
- } else {
- error = dsl_dataset_user_hold_one(dsname, ha);
- }
- if (error == 0)
- error = dsl_sync_task_group_wait(ha->dstg);
-
- for (dst = list_head(&ha->dstg->dstg_tasks); dst;
- dst = list_next(&ha->dstg->dstg_tasks, dst)) {
- dsl_dataset_t *ds = dst->dst_arg1;
-
- if (dst->dst_err) {
- dsl_dataset_name(ds, ha->failed);
- *strchr(ha->failed, '@') = '\0';
- } else if (error == 0 && minor != 0 && temphold) {
- /*
- * If this hold is to be released upon process exit,
- * register that action now.
- */
- dsl_register_onexit_hold_cleanup(ds, htag, minor);
- }
- dsl_dataset_rele(ds, ha->dstg);
- }
-
- if (error == 0 && recursive && !ha->gotone)
- error = ENOENT;
-
- if (error)
- (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
-
- dsl_sync_task_group_destroy(ha->dstg);
-
- kmem_free(ha, sizeof (struct dsl_ds_holdarg));
- spa_close(spa, FTAG);
- if (cleanup_fd != -1)
- zfs_onexit_fd_rele(cleanup_fd);
- return (error);
-}
-
-struct dsl_ds_releasearg {
- dsl_dataset_t *ds;
- const char *htag;
- boolean_t own; /* do we own or just hold ds? */
-};
-
-static int
-dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
- boolean_t *might_destroy)
-{
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- uint64_t zapobj;
- uint64_t tmp;
- int error;
-
- *might_destroy = B_FALSE;
-
- mutex_enter(&ds->ds_lock);
- zapobj = ds->ds_phys->ds_userrefs_obj;
- if (zapobj == 0) {
- /* The tag can't possibly exist */
- mutex_exit(&ds->ds_lock);
- return (ESRCH);
- }
-
- /* Make sure the tag exists */
- error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
- if (error) {
- mutex_exit(&ds->ds_lock);
- if (error == ENOENT)
- error = ESRCH;
- return (error);
- }
-
- if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
- DS_IS_DEFER_DESTROY(ds))
- *might_destroy = B_TRUE;
-
- mutex_exit(&ds->ds_lock);
- return (0);
-}
-
-static int
-dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
-{
- struct dsl_ds_releasearg *ra = arg1;
- dsl_dataset_t *ds = ra->ds;
- boolean_t might_destroy;
- int error;
-
- if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
- return (ENOTSUP);
-
- error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
- if (error)
- return (error);
-
- if (might_destroy) {
- struct dsl_ds_destroyarg dsda = {0};
-
- if (dmu_tx_is_syncing(tx)) {
- /*
- * If we're not prepared to remove the snapshot,
- * we can't allow the release to happen right now.
- */
- if (!ra->own)
- return (EBUSY);
- }
- dsda.ds = ds;
- dsda.releasing = B_TRUE;
- return (dsl_dataset_destroy_check(&dsda, tag, tx));
- }
-
- return (0);
-}
-
static void
-dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
-{
- struct dsl_ds_releasearg *ra = arg1;
- dsl_dataset_t *ds = ra->ds;
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- uint64_t zapobj;
- uint64_t refs;
- int error;
-
- mutex_enter(&ds->ds_lock);
- ds->ds_userrefs--;
- refs = ds->ds_userrefs;
- mutex_exit(&ds->ds_lock);
- error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
- VERIFY(error == 0 || error == ENOENT);
- zapobj = ds->ds_phys->ds_userrefs_obj;
- VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
-
- spa_history_log_internal_ds(ds, "release", tx,
- "tag = %s refs now = %lld", ra->htag, (longlong_t)refs);
-
- if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
- DS_IS_DEFER_DESTROY(ds)) {
- struct dsl_ds_destroyarg dsda = {0};
-
- ASSERT(ra->own);
- dsda.ds = ds;
- dsda.releasing = B_TRUE;
- /* We already did the destroy_check */
- dsl_dataset_destroy_sync(&dsda, tag, tx);
- }
-}
-
-static int
-dsl_dataset_user_release_one(const char *dsname, void *arg)
-{
- struct dsl_ds_holdarg *ha = arg;
- struct dsl_ds_releasearg *ra;
- dsl_dataset_t *ds;
- int error;
- void *dtag = ha->dstg;
- char *name;
- boolean_t own = B_FALSE;
- boolean_t might_destroy;
-
- /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
- name = kmem_asprintf("%s@%s", dsname, ha->snapname);
- error = dsl_dataset_hold(name, dtag, &ds);
- strfree(name);
- if (error == ENOENT && ha->recursive)
- return (0);
- (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
- if (error)
- return (error);
-
- ha->gotone = B_TRUE;
-
- ASSERT(dsl_dataset_is_snapshot(ds));
-
- error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
- if (error) {
- dsl_dataset_rele(ds, dtag);
- return (error);
- }
-
- if (might_destroy) {
-#ifdef _KERNEL
- name = kmem_asprintf("%s@%s", dsname, ha->snapname);
- error = zfs_unmount_snap(name, NULL);
- strfree(name);
- if (error) {
- dsl_dataset_rele(ds, dtag);
- return (error);
- }
-#endif
- if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
- dsl_dataset_rele(ds, dtag);
- return (EBUSY);
- } else {
- own = B_TRUE;
- dsl_dataset_make_exclusive(ds, dtag);
- }
- }
-
- ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
- ra->ds = ds;
- ra->htag = ha->htag;
- ra->own = own;
- dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
- dsl_dataset_user_release_sync, ra, dtag, 0);
-
- return (0);
-}
-
-int
-dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
- boolean_t recursive)
-{
- struct dsl_ds_holdarg *ha;
- dsl_sync_task_t *dst;
- spa_t *spa;
- int error;
-
-top:
- ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
-
- (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
-
- error = spa_open(dsname, &spa, FTAG);
- if (error) {
- kmem_free(ha, sizeof (struct dsl_ds_holdarg));
- return (error);
- }
-
- ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
- ha->htag = htag;
- ha->snapname = snapname;
- ha->recursive = recursive;
- if (recursive) {
- error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
- ha, DS_FIND_CHILDREN);
- } else {
- error = dsl_dataset_user_release_one(dsname, ha);
- }
- if (error == 0)
- error = dsl_sync_task_group_wait(ha->dstg);
-
- for (dst = list_head(&ha->dstg->dstg_tasks); dst;
- dst = list_next(&ha->dstg->dstg_tasks, dst)) {
- struct dsl_ds_releasearg *ra = dst->dst_arg1;
- dsl_dataset_t *ds = ra->ds;
-
- if (dst->dst_err)
- dsl_dataset_name(ds, ha->failed);
-
- if (ra->own)
- dsl_dataset_disown(ds, ha->dstg);
- else
- dsl_dataset_rele(ds, ha->dstg);
-
- kmem_free(ra, sizeof (struct dsl_ds_releasearg));
- }
-
- if (error == 0 && recursive && !ha->gotone)
- error = ENOENT;
-
- if (error && error != EBUSY)
- (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
-
- dsl_sync_task_group_destroy(ha->dstg);
- kmem_free(ha, sizeof (struct dsl_ds_holdarg));
- spa_close(spa, FTAG);
-
- /*
- * We can get EBUSY if we were racing with deferred destroy and
- * dsl_dataset_user_release_check() hadn't done the necessary
- * open context setup. We can also get EBUSY if we're racing
- * with destroy and that thread is the ds_owner. Either way
- * the busy condition should be transient, and we should retry
- * the release operation.
- */
- if (error == EBUSY)
- goto top;
-
- return (error);
-}
-
-/*
- * Called at spa_load time (with retry == B_FALSE) to release a stale
- * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
- */
-int
-dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
- boolean_t retry)
-{
- dsl_dataset_t *ds;
- char *snap;
- char *name;
- int namelen;
- int error;
-
- do {
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
- rw_exit(&dp->dp_config_rwlock);
- if (error)
- return (error);
- namelen = dsl_dataset_namelen(ds)+1;
- name = kmem_alloc(namelen, KM_SLEEP);
- dsl_dataset_name(ds, name);
- dsl_dataset_rele(ds, FTAG);
-
- snap = strchr(name, '@');
- *snap = '\0';
- ++snap;
- error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
- kmem_free(name, namelen);
-
- /*
- * The object can't have been destroyed because we have a hold,
- * but it might have been renamed, resulting in ENOENT. Retry
- * if we've been requested to do so.
- *
- * It would be nice if we could use the dsobj all the way
- * through and avoid ENOENT entirely. But we might need to
- * unmount the snapshot, and there's currently no way to lookup
- * a vfsp using a ZFS object id.
- */
- } while ((error == ENOENT) && retry);
-
- return (error);
-}
-
-int
-dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
+dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx)
{
+ dsl_dataset_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
- int err;
- err = dsl_dataset_hold(dsname, FTAG, &ds);
- if (err)
- return (err);
-
- VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
- if (ds->ds_phys->ds_userrefs_obj != 0) {
- zap_attribute_t *za;
- zap_cursor_t zc;
-
- za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
- for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
- ds->ds_phys->ds_userrefs_obj);
- zap_cursor_retrieve(&zc, za) == 0;
- zap_cursor_advance(&zc)) {
- VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
- za->za_first_integer));
- }
- zap_cursor_fini(&zc);
- kmem_free(za, sizeof (zap_attribute_t));
- }
+ VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
+ dsl_dataset_set_refreservation_sync_impl(ds,
+ ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx);
dsl_dataset_rele(ds, FTAG);
- return (0);
}
-/*
- * Note, this function is used as the callback for dmu_objset_find(). We
- * always return 0 so that we will continue to find and process
- * inconsistent datasets, even if we encounter an error trying to
- * process one of them.
- */
-/* ARGSUSED */
int
-dsl_destroy_inconsistent(const char *dsname, void *arg)
+dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
+ uint64_t refreservation)
{
- dsl_dataset_t *ds;
+ dsl_dataset_set_qr_arg_t ddsqra;
- if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
- if (DS_IS_INCONSISTENT(ds))
- (void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
- else
- dsl_dataset_disown(ds, FTAG);
- }
- return (0);
+ ddsqra.ddsqra_name = dsname;
+ ddsqra.ddsqra_source = source;
+ ddsqra.ddsqra_value = refreservation;
+
+ return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check,
+ dsl_dataset_set_refreservation_sync, &ddsqra, 0));
}
/*
@@ -4163,6 +2762,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
uint64_t snapobj;
dsl_pool_t *dp = new->ds_dir->dd_pool;
+ ASSERT(dsl_pool_config_held(dp));
+
*usedp = 0;
*usedp += new->ds_phys->ds_referenced_bytes;
*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
@@ -4175,7 +2776,6 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
*uncompp += new->ds_phys->ds_uncompressed_bytes;
*uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
snapobj = new->ds_object;
while (snapobj != oldsnap->ds_object) {
dsl_dataset_t *snap;
@@ -4224,7 +2824,6 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
}
}
- rw_exit(&dp->dp_config_rwlock);
return (err);
}
@@ -4266,7 +2865,6 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
*usedp = *compp = *uncompp = 0;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
snapobj = lastsnap->ds_phys->ds_next_snap_obj;
while (snapobj != firstsnap->ds_object) {
dsl_dataset_t *ds;
@@ -4287,6 +2885,42 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
ASSERT3U(snapobj, !=, 0);
dsl_dataset_rele(ds, FTAG);
}
- rw_exit(&dp->dp_config_rwlock);
return (err);
}
+
+/*
+ * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
+ * For example, they could both be snapshots of the same filesystem, and
+ * 'earlier' is before 'later'. Or 'earlier' could be the origin of
+ * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
+ * filesystem. Or 'earlier' could be the origin's origin.
+ */
+boolean_t
+dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
+{
+ dsl_pool_t *dp = later->ds_dir->dd_pool;
+ int error;
+ boolean_t ret;
+
+ ASSERT(dsl_pool_config_held(dp));
+
+ if (earlier->ds_phys->ds_creation_txg >=
+ later->ds_phys->ds_creation_txg)
+ return (B_FALSE);
+
+ if (later->ds_dir == earlier->ds_dir)
+ return (B_TRUE);
+ if (!dsl_dir_is_clone(later->ds_dir))
+ return (B_FALSE);
+
+ if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object)
+ return (B_TRUE);
+ dsl_dataset_t *origin;
+ error = dsl_dataset_hold_obj(dp,
+ later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
+ if (error != 0)
+ return (B_FALSE);
+ ret = dsl_dataset_is_before(origin, earlier);
+ dsl_dataset_rele(origin, FTAG);
+ return (ret);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_deleg.c b/usr/src/uts/common/fs/zfs/dsl_deleg.c
index ba620bd6fb..f09cb2f2ac 100644
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c
+++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c
@@ -147,28 +147,37 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
return (0);
}
+typedef struct dsl_deleg_arg {
+ const char *dda_name;
+ nvlist_t *dda_nvlist;
+} dsl_deleg_arg_t;
+
static void
-dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dir_t *dd = arg1;
- nvlist_t *nvp = arg2;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
+ dsl_deleg_arg_t *dda = arg;
+ dsl_dir_t *dd;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
- uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
+ uint64_t zapobj;
+
+ VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
+ zapobj = dd->dd_phys->dd_deleg_zapobj;
if (zapobj == 0) {
dmu_buf_will_dirty(dd->dd_dbuf, tx);
zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
}
- while (whopair = nvlist_next_nvpair(nvp, whopair)) {
+ while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
uint64_t jumpobj;
- VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
+ perms = fnvpair_value_nvlist(whopair);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
@@ -185,21 +194,27 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
"%s %s", whokey, perm);
}
}
+ dsl_dir_rele(dd, FTAG);
}
static void
-dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dir_t *dd = arg1;
- nvlist_t *nvp = arg2;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
+ dsl_deleg_arg_t *dda = arg;
+ dsl_dir_t *dd;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
- uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
+ uint64_t zapobj;
- if (zapobj == 0)
+ VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
+ zapobj = dd->dd_phys->dd_deleg_zapobj;
+ if (zapobj == 0) {
+ dsl_dir_rele(dd, FTAG);
return;
+ }
- while (whopair = nvlist_next_nvpair(nvp, whopair)) {
+ while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
@@ -234,35 +249,40 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
"%s %s", whokey, perm);
}
}
+ dsl_dir_rele(dd, FTAG);
}
-int
-dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
+static int
+dsl_deleg_check(void *arg, dmu_tx_t *tx)
{
+ dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
int error;
- nvpair_t *whopair = NULL;
- int blocks_modified = 0;
- error = dsl_dir_open(ddname, FTAG, &dd, NULL);
- if (error)
- return (error);
-
- if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
+ if (spa_version(dmu_tx_pool(tx)->dp_spa) <
SPA_VERSION_DELEGATED_PERMS) {
- dsl_dir_close(dd, FTAG);
return (ENOTSUP);
}
- while (whopair = nvlist_next_nvpair(nvp, whopair))
- blocks_modified++;
+ error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL);
+ if (error == 0)
+ dsl_dir_rele(dd, FTAG);
+ return (error);
+}
- error = dsl_sync_task_do(dd->dd_pool, NULL,
- unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
- dd, nvp, blocks_modified);
- dsl_dir_close(dd, FTAG);
+int
+dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
+{
+ dsl_deleg_arg_t dda;
- return (error);
+ /* nvp must already have been verified to be valid */
+
+ dda.dda_name = ddname;
+ dda.dda_nvlist = nvp;
+
+ return (dsl_sync_task(ddname, dsl_deleg_check,
+ unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
+ &dda, fnvlist_num_pairs(nvp)));
}
/*
@@ -290,16 +310,21 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
int error;
objset_t *mos;
- error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
- if (error)
+ error = dsl_pool_hold(ddname, FTAG, &dp);
+ if (error != 0)
+ return (error);
+
+ error = dsl_dir_hold(dp, ddname, FTAG, &startdd, NULL);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
return (error);
+ }
dp = startdd->dd_pool;
mos = dp->dp_meta_objset;
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
zap_cursor_t basezc;
zap_attribute_t baseza;
@@ -307,15 +332,12 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
uint64_t n;
char source[MAXNAMELEN];
- if (dd->dd_phys->dd_deleg_zapobj &&
- (zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
- &n) == 0) && n) {
- VERIFY(nvlist_alloc(&sp_nvp,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
- } else {
+ if (dd->dd_phys->dd_deleg_zapobj == 0 ||
+ zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 ||
+ n == 0)
continue;
- }
+ sp_nvp = fnvlist_alloc();
for (zap_cursor_init(&basezc, mos,
dd->dd_phys->dd_deleg_zapobj);
zap_cursor_retrieve(&basezc, &baseza) == 0;
@@ -327,29 +349,26 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
ASSERT(baseza.za_integer_length == 8);
ASSERT(baseza.za_num_integers == 1);
- VERIFY(nvlist_alloc(&perms_nvp,
- NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ perms_nvp = fnvlist_alloc();
for (zap_cursor_init(&zc, mos, baseza.za_first_integer);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
- VERIFY(nvlist_add_boolean(perms_nvp,
- za.za_name) == 0);
+ fnvlist_add_boolean(perms_nvp, za.za_name);
}
zap_cursor_fini(&zc);
- VERIFY(nvlist_add_nvlist(sp_nvp, baseza.za_name,
- perms_nvp) == 0);
- nvlist_free(perms_nvp);
+ fnvlist_add_nvlist(sp_nvp, baseza.za_name, perms_nvp);
+ fnvlist_free(perms_nvp);
}
zap_cursor_fini(&basezc);
dsl_dir_name(dd, source);
- VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
+ fnvlist_add_nvlist(*nvp, source, sp_nvp);
nvlist_free(sp_nvp);
}
- rw_exit(&dp->dp_config_rwlock);
- dsl_dir_close(startdd, FTAG);
+ dsl_dir_rele(startdd, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (0);
}
@@ -555,7 +574,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
offsetof(perm_set_t, p_node));
- rw_enter(&dp->dp_config_rwlock, RW_READER);
+ ASSERT(dsl_pool_config_held(dp));
for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
checkflag = ZFS_DELEG_DESCENDENT) {
uint64_t zapobj;
@@ -616,7 +635,6 @@ again:
}
error = EPERM;
success:
- rw_exit(&dp->dp_config_rwlock);
cookie = NULL;
while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
@@ -628,15 +646,19 @@ success:
int
dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
{
+ dsl_pool_t *dp;
dsl_dataset_t *ds;
int error;
- error = dsl_dataset_hold(dsname, FTAG, &ds);
- if (error)
+ error = dsl_pool_hold(dsname, FTAG, &dp);
+ if (error != 0)
return (error);
-
- error = dsl_deleg_access_impl(ds, perm, cr);
- dsl_dataset_rele(ds, FTAG);
+ error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+ if (error == 0) {
+ error = dsl_deleg_access_impl(ds, perm, cr);
+ dsl_dataset_rele(ds, FTAG);
+ }
+ dsl_pool_rele(dp, FTAG);
return (error);
}
diff --git a/usr/src/uts/common/fs/zfs/dsl_destroy.c b/usr/src/uts/common/fs/zfs/dsl_destroy.c
new file mode 100644
index 0000000000..20d401f259
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/dsl_destroy.c
@@ -0,0 +1,926 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/dsl_userhold.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dmu_tx.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_dir.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dsl_scan.h>
+#include <sys/dmu_objset.h>
+#include <sys/zap.h>
+#include <sys/zfeature.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/dsl_deleg.h>
+
+typedef struct dmu_snapshots_destroy_arg {
+ nvlist_t *dsda_snaps;
+ nvlist_t *dsda_successful_snaps;
+ boolean_t dsda_defer;
+ nvlist_t *dsda_errlist;
+} dmu_snapshots_destroy_arg_t;
+
+/*
+ * ds must be owned.
+ */
+static int
+dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
+{
+ if (!dsl_dataset_is_snapshot(ds))
+ return (EINVAL);
+
+ if (dsl_dataset_long_held(ds))
+ return (EBUSY);
+
+ /*
+ * Only allow deferred destroy on pools that support it.
+ * NOTE: deferred destroy is only supported on snapshots.
+ */
+ if (defer) {
+ if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
+ SPA_VERSION_USERREFS)
+ return (ENOTSUP);
+ return (0);
+ }
+
+ /*
+ * If this snapshot has an elevated user reference count,
+ * we can't destroy it yet.
+ */
+ if (ds->ds_userrefs > 0)
+ return (EBUSY);
+
+ /*
+ * Can't delete a branch point.
+ */
+ if (ds->ds_phys->ds_num_children > 1)
+ return (EEXIST);
+
+ return (0);
+}
+
+static int
+dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
+{
+ dmu_snapshots_destroy_arg_t *dsda = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+ int error = 0;
+
+ if (!dmu_tx_is_syncing(tx))
+ return (0);
+
+ for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
+ pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
+ dsl_dataset_t *ds;
+
+ error = dsl_dataset_hold(dp, nvpair_name(pair),
+ FTAG, &ds);
+
+ /*
+ * If the snapshot does not exist, silently ignore it
+ * (it's "already destroyed").
+ */
+ if (error == ENOENT)
+ continue;
+
+ if (error == 0) {
+ error = dsl_destroy_snapshot_check_impl(ds,
+ dsda->dsda_defer);
+ dsl_dataset_rele(ds, FTAG);
+ }
+
+ if (error == 0) {
+ fnvlist_add_boolean(dsda->dsda_successful_snaps,
+ nvpair_name(pair));
+ } else {
+ fnvlist_add_int32(dsda->dsda_errlist,
+ nvpair_name(pair), error);
+ }
+ }
+
+ pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
+ if (pair != NULL)
+ return (fnvpair_value_int32(pair));
+ return (0);
+}
+
+struct process_old_arg {
+ dsl_dataset_t *ds;
+ dsl_dataset_t *ds_prev;
+ boolean_t after_branch_point;
+ zio_t *pio;
+ uint64_t used, comp, uncomp;
+};
+
+static int
+process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ struct process_old_arg *poa = arg;
+ dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
+
+ if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
+ dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
+ if (poa->ds_prev && !poa->after_branch_point &&
+ bp->blk_birth >
+ poa->ds_prev->ds_phys->ds_prev_snap_txg) {
+ poa->ds_prev->ds_phys->ds_unique_bytes +=
+ bp_get_dsize_sync(dp->dp_spa, bp);
+ }
+ } else {
+ poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
+ poa->comp += BP_GET_PSIZE(bp);
+ poa->uncomp += BP_GET_UCSIZE(bp);
+ dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
+ }
+ return (0);
+}
+
+static void
+process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
+ dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
+{
+ struct process_old_arg poa = { 0 };
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ uint64_t deadlist_obj;
+
+ ASSERT(ds->ds_deadlist.dl_oldfmt);
+ ASSERT(ds_next->ds_deadlist.dl_oldfmt);
+
+ poa.ds = ds;
+ poa.ds_prev = ds_prev;
+ poa.after_branch_point = after_branch_point;
+ poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
+ process_old_cb, &poa, tx));
+ VERIFY0(zio_wait(poa.pio));
+ ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
+
+ /* change snapused */
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
+ -poa.used, -poa.comp, -poa.uncomp, tx);
+
+ /* swap next's deadlist to our deadlist */
+ dsl_deadlist_close(&ds->ds_deadlist);
+ dsl_deadlist_close(&ds_next->ds_deadlist);
+ deadlist_obj = ds->ds_phys->ds_deadlist_obj;
+ ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj;
+ ds_next->ds_phys->ds_deadlist_obj = deadlist_obj;
+ dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
+ dsl_deadlist_open(&ds_next->ds_deadlist, mos,
+ ds_next->ds_phys->ds_deadlist_obj);
+}
+
+static void
+dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
+{
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ zap_cursor_t zc;
+ zap_attribute_t za;
+
+ /*
+ * If it is the old version, dd_clones doesn't exist so we can't
+ * find the clones, but dsl_deadlist_remove_key() is a no-op so it
+ * doesn't matter.
+ */
+ if (ds->ds_dir->dd_phys->dd_clones == 0)
+ return;
+
+ for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ dsl_dataset_t *clone;
+
+ VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
+ za.za_first_integer, FTAG, &clone));
+ if (clone->ds_dir->dd_origin_txg > mintxg) {
+ dsl_deadlist_remove_key(&clone->ds_deadlist,
+ mintxg, tx);
+ dsl_dataset_remove_clones_key(clone, mintxg, tx);
+ }
+ dsl_dataset_rele(clone, FTAG);
+ }
+ zap_cursor_fini(&zc);
+}
+
+void
+dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
+{
+ int err;
+ int after_branch_point = FALSE;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ dsl_dataset_t *ds_prev = NULL;
+ uint64_t obj;
+
+ ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+ ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
+ ASSERT(refcount_is_zero(&ds->ds_longholds));
+
+ if (defer &&
+ (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
+ ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
+ spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
+ return;
+ }
+
+ ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
+
+ /* We need to log before removing it from the namespace. */
+ spa_history_log_internal_ds(ds, "destroy", tx, "");
+
+ dsl_scan_ds_destroyed(ds, tx);
+
+ obj = ds->ds_object;
+
+ if (ds->ds_phys->ds_prev_snap_obj != 0) {
+ ASSERT3P(ds->ds_prev, ==, NULL);
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
+ after_branch_point =
+ (ds_prev->ds_phys->ds_next_snap_obj != obj);
+
+ dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
+ if (after_branch_point &&
+ ds_prev->ds_phys->ds_next_clones_obj != 0) {
+ dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
+ if (ds->ds_phys->ds_next_snap_obj != 0) {
+ VERIFY0(zap_add_int(mos,
+ ds_prev->ds_phys->ds_next_clones_obj,
+ ds->ds_phys->ds_next_snap_obj, tx));
+ }
+ }
+ if (!after_branch_point) {
+ ds_prev->ds_phys->ds_next_snap_obj =
+ ds->ds_phys->ds_next_snap_obj;
+ }
+ }
+
+ dsl_dataset_t *ds_next;
+ uint64_t old_unique;
+ uint64_t used = 0, comp = 0, uncomp = 0;
+
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
+ ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
+
+ old_unique = ds_next->ds_phys->ds_unique_bytes;
+
+ dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
+ ds_next->ds_phys->ds_prev_snap_obj =
+ ds->ds_phys->ds_prev_snap_obj;
+ ds_next->ds_phys->ds_prev_snap_txg =
+ ds->ds_phys->ds_prev_snap_txg;
+ ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
+ ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
+
+ if (ds_next->ds_deadlist.dl_oldfmt) {
+ process_old_deadlist(ds, ds_prev, ds_next,
+ after_branch_point, tx);
+ } else {
+ /* Adjust prev's unique space. */
+ if (ds_prev && !after_branch_point) {
+ dsl_deadlist_space_range(&ds_next->ds_deadlist,
+ ds_prev->ds_phys->ds_prev_snap_txg,
+ ds->ds_phys->ds_prev_snap_txg,
+ &used, &comp, &uncomp);
+ ds_prev->ds_phys->ds_unique_bytes += used;
+ }
+
+ /* Adjust snapused. */
+ dsl_deadlist_space_range(&ds_next->ds_deadlist,
+ ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
+ &used, &comp, &uncomp);
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
+ -used, -comp, -uncomp, tx);
+
+ /* Move blocks to be freed to pool's free list. */
+ dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
+ &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
+ tx);
+ dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
+ DD_USED_HEAD, used, comp, uncomp, tx);
+
+ /* Merge our deadlist into next's and free it. */
+ dsl_deadlist_merge(&ds_next->ds_deadlist,
+ ds->ds_phys->ds_deadlist_obj, tx);
+ }
+ dsl_deadlist_close(&ds->ds_deadlist);
+ dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ds->ds_phys->ds_deadlist_obj = 0;
+
+ /* Collapse range in clone heads */
+ dsl_dataset_remove_clones_key(ds,
+ ds->ds_phys->ds_creation_txg, tx);
+
+ if (dsl_dataset_is_snapshot(ds_next)) {
+ dsl_dataset_t *ds_nextnext;
+
+ /*
+ * Update next's unique to include blocks which
+ * were previously shared by only this snapshot
+ * and it. Those blocks will be born after the
+ * prev snap and before this snap, and will have
+ * died after the next snap and before the one
+ * after that (ie. be on the snap after next's
+ * deadlist).
+ */
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext));
+ dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
+ ds->ds_phys->ds_prev_snap_txg,
+ ds->ds_phys->ds_creation_txg,
+ &used, &comp, &uncomp);
+ ds_next->ds_phys->ds_unique_bytes += used;
+ dsl_dataset_rele(ds_nextnext, FTAG);
+ ASSERT3P(ds_next->ds_prev, ==, NULL);
+
+ /* Collapse range in this head. */
+ dsl_dataset_t *hds;
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds));
+ dsl_deadlist_remove_key(&hds->ds_deadlist,
+ ds->ds_phys->ds_creation_txg, tx);
+ dsl_dataset_rele(hds, FTAG);
+
+ } else {
+ ASSERT3P(ds_next->ds_prev, ==, ds);
+ dsl_dataset_rele(ds_next->ds_prev, ds_next);
+ ds_next->ds_prev = NULL;
+ if (ds_prev) {
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_prev_snap_obj,
+ ds_next, &ds_next->ds_prev));
+ }
+
+ dsl_dataset_recalc_head_uniq(ds_next);
+
+ /*
+ * Reduce the amount of our unconsumed refreservation
+ * being charged to our parent by the amount of
+ * new unique data we have gained.
+ */
+ if (old_unique < ds_next->ds_reserved) {
+ int64_t mrsdelta;
+ uint64_t new_unique =
+ ds_next->ds_phys->ds_unique_bytes;
+
+ ASSERT(old_unique <= new_unique);
+ mrsdelta = MIN(new_unique - old_unique,
+ ds_next->ds_reserved - old_unique);
+ dsl_dir_diduse_space(ds->ds_dir,
+ DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
+ }
+ }
+ dsl_dataset_rele(ds_next, FTAG);
+
+ /*
+ * This must be done after the dsl_traverse(), because it will
+ * re-open the objset.
+ */
+ if (ds->ds_objset) {
+ dmu_objset_evict(ds->ds_objset);
+ ds->ds_objset = NULL;
+ }
+
+ /* remove from snapshot namespace */
+ dsl_dataset_t *ds_head;
+ ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
+ VERIFY0(dsl_dataset_get_snapname(ds));
+#ifdef ZFS_DEBUG
+ {
+ uint64_t val;
+
+ err = dsl_dataset_snap_lookup(ds_head,
+ ds->ds_snapname, &val);
+ ASSERT0(err);
+ ASSERT3U(val, ==, obj);
+ }
+#endif
+ VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx));
+ dsl_dataset_rele(ds_head, FTAG);
+
+ if (ds_prev != NULL)
+ dsl_dataset_rele(ds_prev, FTAG);
+
+ spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
+
+ if (ds->ds_phys->ds_next_clones_obj != 0) {
+ uint64_t count;
+ ASSERT0(zap_count(mos,
+ ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
+ VERIFY0(dmu_object_free(mos,
+ ds->ds_phys->ds_next_clones_obj, tx));
+ }
+ if (ds->ds_phys->ds_props_obj != 0)
+ VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
+ if (ds->ds_phys->ds_userrefs_obj != 0)
+ VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
+ dsl_dir_rele(ds->ds_dir, ds);
+ ds->ds_dir = NULL;
+ VERIFY0(dmu_object_free(mos, obj, tx));
+}
+
+static void
+dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
+{
+ dmu_snapshots_destroy_arg_t *dsda = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+
+ for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
+ pair != NULL;
+ pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
+ dsl_dataset_t *ds;
+
+ VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
+
+ dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
+ dsl_dataset_rele(ds, FTAG);
+ }
+}
+
+/*
+ * The semantics of this function are described in the comment above
+ * lzc_destroy_snaps(). To summarize:
+ *
+ * The snapshots must all be in the same pool.
+ *
+ * Snapshots that don't exist will be silently ignored (considered to be
+ * "already deleted").
+ *
+ * On success, all snaps will be destroyed and this will return 0.
+ * On failure, no snaps will be destroyed, the errlist will be filled in,
+ * and this will return an errno.
+ */
+int
+dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
+ nvlist_t *errlist)
+{
+ dmu_snapshots_destroy_arg_t dsda;
+ int error;
+ nvpair_t *pair;
+
+ pair = nvlist_next_nvpair(snaps, NULL);
+ if (pair == NULL)
+ return (0);
+
+ dsda.dsda_snaps = snaps;
+ dsda.dsda_successful_snaps = fnvlist_alloc();
+ dsda.dsda_defer = defer;
+ dsda.dsda_errlist = errlist;
+
+ error = dsl_sync_task(nvpair_name(pair),
+ dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
+ &dsda, 0);
+ fnvlist_free(dsda.dsda_successful_snaps);
+
+ return (error);
+}
+
+int
+dsl_destroy_snapshot(const char *name, boolean_t defer)
+{
+ int error;
+ nvlist_t *nvl = fnvlist_alloc();
+ nvlist_t *errlist = fnvlist_alloc();
+
+ fnvlist_add_boolean(nvl, name);
+ error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
+ fnvlist_free(errlist);
+ fnvlist_free(nvl);
+ return (error);
+}
+
+struct killarg {
+ dsl_dataset_t *ds;
+ dmu_tx_t *tx;
+};
+
+/* ARGSUSED */
+static int
+kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ struct killarg *ka = arg;
+ dmu_tx_t *tx = ka->tx;
+
+ if (bp == NULL)
+ return (0);
+
+ if (zb->zb_level == ZB_ZIL_LEVEL) {
+ ASSERT(zilog != NULL);
+ /*
+ * It's a block in the intent log. It has no
+ * accounting, so just free it.
+ */
+ dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
+ } else {
+ ASSERT(zilog == NULL);
+ ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
+ (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
+ }
+
+ return (0);
+}
+
+static void
+old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+ struct killarg ka;
+
+ /*
+ * Free everything that we point to (that's born after
+ * the previous snapshot, if we are a clone)
+ *
+ * NB: this should be very quick, because we already
+ * freed all the objects in open context.
+ */
+ ka.ds = ds;
+ ka.tx = tx;
+ VERIFY0(traverse_dataset(ds,
+ ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
+ kill_blkptr, &ka));
+ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
+}
+
+typedef struct dsl_destroy_head_arg {
+ const char *ddha_name;
+} dsl_destroy_head_arg_t;
+
+int
+dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
+{
+ int error;
+ uint64_t count;
+ objset_t *mos;
+
+ if (dsl_dataset_is_snapshot(ds))
+ return (EINVAL);
+
+ if (refcount_count(&ds->ds_longholds) != expected_holds)
+ return (EBUSY);
+
+ mos = ds->ds_dir->dd_pool->dp_meta_objset;
+
+ /*
+ * Can't delete a head dataset if there are snapshots of it.
+ * (Except if the only snapshots are from the branch we cloned
+ * from.)
+ */
+ if (ds->ds_prev != NULL &&
+ ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
+ return (EBUSY);
+
+ /*
+ * Can't delete if there are children of this fs.
+ */
+ error = zap_count(mos,
+ ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
+ if (error != 0)
+ return (error);
+ if (count != 0)
+ return (EEXIST);
+
+ if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
+ ds->ds_prev->ds_phys->ds_num_children == 2 &&
+ ds->ds_prev->ds_userrefs == 0) {
+ /* We need to remove the origin snapshot as well. */
+ if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
+ return (EBUSY);
+ }
+ return (0);
+}
+
+static int
+dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_destroy_head_arg_t *ddha = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ int error;
+
+ error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ error = dsl_destroy_head_check_impl(ds, 0);
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+}
+
+static void
+dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
+{
+ dsl_dir_t *dd;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
+ dd_used_t t;
+
+ ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
+
+ VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
+
+ ASSERT0(dd->dd_phys->dd_head_dataset_obj);
+
+ /*
+ * Remove our reservation. The impl() routine avoids setting the
+ * actual property, which would require the (already destroyed) ds.
+ */
+ dsl_dir_set_reservation_sync_impl(dd, 0, tx);
+
+ ASSERT0(dd->dd_phys->dd_used_bytes);
+ ASSERT0(dd->dd_phys->dd_reserved);
+ for (t = 0; t < DD_USED_NUM; t++)
+ ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
+
+ VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
+ VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
+ VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
+ VERIFY0(zap_remove(mos,
+ dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
+
+ dsl_dir_rele(dd, FTAG);
+ VERIFY0(dmu_object_free(mos, ddobj, tx));
+}
+
+void
+dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
+ uint64_t obj, ddobj, prevobj = 0;
+ boolean_t rmorigin;
+
+ ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
+ ASSERT(ds->ds_prev == NULL ||
+ ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
+ ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
+ ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+
+ /* We need to log before removing it from the namespace. */
+ spa_history_log_internal_ds(ds, "destroy", tx, "");
+
+ rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
+ DS_IS_DEFER_DESTROY(ds->ds_prev) &&
+ ds->ds_prev->ds_phys->ds_num_children == 2 &&
+ ds->ds_prev->ds_userrefs == 0);
+
+ /* Remove our reservation */
+ if (ds->ds_reserved != 0) {
+ dsl_dataset_set_refreservation_sync_impl(ds,
+ (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
+ 0, tx);
+ ASSERT0(ds->ds_reserved);
+ }
+
+ dsl_scan_ds_destroyed(ds, tx);
+
+ obj = ds->ds_object;
+
+ if (ds->ds_phys->ds_prev_snap_obj != 0) {
+ /* This is a clone */
+ ASSERT(ds->ds_prev != NULL);
+ ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj);
+ ASSERT0(ds->ds_phys->ds_next_snap_obj);
+
+ dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
+ if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) {
+ dsl_dataset_remove_from_next_clones(ds->ds_prev,
+ obj, tx);
+ }
+
+ ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1);
+ ds->ds_prev->ds_phys->ds_num_children--;
+ }
+
+ zfeature_info_t *async_destroy =
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
+ objset_t *os;
+
+ /*
+ * Destroy the deadlist. Unless it's a clone, the
+ * deadlist should be empty. (If it's a clone, it's
+ * safe to ignore the deadlist contents.)
+ */
+ dsl_deadlist_close(&ds->ds_deadlist);
+ dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ds->ds_phys->ds_deadlist_obj = 0;
+
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+
+ if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
+ old_synchronous_dataset_destroy(ds, tx);
+ } else {
+ /*
+ * Move the bptree into the pool's list of trees to
+ * clean up and update space accounting information.
+ */
+ uint64_t used, comp, uncomp;
+
+ zil_destroy_sync(dmu_objset_zil(os), tx);
+
+ if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
+ spa_feature_incr(dp->dp_spa, async_destroy, tx);
+ dp->dp_bptree_obj = bptree_alloc(mos, tx);
+ VERIFY0(zap_add(mos,
+ DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+ &dp->dp_bptree_obj, tx));
+ }
+
+ used = ds->ds_dir->dd_phys->dd_used_bytes;
+ comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
+ uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
+
+ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
+ ds->ds_phys->ds_unique_bytes == used);
+
+ bptree_add(mos, dp->dp_bptree_obj,
+ &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
+ used, comp, uncomp, tx);
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
+ -used, -comp, -uncomp, tx);
+ dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+ used, comp, uncomp, tx);
+ }
+
+ if (ds->ds_prev != NULL) {
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
+ VERIFY0(zap_remove_int(mos,
+ ds->ds_prev->ds_dir->dd_phys->dd_clones,
+ ds->ds_object, tx));
+ }
+ prevobj = ds->ds_prev->ds_object;
+ dsl_dataset_rele(ds->ds_prev, ds);
+ ds->ds_prev = NULL;
+ }
+
+ /*
+ * This must be done after the dsl_traverse(), because it will
+ * re-open the objset.
+ */
+ if (ds->ds_objset) {
+ dmu_objset_evict(ds->ds_objset);
+ ds->ds_objset = NULL;
+ }
+
+ /* Erase the link in the dir */
+ dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
+ ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
+ ddobj = ds->ds_dir->dd_object;
+ ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
+ VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx));
+
+ spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
+
+ ASSERT0(ds->ds_phys->ds_next_clones_obj);
+ ASSERT0(ds->ds_phys->ds_props_obj);
+ ASSERT0(ds->ds_phys->ds_userrefs_obj);
+ dsl_dir_rele(ds->ds_dir, ds);
+ ds->ds_dir = NULL;
+ VERIFY0(dmu_object_free(mos, obj, tx));
+
+ dsl_dir_destroy_sync(ddobj, tx);
+
+ if (rmorigin) {
+ dsl_dataset_t *prev;
+ VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
+ dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
+ dsl_dataset_rele(prev, FTAG);
+ }
+}
+
+static void
+dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_destroy_head_arg_t *ddha = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+
+ VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
+ dsl_destroy_head_sync_impl(ds, tx);
+ dsl_dataset_rele(ds, FTAG);
+}
+
+static void
+dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_destroy_head_arg_t *ddha = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+
+ VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
+
+ /* Mark it as inconsistent on-disk, in case we crash */
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
+
+ spa_history_log_internal_ds(ds, "destroy begin", tx, "");
+ dsl_dataset_rele(ds, FTAG);
+}
+
+int
+dsl_destroy_head(const char *name)
+{
+ dsl_destroy_head_arg_t ddha;
+ int error;
+ spa_t *spa;
+ boolean_t isenabled;
+
+#ifdef _KERNEL
+ zfs_destroy_unmount_origin(name);
+#endif
+
+ error = spa_open(name, &spa, FTAG);
+ if (error != 0)
+ return (error);
+ isenabled = spa_feature_is_enabled(spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]);
+ spa_close(spa, FTAG);
+
+ ddha.ddha_name = name;
+
+ if (!isenabled) {
+ objset_t *os;
+
+ error = dsl_sync_task(name, dsl_destroy_head_check,
+ dsl_destroy_head_begin_sync, &ddha, 0);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Head deletion is processed in one txg on old pools;
+ * remove the objects from open context so that the txg sync
+ * is not too long.
+ */
+ error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
+ if (error == 0) {
+ uint64_t prev_snap_txg =
+ dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg;
+ for (uint64_t obj = 0; error == 0;
+ error = dmu_object_next(os, &obj, FALSE,
+ prev_snap_txg))
+ (void) dmu_free_object(os, obj);
+ /* sync out all frees */
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ dmu_objset_disown(os, FTAG);
+ }
+ }
+
+ return (dsl_sync_task(name, dsl_destroy_head_check,
+ dsl_destroy_head_sync, &ddha, 0));
+}
+
+/*
+ * Note, this function is used as the callback for dmu_objset_find(). We
+ * always return 0 so that we will continue to find and process
+ * inconsistent datasets, even if we encounter an error trying to
+ * process one of them.
+ */
+/* ARGSUSED */
+int
+dsl_destroy_inconsistent(const char *dsname, void *arg)
+{
+ objset_t *os;
+
+ if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
+ boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+ dmu_objset_rele(os, FTAG);
+ if (inconsistent)
+ (void) dsl_destroy_head(dsname);
+ }
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index 5ccb6862e9..1e7ba6d6cb 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -40,8 +40,6 @@
#include "zfs_namecheck.h"
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
-static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
- uint64_t value, dmu_tx_t *tx);
/* ARGSUSED */
static void
@@ -58,7 +56,7 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
if (dd->dd_parent)
- dsl_dir_close(dd->dd_parent, dd);
+ dsl_dir_rele(dd->dd_parent, dd);
spa_close(dd->dd_pool->dp_spa, dd);
@@ -72,18 +70,17 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
int
-dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
+dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **ddp)
{
dmu_buf_t *dbuf;
dsl_dir_t *dd;
int err;
- ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
- dsl_pool_sync_context(dp));
+ ASSERT(dsl_pool_config_held(dp));
err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
- if (err)
+ if (err != 0)
return (err);
dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
@@ -110,9 +107,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_snap_cmtime_update(dd);
if (dd->dd_phys->dd_parent_obj) {
- err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
+ err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
NULL, dd, &dd->dd_parent);
- if (err)
+ if (err != 0)
goto errout;
if (tail) {
#ifdef ZFS_DEBUG
@@ -129,7 +126,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_parent->dd_phys->dd_child_dir_zapobj,
ddobj, 0, dd->dd_myname);
}
- if (err)
+ if (err != 0)
goto errout;
} else {
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
@@ -146,7 +143,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
*/
err = dmu_bonus_hold(dp->dp_meta_objset,
dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
- if (err)
+ if (err != 0)
goto errout;
origin_phys = origin_bonus->db_data;
dd->dd_origin_txg =
@@ -158,7 +155,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_evict);
if (winner) {
if (dd->dd_parent)
- dsl_dir_close(dd->dd_parent, dd);
+ dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@@ -185,7 +182,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
errout:
if (dd->dd_parent)
- dsl_dir_close(dd->dd_parent, dd);
+ dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@@ -193,7 +190,7 @@ errout:
}
void
-dsl_dir_close(dsl_dir_t *dd, void *tag)
+dsl_dir_rele(dsl_dir_t *dd, void *tag)
{
dprintf_dd(dd, "%s\n", "");
spa_close(dd->dd_pool->dp_spa, tag);
@@ -250,6 +247,7 @@ static int
getcomponent(const char *path, char *component, const char **nextp)
{
char *p;
+
if ((path == NULL) || (path[0] == '\0'))
return (ENOENT);
/* This would be a good place to reserve some namespace... */
@@ -272,10 +270,10 @@ getcomponent(const char *path, char *component, const char **nextp)
(void) strcpy(component, path);
p = NULL;
} else if (p[0] == '/') {
- if (p-path >= MAXNAMELEN)
+ if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
- component[p-path] = '\0';
+ component[p - path] = '\0';
p++;
} else if (p[0] == '@') {
/*
@@ -284,65 +282,54 @@ getcomponent(const char *path, char *component, const char **nextp)
*/
if (strchr(path, '/'))
return (EINVAL);
- if (p-path >= MAXNAMELEN)
+ if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
- component[p-path] = '\0';
+ component[p - path] = '\0';
} else {
- ASSERT(!"invalid p");
+ panic("invalid p=%p", (void *)p);
}
*nextp = p;
return (0);
}
/*
- * same as dsl_open_dir, ignore the first component of name and use the
- * spa instead
+ * Return the dsl_dir_t, and possibly the last component which couldn't
+ * be found in *tail. The name must be in the specified dsl_pool_t. This
+ * thread must hold the dp_config_rwlock for the pool. Returns NULL if the
+ * path is bogus, or if tail==NULL and we couldn't parse the whole name.
+ * (*tail)[0] == '@' means that the last component is a snapshot.
*/
int
-dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
+dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **ddp, const char **tailp)
{
char buf[MAXNAMELEN];
- const char *next, *nextnext = NULL;
+ const char *spaname, *next, *nextnext = NULL;
int err;
dsl_dir_t *dd;
- dsl_pool_t *dp;
uint64_t ddobj;
- int openedspa = FALSE;
-
- dprintf("%s\n", name);
err = getcomponent(name, buf, &next);
- if (err)
+ if (err != 0)
return (err);
- if (spa == NULL) {
- err = spa_open(buf, &spa, FTAG);
- if (err) {
- dprintf("spa_open(%s) failed\n", buf);
- return (err);
- }
- openedspa = TRUE;
- /* XXX this assertion belongs in spa_open */
- ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
- }
+ /* Make sure the name is in the specified pool. */
+ spaname = spa_name(dp->dp_spa);
+ if (strcmp(buf, spaname) != 0)
+ return (EINVAL);
- dp = spa_get_dsl(spa);
+ ASSERT(dsl_pool_config_held(dp));
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
- if (err) {
- rw_exit(&dp->dp_config_rwlock);
- if (openedspa)
- spa_close(spa, FTAG);
+ err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
+ if (err != 0) {
return (err);
}
while (next != NULL) {
dsl_dir_t *child_ds;
err = getcomponent(next, buf, &nextnext);
- if (err)
+ if (err != 0)
break;
ASSERT(next[0] != '\0');
if (next[0] == '@')
@@ -353,25 +340,22 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
err = zap_lookup(dp->dp_meta_objset,
dd->dd_phys->dd_child_dir_zapobj,
buf, sizeof (ddobj), 1, &ddobj);
- if (err) {
+ if (err != 0) {
if (err == ENOENT)
err = 0;
break;
}
- err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
- if (err)
+ err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
+ if (err != 0)
break;
- dsl_dir_close(dd, tag);
+ dsl_dir_rele(dd, tag);
dd = child_ds;
next = nextnext;
}
- rw_exit(&dp->dp_config_rwlock);
- if (err) {
- dsl_dir_close(dd, tag);
- if (openedspa)
- spa_close(spa, FTAG);
+ if (err != 0) {
+ dsl_dir_rele(dd, tag);
return (err);
}
@@ -382,30 +366,16 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
if (next != NULL &&
(tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
/* bad path name */
- dsl_dir_close(dd, tag);
+ dsl_dir_rele(dd, tag);
dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
err = ENOENT;
}
- if (tailp)
+ if (tailp != NULL)
*tailp = next;
- if (openedspa)
- spa_close(spa, FTAG);
*ddp = dd;
return (err);
}
-/*
- * Return the dsl_dir_t, and possibly the last component which couldn't
- * be found in *tail. Return NULL if the path is bogus, or if
- * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@'
- * means that the last component is a snapshot.
- */
-int
-dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
-{
- return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
-}
-
uint64_t
dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
dmu_tx_t *tx)
@@ -443,71 +413,6 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
return (ddobj);
}
-/* ARGSUSED */
-int
-dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- dsl_pool_t *dp = dd->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- int err;
- uint64_t count;
-
- /*
- * There should be exactly two holds, both from
- * dsl_dataset_destroy: one on the dd directory, and one on its
- * head ds. If there are more holds, then a concurrent thread is
- * performing a lookup inside this dir while we're trying to destroy
- * it. To minimize this possibility, we perform this check only
- * in syncing context and fail the operation if we encounter
- * additional holds. The dp_config_rwlock ensures that nobody else
- * opens it after we check.
- */
- if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
- return (EBUSY);
-
- err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
- if (err)
- return (err);
- if (count != 0)
- return (EEXIST);
-
- return (0);
-}
-
-void
-dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
-{
- dsl_dir_t *dd = arg1;
- objset_t *mos = dd->dd_pool->dp_meta_objset;
- uint64_t obj;
- dd_used_t t;
-
- ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
- ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
-
- /*
- * Remove our reservation. The impl() routine avoids setting the
- * actual property, which would require the (already destroyed) ds.
- */
- dsl_dir_set_reservation_sync_impl(dd, 0, tx);
-
- ASSERT0(dd->dd_phys->dd_used_bytes);
- ASSERT0(dd->dd_phys->dd_reserved);
- for (t = 0; t < DD_USED_NUM; t++)
- ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
-
- VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
- VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
- VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
- VERIFY(0 == zap_remove(mos,
- dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
-
- obj = dd->dd_object;
- dsl_dir_close(dd, tag);
- VERIFY(0 == dmu_object_free(mos, obj, tx));
-}
-
boolean_t
dsl_dir_is_clone(dsl_dir_t *dd)
{
@@ -545,18 +450,16 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
}
mutex_exit(&dd->dd_lock);
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
if (dsl_dir_is_clone(dd)) {
dsl_dataset_t *ds;
char buf[MAXNAMELEN];
- VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
+ VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
dd->dd_phys->dd_origin_obj, FTAG, &ds));
dsl_dataset_name(ds, buf);
dsl_dataset_rele(ds, FTAG);
dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
}
- rw_exit(&dd->dd_pool->dp_config_rwlock);
}
void
@@ -566,7 +469,7 @@ dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
ASSERT(dd->dd_phys);
- if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
+ if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(dd->dd_dbuf, dd);
}
@@ -853,7 +756,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
FALSE, asize > usize, tr_list, tx, TRUE);
}
- if (err)
+ if (err != 0)
dsl_dir_tempreserve_clear(tr_list, tx);
else
*tr_cookiep = tr_list;
@@ -1004,115 +907,123 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
mutex_exit(&dd->dd_lock);
}
+typedef struct dsl_dir_set_qr_arg {
+ const char *ddsqra_name;
+ zprop_source_t ddsqra_source;
+ uint64_t ddsqra_value;
+} dsl_dir_set_qr_arg_t;
+
static int
-dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_dir_t *dd = ds->ds_dir;
- dsl_prop_setarg_t *psa = arg2;
- int err;
- uint64_t towrite;
+ dsl_dir_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ int error;
+ uint64_t towrite, newval;
- if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
- return (err);
+ error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+ if (error != 0)
+ return (error);
+
+ error = dsl_prop_predict(ds->ds_dir, "quota",
+ ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
- if (psa->psa_effective_value == 0)
+ if (newval == 0) {
+ dsl_dataset_rele(ds, FTAG);
return (0);
+ }
- mutex_enter(&dd->dd_lock);
+ mutex_enter(&ds->ds_dir->dd_lock);
/*
* If we are doing the preliminary check in open context, and
* there are pending changes, then don't fail it, since the
* pending changes could under-estimate the amount of space to be
* freed up.
*/
- towrite = dsl_dir_space_towrite(dd);
+ towrite = dsl_dir_space_towrite(ds->ds_dir);
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
- (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
- psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
- err = ENOSPC;
+ (newval < ds->ds_dir->dd_phys->dd_reserved ||
+ newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) {
+ error = ENOSPC;
}
- mutex_exit(&dd->dd_lock);
- return (err);
+ mutex_exit(&ds->ds_dir->dd_lock);
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
}
-extern dsl_syncfunc_t dsl_prop_set_sync;
-
static void
-dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_dir_t *dd = ds->ds_dir;
- dsl_prop_setarg_t *psa = arg2;
- uint64_t effective_value = psa->psa_effective_value;
+ dsl_dir_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ uint64_t newval;
- dsl_prop_set_sync(ds, psa, tx);
- DSL_PROP_CHECK_PREDICTION(dd, psa);
+ VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
+ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
+ ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
+ &ddsqra->ddsqra_value, tx);
- mutex_enter(&dd->dd_lock);
- dd->dd_phys->dd_quota = effective_value;
- mutex_exit(&dd->dd_lock);
+ VERIFY0(dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
+
+ dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
+ mutex_enter(&ds->ds_dir->dd_lock);
+ ds->ds_dir->dd_phys->dd_quota = newval;
+ mutex_exit(&ds->ds_dir->dd_lock);
+ dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
{
- dsl_dir_t *dd;
- dsl_dataset_t *ds;
- dsl_prop_setarg_t psa;
- int err;
-
- dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
-
- err = dsl_dataset_hold(ddname, FTAG, &ds);
- if (err)
- return (err);
+ dsl_dir_set_qr_arg_t ddsqra;
- err = dsl_dir_open(ddname, FTAG, &dd, NULL);
- if (err) {
- dsl_dataset_rele(ds, FTAG);
- return (err);
- }
+ ddsqra.ddsqra_name = ddname;
+ ddsqra.ddsqra_source = source;
+ ddsqra.ddsqra_value = quota;
- ASSERT(ds->ds_dir == dd);
-
- /*
- * If someone removes a file, then tries to set the quota, we want to
- * make sure the file freeing takes effect.
- */
- txg_wait_open(dd->dd_pool, 0);
-
- err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
- dsl_dir_set_quota_sync, ds, &psa, 0);
-
- dsl_dir_close(dd, FTAG);
- dsl_dataset_rele(ds, FTAG);
- return (err);
+ return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
+ dsl_dir_set_quota_sync, &ddsqra, 0));
}
int
-dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_dir_t *dd = ds->ds_dir;
- dsl_prop_setarg_t *psa = arg2;
- uint64_t effective_value;
- uint64_t used, avail;
- int err;
-
- if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
- return (err);
+ dsl_dir_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ dsl_dir_t *dd;
+ uint64_t newval, used, avail;
+ int error;
- effective_value = psa->psa_effective_value;
+ error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
+ if (error != 0)
+ return (error);
+ dd = ds->ds_dir;
/*
* If we are doing the preliminary check in open context, the
* space estimates may be inaccurate.
*/
- if (!dmu_tx_is_syncing(tx))
+ if (!dmu_tx_is_syncing(tx)) {
+ dsl_dataset_rele(ds, FTAG);
return (0);
+ }
+
+ error = dsl_prop_predict(ds->ds_dir,
+ zfs_prop_to_name(ZFS_PROP_RESERVATION),
+ ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+ }
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
@@ -1125,21 +1036,21 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
}
- if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
- uint64_t delta = MAX(used, effective_value) -
+ if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) {
+ uint64_t delta = MAX(used, newval) -
MAX(used, dd->dd_phys->dd_reserved);
- if (delta > avail)
- return (ENOSPC);
- if (dd->dd_phys->dd_quota > 0 &&
- effective_value > dd->dd_phys->dd_quota)
- return (ENOSPC);
+ if (delta > avail ||
+ (dd->dd_phys->dd_quota > 0 &&
+ newval > dd->dd_phys->dd_quota))
+ error = ENOSPC;
}
- return (0);
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
}
-static void
+void
dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
{
uint64_t used;
@@ -1162,48 +1073,38 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
static void
-dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_dir_t *dd = ds->ds_dir;
- dsl_prop_setarg_t *psa = arg2;
- uint64_t value = psa->psa_effective_value;
+ dsl_dir_set_qr_arg_t *ddsqra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ uint64_t newval;
+
+ VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
+
+ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION),
+ ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
+ &ddsqra->ddsqra_value, tx);
- dsl_prop_set_sync(ds, psa, tx);
- DSL_PROP_CHECK_PREDICTION(dd, psa);
+ VERIFY0(dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
- dsl_dir_set_reservation_sync_impl(dd, value, tx);
+ dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
+ dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation)
{
- dsl_dir_t *dd;
- dsl_dataset_t *ds;
- dsl_prop_setarg_t psa;
- int err;
-
- dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
-
- err = dsl_dataset_hold(ddname, FTAG, &ds);
- if (err)
- return (err);
-
- err = dsl_dir_open(ddname, FTAG, &dd, NULL);
- if (err) {
- dsl_dataset_rele(ds, FTAG);
- return (err);
- }
+ dsl_dir_set_qr_arg_t ddsqra;
- ASSERT(ds->ds_dir == dd);
+ ddsqra.ddsqra_name = ddname;
+ ddsqra.ddsqra_source = source;
+ ddsqra.ddsqra_value = reservation;
- err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
- dsl_dir_set_reservation_sync, ds, &psa, 0);
-
- dsl_dir_close(dd, FTAG);
- dsl_dataset_rele(ds, FTAG);
- return (err);
+ return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
+ dsl_dir_set_reservation_sync, &ddsqra, 0));
}
static dsl_dir_t *
@@ -1235,79 +1136,123 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
return (would_change(dd->dd_parent, delta, ancestor));
}
-struct renamearg {
- dsl_dir_t *newparent;
- const char *mynewname;
-};
+typedef struct dsl_dir_rename_arg {
+ const char *ddra_oldname;
+ const char *ddra_newname;
+} dsl_dir_rename_arg_t;
+/* ARGSUSED */
static int
-dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
- dsl_dir_t *dd = arg1;
- struct renamearg *ra = arg2;
- dsl_pool_t *dp = dd->dd_pool;
- objset_t *mos = dp->dp_meta_objset;
- int err;
- uint64_t val;
+ int *deltap = arg;
+ char namebuf[MAXNAMELEN];
- /*
- * There should only be one reference, from dmu_objset_rename().
- * Fleeting holds are also possible (eg, from "zfs list" getting
- * stats), but any that are present in open context will likely
- * be gone by syncing context, so only fail from syncing
- * context.
- */
- if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
- return (EBUSY);
+ dsl_dataset_name(ds, namebuf);
+
+ if (strlen(namebuf) + *deltap >= MAXNAMELEN)
+ return (ENAMETOOLONG);
+ return (0);
+}
+
+static int
+dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_dir_rename_arg_t *ddra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *dd, *newparent;
+ const char *mynewname;
+ int error;
+ int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
- /* check for existing name */
- err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
- ra->mynewname, 8, 1, &val);
- if (err == 0)
+ /* target dir should exist */
+ error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
+ if (error != 0)
+ return (error);
+
+ /* new parent should exist */
+ error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
+ &newparent, &mynewname);
+ if (error != 0) {
+ dsl_dir_rele(dd, FTAG);
+ return (error);
+ }
+
+ /* can't rename to different pool */
+ if (dd->dd_pool != newparent->dd_pool) {
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ return (ENXIO);
+ }
+
+ /* new name should not already exist */
+ if (mynewname == NULL) {
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
return (EEXIST);
- if (err != ENOENT)
- return (err);
+ }
+
+ /* if the name length is growing, validate child name lengths */
+ if (delta > 0) {
+ error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
+ &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
+ if (error != 0) {
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ return (error);
+ }
+ }
- if (ra->newparent != dd->dd_parent) {
+ if (newparent != dd->dd_parent) {
/* is there enough space? */
uint64_t myspace =
MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
/* no rename into our descendant */
- if (closest_common_ancestor(dd, ra->newparent) == dd)
+ if (closest_common_ancestor(dd, newparent) == dd) {
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
return (EINVAL);
+ }
- if (err = dsl_dir_transfer_possible(dd->dd_parent,
- ra->newparent, myspace))
- return (err);
+ error = dsl_dir_transfer_possible(dd->dd_parent,
+ newparent, myspace);
+ if (error != 0) {
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ return (error);
+ }
}
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
return (0);
}
static void
-dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
{
- dsl_dir_t *dd = arg1;
- struct renamearg *ra = arg2;
- dsl_pool_t *dp = dd->dd_pool;
+ dsl_dir_rename_arg_t *ddra = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *dd, *newparent;
+ const char *mynewname;
+ int error;
objset_t *mos = dp->dp_meta_objset;
- int err;
- char namebuf[MAXNAMELEN];
- ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
+ VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
+ VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
+ &mynewname));
/* Log this before we change the name. */
- dsl_dir_name(ra->newparent, namebuf);
spa_history_log_internal_dd(dd, "rename", tx,
- "-> %s/%s", namebuf, ra->mynewname);
+ "-> %s", ddra->ddra_newname);
- if (ra->newparent != dd->dd_parent) {
+ if (newparent != dd->dd_parent) {
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
-dd->dd_phys->dd_used_bytes,
-dd->dd_phys->dd_compressed_bytes,
-dd->dd_phys->dd_uncompressed_bytes, tx);
- dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
+ dsl_dir_diduse_space(newparent, DD_USED_CHILD,
dd->dd_phys->dd_used_bytes,
dd->dd_phys->dd_compressed_bytes,
dd->dd_phys->dd_uncompressed_bytes, tx);
@@ -1318,7 +1263,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
-unused_rsrv, 0, 0, tx);
- dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
+ dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
unused_rsrv, 0, 0, tx);
}
}
@@ -1326,52 +1271,36 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dmu_buf_will_dirty(dd->dd_dbuf, tx);
/* remove from old parent zapobj */
- err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
+ error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, tx);
- ASSERT0(err);
+ ASSERT0(error);
- (void) strcpy(dd->dd_myname, ra->mynewname);
- dsl_dir_close(dd->dd_parent, dd);
- dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
- VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
- ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
+ (void) strcpy(dd->dd_myname, mynewname);
+ dsl_dir_rele(dd->dd_parent, dd);
+ dd->dd_phys->dd_parent_obj = newparent->dd_object;
+ VERIFY0(dsl_dir_hold_obj(dp,
+ newparent->dd_object, NULL, dd, &dd->dd_parent));
/* add to new parent zapobj */
- err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
- dd->dd_myname, 8, 1, &dd->dd_object, tx);
- ASSERT0(err);
+ VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
+ dd->dd_myname, 8, 1, &dd->dd_object, tx));
+
+ dsl_prop_notify_all(dd);
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
}
int
-dsl_dir_rename(dsl_dir_t *dd, const char *newname)
+dsl_dir_rename(const char *oldname, const char *newname)
{
- struct renamearg ra;
- int err;
+ dsl_dir_rename_arg_t ddra;
- /* new parent should exist */
- err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
- if (err)
- return (err);
+ ddra.ddra_oldname = oldname;
+ ddra.ddra_newname = newname;
- /* can't rename to different pool */
- if (dd->dd_pool != ra.newparent->dd_pool) {
- err = ENXIO;
- goto out;
- }
-
- /* new name should not already exist */
- if (ra.mynewname == NULL) {
- err = EEXIST;
- goto out;
- }
-
- err = dsl_sync_task_do(dd->dd_pool,
- dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
-
-out:
- dsl_dir_close(ra.newparent, FTAG);
- return (err);
+ return (dsl_sync_task(oldname,
+ dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3));
}
int
diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c
index 38d656a43b..6af631679e 100644
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c
@@ -43,6 +43,7 @@
#include <sys/bptree.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
+#include <sys/dsl_userhold.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
@@ -69,7 +70,7 @@ dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
if (err)
return (err);
- return (dsl_dir_open_obj(dp, obj, name, dp, ddp));
+ return (dsl_dir_hold_obj(dp, obj, name, dp, ddp));
}
static dsl_pool_t *
@@ -81,7 +82,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
dp->dp_spa = spa;
dp->dp_meta_rootbp = *bp;
- rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL);
+ rrw_init(&dp->dp_config_rwlock, B_TRUE);
dp->dp_write_limit = zfs_write_limit_min;
txg_init(dp, txg);
@@ -92,7 +93,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
txg_list_create(&dp->dp_dirty_dirs,
offsetof(dsl_dir_t, dd_dirty_link));
txg_list_create(&dp->dp_sync_tasks,
- offsetof(dsl_sync_task_group_t, dstg_node));
+ offsetof(dsl_sync_task_t, dst_node));
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -126,14 +127,14 @@ dsl_pool_open(dsl_pool_t *dp)
dsl_dataset_t *ds;
uint64_t obj;
- rw_enter(&dp->dp_config_rwlock, RW_WRITER);
+ rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
if (err)
goto out;
- err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
+ err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir);
if (err)
goto out;
@@ -154,7 +155,7 @@ dsl_pool_open(dsl_pool_t *dp)
&dp->dp_origin_snap);
dsl_dataset_rele(ds, FTAG);
}
- dsl_dir_close(dd, dp);
+ dsl_dir_rele(dd, dp);
if (err)
goto out;
}
@@ -169,7 +170,7 @@ dsl_pool_open(dsl_pool_t *dp)
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
if (err)
goto out;
- VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
+ VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@@ -202,7 +203,7 @@ dsl_pool_open(dsl_pool_t *dp)
err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
- rw_exit(&dp->dp_config_rwlock);
+ rrw_exit(&dp->dp_config_rwlock, FTAG);
return (err);
}
@@ -217,13 +218,13 @@ dsl_pool_close(dsl_pool_t *dp)
* and not a hold, so just drop that here.
*/
if (dp->dp_origin_snap)
- dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
+ dsl_dataset_rele(dp->dp_origin_snap, dp);
if (dp->dp_mos_dir)
- dsl_dir_close(dp->dp_mos_dir, dp);
+ dsl_dir_rele(dp->dp_mos_dir, dp);
if (dp->dp_free_dir)
- dsl_dir_close(dp->dp_free_dir, dp);
+ dsl_dir_rele(dp->dp_free_dir, dp);
if (dp->dp_root_dir)
- dsl_dir_close(dp->dp_root_dir, dp);
+ dsl_dir_rele(dp->dp_root_dir, dp);
bpobj_close(&dp->dp_free_bpobj);
@@ -239,7 +240,7 @@ dsl_pool_close(dsl_pool_t *dp)
arc_flush(dp->dp_spa);
txg_fini(dp);
dsl_scan_fini(dp);
- rw_destroy(&dp->dp_config_rwlock);
+ rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_vnrele_taskq);
if (dp->dp_blkstats)
@@ -257,6 +258,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dsl_dataset_t *ds;
uint64_t obj;
+ rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
+
/* create and open the MOS (meta-objset) */
dp->dp_meta_objset = dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
@@ -267,30 +270,30 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
ASSERT0(err);
/* Initialize scan structures */
- VERIFY3U(0, ==, dsl_scan_init(dp, txg));
+ VERIFY0(dsl_scan_init(dp, txg));
/* create and open the root dir */
dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
- VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
+ VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir));
/* create and open the meta-objset dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
- VERIFY(0 == dsl_pool_open_special_dir(dp,
+ VERIFY0(dsl_pool_open_special_dir(dp,
MOS_DIR_NAME, &dp->dp_mos_dir));
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
/* create and open the free dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
FREE_DIR_NAME, tx);
- VERIFY(0 == dsl_pool_open_special_dir(dp,
+ VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/* create and open the free_bplist */
obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
- VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
+ VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@@ -301,7 +304,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
/* create the root objset */
- VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
+ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
os = dmu_objset_create_impl(dp->dp_spa, ds,
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
#ifdef _KERNEL
@@ -311,6 +314,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dmu_tx_commit(tx);
+ rrw_exit(&dp->dp_config_rwlock, FTAG);
+
return (dp);
}
@@ -333,10 +338,7 @@ static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
- dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_deadlist_insert(dl, bp, tx);
- rw_exit(&dp->dp_config_rwlock);
return (0);
}
@@ -358,7 +360,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
/*
* We need to copy dp_space_towrite() before doing
- * dsl_sync_task_group_sync(), because
+ * dsl_sync_task_sync(), because
* dsl_dataset_snapshot_reserve_space() will increase
* dp_space_towrite but not actually write anything.
*/
@@ -472,14 +474,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
DTRACE_PROBE(pool_sync__3task);
if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
- dsl_sync_task_group_t *dstg;
+ dsl_sync_task_t *dst;
/*
* No more sync tasks should have been added while we
* were syncing.
*/
ASSERT(spa_sync_pass(dp->dp_spa) == 1);
- while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
- dsl_sync_task_group_sync(dstg, tx);
+ while (dst = txg_list_remove(&dp->dp_sync_tasks, txg))
+ dsl_sync_task_sync(dst, tx);
}
dmu_tx_commit(tx);
@@ -654,14 +656,13 @@ dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
/* ARGSUSED */
static int
-upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds, *prev = NULL;
int err;
- dsl_pool_t *dp = spa_get_dsl(spa);
- err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@@ -687,7 +688,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
* The $ORIGIN can't have any data, or the accounting
* will be wrong.
*/
- ASSERT(prev->ds_phys->ds_bp.blk_birth == 0);
+ ASSERT0(prev->ds_phys->ds_bp.blk_birth);
/* The origin doesn't get attached to itself */
if (ds->ds_object == prev->ds_object) {
@@ -707,13 +708,13 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
if (ds->ds_phys->ds_next_snap_obj == 0) {
ASSERT(ds->ds_prev == NULL);
- VERIFY(0 == dsl_dataset_hold_obj(dp,
+ VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
}
}
- ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object);
- ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
+ ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object);
+ ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object);
if (prev->ds_phys->ds_next_clones_obj == 0) {
dmu_buf_will_dirty(prev->ds_dbuf, tx);
@@ -721,7 +722,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
zap_create(dp->dp_meta_objset,
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
}
- VERIFY(0 == zap_add_int(dp->dp_meta_objset,
+ VERIFY0(zap_add_int(dp->dp_meta_objset,
prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx));
dsl_dataset_rele(ds, FTAG);
@@ -736,25 +737,21 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap != NULL);
- VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
+ VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
tx, DS_FIND_CHILDREN));
}
/* ARGSUSED */
static int
-upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dmu_tx_t *tx = arg;
- dsl_dataset_t *ds;
- dsl_pool_t *dp = spa_get_dsl(spa);
objset_t *mos = dp->dp_meta_objset;
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-
- if (ds->ds_dir->dd_phys->dd_origin_obj) {
+ if (ds->ds_dir->dd_phys->dd_origin_obj != 0) {
dsl_dataset_t *origin;
- VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
+ VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
if (origin->ds_dir->dd_phys->dd_clones == 0) {
@@ -763,13 +760,11 @@ upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
}
- VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
- origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
+ VERIFY0(zap_add_int(dp->dp_meta_objset,
+ origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx));
dsl_dataset_rele(origin, FTAG);
}
-
- dsl_dataset_rele(ds, FTAG);
return (0);
}
@@ -780,7 +775,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
uint64_t obj;
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
- VERIFY(0 == dsl_pool_open_special_dir(dp,
+ VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/*
@@ -790,12 +785,11 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
*/
obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
- VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
- VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
- dp->dp_meta_objset, obj));
+ VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
- VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
+ VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
}
@@ -807,17 +801,16 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap == NULL);
+ ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
/* create the origin dir, ds, & snap-ds */
- rw_enter(&dp->dp_config_rwlock, RW_WRITER);
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
NULL, 0, kcred, tx);
- VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
- dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
- VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
+ VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+ dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
+ VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
dp, &dp->dp_origin_snap));
dsl_dataset_rele(ds, FTAG);
- rw_exit(&dp->dp_config_rwlock);
}
taskq_t *
@@ -852,7 +845,7 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
*htag = '\0';
++htag;
dsobj = strtonum(za.za_name, NULL);
- (void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE);
+ dsl_dataset_user_release_tmp(dp, dsobj, htag);
}
zap_cursor_fini(&zc);
}
@@ -874,7 +867,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
static int
dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
- const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
+ const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding)
{
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
@@ -899,7 +892,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
if (holding)
- error = zap_add(mos, zapobj, name, 8, 1, now, tx);
+ error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
else
error = zap_remove(mos, zapobj, name, tx);
strfree(name);
@@ -912,7 +905,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
*/
int
dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
- uint64_t *now, dmu_tx_t *tx)
+ uint64_t now, dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
}
@@ -927,3 +920,106 @@ dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
tx, B_FALSE));
}
+
+/*
+ * DSL Pool Configuration Lock
+ *
+ * The dp_config_rwlock protects against changes to DSL state (e.g. dataset
+ * creation / destruction / rename / property setting). It must be held for
+ * read to hold a dataset or dsl_dir. I.e. you must call
+ * dsl_pool_config_enter() or dsl_pool_hold() before calling
+ * dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock
+ * must be held continuously until all datasets and dsl_dirs are released.
+ *
+ * The only exception to this rule is that if a "long hold" is placed on
+ * a dataset, then the dp_config_rwlock may be dropped while the dataset
+ * is still held. The long hold will prevent the dataset from being
+ * destroyed -- the destroy will fail with EBUSY. A long hold can be
+ * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset
+ * (by calling dsl_{dataset,objset}_{try}own{_obj}).
+ *
+ * Legitimate long-holders (including owners) should be long-running, cancelable
+ * tasks that should cause "zfs destroy" to fail. This includes DMU
+ * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open),
+ * "zfs send", and "zfs diff". There are several other long-holders whose
+ * uses are suboptimal (e.g. "zfs promote", and zil_suspend()).
+ *
+ * The usual formula for long-holding would be:
+ * dsl_pool_hold()
+ * dsl_dataset_hold()
+ * ... perform checks ...
+ * dsl_dataset_long_hold()
+ * dsl_pool_rele()
+ * ... perform long-running task ...
+ * dsl_dataset_long_rele()
+ * dsl_dataset_rele()
+ *
+ * Note that when the long hold is released, the dataset is still held but
+ * the pool is not held. The dataset may change arbitrarily during this time
+ * (e.g. it could be destroyed). Therefore you shouldn't do anything to the
+ * dataset except release it.
+ *
+ * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
+ * or modifying operations.
+ *
+ * Modifying operations should generally use dsl_sync_task(). The synctask
+ * infrastructure enforces proper locking strategy with respect to the
+ * dp_config_rwlock. See the comment above dsl_sync_task() for details.
+ *
+ * Read-only operations will manually hold the pool, then the dataset, obtain
+ * information from the dataset, then release the pool and dataset.
+ * dmu_objset_{hold,rele}() are convenience routines that also do the pool
+ * hold/rele.
+ */
+
+int
+dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp)
+{
+ spa_t *spa;
+ int error;
+
+ error = spa_open(name, &spa, tag);
+ if (error == 0) {
+ *dp = spa_get_dsl(spa);
+ dsl_pool_config_enter(*dp, tag);
+ }
+ return (error);
+}
+
+void
+dsl_pool_rele(dsl_pool_t *dp, void *tag)
+{
+ dsl_pool_config_exit(dp, tag);
+ spa_close(dp->dp_spa, tag);
+}
+
+void
+dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
+{
+ /*
+ * We use a "reentrant" reader-writer lock, but not reentrantly.
+ *
+ * The rrwlock can (with the track_all flag) track all reading threads,
+ * which is very useful for debugging which code path failed to release
+ * the lock, and for verifying that the *current* thread does hold
+ * the lock.
+ *
+ * (Unlike a rwlock, which knows that N threads hold it for
+ * read, but not *which* threads, so rw_held(RW_READER) returns TRUE
+ * if any thread holds it for read, even if this thread doesn't).
+ */
+ ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
+ rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
+}
+
+void
+dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
+{
+ rrw_exit(&dp->dp_config_rwlock, tag);
+}
+
+boolean_t
+dsl_pool_config_held(dsl_pool_t *dp)
+{
+ return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c
index 5bbe14ff69..cd7c3ecb5f 100644
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c
@@ -82,7 +82,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
char *inheritstr;
char *recvdstr;
- ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
+ ASSERT(dsl_pool_config_held(dd->dd_pool));
if (setpoint)
setpoint[0] = '\0';
@@ -97,8 +97,6 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
* after this loop.
*/
for (; dd != NULL; dd = dd->dd_parent) {
- ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
-
if (dd != target || snapshot) {
if (!inheritable)
break;
@@ -167,7 +165,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
boolean_t snapshot;
uint64_t zapobj;
- ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock));
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds));
zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj);
@@ -235,18 +233,12 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
uint64_t value;
dsl_prop_cb_record_t *cbr;
int err;
- int need_rwlock;
- need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock);
- if (need_rwlock)
- rw_enter(&dp->dp_config_rwlock, RW_READER);
+ ASSERT(dsl_pool_config_held(dp));
- err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL);
- if (err != 0) {
- if (need_rwlock)
- rw_exit(&dp->dp_config_rwlock);
+ err = dsl_prop_get_int_ds(ds, propname, &value);
+ if (err != 0)
return (err);
- }
cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
cbr->cbr_ds = ds;
@@ -259,9 +251,6 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
mutex_exit(&dd->dd_lock);
cbr->cbr_func(cbr->cbr_arg, value);
-
- if (need_rwlock)
- rw_exit(&dp->dp_config_rwlock);
return (0);
}
@@ -269,19 +258,18 @@ int
dsl_prop_get(const char *dsname, const char *propname,
int intsz, int numints, void *buf, char *setpoint)
{
- dsl_dataset_t *ds;
- int err;
+ objset_t *os;
+ int error;
- err = dsl_dataset_hold(dsname, FTAG, &ds);
- if (err)
- return (err);
+ error = dmu_objset_hold(dsname, FTAG, &os);
+ if (error != 0)
+ return (error);
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint);
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+ error = dsl_prop_get_ds(dmu_objset_ds(os), propname,
+ intsz, numints, buf, setpoint);
- dsl_dataset_rele(ds, FTAG);
- return (err);
+ dmu_objset_rele(os, FTAG);
+ return (error);
}
/*
@@ -299,17 +287,11 @@ dsl_prop_get_integer(const char *ddname, const char *propname,
return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
}
-void
-dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
- zprop_source_t source, uint64_t *value)
+int
+dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname,
+ uint64_t *valuep)
{
- psa->psa_name = propname;
- psa->psa_source = source;
- psa->psa_intsz = 8;
- psa->psa_numints = 1;
- psa->psa_value = value;
-
- psa->psa_effective_value = -1ULL;
+ return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL));
}
/*
@@ -323,11 +305,10 @@ dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
* a property not handled by this function.
*/
int
-dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
+dsl_prop_predict(dsl_dir_t *dd, const char *propname,
+ zprop_source_t source, uint64_t value, uint64_t *newvalp)
{
- const char *propname = psa->psa_name;
zfs_prop_t prop = zfs_name_to_prop(propname);
- zprop_source_t source = psa->psa_source;
objset_t *mos;
uint64_t zapobj;
uint64_t version;
@@ -359,36 +340,33 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
switch (source) {
case ZPROP_SRC_NONE:
/* Revert to the received value, if any. */
- err = zap_lookup(mos, zapobj, recvdstr, 8, 1,
- &psa->psa_effective_value);
+ err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp);
if (err == ENOENT)
- psa->psa_effective_value = 0;
+ *newvalp = 0;
break;
case ZPROP_SRC_LOCAL:
- psa->psa_effective_value = *(uint64_t *)psa->psa_value;
+ *newvalp = value;
break;
case ZPROP_SRC_RECEIVED:
/*
* If there's no local setting, then the new received value will
* be the effective value.
*/
- err = zap_lookup(mos, zapobj, propname, 8, 1,
- &psa->psa_effective_value);
+ err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
- psa->psa_effective_value = *(uint64_t *)psa->psa_value;
+ *newvalp = value;
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
/*
* We're clearing the received value, so the local setting (if
* it exists) remains the effective value.
*/
- err = zap_lookup(mos, zapobj, propname, 8, 1,
- &psa->psa_effective_value);
+ err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
- psa->psa_effective_value = 0;
+ *newvalp = 0;
break;
default:
- cmn_err(CE_PANIC, "unexpected property source: %d", source);
+ panic("unexpected property source: %d", source);
}
strfree(recvdstr);
@@ -399,37 +377,6 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
return (err);
}
-#ifdef ZFS_DEBUG
-void
-dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
-{
- zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
- uint64_t intval;
- char setpoint[MAXNAMELEN];
- uint64_t version = spa_version(dd->dd_pool->dp_spa);
- int err;
-
- if (version < SPA_VERSION_RECVD_PROPS) {
- switch (prop) {
- case ZFS_PROP_QUOTA:
- case ZFS_PROP_RESERVATION:
- return;
- }
- }
-
- err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
- setpoint, B_FALSE);
- if (err == 0 && intval != psa->psa_effective_value) {
- cmn_err(CE_PANIC, "%s property, source: %x, "
- "predicted effective value: %llu, "
- "actual effective value: %llu (setpoint: %s)",
- psa->psa_name, psa->psa_source,
- (unsigned long long)psa->psa_effective_value,
- (unsigned long long)intval, setpoint);
- }
-}
-#endif
-
/*
* Unregister this callback. Return 0 on success, ENOENT if ddname is
* invalid, ENOMSG if no matching callback registered.
@@ -464,25 +411,57 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
return (0);
}
-/*
- * Return the number of callbacks that are registered for this dataset.
- */
-int
-dsl_prop_numcb(dsl_dataset_t *ds)
+boolean_t
+dsl_prop_hascb(dsl_dataset_t *ds)
{
dsl_dir_t *dd = ds->ds_dir;
+ boolean_t rv = B_FALSE;
dsl_prop_cb_record_t *cbr;
- int num = 0;
mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs);
- cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- if (cbr->cbr_ds == ds)
- num++;
+ for (cbr = list_head(&dd->dd_prop_cbs); cbr;
+ cbr = list_next(&dd->dd_prop_cbs, cbr)) {
+ if (cbr->cbr_ds == ds) {
+ rv = B_TRUE;
+ break;
+ }
}
mutex_exit(&dd->dd_lock);
+ return (rv);
+}
- return (num);
+/* ARGSUSED */
+static int
+dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
+{
+ dsl_dir_t *dd = ds->ds_dir;
+ dsl_prop_cb_record_t *cbr;
+
+ mutex_enter(&dd->dd_lock);
+ for (cbr = list_head(&dd->dd_prop_cbs); cbr;
+ cbr = list_next(&dd->dd_prop_cbs, cbr)) {
+ uint64_t value;
+
+ if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname,
+ sizeof (value), 1, &value, NULL) == 0)
+ cbr->cbr_func(cbr->cbr_arg, value);
+ }
+ mutex_exit(&dd->dd_lock);
+
+ return (0);
+}
+
+/*
+ * Update all property values for ddobj & its descendants. This is used
+ * when renaming the dir.
+ */
+void
+dsl_prop_notify_all(dsl_dir_t *dd)
+{
+ dsl_pool_t *dp = dd->dd_pool;
+ ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+ (void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb,
+ NULL, DS_FIND_CHILDREN);
}
static void
@@ -496,8 +475,8 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
zap_attribute_t *za;
int err;
- ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
- err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
+ ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+ err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
if (err)
return;
@@ -508,7 +487,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
*/
err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname);
if (err == 0) {
- dsl_dir_close(dd, FTAG);
+ dsl_dir_rele(dd, FTAG);
return;
}
ASSERT3U(err, ==, ENOENT);
@@ -543,26 +522,24 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
}
kmem_free(za, sizeof (zap_attribute_t));
zap_cursor_fini(&zc);
- dsl_dir_close(dd, FTAG);
+ dsl_dir_rele(dd, FTAG);
}
void
-dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
+ zprop_source_t source, int intsz, int numints, const void *value,
+ dmu_tx_t *tx)
{
- dsl_dataset_t *ds = arg1;
- dsl_prop_setarg_t *psa = arg2;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t zapobj, intval, dummy;
int isint;
char valbuf[32];
- char *valstr = NULL;
+ const char *valstr = NULL;
char *inheritstr;
char *recvdstr;
char *tbuf = NULL;
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
- const char *propname = psa->psa_name;
- zprop_source_t source = psa->psa_source;
isint = (dodefault(propname, 8, 1, &intval) == 0);
@@ -612,8 +589,8 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
*/
err = zap_remove(mos, zapobj, inheritstr, tx);
ASSERT(err == 0 || err == ENOENT);
- VERIFY(0 == zap_update(mos, zapobj, propname,
- psa->psa_intsz, psa->psa_numints, psa->psa_value, tx));
+ VERIFY0(zap_update(mos, zapobj, propname,
+ intsz, numints, value, tx));
break;
case ZPROP_SRC_INHERITED:
/*
@@ -624,12 +601,10 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
err = zap_remove(mos, zapobj, propname, tx);
ASSERT(err == 0 || err == ENOENT);
if (version >= SPA_VERSION_RECVD_PROPS &&
- dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy,
- NULL) == 0) {
+ dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
dummy = 0;
- err = zap_update(mos, zapobj, inheritstr,
- 8, 1, &dummy, tx);
- ASSERT(err == 0);
+ VERIFY0(zap_update(mos, zapobj, inheritstr,
+ 8, 1, &dummy, tx));
}
break;
case ZPROP_SRC_RECEIVED:
@@ -637,7 +612,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* set propname$recvd -> value
*/
err = zap_update(mos, zapobj, recvdstr,
- psa->psa_intsz, psa->psa_numints, psa->psa_value, tx);
+ intsz, numints, value, tx);
ASSERT(err == 0);
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
@@ -667,7 +642,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
strfree(recvdstr);
if (isint) {
- VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL));
+ VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
dsl_prop_cb_record_t *cbr;
@@ -694,7 +669,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
valstr = valbuf;
} else {
if (source == ZPROP_SRC_LOCAL) {
- valstr = (char *)psa->psa_value;
+ valstr = value;
} else {
tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
if (dsl_prop_get_ds(ds, propname, 1,
@@ -711,118 +686,73 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
kmem_free(tbuf, ZAP_MAXVALUELEN);
}
-void
-dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+int
+dsl_prop_set_int(const char *dsname, const char *propname,
+ zprop_source_t source, uint64_t value)
{
- dsl_dataset_t *ds = arg1;
- dsl_props_arg_t *pa = arg2;
- nvlist_t *props = pa->pa_props;
- dsl_prop_setarg_t psa;
- nvpair_t *elem = NULL;
-
- psa.psa_source = pa->pa_source;
-
- while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
- nvpair_t *pair = elem;
-
- psa.psa_name = nvpair_name(pair);
+ nvlist_t *nvl = fnvlist_alloc();
+ int error;
- if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
- /*
- * dsl_prop_get_all_impl() returns properties in this
- * format.
- */
- nvlist_t *attrs;
- VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
- VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
- &pair) == 0);
- }
-
- if (nvpair_type(pair) == DATA_TYPE_STRING) {
- VERIFY(nvpair_value_string(pair,
- (char **)&psa.psa_value) == 0);
- psa.psa_intsz = 1;
- psa.psa_numints = strlen(psa.psa_value) + 1;
- } else {
- uint64_t intval;
- VERIFY(nvpair_value_uint64(pair, &intval) == 0);
- psa.psa_intsz = sizeof (intval);
- psa.psa_numints = 1;
- psa.psa_value = &intval;
- }
- dsl_prop_set_sync(ds, &psa, tx);
- }
+ fnvlist_add_uint64(nvl, propname, value);
+ error = dsl_props_set(dsname, source, nvl);
+ fnvlist_free(nvl);
+ return (error);
}
int
-dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
- int intsz, int numints, const void *buf)
+dsl_prop_set_string(const char *dsname, const char *propname,
+ zprop_source_t source, const char *value)
{
- dsl_dataset_t *ds;
- uint64_t version;
- int err;
- dsl_prop_setarg_t psa;
-
- /*
- * We must do these checks before we get to the syncfunc, since
- * it can't fail.
- */
- if (strlen(propname) >= ZAP_MAXNAMELEN)
- return (ENAMETOOLONG);
-
- err = dsl_dataset_hold(dsname, FTAG, &ds);
- if (err)
- return (err);
-
- version = spa_version(ds->ds_dir->dd_pool->dp_spa);
- if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
- ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
- dsl_dataset_rele(ds, FTAG);
- return (E2BIG);
- }
- if (dsl_dataset_is_snapshot(ds) &&
- version < SPA_VERSION_SNAP_PROPS) {
- dsl_dataset_rele(ds, FTAG);
- return (ENOTSUP);
- }
+ nvlist_t *nvl = fnvlist_alloc();
+ int error;
- psa.psa_name = propname;
- psa.psa_source = source;
- psa.psa_intsz = intsz;
- psa.psa_numints = numints;
- psa.psa_value = buf;
- psa.psa_effective_value = -1ULL;
+ fnvlist_add_string(nvl, propname, value);
+ error = dsl_props_set(dsname, source, nvl);
+ fnvlist_free(nvl);
+ return (error);
+}
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- NULL, dsl_prop_set_sync, ds, &psa, 2);
+int
+dsl_prop_inherit(const char *dsname, const char *propname,
+ zprop_source_t source)
+{
+ nvlist_t *nvl = fnvlist_alloc();
+ int error;
- dsl_dataset_rele(ds, FTAG);
- return (err);
+ fnvlist_add_boolean(nvl, propname);
+ error = dsl_props_set(dsname, source, nvl);
+ fnvlist_free(nvl);
+ return (error);
}
-int
-dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
+typedef struct dsl_props_set_arg {
+ const char *dpsa_dsname;
+ zprop_source_t dpsa_source;
+ nvlist_t *dpsa_props;
+} dsl_props_set_arg_t;
+
+static int
+dsl_props_set_check(void *arg, dmu_tx_t *tx)
{
+ dsl_props_set_arg_t *dpsa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t version;
nvpair_t *elem = NULL;
- dsl_props_arg_t pa;
int err;
- if (err = dsl_dataset_hold(dsname, FTAG, &ds))
+ err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds);
+ if (err != 0)
return (err);
- /*
- * Do these checks before the syncfunc, since it can't fail.
- */
+
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
- while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+ while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) {
if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
dsl_dataset_rele(ds, FTAG);
return (ENAMETOOLONG);
}
if (nvpair_type(elem) == DATA_TYPE_STRING) {
- char *valstr;
- VERIFY(nvpair_value_string(elem, &valstr) == 0);
+ char *valstr = fnvpair_value_string(elem);
if (strlen(valstr) >= (version <
SPA_VERSION_STMF_PROP ?
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
@@ -832,20 +762,83 @@ dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
}
}
- if (dsl_dataset_is_snapshot(ds) &&
- version < SPA_VERSION_SNAP_PROPS) {
+ if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (ENOTSUP);
}
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
- pa.pa_props = props;
- pa.pa_source = source;
+void
+dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
+ nvlist_t *props, dmu_tx_t *tx)
+{
+ nvpair_t *elem = NULL;
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- NULL, dsl_props_set_sync, ds, &pa, 2);
+ while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+ nvpair_t *pair = elem;
+ if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
+ /*
+ * dsl_prop_get_all_impl() returns properties in this
+ * format.
+ */
+ nvlist_t *attrs = fnvpair_value_nvlist(pair);
+ pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
+ }
+
+ if (nvpair_type(pair) == DATA_TYPE_STRING) {
+ const char *value = fnvpair_value_string(pair);
+ dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+ source, 1, strlen(value) + 1, value, tx);
+ } else if (nvpair_type(pair) == DATA_TYPE_UINT64) {
+ uint64_t intval = fnvpair_value_uint64(pair);
+ dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+ source, sizeof (intval), 1, &intval, tx);
+ } else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) {
+ dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+ source, 0, 0, NULL, tx);
+ } else {
+ panic("invalid nvpair type");
+ }
+ }
+}
+
+static void
+dsl_props_set_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_props_set_arg_t *dpsa = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+
+ VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds));
+ dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx);
dsl_dataset_rele(ds, FTAG);
- return (err);
+}
+
+/*
+ * All-or-nothing; if any prop can't be set, nothing will be modified.
+ */
+int
+dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
+{
+ dsl_props_set_arg_t dpsa;
+ int nblks = 0;
+
+ dpsa.dpsa_dsname = dsname;
+ dpsa.dpsa_source = source;
+ dpsa.dpsa_props = props;
+
+ /*
+ * If the source includes NONE, then we will only be removing entries
+ * from the ZAP object. In that case don't check for ENOSPC.
+ */
+ if ((source & ZPROP_SRC_NONE) == 0)
+ nblks = 2 * fnvlist_num_pairs(props);
+
+ return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync,
+ &dpsa, nblks));
}
typedef enum dsl_prop_getflags {
@@ -995,7 +988,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
if (dsl_dataset_is_snapshot(ds))
flags |= DSL_PROP_GET_SNAPSHOT;
- rw_enter(&dp->dp_config_rwlock, RW_READER);
+ ASSERT(dsl_pool_config_held(dp));
if (ds->ds_phys->ds_props_obj != 0) {
ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
@@ -1020,58 +1013,51 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
break;
}
out:
- rw_exit(&dp->dp_config_rwlock);
return (err);
}
boolean_t
-dsl_prop_get_hasrecvd(objset_t *os)
+dsl_prop_get_hasrecvd(const char *dsname)
{
- dsl_dataset_t *ds = os->os_dsl_dataset;
- int rc;
uint64_t dummy;
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL);
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
- ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
- return (rc == 0);
+ return (0 ==
+ dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL));
}
-static void
-dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source)
+static int
+dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source)
{
- dsl_dataset_t *ds = os->os_dsl_dataset;
- uint64_t dummy = 0;
- dsl_prop_setarg_t psa;
-
- if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS)
- return;
+ uint64_t version;
+ spa_t *spa;
+ int error = 0;
- dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy);
+ VERIFY0(spa_open(dsname, &spa, FTAG));
+ version = spa_version(spa);
+ spa_close(spa, FTAG);
- (void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL,
- dsl_prop_set_sync, ds, &psa, 2);
+ if (version >= SPA_VERSION_RECVD_PROPS)
+ error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0);
+ return (error);
}
/*
* Call after successfully receiving properties to ensure that only the first
* receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
*/
-void
-dsl_prop_set_hasrecvd(objset_t *os)
+int
+dsl_prop_set_hasrecvd(const char *dsname)
{
- if (dsl_prop_get_hasrecvd(os)) {
- ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
- return;
- }
- dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL);
+ int error = 0;
+ if (!dsl_prop_get_hasrecvd(dsname))
+ error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL);
+ return (error);
}
void
-dsl_prop_unset_hasrecvd(objset_t *os)
+dsl_prop_unset_hasrecvd(const char *dsname)
{
- dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE);
+ VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE));
}
int
@@ -1081,16 +1067,25 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
}
int
-dsl_prop_get_received(objset_t *os, nvlist_t **nvp)
+dsl_prop_get_received(const char *dsname, nvlist_t **nvp)
{
+ objset_t *os;
+ int error;
+
/*
* Received properties are not distinguishable from local properties
* until the dataset has received properties on or after
* SPA_VERSION_RECVD_PROPS.
*/
- dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ?
+ dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ?
DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
- return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags));
+
+ error = dmu_objset_hold(dsname, FTAG, &os);
+ if (error != 0)
+ return (error);
+ error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags);
+ dmu_objset_rele(os, FTAG);
+ return (error);
}
void
diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c
index e171725799..3de3c6e4d7 100644
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c
@@ -55,7 +55,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
static scan_cb_t dsl_scan_defrag_cb;
static scan_cb_t dsl_scan_scrub_cb;
static scan_cb_t dsl_scan_remove_cb;
-static dsl_syncfunc_t dsl_scan_cancel_sync;
+static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
@@ -154,9 +154,9 @@ dsl_scan_fini(dsl_pool_t *dp)
/* ARGSUSED */
static int
-dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state == DSS_SCANNING)
return (EBUSY);
@@ -164,12 +164,11 @@ dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (0);
}
-/* ARGSUSED */
static void
-dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
- pool_scan_func_t *funcp = arg2;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
+ pool_scan_func_t *funcp = arg;
dmu_object_type_t ot = 0;
dsl_pool_t *dp = scn->scn_dp;
spa_t *spa = dp->dp_spa;
@@ -315,9 +314,9 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
/* ARGSUSED */
static int
-dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state != DSS_SCANNING)
return (ENOENT);
@@ -326,9 +325,9 @@ dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
/* ARGSUSED */
static void
-dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
dsl_scan_done(scn, B_FALSE, tx);
dsl_scan_sync_state(scn, tx);
@@ -337,12 +336,8 @@ dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
int
dsl_scan_cancel(dsl_pool_t *dp)
{
- boolean_t complete = B_FALSE;
- int err;
-
- err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
- dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
- return (err);
+ return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
+ dsl_scan_cancel_sync, NULL, 3));
}
static void dsl_scan_visitbp(blkptr_t *bp,
@@ -378,7 +373,7 @@ dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
static void
dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
{
- VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
+ VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
&scn->scn_phys, tx));
@@ -950,33 +945,33 @@ struct enqueue_clones_arg {
/* ARGSUSED */
static int
-enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
struct enqueue_clones_arg *eca = arg;
dsl_dataset_t *ds;
int err;
- dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
- err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj)
+ return (0);
+
+ err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
- if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
- while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
- dsl_dataset_t *prev;
- err = dsl_dataset_hold_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
+ while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
+ dsl_dataset_t *prev;
+ err = dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
- dsl_dataset_rele(ds, FTAG);
- if (err)
- return (err);
- ds = prev;
- }
- VERIFY(zap_add_int_key(dp->dp_meta_objset,
- scn->scn_phys.scn_queue_obj, ds->ds_object,
- ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
+ dsl_dataset_rele(ds, FTAG);
+ if (err)
+ return (err);
+ ds = prev;
}
+ VERIFY(zap_add_int_key(dp->dp_meta_objset,
+ scn->scn_phys.scn_queue_obj, ds->ds_object,
+ ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
return (0);
}
@@ -1065,17 +1060,17 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
}
if (usenext) {
- VERIFY(zap_join_key(dp->dp_meta_objset,
+ VERIFY0(zap_join_key(dp->dp_meta_objset,
ds->ds_phys->ds_next_clones_obj,
scn->scn_phys.scn_queue_obj,
- ds->ds_phys->ds_creation_txg, tx) == 0);
+ ds->ds_phys->ds_creation_txg, tx));
} else {
struct enqueue_clones_arg eca;
eca.tx = tx;
eca.originobj = ds->ds_object;
- (void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
- NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
+ VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+ enqueue_clones_cb, &eca, DS_FIND_CHILDREN));
}
}
@@ -1085,15 +1080,14 @@ out:
/* ARGSUSED */
static int
-enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
int err;
- dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
- err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@@ -1248,8 +1242,8 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
- VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
- NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
+ VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+ enqueue_cb, tx, DS_FIND_CHILDREN));
} else {
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
@@ -1384,7 +1378,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u txg=%llu",
func, tx->tx_txg);
- dsl_scan_setup_sync(scn, &func, tx);
+ dsl_scan_setup_sync(&func, tx);
}
if (!dsl_scan_active(scn) ||
@@ -1418,21 +1412,21 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
err = bptree_iterate(dp->dp_meta_objset,
dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
scn, tx);
- VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
- if (err != 0)
- return;
-
- /* disable async destroy feature */
- spa_feature_decr(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
- ASSERT(!spa_feature_is_active(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
- VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_BPTREE_OBJ, tx));
- VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
- dp->dp_bptree_obj, tx));
- dp->dp_bptree_obj = 0;
+ VERIFY0(zio_wait(scn->scn_zio_root));
+
+ if (err == 0) {
+ zfeature_info_t *feat = &spa_feature_table
+ [SPA_FEATURE_ASYNC_DESTROY];
+ /* finished; deactivate async destroy feature */
+ spa_feature_decr(spa, feat, tx);
+ ASSERT(!spa_feature_is_active(spa, feat));
+ VERIFY0(zap_remove(dp->dp_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, tx));
+ VERIFY0(bptree_free(dp->dp_meta_objset,
+ dp->dp_bptree_obj, tx));
+ dp->dp_bptree_obj = 0;
+ }
}
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
@@ -1479,7 +1473,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_CANFAIL);
+ dsl_pool_config_enter(dp, FTAG);
dsl_scan_visit(scn, tx);
+ dsl_pool_config_exit(dp, FTAG);
(void) zio_wait(scn->scn_zio_root);
scn->scn_zio_root = NULL;
@@ -1714,6 +1710,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
- return (dsl_sync_task_do(dp, dsl_scan_setup_check,
- dsl_scan_setup_sync, dp->dp_scan, &func, 0));
+ return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
+ dsl_scan_setup_sync, &func, 0));
}
diff --git a/usr/src/uts/common/fs/zfs/dsl_synctask.c b/usr/src/uts/common/fs/zfs/dsl_synctask.c
index e2481281e0..ecb45fbc38 100644
--- a/usr/src/uts/common/fs/zfs/dsl_synctask.c
+++ b/usr/src/uts/common/fs/zfs/dsl_synctask.c
@@ -34,136 +34,115 @@
/* ARGSUSED */
static int
-dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
{
return (0);
}
-dsl_sync_task_group_t *
-dsl_sync_task_group_create(dsl_pool_t *dp)
-{
- dsl_sync_task_group_t *dstg;
-
- dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
- list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
- offsetof(dsl_sync_task_t, dst_node));
- dstg->dstg_pool = dp;
-
- return (dstg);
-}
-
-void
-dsl_sync_task_create(dsl_sync_task_group_t *dstg,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified)
-{
- dsl_sync_task_t *dst;
-
- if (checkfunc == NULL)
- checkfunc = dsl_null_checkfunc;
- dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
- dst->dst_checkfunc = checkfunc;
- dst->dst_syncfunc = syncfunc;
- dst->dst_arg1 = arg1;
- dst->dst_arg2 = arg2;
- list_insert_tail(&dstg->dstg_tasks, dst);
-
- dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
-}
-
+/*
+ * Called from open context to perform a callback in syncing context. Waits
+ * for the operation to complete.
+ *
+ * The checkfunc will be called from open context as a preliminary check
+ * which can quickly fail. If it succeeds, it will be called again from
+ * syncing context. The checkfunc should generally be designed to work
+ * properly in either context, but if necessary it can check
+ * dmu_tx_is_syncing(tx).
+ *
+ * The synctask infrastructure enforces proper locking strategy with respect
+ * to the dp_config_rwlock -- the lock will always be held when the callbacks
+ * are called. It will be held for read during the open-context (preliminary)
+ * call to the checkfunc, and then held for write from syncing context during
+ * the calls to the check and sync funcs.
+ *
+ * A dataset or pool name can be passed as the first argument. Typically,
+ * the check func will hold, check the return value of the hold, and then
+ * release the dataset. The sync func will VERIFYO(hold()) the dataset.
+ * This is safe because no changes can be made between the check and sync funcs,
+ * and the sync func will only be called if the check func successfully opened
+ * the dataset.
+ */
int
-dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
+dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
+ dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified)
{
+ spa_t *spa;
dmu_tx_t *tx;
- uint64_t txg;
- dsl_sync_task_t *dst;
-
-top:
- tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
- VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
-
- txg = dmu_tx_get_txg(tx);
+ int err;
+ dsl_sync_task_t dst = { 0 };
+ dsl_pool_t *dp;
- /* Do a preliminary error check. */
- dstg->dstg_err = 0;
-#ifdef ZFS_DEBUG
- /*
- * Only check half the time, otherwise, the sync-context
- * check will almost never fail.
- */
- if (spa_get_random(2) == 0)
- goto skip;
-#endif
- rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
- dst->dst_err =
- dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
- if (dst->dst_err)
- dstg->dstg_err = dst->dst_err;
- }
- rw_exit(&dstg->dstg_pool->dp_config_rwlock);
+ err = spa_open(pool, &spa, FTAG);
+ if (err != 0)
+ return (err);
+ dp = spa_get_dsl(spa);
- if (dstg->dstg_err) {
+top:
+ tx = dmu_tx_create_dd(dp->dp_mos_dir);
+ VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+
+ dst.dst_pool = dp;
+ dst.dst_txg = dmu_tx_get_txg(tx);
+ dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
+ dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
+ dst.dst_syncfunc = syncfunc;
+ dst.dst_arg = arg;
+ dst.dst_error = 0;
+ dst.dst_nowaiter = B_FALSE;
+
+ dsl_pool_config_enter(dp, FTAG);
+ err = dst.dst_checkfunc(arg, tx);
+ dsl_pool_config_exit(dp, FTAG);
+
+ if (err != 0) {
dmu_tx_commit(tx);
- return (dstg->dstg_err);
+ spa_close(spa, FTAG);
+ return (err);
}
-skip:
- /*
- * We don't generally have many sync tasks, so pay the price of
- * add_tail to get the tasks executed in the right order.
- */
- VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
- dstg, txg));
+ VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg));
dmu_tx_commit(tx);
- txg_wait_synced(dstg->dstg_pool, txg);
+ txg_wait_synced(dp, dst.dst_txg);
- if (dstg->dstg_err == EAGAIN) {
- txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
+ if (dst.dst_error == EAGAIN) {
+ txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
goto top;
}
- return (dstg->dstg_err);
+ spa_close(spa, FTAG);
+ return (dst.dst_error);
}
void
-dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
+dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
+ int blocks_modified, dmu_tx_t *tx)
{
- uint64_t txg;
+ dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
- dstg->dstg_nowaiter = B_TRUE;
- txg = dmu_tx_get_txg(tx);
- /*
- * We don't generally have many sync tasks, so pay the price of
- * add_tail to get the tasks executed in the right order.
- */
- VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
- dstg, txg));
-}
-
-void
-dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
-{
- dsl_sync_task_t *dst;
+ dst->dst_pool = dp;
+ dst->dst_txg = dmu_tx_get_txg(tx);
+ dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
+ dst->dst_checkfunc = dsl_null_checkfunc;
+ dst->dst_syncfunc = syncfunc;
+ dst->dst_arg = arg;
+ dst->dst_error = 0;
+ dst->dst_nowaiter = B_TRUE;
- while (dst = list_head(&dstg->dstg_tasks)) {
- list_remove(&dstg->dstg_tasks, dst);
- kmem_free(dst, sizeof (dsl_sync_task_t));
- }
- kmem_free(dstg, sizeof (dsl_sync_task_group_t));
+ VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, dst, dst->dst_txg));
}
+/*
+ * Called in syncing context to execute the synctask.
+ */
void
-dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
+dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
{
- dsl_sync_task_t *dst;
- dsl_pool_t *dp = dstg->dstg_pool;
+ dsl_pool_t *dp = dst->dst_pool;
uint64_t quota, used;
- ASSERT0(dstg->dstg_err);
+ ASSERT0(dst->dst_error);
/*
* Check for sufficient space. We just check against what's
@@ -175,63 +154,21 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
used = dp->dp_root_dir->dd_phys->dd_used_bytes;
/* MOS space is triple-dittoed, so we multiply by 3. */
- if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
- dstg->dstg_err = ENOSPC;
+ if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) {
+ dst->dst_error = ENOSPC;
+ if (dst->dst_nowaiter)
+ kmem_free(dst, sizeof (*dst));
return;
}
/*
- * Check for errors by calling checkfuncs.
+ * Check for errors by calling checkfunc.
*/
- rw_enter(&dp->dp_config_rwlock, RW_WRITER);
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
- dst->dst_err =
- dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
- if (dst->dst_err)
- dstg->dstg_err = dst->dst_err;
- }
-
- if (dstg->dstg_err == 0) {
- /*
- * Execute sync tasks.
- */
- for (dst = list_head(&dstg->dstg_tasks); dst;
- dst = list_next(&dstg->dstg_tasks, dst)) {
- dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
- }
- }
- rw_exit(&dp->dp_config_rwlock);
-
- if (dstg->dstg_nowaiter)
- dsl_sync_task_group_destroy(dstg);
-}
-
-int
-dsl_sync_task_do(dsl_pool_t *dp,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified)
-{
- dsl_sync_task_group_t *dstg;
- int err;
-
- ASSERT(spa_writeable(dp->dp_spa));
-
- dstg = dsl_sync_task_group_create(dp);
- dsl_sync_task_create(dstg, checkfunc, syncfunc,
- arg1, arg2, blocks_modified);
- err = dsl_sync_task_group_wait(dstg);
- dsl_sync_task_group_destroy(dstg);
- return (err);
-}
-
-void
-dsl_sync_task_do_nowait(dsl_pool_t *dp,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
-{
- dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp);
- dsl_sync_task_create(dstg, checkfunc, syncfunc,
- arg1, arg2, blocks_modified);
- dsl_sync_task_group_nowait(dstg, tx);
+ rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
+ dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
+ if (dst->dst_error == 0)
+ dst->dst_syncfunc(dst->dst_arg, tx);
+ rrw_exit(&dp->dp_config_rwlock, FTAG);
+ if (dst->dst_nowaiter)
+ kmem_free(dst, sizeof (*dst));
}
diff --git a/usr/src/uts/common/fs/zfs/dsl_userhold.c b/usr/src/uts/common/fs/zfs/dsl_userhold.c
new file mode 100644
index 0000000000..e30169eade
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/dsl_userhold.c
@@ -0,0 +1,536 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/dsl_userhold.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dmu_tx.h>
+#include <sys/zfs_onexit.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_dir.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zap.h>
+
+typedef struct dsl_dataset_user_hold_arg {
+ nvlist_t *dduha_holds;
+ nvlist_t *dduha_errlist;
+ minor_t dduha_minor;
+} dsl_dataset_user_hold_arg_t;
+
+/*
+ * If you add new checks here, you may need to add additional checks to the
+ * "temporary" case in snapshot_check() in dmu_objset.c.
+ */
+int
+dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
+ boolean_t temphold, dmu_tx_t *tx)
+{
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
+ int error = 0;
+
+ if (strlen(htag) > MAXNAMELEN)
+ return (E2BIG);
+ /* Tempholds have a more restricted length */
+ if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
+ return (E2BIG);
+
+ /* tags must be unique (if ds already exists) */
+ if (ds != NULL) {
+ mutex_enter(&ds->ds_lock);
+ if (ds->ds_phys->ds_userrefs_obj != 0) {
+ uint64_t value;
+ error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
+ htag, 8, 1, &value);
+ if (error == 0)
+ error = EEXIST;
+ else if (error == ENOENT)
+ error = 0;
+ }
+ mutex_exit(&ds->ds_lock);
+ }
+
+ return (error);
+}
+
+static int
+dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_user_hold_arg_t *dduha = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+ int rv = 0;
+
+ if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
+ return (ENOTSUP);
+
+ for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
+ int error = 0;
+ dsl_dataset_t *ds;
+ char *htag;
+
+ /* must be a snapshot */
+ if (strchr(nvpair_name(pair), '@') == NULL)
+ error = EINVAL;
+
+ if (error == 0)
+ error = nvpair_value_string(pair, &htag);
+ if (error == 0) {
+ error = dsl_dataset_hold(dp,
+ nvpair_name(pair), FTAG, &ds);
+ }
+ if (error == 0) {
+ error = dsl_dataset_user_hold_check_one(ds, htag,
+ dduha->dduha_minor != 0, tx);
+ dsl_dataset_rele(ds, FTAG);
+ }
+
+ if (error != 0) {
+ rv = error;
+ fnvlist_add_int32(dduha->dduha_errlist,
+ nvpair_name(pair), error);
+ }
+ }
+ return (rv);
+}
+
+void
+dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
+ minor_t minor, uint64_t now, dmu_tx_t *tx)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ uint64_t zapobj;
+
+ mutex_enter(&ds->ds_lock);
+ if (ds->ds_phys->ds_userrefs_obj == 0) {
+ /*
+ * This is the first user hold for this dataset. Create
+ * the userrefs zap object.
+ */
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ zapobj = ds->ds_phys->ds_userrefs_obj =
+ zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
+ } else {
+ zapobj = ds->ds_phys->ds_userrefs_obj;
+ }
+ ds->ds_userrefs++;
+ mutex_exit(&ds->ds_lock);
+
+ VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
+
+ if (minor != 0) {
+ VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
+ htag, now, tx));
+ dsl_register_onexit_hold_cleanup(ds, htag, minor);
+ }
+
+ spa_history_log_internal_ds(ds, "hold", tx,
+ "tag=%s temp=%d refs=%llu",
+ htag, minor != 0, ds->ds_userrefs);
+}
+
+static void
+dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_user_hold_arg_t *dduha = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+ uint64_t now = gethrestime_sec();
+
+ for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
+ dsl_dataset_t *ds;
+ VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
+ dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
+ dduha->dduha_minor, now, tx);
+ dsl_dataset_rele(ds, FTAG);
+ }
+}
+
+/*
+ * holds is nvl of snapname -> holdname
+ * errlist will be filled in with snapname -> error
+ * if cleanup_minor is not 0, the holds will be temporary, cleaned up
+ * when the process exits.
+ *
+ * if any fails, all will fail.
+ */
+int
+dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
+{
+ dsl_dataset_user_hold_arg_t dduha;
+ nvpair_t *pair;
+
+ pair = nvlist_next_nvpair(holds, NULL);
+ if (pair == NULL)
+ return (0);
+
+ dduha.dduha_holds = holds;
+ dduha.dduha_errlist = errlist;
+ dduha.dduha_minor = cleanup_minor;
+
+ return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
+ dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
+}
+
+typedef struct dsl_dataset_user_release_arg {
+ nvlist_t *ddura_holds;
+ nvlist_t *ddura_todelete;
+ nvlist_t *ddura_errlist;
+} dsl_dataset_user_release_arg_t;
+
+static int
+dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
+ nvlist_t *holds, boolean_t *todelete)
+{
+ uint64_t zapobj;
+ nvpair_t *pair;
+ objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+ int error;
+ int numholds = 0;
+
+ *todelete = B_FALSE;
+
+ if (!dsl_dataset_is_snapshot(ds))
+ return (EINVAL);
+
+ zapobj = ds->ds_phys->ds_userrefs_obj;
+ if (zapobj == 0)
+ return (ESRCH);
+
+ for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(holds, pair)) {
+ /* Make sure the hold exists */
+ uint64_t tmp;
+ error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
+ if (error == ENOENT)
+ error = ESRCH;
+ if (error != 0)
+ return (error);
+ numholds++;
+ }
+
+ if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
+ ds->ds_userrefs == numholds) {
+ /* we need to destroy the snapshot as well */
+
+ if (dsl_dataset_long_held(ds))
+ return (EBUSY);
+ *todelete = B_TRUE;
+ }
+ return (0);
+}
+
+static int
+dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_user_release_arg_t *ddura = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+ int rv = 0;
+
+ if (!dmu_tx_is_syncing(tx))
+ return (0);
+
+ for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
+ const char *name = nvpair_name(pair);
+ int error;
+ dsl_dataset_t *ds;
+ nvlist_t *holds;
+
+ error = nvpair_value_nvlist(pair, &holds);
+ if (error != 0)
+ return (EINVAL);
+
+ error = dsl_dataset_hold(dp, name, FTAG, &ds);
+ if (error == 0) {
+ boolean_t deleteme;
+ error = dsl_dataset_user_release_check_one(ds,
+ holds, &deleteme);
+ if (error == 0 && deleteme) {
+ fnvlist_add_boolean(ddura->ddura_todelete,
+ name);
+ }
+ dsl_dataset_rele(ds, FTAG);
+ }
+ if (error != 0) {
+ if (ddura->ddura_errlist != NULL) {
+ fnvlist_add_int32(ddura->ddura_errlist,
+ name, error);
+ }
+ rv = error;
+ }
+ }
+ return (rv);
+}
+
+static void
+dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
+ dmu_tx_t *tx)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ uint64_t zapobj;
+ int error;
+ nvpair_t *pair;
+
+ for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(holds, pair)) {
+ ds->ds_userrefs--;
+ error = dsl_pool_user_release(dp, ds->ds_object,
+ nvpair_name(pair), tx);
+ VERIFY(error == 0 || error == ENOENT);
+ zapobj = ds->ds_phys->ds_userrefs_obj;
+ VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
+
+ spa_history_log_internal_ds(ds, "release", tx,
+ "tag=%s refs=%lld", nvpair_name(pair),
+ (longlong_t)ds->ds_userrefs);
+ }
+}
+
+static void
+dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_user_release_arg_t *ddura = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ nvpair_t *pair;
+
+ for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
+ dsl_dataset_t *ds;
+
+ VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
+ dsl_dataset_user_release_sync_one(ds,
+ fnvpair_value_nvlist(pair), tx);
+ if (nvlist_exists(ddura->ddura_todelete,
+ nvpair_name(pair))) {
+ ASSERT(ds->ds_userrefs == 0 &&
+ ds->ds_phys->ds_num_children == 1 &&
+ DS_IS_DEFER_DESTROY(ds));
+ dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
+ }
+ dsl_dataset_rele(ds, FTAG);
+ }
+}
+
+/*
+ * holds is nvl of snapname -> { holdname, ... }
+ * errlist will be filled in with snapname -> error
+ *
+ * if any fails, all will fail.
+ */
+int
+dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
+{
+ dsl_dataset_user_release_arg_t ddura;
+ nvpair_t *pair;
+ int error;
+
+ pair = nvlist_next_nvpair(holds, NULL);
+ if (pair == NULL)
+ return (0);
+
+ ddura.ddura_holds = holds;
+ ddura.ddura_errlist = errlist;
+ ddura.ddura_todelete = fnvlist_alloc();
+
+ error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
+ dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
+ fnvlist_free(ddura.ddura_todelete);
+ return (error);
+}
+
+typedef struct dsl_dataset_user_release_tmp_arg {
+ uint64_t ddurta_dsobj;
+ nvlist_t *ddurta_holds;
+ boolean_t ddurta_deleteme;
+} dsl_dataset_user_release_tmp_arg_t;
+
+static int
+dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+ int error;
+
+ if (!dmu_tx_is_syncing(tx))
+ return (0);
+
+ error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
+ if (error)
+ return (error);
+
+ error = dsl_dataset_user_release_check_one(ds,
+ ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
+ dsl_dataset_rele(ds, FTAG);
+ return (error);
+}
+
+static void
+dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds;
+
+ VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
+ dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
+ if (ddurta->ddurta_deleteme) {
+ ASSERT(ds->ds_userrefs == 0 &&
+ ds->ds_phys->ds_num_children == 1 &&
+ DS_IS_DEFER_DESTROY(ds));
+ dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
+ }
+ dsl_dataset_rele(ds, FTAG);
+}
+
+/*
+ * Called at spa_load time to release a stale temporary user hold.
+ * Also called by the onexit code.
+ */
+void
+dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
+{
+ dsl_dataset_user_release_tmp_arg_t ddurta;
+ dsl_dataset_t *ds;
+ int error;
+
+#ifdef _KERNEL
+ /* Make sure it is not mounted. */
+ dsl_pool_config_enter(dp, FTAG);
+ error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ if (error == 0) {
+ char name[MAXNAMELEN];
+ dsl_dataset_name(ds, name);
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_config_exit(dp, FTAG);
+ zfs_unmount_snap(name);
+ } else {
+ dsl_pool_config_exit(dp, FTAG);
+ }
+#endif
+
+ ddurta.ddurta_dsobj = dsobj;
+ ddurta.ddurta_holds = fnvlist_alloc();
+ fnvlist_add_boolean(ddurta.ddurta_holds, htag);
+
+ (void) dsl_sync_task(spa_name(dp->dp_spa),
+ dsl_dataset_user_release_tmp_check,
+ dsl_dataset_user_release_tmp_sync, &ddurta, 1);
+ fnvlist_free(ddurta.ddurta_holds);
+}
+
+typedef struct zfs_hold_cleanup_arg {
+ char zhca_spaname[MAXNAMELEN];
+ uint64_t zhca_spa_load_guid;
+ uint64_t zhca_dsobj;
+ char zhca_htag[MAXNAMELEN];
+} zfs_hold_cleanup_arg_t;
+
+static void
+dsl_dataset_user_release_onexit(void *arg)
+{
+ zfs_hold_cleanup_arg_t *ca = arg;
+ spa_t *spa;
+ int error;
+
+ error = spa_open(ca->zhca_spaname, &spa, FTAG);
+ if (error != 0) {
+ zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
+ "because pool is no longer loaded",
+ ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
+ return;
+ }
+ if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
+ zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
+ "because pool is no longer loaded (guid doesn't match)",
+ ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
+ spa_close(spa, FTAG);
+ return;
+ }
+
+ dsl_dataset_user_release_tmp(spa_get_dsl(spa),
+ ca->zhca_dsobj, ca->zhca_htag);
+ kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
+ spa_close(spa, FTAG);
+}
+
+void
+dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
+ minor_t minor)
+{
+ zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
+ spa_t *spa = dsl_dataset_get_spa(ds);
+ (void) strlcpy(ca->zhca_spaname, spa_name(spa),
+ sizeof (ca->zhca_spaname));
+ ca->zhca_spa_load_guid = spa_load_guid(spa);
+ ca->zhca_dsobj = ds->ds_object;
+ (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
+ VERIFY0(zfs_onexit_add_cb(minor,
+ dsl_dataset_user_release_onexit, ca, NULL));
+}
+
+int
+dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
+{
+ dsl_pool_t *dp;
+ dsl_dataset_t *ds;
+ int err;
+
+ err = dsl_pool_hold(dsname, FTAG, &dp);
+ if (err != 0)
+ return (err);
+ err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
+ if (err != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+
+ if (ds->ds_phys->ds_userrefs_obj != 0) {
+ zap_attribute_t *za;
+ zap_cursor_t zc;
+
+ za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+ for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
+ ds->ds_phys->ds_userrefs_obj);
+ zap_cursor_retrieve(&zc, za) == 0;
+ zap_cursor_advance(&zc)) {
+ fnvlist_add_uint64(nvl, za->za_name,
+ za->za_first_integer);
+ }
+ zap_cursor_fini(&zc);
+ kmem_free(za, sizeof (zap_attribute_t));
+ }
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index bf9889e183..aae2ccd5d9 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -1866,3 +1866,41 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
return (error);
}
+
+static void
+checkmap(space_map_t *sm, uint64_t off, uint64_t size)
+{
+ space_seg_t *ss;
+ avl_index_t where;
+
+ mutex_enter(sm->sm_lock);
+ ss = space_map_find(sm, off, size, &where);
+ if (ss != NULL)
+ panic("freeing free block; ss=%p", (void *)ss);
+ mutex_exit(sm->sm_lock);
+}
+
+void
+metaslab_check_free(spa_t *spa, const blkptr_t *bp)
+{
+ if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0)
+ return;
+
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+ for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
+ uint64_t vdid = DVA_GET_VDEV(&bp->blk_dva[i]);
+ vdev_t *vd = vdev_lookup_top(spa, vdid);
+ uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[i]);
+ uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]);
+ metaslab_t *ms = vd->vdev_ms[off >> vd->vdev_ms_shift];
+
+ if (ms->ms_map->sm_loaded)
+ checkmap(ms->ms_map, off, size);
+
+ for (int j = 0; j < TXG_SIZE; j++)
+ checkmap(ms->ms_freemap[j], off, size);
+ for (int j = 0; j < TXG_DEFER_SIZE; j++)
+ checkmap(ms->ms_defermap[j], off, size);
+ }
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+}
diff --git a/usr/src/uts/common/fs/zfs/refcount.c b/usr/src/uts/common/fs/zfs/refcount.c
index 3a8e144e90..df0f256849 100644
--- a/usr/src/uts/common/fs/zfs/refcount.c
+++ b/usr/src/uts/common/fs/zfs/refcount.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -32,7 +33,7 @@ int reference_tracking_enable = FALSE; /* runs out of memory too easily */
#else
int reference_tracking_enable = TRUE;
#endif
-int reference_history = 4; /* tunable */
+int reference_history = 3; /* tunable */
static kmem_cache_t *reference_cache;
static kmem_cache_t *reference_history_cache;
@@ -64,6 +65,14 @@ refcount_create(refcount_t *rc)
offsetof(reference_t, ref_link));
rc->rc_count = 0;
rc->rc_removed_count = 0;
+ rc->rc_tracked = reference_tracking_enable;
+}
+
+void
+refcount_create_untracked(refcount_t *rc)
+{
+ refcount_create(rc);
+ rc->rc_tracked = B_FALSE;
}
void
@@ -96,14 +105,12 @@ refcount_destroy(refcount_t *rc)
int
refcount_is_zero(refcount_t *rc)
{
- ASSERT(rc->rc_count >= 0);
return (rc->rc_count == 0);
}
int64_t
refcount_count(refcount_t *rc)
{
- ASSERT(rc->rc_count >= 0);
return (rc->rc_count);
}
@@ -113,14 +120,14 @@ refcount_add_many(refcount_t *rc, uint64_t number, void *holder)
reference_t *ref = NULL;
int64_t count;
- if (reference_tracking_enable) {
+ if (rc->rc_tracked) {
ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
ref->ref_holder = holder;
ref->ref_number = number;
}
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= 0);
- if (reference_tracking_enable)
+ if (rc->rc_tracked)
list_insert_head(&rc->rc_list, ref);
rc->rc_count += number;
count = rc->rc_count;
@@ -144,7 +151,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= number);
- if (!reference_tracking_enable) {
+ if (!rc->rc_tracked) {
rc->rc_count -= number;
count = rc->rc_count;
mutex_exit(&rc->rc_mtx);
@@ -161,7 +168,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
KM_SLEEP);
list_insert_head(&rc->rc_removed, ref);
rc->rc_removed_count++;
- if (rc->rc_removed_count >= reference_history) {
+ if (rc->rc_removed_count > reference_history) {
ref = list_tail(&rc->rc_removed);
list_remove(&rc->rc_removed, ref);
kmem_cache_free(reference_history_cache,
diff --git a/usr/src/uts/common/fs/zfs/rrwlock.c b/usr/src/uts/common/fs/zfs/rrwlock.c
index 7f9290bd44..8e80166c7d 100644
--- a/usr/src/uts/common/fs/zfs/rrwlock.c
+++ b/usr/src/uts/common/fs/zfs/rrwlock.c
@@ -75,8 +75,9 @@
uint_t rrw_tsd_key;
typedef struct rrw_node {
- struct rrw_node *rn_next;
- rrwlock_t *rn_rrl;
+ struct rrw_node *rn_next;
+ rrwlock_t *rn_rrl;
+ void *rn_tag;
} rrw_node_t;
static rrw_node_t *
@@ -98,13 +99,14 @@ rrn_find(rrwlock_t *rrl)
* Add a node to the head of the singly linked list.
*/
static void
-rrn_add(rrwlock_t *rrl)
+rrn_add(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
rn->rn_rrl = rrl;
rn->rn_next = tsd_get(rrw_tsd_key);
+ rn->rn_tag = tag;
VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
}
@@ -113,7 +115,7 @@ rrn_add(rrwlock_t *rrl)
* thread's list and return TRUE; otherwise return FALSE.
*/
static boolean_t
-rrn_find_and_remove(rrwlock_t *rrl)
+rrn_find_and_remove(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rrw_node_t *prev = NULL;
@@ -122,7 +124,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
return (B_FALSE);
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
- if (rn->rn_rrl == rrl) {
+ if (rn->rn_rrl == rrl && rn->rn_tag == tag) {
if (prev)
prev->rn_next = rn->rn_next;
else
@@ -136,7 +138,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
}
void
-rrw_init(rrwlock_t *rrl)
+rrw_init(rrwlock_t *rrl, boolean_t track_all)
{
mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
@@ -144,6 +146,7 @@ rrw_init(rrwlock_t *rrl)
refcount_create(&rrl->rr_anon_rcount);
refcount_create(&rrl->rr_linked_rcount);
rrl->rr_writer_wanted = B_FALSE;
+ rrl->rr_track_all = track_all;
}
void
@@ -156,12 +159,13 @@ rrw_destroy(rrwlock_t *rrl)
refcount_destroy(&rrl->rr_linked_rcount);
}
-static void
+void
rrw_enter_read(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
#if !defined(DEBUG) && defined(_KERNEL)
- if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
+ if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted &&
+ !rrl->rr_track_all) {
rrl->rr_anon_rcount.rc_count++;
mutex_exit(&rrl->rr_lock);
return;
@@ -171,14 +175,14 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
ASSERT(rrl->rr_writer != curthread);
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
- while (rrl->rr_writer || (rrl->rr_writer_wanted &&
+ while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted &&
refcount_is_zero(&rrl->rr_anon_rcount) &&
rrn_find(rrl) == NULL))
cv_wait(&rrl->rr_cv, &rrl->rr_lock);
- if (rrl->rr_writer_wanted) {
+ if (rrl->rr_writer_wanted || rrl->rr_track_all) {
/* may or may not be a re-entrant enter */
- rrn_add(rrl);
+ rrn_add(rrl, tag);
(void) refcount_add(&rrl->rr_linked_rcount, tag);
} else {
(void) refcount_add(&rrl->rr_anon_rcount, tag);
@@ -187,7 +191,7 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
-static void
+void
rrw_enter_write(rrwlock_t *rrl)
{
mutex_enter(&rrl->rr_lock);
@@ -233,10 +237,12 @@ rrw_exit(rrwlock_t *rrl, void *tag)
if (rrl->rr_writer == NULL) {
int64_t count;
- if (rrn_find_and_remove(rrl))
+ if (rrn_find_and_remove(rrl, tag)) {
count = refcount_remove(&rrl->rr_linked_rcount, tag);
- else
+ } else {
+ ASSERT(!rrl->rr_track_all);
count = refcount_remove(&rrl->rr_anon_rcount, tag);
+ }
if (count == 0)
cv_broadcast(&rrl->rr_cv);
} else {
@@ -249,6 +255,11 @@ rrw_exit(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
+/*
+ * If the lock was created with track_all, rrw_held(RW_READER) will return
+ * B_TRUE iff the current thread has the lock for reader. Otherwise it may
+ * return B_TRUE if any thread has the lock for reader.
+ */
boolean_t
rrw_held(rrwlock_t *rrl, krw_t rw)
{
@@ -259,7 +270,7 @@ rrw_held(rrwlock_t *rrl, krw_t rw)
held = (rrl->rr_writer == curthread);
} else {
held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
- !refcount_is_zero(&rrl->rr_linked_rcount));
+ rrn_find(rrl) != NULL);
}
mutex_exit(&rrl->rr_lock);
diff --git a/usr/src/uts/common/fs/zfs/sa.c b/usr/src/uts/common/fs/zfs/sa.c
index cd3a58b5fa..05f329c647 100644
--- a/usr/src/uts/common/fs/zfs/sa.c
+++ b/usr/src/uts/common/fs/zfs/sa.c
@@ -1001,10 +1001,10 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
sa_attr_type_t *tb;
int error;
- mutex_enter(&os->os_lock);
+ mutex_enter(&os->os_user_ptr_lock);
if (os->os_sa) {
mutex_enter(&os->os_sa->sa_lock);
- mutex_exit(&os->os_lock);
+ mutex_exit(&os->os_user_ptr_lock);
tb = os->os_sa->sa_user_table;
mutex_exit(&os->os_sa->sa_lock);
*user_table = tb;
@@ -1017,7 +1017,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
os->os_sa = sa;
mutex_enter(&sa->sa_lock);
- mutex_exit(&os->os_lock);
+ mutex_exit(&os->os_user_ptr_lock);
avl_create(&sa->sa_layout_num_tree, layout_num_compare,
sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index fdc28700c8..544a0407c1 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -63,6 +63,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
#include <sys/zfeature.h>
+#include <sys/dsl_destroy.h>
#ifdef _KERNEL
#include <sys/bootprops.h>
@@ -129,10 +130,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
};
-static dsl_syncfunc_t spa_sync_version;
-static dsl_syncfunc_t spa_sync_props;
-static dsl_checkfunc_t spa_change_guid_check;
-static dsl_syncfunc_t spa_change_guid_sync;
+static void spa_sync_version(void *arg, dmu_tx_t *tx);
+static void spa_sync_props(void *arg, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
@@ -325,10 +324,10 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
dsl_dataset_t *ds = NULL;
dp = spa_get_dsl(spa);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
+ dsl_pool_config_enter(dp, FTAG);
if (err = dsl_dataset_hold_obj(dp,
za.za_first_integer, FTAG, &ds)) {
- rw_exit(&dp->dp_config_rwlock);
+ dsl_pool_config_exit(dp, FTAG);
break;
}
@@ -337,7 +336,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
KM_SLEEP);
dsl_dataset_name(ds, strval);
dsl_dataset_rele(ds, FTAG);
- rw_exit(&dp->dp_config_rwlock);
+ dsl_pool_config_exit(dp, FTAG);
} else {
strval = NULL;
intval = za.za_first_integer;
@@ -491,9 +490,10 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
if (dmu_objset_type(os) != DMU_OST_ZFS) {
error = ENOTSUP;
- } else if ((error = dsl_prop_get_integer(strval,
+ } else if ((error =
+ dsl_prop_get_int_ds(dmu_objset_ds(os),
zfs_prop_to_name(ZFS_PROP_COMPRESSION),
- &compress, NULL)) == 0 &&
+ &compress)) == 0 &&
!BOOTFS_COMPRESS_VALID(compress)) {
error = ENOTSUP;
} else {
@@ -660,8 +660,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
* read object, the features for write object, or the
* feature descriptions object.
*/
- error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
- spa_sync_version, spa, &ver, 6);
+ error = dsl_sync_task(spa->spa_name, NULL,
+ spa_sync_version, &ver, 6);
if (error)
return (error);
continue;
@@ -672,8 +672,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
}
if (need_sync) {
- return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
- spa, nvp, 6));
+ return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props,
+ nvp, 6));
}
return (0);
@@ -695,10 +695,10 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
/*ARGSUSED*/
static int
-spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_change_guid_check(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- uint64_t *newguid = arg2;
+ uint64_t *newguid = arg;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
vdev_t *rvd = spa->spa_root_vdev;
uint64_t vdev_state;
@@ -715,10 +715,10 @@ spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
-spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_change_guid_sync(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- uint64_t *newguid = arg2;
+ uint64_t *newguid = arg;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
uint64_t oldguid;
vdev_t *rvd = spa->spa_root_vdev;
@@ -752,8 +752,8 @@ spa_change_guid(spa_t *spa)
mutex_enter(&spa_namespace_lock);
guid = spa_generate_guid(NULL);
- error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
- spa_change_guid_sync, spa, &guid, 5);
+ error = dsl_sync_task(spa->spa_name, spa_change_guid_check,
+ spa_change_guid_sync, &guid, 5);
if (error == 0) {
spa_config_sync(spa, B_FALSE, B_TRUE);
@@ -1687,21 +1687,22 @@ spa_config_valid(spa_t *spa, nvlist_t *config)
/*
* Check for missing log devices
*/
-static int
+static boolean_t
spa_check_logs(spa_t *spa)
{
+ boolean_t rv = B_FALSE;
+
switch (spa->spa_log_state) {
case SPA_LOG_MISSING:
/* need to recheck in case slog has been restored */
case SPA_LOG_UNKNOWN:
- if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
- DS_FIND_CHILDREN)) {
+ rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain,
+ NULL, DS_FIND_CHILDREN) != 0);
+ if (rv)
spa_set_log_state(spa, SPA_LOG_MISSING);
- return (1);
- }
break;
}
- return (0);
+ return (rv);
}
static boolean_t
@@ -1747,11 +1748,11 @@ spa_activate_log(spa_t *spa)
int
spa_offline_log(spa_t *spa)
{
- int error = 0;
-
- if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
- NULL, DS_FIND_CHILDREN)) == 0) {
+ int error;
+ error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
+ NULL, DS_FIND_CHILDREN);
+ if (error == 0) {
/*
* We successfully offlined the log device, sync out the
* current txg so that the "stubby" block can be removed
@@ -3549,7 +3550,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (props != NULL) {
spa_configfile_set(spa, props, B_FALSE);
- spa_sync_props(spa, props, tx);
+ spa_sync_props(props, tx);
}
dmu_tx_commit(tx);
@@ -5813,10 +5814,11 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
}
static void
-spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_sync_version(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- uint64_t version = *(uint64_t *)arg2;
+ uint64_t *versionp = arg;
+ uint64_t version = *versionp;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
/*
* Setting the version is special cased when first creating the pool.
@@ -5835,11 +5837,11 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
* Set zpool properties.
*/
static void
-spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_sync_props(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
+ nvlist_t *nvp = arg;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
- nvlist_t *nvp = arg2;
nvpair_t *elem = NULL;
mutex_enter(&spa->spa_props_lock);
@@ -5990,6 +5992,8 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
ASSERT(spa->spa_sync_pass == 1);
+ rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
+
if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
dsl_pool_create_origin(dp, tx);
@@ -6015,6 +6019,7 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
spa_feature_create_zap_objects(spa, tx);
}
+ rrw_exit(&dp->dp_config_rwlock, FTAG);
}
/*
diff --git a/usr/src/uts/common/fs/zfs/spa_history.c b/usr/src/uts/common/fs/zfs/spa_history.c
index 9ae28739f3..eef642aa13 100644
--- a/usr/src/uts/common/fs/zfs/spa_history.c
+++ b/usr/src/uts/common/fs/zfs/spa_history.c
@@ -195,10 +195,10 @@ spa_history_zone(void)
*/
/*ARGSUSED*/
static void
-spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- nvlist_t *nvl = arg2;
+ nvlist_t *nvl = arg;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
dmu_buf_t *dbp;
spa_history_phys_t *shpp;
@@ -220,7 +220,7 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* Get the offset of where we need to write via the bonus buffer.
* Update the offset when the write completes.
*/
- VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
+ VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
shpp = dbp->db_data;
dmu_buf_will_dirty(dbp, tx);
@@ -321,8 +321,8 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
/* Kick this off asynchronously; errors are ignored. */
- dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
- spa_history_log_sync, spa, nvarg, 0, tx);
+ dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
+ nvarg, 0, tx);
dmu_tx_commit(tx);
/* spa_history_log_sync will free nvl */
@@ -455,10 +455,10 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
if (dmu_tx_is_syncing(tx)) {
- spa_history_log_sync(spa, nvl, tx);
+ spa_history_log_sync(nvl, tx);
} else {
- dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
- spa_history_log_sync, spa, nvl, 0, tx);
+ dsl_sync_task_nowait(spa_get_dsl(spa),
+ spa_history_log_sync, nvl, 0, tx);
}
/* spa_history_log_sync() will free nvl */
}
@@ -530,15 +530,9 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
void
spa_history_log_version(spa_t *spa, const char *operation)
{
-#ifdef _KERNEL
- uint64_t current_vers = spa_version(spa);
-
spa_history_log_internal(spa, operation, NULL,
"pool version %llu; software version %llu/%d; uts %s %s %s %s",
- (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
+ (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
utsname.nodename, utsname.release, utsname.version,
utsname.machine);
- cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation,
- (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
-#endif
}
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 405d93c6ce..733d2609e5 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -237,8 +237,8 @@ kmem_cache_t *spa_buffer_pool;
int spa_mode_global;
#ifdef ZFS_DEBUG
-/* Everything except dprintf is on by default in debug builds */
-int zfs_flags = ~ZFS_DEBUG_DPRINTF;
+/* Everything except dprintf and spa is on by default in debug builds */
+int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
#else
int zfs_flags = 0;
#endif
@@ -282,7 +282,7 @@ spa_config_lock_init(spa_t *spa)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
- refcount_create(&scl->scl_count);
+ refcount_create_untracked(&scl->scl_count);
scl->scl_writer = NULL;
scl->scl_write_wanted = 0;
}
@@ -335,6 +335,8 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
{
int wlocks_held = 0;
+ ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
+
for (int i = 0; i < SCL_LOCKS; i++) {
spa_config_lock_t *scl = &spa->spa_config_lock[i];
if (scl->scl_writer == curthread)
@@ -413,27 +415,22 @@ spa_lookup(const char *name)
static spa_t search; /* spa_t is large; don't allocate on stack */
spa_t *spa;
avl_index_t where;
- char c;
char *cp;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ (void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
+
/*
* If it's a full dataset name, figure out the pool name and
* just use that.
*/
- cp = strpbrk(name, "/@");
- if (cp) {
- c = *cp;
+ cp = strpbrk(search.spa_name, "/@");
+ if (cp != NULL)
*cp = '\0';
- }
- (void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
spa = avl_find(&spa_namespace_avl, &search, &where);
- if (cp)
- *cp = c;
-
return (spa);
}
@@ -567,6 +564,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
kstat_install(spa->spa_iokstat);
}
+ spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
+
return (spa);
}
diff --git a/usr/src/uts/common/fs/zfs/space_map.c b/usr/src/uts/common/fs/zfs/space_map.c
index 30a35c85da..fb30b34470 100644
--- a/usr/src/uts/common/fs/zfs/space_map.c
+++ b/usr/src/uts/common/fs/zfs/space_map.c
@@ -102,7 +102,7 @@ void
space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
- space_seg_t ssearch, *ss_before, *ss_after, *ss;
+ space_seg_t *ss_before, *ss_after, *ss;
uint64_t end = start + size;
int merge_before, merge_after;
@@ -115,11 +115,8 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
- ssearch.ss_start = start;
- ssearch.ss_end = end;
- ss = avl_find(&sm->sm_root, &ssearch, &where);
-
- if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) {
+ ss = space_map_find(sm, start, size, &where);
+ if (ss != NULL) {
zfs_panic_recover("zfs: allocating allocated segment"
"(offset=%llu size=%llu)\n",
(longlong_t)start, (longlong_t)size);
@@ -171,19 +168,12 @@ void
space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
- space_seg_t ssearch, *ss, *newseg;
+ space_seg_t *ss, *newseg;
uint64_t end = start + size;
int left_over, right_over;
- ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(!sm->sm_condensing);
- VERIFY(size != 0);
- VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
- VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
-
- ssearch.ss_start = start;
- ssearch.ss_end = end;
- ss = avl_find(&sm->sm_root, &ssearch, &where);
+ ss = space_map_find(sm, start, size, &where);
/* Make sure we completely overlap with someone */
if (ss == NULL) {
@@ -226,12 +216,11 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
sm->sm_space -= size;
}
-boolean_t
-space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
+space_seg_t *
+space_map_find(space_map_t *sm, uint64_t start, uint64_t size,
+ avl_index_t *wherep)
{
- avl_index_t where;
space_seg_t ssearch, *ss;
- uint64_t end = start + size;
ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(size != 0);
@@ -239,10 +228,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
- ssearch.ss_end = end;
- ss = avl_find(&sm->sm_root, &ssearch, &where);
+ ssearch.ss_end = start + size;
+ ss = avl_find(&sm->sm_root, &ssearch, wherep);
+
+ if (ss != NULL && ss->ss_start <= start && ss->ss_end >= start + size)
+ return (ss);
+ return (NULL);
+}
+
+boolean_t
+space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
+{
+ avl_index_t where;
- return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end);
+ return (space_map_find(sm, start, size, &where) != 0);
}
void
diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h
index 916d2abf14..0e86290f2d 100644
--- a/usr/src/uts/common/fs/zfs/sys/arc.h
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h
@@ -89,7 +89,7 @@ arc_buf_t *arc_loan_buf(spa_t *spa, int size);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
-int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
+boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h
index 8591f15851..a29f7b3ccf 100644
--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h
@@ -311,20 +311,17 @@ void dbuf_fini(void);
boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
-#define DBUF_IS_METADATA(_db) \
- (dbuf_is_metadata(_db))
-
#define DBUF_GET_BUFC_TYPE(_db) \
- (DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
+ (dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
#define DBUF_IS_CACHEABLE(_db) \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
- (DBUF_IS_METADATA(_db) && \
+ (dbuf_is_metadata(_db) && \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
#define DBUF_IS_L2CACHEABLE(_db) \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
- (DBUF_IS_METADATA(_db) && \
+ (dbuf_is_metadata(_db) && \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
#ifdef ZFS_DEBUG
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index ef0a6a7c37..1366a998fd 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -217,6 +217,11 @@ typedef enum dmu_object_type {
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
} dmu_object_type_t;
+typedef enum txg_how {
+ TXG_WAIT = 1,
+ TXG_NOWAIT,
+} txg_how_t;
+
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
void byteswap_uint16_array(void *buf, size_t size);
@@ -255,22 +260,19 @@ void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
-int dmu_objset_evict_dbufs(objset_t *os);
+void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
-int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
- uint64_t flags);
-int dmu_objset_destroy(const char *name, boolean_t defer);
-int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer,
+int dmu_objset_clone(const char *name, const char *origin);
+int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
-int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *);
int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
int dmu_objset_snapshot_tmp(const char *, const char *, int);
-int dmu_objset_rename(const char *name, const char *newname,
- boolean_t recursive);
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
+int dsl_dataset_rename_snapshot(const char *fsname,
+ const char *oldsnapname, const char *newsnapname, boolean_t recursive);
typedef struct dmu_buf {
uint64_t db_object; /* object that this buffer is part of */
@@ -545,7 +547,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
void dmu_tx_abort(dmu_tx_t *tx);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
@@ -788,36 +790,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
dmu_traverse_cb_t cb, void *arg);
-int dmu_send(objset_t *tosnap, objset_t *fromsnap,
- int outfd, struct vnode *vp, offset_t *off);
-int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep);
-
-typedef struct dmu_recv_cookie {
- /*
- * This structure is opaque!
- *
- * If logical and real are different, we are recving the stream
- * into the "real" temporary clone, and then switching it with
- * the "logical" target.
- */
- struct dsl_dataset *drc_logical_ds;
- struct dsl_dataset *drc_real_ds;
- struct drr_begin *drc_drrb;
- char *drc_tosnap;
- char *drc_top_ds;
- boolean_t drc_newfs;
- boolean_t drc_force;
- struct avl_tree *drc_guid_to_ds_map;
-} dmu_recv_cookie_t;
-
-int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
- boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
-int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
- int cleanup_fd, uint64_t *action_handlep);
-int dmu_recv_end(dmu_recv_cookie_t *drc);
-
-int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp,
- offset_t *off);
+int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
+ struct vnode *vp, offset_t *offp);
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
index 578b94732e..143e594d1d 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
@@ -43,6 +43,7 @@ extern "C" {
extern krwlock_t os_lock;
+struct dsl_pool;
struct dsl_dataset;
struct dmu_tx;
@@ -114,8 +115,6 @@ struct objset {
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
-
- /* SA layout/attribute registration */
sa_os_t *os_sa;
};
@@ -143,10 +142,11 @@ void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
-int dmu_objset_find_spa(spa_t *spa, const char *name,
- int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
+int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj,
+ int func(struct dsl_pool *, struct dsl_dataset *, void *),
+ void *arg, int flags);
int dmu_objset_prefetch(const char *name, void *arg);
-int dmu_objset_evict_dbufs(objset_t *os);
+void dmu_objset_evict_dbufs(objset_t *os);
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
@@ -162,6 +162,7 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
+int dmu_fsname(const char *snapname, char *buf);
void dmu_objset_init(void);
void dmu_objset_fini(void);
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_send.h b/usr/src/uts/common/fs/zfs/sys/dmu_send.h
new file mode 100644
index 0000000000..ee0885a60f
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_send.h
@@ -0,0 +1,66 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _DMU_SEND_H
+#define _DMU_SEND_H
+
+#include <sys/inttypes.h>
+#include <sys/spa.h>
+
+struct vnode;
+struct dsl_dataset;
+struct drr_begin;
+struct avl_tree;
+
+int dmu_send(const char *tosnap, const char *fromsnap, int outfd,
+ struct vnode *vp, offset_t *off);
+int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
+ uint64_t *sizep);
+int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
+ int outfd, struct vnode *vp, offset_t *off);
+
+typedef struct dmu_recv_cookie {
+ struct dsl_dataset *drc_ds;
+ struct drr_begin *drc_drrb;
+ const char *drc_tofs;
+ const char *drc_tosnap;
+ boolean_t drc_newfs;
+ boolean_t drc_byteswap;
+ boolean_t drc_force;
+ struct avl_tree *drc_guid_to_ds_map;
+ zio_cksum_t drc_cksum;
+ uint64_t drc_newsnapobj;
+} dmu_recv_cookie_t;
+
+int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
+ boolean_t force, char *origin, dmu_recv_cookie_t *drc);
+int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
+ int cleanup_fd, uint64_t *action_handlep);
+int dmu_recv_end(dmu_recv_cookie_t *drc);
+
+#endif /* _DMU_SEND_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h
index c5ea50fa8d..dbd2242540 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h
@@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
#ifndef _SYS_DMU_TX_H
#define _SYS_DMU_TX_H
@@ -108,10 +111,11 @@ typedef struct dmu_tx_callback {
* These routines are defined in dmu.h, and are called by the user.
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
-int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
+struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
index 272c3ecde2..6729f9f05a 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -35,6 +35,7 @@
#include <sys/dsl_synctask.h>
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
+#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
@@ -48,10 +49,8 @@ struct dsl_pool;
#define DS_IS_INCONSISTENT(ds) \
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
/*
- * NB: nopromote can not yet be set, but we want support for it in this
- * on-disk version, so that we don't need to upgrade for it later. It
- * will be needed when we implement 'zfs split' (where the split off
- * clone should not be promoted).
+ * Note: nopromote can not yet be set, but we want support for it in this
+ * on-disk version, so that we don't need to upgrade for it later.
*/
#define DS_FLAG_NOPROMOTE (1ULL<<1)
@@ -76,6 +75,8 @@ struct dsl_pool;
*/
#define DS_FLAG_CI_DATASET (1ULL<<16)
+#define DS_CREATE_FLAG_NODIRTY (1ULL<<24)
+
typedef struct dsl_dataset_phys {
uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
@@ -125,9 +126,6 @@ typedef struct dsl_dataset {
dsl_deadlist_t ds_deadlist;
bplist_t ds_pending_deadlist;
- /* to protect against multiple concurrent incremental recv */
- kmutex_t ds_recvlock;
-
/* protected by lock on pool's dp_dirty_datasets list */
txg_node_t ds_dirty_link;
list_node_t ds_synced_link;
@@ -139,13 +137,15 @@ typedef struct dsl_dataset {
kmutex_t ds_lock;
objset_t *ds_objset;
uint64_t ds_userrefs;
+ void *ds_owner;
/*
- * ds_owner is protected by the ds_rwlock and the ds_lock
+ * Long holds prevent the ds from being destroyed; they allow the
+ * ds to remain held even after dropping the dp_config_rwlock.
+ * Owning counts as a long hold. See the comments above
+ * dsl_pool_hold() for details.
*/
- krwlock_t ds_rwlock;
- kcondvar_t ds_exclusive_cv;
- void *ds_owner;
+ refcount_t ds_longholds;
/* no locking; only for making guesses */
uint64_t ds_trysnap_txg;
@@ -163,76 +163,44 @@ typedef struct dsl_dataset {
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
-struct dsl_ds_destroyarg {
- dsl_dataset_t *ds; /* ds to destroy */
- dsl_dataset_t *rm_origin; /* also remove our origin? */
- boolean_t is_origin_rm; /* set if removing origin snap */
- boolean_t defer; /* destroy -d requested? */
- boolean_t releasing; /* destroying due to release? */
- boolean_t need_prep; /* do we need to retry due to EBUSY? */
-};
-
/*
* The max length of a temporary tag prefix is the number of hex digits
* required to express UINT64_MAX plus one for the hyphen.
*/
#define MAX_TAG_PREFIX_LEN 17
-struct dsl_ds_holdarg {
- dsl_sync_task_group_t *dstg;
- const char *htag;
- char *snapname;
- boolean_t recursive;
- boolean_t gotone;
- boolean_t temphold;
- char failed[MAXPATHLEN];
-};
-
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
-int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
-int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
- void *tag, dsl_dataset_t **);
-int dsl_dataset_own(const char *name, boolean_t inconsistentok,
+int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
+ dsl_dataset_t **dsp);
+int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
+ dsl_dataset_t **);
+void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+int dsl_dataset_own(struct dsl_pool *dp, const char *name,
void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
- boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
-void dsl_dataset_name(dsl_dataset_t *ds, char *name);
-void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+ void *tag, dsl_dataset_t **dsp);
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
-void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
-boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
- void *tag);
-void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_name(dsl_dataset_t *ds, char *name);
+boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx);
-int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
-dsl_checkfunc_t dsl_dataset_destroy_check;
-dsl_syncfunc_t dsl_dataset_destroy_sync;
-dsl_syncfunc_t dsl_dataset_user_hold_sync;
-int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
-void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
-int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
+int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force);
-int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
- boolean_t recursive, boolean_t temphold, int cleanup_fd);
-int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
- boolean_t temphold);
-int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
- boolean_t recursive);
-int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
- char *htag, boolean_t retry);
-int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
+int dsl_dataset_rename_snapshot(const char *fsname,
+ const char *oldsnapname, const char *newsnapname, boolean_t recursive);
+int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
+ minor_t cleanup_minor, const char *htag);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
@@ -269,13 +237,35 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
uint64_t asize, uint64_t inflight, uint64_t *used,
uint64_t *ref_rsrv);
-int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
+int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
uint64_t quota);
-dsl_syncfunc_t dsl_dataset_set_quota_sync;
-int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
+int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
uint64_t reservation);
-int dsl_destroy_inconsistent(const char *dsname, void *arg);
+boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier);
+void dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag);
+boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
+
+int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
+ dsl_dataset_t *origin_head, boolean_t force);
+void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
+ dsl_dataset_t *origin_head, dmu_tx_t *tx);
+int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
+ dmu_tx_t *tx);
+void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
+ dmu_tx_t *tx);
+
+void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
+ dmu_tx_t *tx);
+void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds);
+int dsl_dataset_get_snapname(dsl_dataset_t *ds);
+int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
+ uint64_t *value);
+int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx);
+void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
+ zprop_source_t source, uint64_t value, dmu_tx_t *tx);
+int dsl_dataset_rollback(const char *fsname);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_destroy.h b/usr/src/uts/common/fs/zfs/sys/dsl_destroy.h
new file mode 100644
index 0000000000..c5a70bb90e
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_destroy.h
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_DSL_DESTROY_H
+#define _SYS_DSL_DESTROY_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct nvlist;
+struct dsl_dataset;
+struct dmu_tx;
+
+int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
+ struct nvlist *errlist);
+int dsl_destroy_snapshot(const char *name, boolean_t defer);
+int dsl_destroy_head(const char *name);
+int dsl_destroy_head_check_impl(struct dsl_dataset *ds, int expected_holds);
+void dsl_destroy_head_sync_impl(struct dsl_dataset *ds, struct dmu_tx *tx);
+int dsl_destroy_inconsistent(const char *dsname, void *arg);
+void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *ds,
+ boolean_t defer, struct dmu_tx *tx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DSL_DESTROY_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
index 2191635dd8..641bcfcdd3 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_DIR_H
@@ -101,18 +102,15 @@ struct dsl_dir {
char dd_myname[MAXNAMELEN];
};
-void dsl_dir_close(dsl_dir_t *dd, void *tag);
-int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
-int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
- const char **tailp);
-int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
+void dsl_dir_rele(dsl_dir_t *dd, void *tag);
+int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
+ dsl_dir_t **, const char **tail);
+int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **);
void dsl_dir_name(dsl_dir_t *dd, char *buf);
int dsl_dir_namelen(dsl_dir_t *dd);
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
const char *name, dmu_tx_t *tx);
-dsl_checkfunc_t dsl_dir_destroy_check;
-dsl_syncfunc_t dsl_dir_destroy_sync;
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
@@ -131,14 +129,15 @@ int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation);
-int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
+int dsl_dir_rename(const char *oldname, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
-int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
+void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
+ dmu_tx_t *tx);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
index ab1229a2e6..b0160edfb1 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
@@ -36,6 +36,7 @@
#include <sys/arc.h>
#include <sys/bpobj.h>
#include <sys/bptree.h>
+#include <sys/rrwlock.h>
#ifdef __cplusplus
extern "C" {
@@ -113,7 +114,7 @@ typedef struct dsl_pool {
* syncing context does not need to ever have it for read, since
* nobody else could possibly have it for write.
*/
- krwlock_t dp_config_rwlock;
+ rrwlock_t dp_config_rwlock;
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
@@ -139,15 +140,20 @@ void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
int64_t used, int64_t comp, int64_t uncomp);
+void dsl_pool_config_enter(dsl_pool_t *dp, void *tag);
+void dsl_pool_config_exit(dsl_pool_t *dp, void *tag);
+boolean_t dsl_pool_config_held(dsl_pool_t *dp);
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
-extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
- const char *tag, uint64_t *now, dmu_tx_t *tx);
-extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
+int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
+ const char *tag, uint64_t now, dmu_tx_t *tx);
+int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, dmu_tx_t *tx);
-extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
+void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
+int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp);
+void dsl_pool_rele(dsl_pool_t *dp, void *tag);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
index b0d9a52cdf..5fe18d6a7c 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
@@ -54,58 +54,47 @@ typedef struct dsl_props_arg {
zprop_source_t pa_source;
} dsl_props_arg_t;
-typedef struct dsl_prop_set_arg {
- const char *psa_name;
- zprop_source_t psa_source;
- int psa_intsz;
- int psa_numints;
- const void *psa_value;
-
- /*
- * Used to handle the special requirements of the quota and reservation
- * properties.
- */
- uint64_t psa_effective_value;
-} dsl_prop_setarg_t;
-
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
-int dsl_prop_numcb(struct dsl_dataset *ds);
+void dsl_prop_notify_all(struct dsl_dir *dd);
+boolean_t dsl_prop_hascb(struct dsl_dataset *ds);
int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
-int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
+int dsl_prop_get_received(const char *dsname, nvlist_t **nvp);
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
+int dsl_prop_get_int_ds(struct dsl_dataset *ds, const char *propname,
+ uint64_t *valuep);
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint,
boolean_t snapshot);
-dsl_syncfunc_t dsl_props_set_sync;
-int dsl_prop_set(const char *ddname, const char *propname,
- zprop_source_t source, int intsz, int numints, const void *buf);
+void dsl_props_set_sync_impl(struct dsl_dataset *ds, zprop_source_t source,
+ nvlist_t *props, dmu_tx_t *tx);
+void dsl_prop_set_sync_impl(struct dsl_dataset *ds, const char *propname,
+ zprop_source_t source, int intsz, int numints, const void *value,
+ dmu_tx_t *tx);
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
+int dsl_prop_set_int(const char *dsname, const char *propname,
+ zprop_source_t source, uint64_t value);
+int dsl_prop_set_string(const char *dsname, const char *propname,
+ zprop_source_t source, const char *value);
+int dsl_prop_inherit(const char *dsname, const char *propname,
+ zprop_source_t source);
-void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
- zprop_source_t source, uint64_t *value);
-int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
-#ifdef ZFS_DEBUG
-void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
-#define DSL_PROP_CHECK_PREDICTION(dd, psa) \
- dsl_prop_check_prediction((dd), (psa))
-#else
-#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */
-#endif
+int dsl_prop_predict(dsl_dir_t *dd, const char *propname,
+ zprop_source_t source, uint64_t value, uint64_t *newvalp);
/* flag first receive on or after SPA_VERSION_RECVD_PROPS */
-boolean_t dsl_prop_get_hasrecvd(objset_t *os);
-void dsl_prop_set_hasrecvd(objset_t *os);
-void dsl_prop_unset_hasrecvd(objset_t *os);
+boolean_t dsl_prop_get_hasrecvd(const char *dsname);
+int dsl_prop_set_hasrecvd(const char *dsname);
+void dsl_prop_unset_hasrecvd(const char *dsname);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h b/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h
index 9126290cdb..ef86fb64cf 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_SYNCTASK_H
@@ -34,43 +35,26 @@ extern "C" {
struct dsl_pool;
-typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
-typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
+typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *);
+typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *);
typedef struct dsl_sync_task {
- list_node_t dst_node;
+ txg_node_t dst_node;
+ struct dsl_pool *dst_pool;
+ uint64_t dst_txg;
+ int dst_space;
dsl_checkfunc_t *dst_checkfunc;
dsl_syncfunc_t *dst_syncfunc;
- void *dst_arg1;
- void *dst_arg2;
- int dst_err;
+ void *dst_arg;
+ int dst_error;
+ boolean_t dst_nowaiter;
} dsl_sync_task_t;
-typedef struct dsl_sync_task_group {
- txg_node_t dstg_node;
- list_t dstg_tasks;
- struct dsl_pool *dstg_pool;
- uint64_t dstg_txg;
- int dstg_err;
- int dstg_space;
- boolean_t dstg_nowaiter;
-} dsl_sync_task_group_t;
-
-dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
-void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
- dsl_checkfunc_t *, dsl_syncfunc_t *,
- void *arg1, void *arg2, int blocks_modified);
-int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
-void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
-void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
-void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
-
-int dsl_sync_task_do(struct dsl_pool *dp,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified);
-void dsl_sync_task_do_nowait(struct dsl_pool *dp,
- dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
- void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
+void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx);
+int dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
+ dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified);
+void dsl_sync_task_nowait(struct dsl_pool *dp, dsl_syncfunc_t *syncfunc,
+ void *arg, int blocks_modified, dmu_tx_t *tx);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_userhold.h b/usr/src/uts/common/fs/zfs/sys/dsl_userhold.h
new file mode 100644
index 0000000000..56c6c8f47a
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_userhold.h
@@ -0,0 +1,57 @@
+
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_DSL_USERHOLD_H
+#define _SYS_DSL_USERHOLD_H
+
+#include <sys/nvpair.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct dsl_pool;
+struct dsl_dataset;
+struct dmu_tx;
+
+int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor,
+ nvlist_t *errlist);
+int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist);
+int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl);
+void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
+ const char *htag);
+int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag,
+ boolean_t temphold, struct dmu_tx *tx);
+void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag,
+ minor_t minor, uint64_t now, struct dmu_tx *tx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DSL_USERHOLD_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/metaslab.h b/usr/src/uts/common/fs/zfs/sys/metaslab.h
index 2cf4d2b489..d6c0bf4c94 100644
--- a/usr/src/uts/common/fs/zfs/sys/metaslab.h
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@@ -56,6 +56,7 @@ extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
+extern void metaslab_check_free(spa_t *spa, const blkptr_t *bp);
extern metaslab_class_t *metaslab_class_create(spa_t *spa,
space_map_ops_t *ops);
diff --git a/usr/src/uts/common/fs/zfs/sys/refcount.h b/usr/src/uts/common/fs/zfs/sys/refcount.h
index 1dcd467f91..9efc5f1e44 100644
--- a/usr/src/uts/common/fs/zfs/sys/refcount.h
+++ b/usr/src/uts/common/fs/zfs/sys/refcount.h
@@ -51,15 +51,17 @@ typedef struct reference {
typedef struct refcount {
kmutex_t rc_mtx;
+ boolean_t rc_tracked;
list_t rc_list;
list_t rc_removed;
uint64_t rc_count;
uint64_t rc_removed_count;
} refcount_t;
-/* Note: refcount_t must be initialized with refcount_create() */
+/* Note: refcount_t must be initialized with refcount_create[_untracked]() */
void refcount_create(refcount_t *rc);
+void refcount_create_untracked(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
void refcount_destroy_many(refcount_t *rc, uint64_t number);
int refcount_is_zero(refcount_t *rc);
@@ -80,6 +82,7 @@ typedef struct refcount {
} refcount_t;
#define refcount_create(rc) ((rc)->rc_count = 0)
+#define refcount_create_untracked(rc) ((rc)->rc_count = 0)
#define refcount_destroy(rc) ((rc)->rc_count = 0)
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
diff --git a/usr/src/uts/common/fs/zfs/sys/rrwlock.h b/usr/src/uts/common/fs/zfs/sys/rrwlock.h
index 239268bd58..e1e6d31223 100644
--- a/usr/src/uts/common/fs/zfs/sys/rrwlock.h
+++ b/usr/src/uts/common/fs/zfs/sys/rrwlock.h
@@ -58,6 +58,7 @@ typedef struct rrwlock {
refcount_t rr_anon_rcount;
refcount_t rr_linked_rcount;
boolean_t rr_writer_wanted;
+ boolean_t rr_track_all;
} rrwlock_t;
/*
@@ -65,15 +66,19 @@ typedef struct rrwlock {
* 'tag' must be the same in a rrw_enter() as in its
* corresponding rrw_exit().
*/
-void rrw_init(rrwlock_t *rrl);
+void rrw_init(rrwlock_t *rrl, boolean_t track_all);
void rrw_destroy(rrwlock_t *rrl);
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
+void rrw_enter_read(rrwlock_t *rrl, void *tag);
+void rrw_enter_write(rrwlock_t *rrl);
void rrw_exit(rrwlock_t *rrl, void *tag);
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
void rrw_tsd_destroy(void *arg);
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
+#define RRW_LOCK_HELD(x) \
+ (rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/fs/zfs/sys/space_map.h b/usr/src/uts/common/fs/zfs/sys/space_map.h
index 2da50fb7b3..64223daf62 100644
--- a/usr/src/uts/common/fs/zfs/sys/space_map.h
+++ b/usr/src/uts/common/fs/zfs/sys/space_map.h
@@ -149,6 +149,8 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
extern boolean_t space_map_contains(space_map_t *sm,
uint64_t start, uint64_t size);
+extern space_seg_t *space_map_find(space_map_t *sm, uint64_t start,
+ uint64_t size, avl_index_t *wherep);
extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
extern void space_map_vacate(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
diff --git a/usr/src/uts/common/fs/zfs/sys/txg.h b/usr/src/uts/common/fs/zfs/sys/txg.h
index 1287f09c7e..2df33f0fb0 100644
--- a/usr/src/uts/common/fs/zfs/sys/txg.h
+++ b/usr/src/uts/common/fs/zfs/sys/txg.h
@@ -45,9 +45,6 @@ extern "C" {
/* Number of txgs worth of frees we defer adding to in-core spacemaps */
#define TXG_DEFER_SIZE 2
-#define TXG_WAIT 1ULL
-#define TXG_NOWAIT 2ULL
-
typedef struct tx_cpu tx_cpu_t;
typedef struct txg_handle {
@@ -119,11 +116,11 @@ extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
extern void txg_list_create(txg_list_t *tl, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
-extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
-extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
+extern boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
+extern boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
-extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
+extern boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
diff --git a/usr/src/uts/common/fs/zfs/sys/zfeature.h b/usr/src/uts/common/fs/zfs/sys/zfeature.h
index 481e85b1ba..1a081e422d 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfeature.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfeature.h
@@ -26,7 +26,6 @@
#ifndef _SYS_ZFEATURE_H
#define _SYS_ZFEATURE_H
-#include <sys/dmu.h>
#include <sys/nvpair.h>
#include "zfeature_common.h"
@@ -34,14 +33,18 @@
extern "C" {
#endif
-extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
+struct spa;
+struct dmu_tx;
+struct objset;
+
+extern boolean_t feature_is_supported(struct objset *os, uint64_t obj,
uint64_t desc_obj, nvlist_t *unsup_feat, nvlist_t *enabled_feat);
-struct spa;
-extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
-extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
-extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
-extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern void spa_feature_create_zap_objects(struct spa *, struct dmu_tx *);
+extern void spa_feature_enable(struct spa *, zfeature_info_t *,
+ struct dmu_tx *);
+extern void spa_feature_incr(struct spa *, zfeature_info_t *, struct dmu_tx *);
+extern void spa_feature_decr(struct spa *, zfeature_info_t *, struct dmu_tx *);
extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h
index 94626229ad..14eb2abdc1 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h
@@ -48,11 +48,13 @@ extern "C" {
extern int zfs_flags;
-#define ZFS_DEBUG_DPRINTF 0x0001
-#define ZFS_DEBUG_DBUF_VERIFY 0x0002
-#define ZFS_DEBUG_DNODE_VERIFY 0x0004
-#define ZFS_DEBUG_SNAPNAMES 0x0008
-#define ZFS_DEBUG_MODIFY 0x0010
+#define ZFS_DEBUG_DPRINTF (1<<0)
+#define ZFS_DEBUG_DBUF_VERIFY (1<<1)
+#define ZFS_DEBUG_DNODE_VERIFY (1<<2)
+#define ZFS_DEBUG_SNAPNAMES (1<<3)
+#define ZFS_DEBUG_MODIFY (1<<4)
+#define ZFS_DEBUG_SPA (1<<5)
+#define ZFS_DEBUG_ZIO_FREE (1<<6)
#ifdef ZFS_DEBUG
extern void __dprintf(const char *file, const char *func,
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 86e901be0d..874d422568 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -293,7 +293,6 @@ typedef struct zfs_cmd {
uint64_t zc_history; /* really (char *) */
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
- char zc_top_ds[MAXPATHLEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
@@ -345,7 +344,8 @@ extern int zfs_secpolicy_rename_perms(const char *from,
const char *to, cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int zfs_busy(void);
-extern int zfs_unmount_snap(const char *, void *);
+extern void zfs_unmount_snap(const char *);
+extern void zfs_destroy_unmount_origin(const char *);
/*
* ZFS minor numbers can refer to either a control device instance or
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
index 3e9621a0ee..cf0bbee2ca 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_FS_ZFS_ZNODE_H
@@ -240,7 +241,7 @@ typedef struct znode {
*/
#define ZFS_ENTER(zfsvfs) \
{ \
- rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
+ rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
if ((zfsvfs)->z_unmounted) { \
ZFS_EXIT(zfsvfs); \
return (EIO); \
diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h
index e52c65bb76..a212e4f0e1 100644
--- a/usr/src/uts/common/fs/zfs/sys/zil.h
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h
@@ -411,8 +411,8 @@ extern int zil_check_log_chain(const char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog, uint64_t synced_txg);
-extern int zil_suspend(zilog_t *zilog);
-extern void zil_resume(zilog_t *zilog);
+extern int zil_suspend(const char *osname, void **cookiep);
+extern void zil_resume(void *cookie);
extern void zil_add_block(zilog_t *zilog, const blkptr_t *bp);
extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);
diff --git a/usr/src/uts/common/fs/zfs/txg.c b/usr/src/uts/common/fs/zfs/txg.c
index 47603872af..58690e325f 100644
--- a/usr/src/uts/common/fs/zfs/txg.c
+++ b/usr/src/uts/common/fs/zfs/txg.c
@@ -576,6 +576,8 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
+ ASSERT(!dsl_pool_config_held(dp));
+
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@@ -599,6 +601,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
+ ASSERT(!dsl_pool_config_held(dp));
+
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@@ -664,42 +668,43 @@ txg_list_empty(txg_list_t *tl, uint64_t txg)
}
/*
- * Add an entry to the list.
- * Returns 0 if it's a new entry, 1 if it's already there.
+ * Add an entry to the list (unless it's already on the list).
+ * Returns B_TRUE if it was actually added.
*/
-int
+boolean_t
txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
- int already_on_list;
+ boolean_t add;
mutex_enter(&tl->tl_lock);
- already_on_list = tn->tn_member[t];
- if (!already_on_list) {
+ add = (tn->tn_member[t] == 0);
+ if (add) {
tn->tn_member[t] = 1;
tn->tn_next[t] = tl->tl_head[t];
tl->tl_head[t] = tn;
}
mutex_exit(&tl->tl_lock);
- return (already_on_list);
+ return (add);
}
/*
- * Add an entry to the end of the list (walks list to find end).
- * Returns 0 if it's a new entry, 1 if it's already there.
+ * Add an entry to the end of the list, unless it's already on the list.
+ * (walks list to find end)
+ * Returns B_TRUE if it was actually added.
*/
-int
+boolean_t
txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
- int already_on_list;
+ boolean_t add;
mutex_enter(&tl->tl_lock);
- already_on_list = tn->tn_member[t];
- if (!already_on_list) {
+ add = (tn->tn_member[t] == 0);
+ if (add) {
txg_node_t **tp;
for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
@@ -711,7 +716,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
}
mutex_exit(&tl->tl_lock);
- return (already_on_list);
+ return (add);
}
/*
@@ -762,13 +767,13 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
return (NULL);
}
-int
+boolean_t
txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
- return (tn->tn_member[t]);
+ return (tn->tn_member[t] != 0);
}
/*
diff --git a/usr/src/uts/common/fs/zfs/zfs_ctldir.c b/usr/src/uts/common/fs/zfs/zfs_ctldir.c
index d902ff637c..ef9a5611a2 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ctldir.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ctldir.c
@@ -72,6 +72,7 @@
#include <sys/gfs.h>
#include <sys/stat.h>
#include <sys/dmu.h>
+#include <sys/dsl_destroy.h>
#include <sys/dsl_deleg.h>
#include <sys/mount.h>
#include <sys/sunddi.h>
@@ -615,7 +616,7 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
zfsvfs_t *zfsvfs;
avl_index_t where;
char from[MAXNAMELEN], to[MAXNAMELEN];
- char real[MAXNAMELEN];
+ char real[MAXNAMELEN], fsname[MAXNAMELEN];
int err;
zfsvfs = sdvp->v_vfsp->vfs_data;
@@ -634,12 +635,14 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
ZFS_EXIT(zfsvfs);
+ dmu_objset_name(zfsvfs->z_os, fsname);
+
err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
- if (!err)
+ if (err == 0)
err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
- if (!err)
+ if (err == 0)
err = zfs_secpolicy_rename_perms(from, to, cr);
- if (err)
+ if (err != 0)
return (err);
/*
@@ -659,7 +662,7 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
return (ENOENT);
}
- err = dmu_objset_rename(from, to, B_FALSE);
+ err = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
if (err == 0)
zfsctl_rename_snap(sdp, sep, tnm);
@@ -699,9 +702,9 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
ZFS_EXIT(zfsvfs);
err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
- if (!err)
+ if (err == 0)
err = zfs_secpolicy_destroy_perms(snapname, cr);
- if (err)
+ if (err != 0)
return (err);
mutex_enter(&sdp->sd_lock);
@@ -711,10 +714,10 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
if (sep) {
avl_remove(&sdp->sd_snaps, sep);
err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
- if (err)
+ if (err != 0)
avl_add(&sdp->sd_snaps, sep);
else
- err = dmu_objset_destroy(snapname, B_FALSE);
+ err = dsl_destroy_snapshot(snapname, B_FALSE);
} else {
err = ENOENT;
}
@@ -746,12 +749,12 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
*vpp = NULL;
err = zfs_secpolicy_snapshot_perms(name, cr);
- if (err)
+ if (err != 0)
return (err);
if (err == 0) {
err = dmu_objset_snapshot_one(name, dirname);
- if (err)
+ if (err != 0)
return (err);
err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
}
@@ -831,7 +834,7 @@ zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
*vpp = sep->se_root;
VN_HOLD(*vpp);
err = traverse(vpp);
- if (err) {
+ if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
} else if (*vpp == sep->se_root) {
@@ -857,7 +860,7 @@ zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
* The requested snapshot is not currently mounted, look it up.
*/
err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
- if (err) {
+ if (err != 0) {
mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
/*
@@ -930,7 +933,7 @@ domount:
* If we had an error, drop our hold on the vnode and
* zfsctl_snapshot_inactive() will clean up.
*/
- if (err) {
+ if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
}
@@ -982,8 +985,10 @@ zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
ZFS_ENTER(zfsvfs);
cookie = *offp;
+ dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
&cookie, &case_conflict);
+ dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
if (error) {
ZFS_EXIT(zfsvfs);
if (error == ENOENT) {
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 38adc1940f..caad34c5a7 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -156,6 +156,7 @@
#include <sys/dsl_deleg.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_impl.h>
+#include <sys/dmu_tx.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/sunldi.h>
@@ -173,6 +174,9 @@
#include <sys/dsl_scan.h>
#include <sharefs/share.h>
#include <sys/dmu_objset.h>
+#include <sys/dmu_send.h>
+#include <sys/dsl_destroy.h>
+#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
#include "zfs_namecheck.h"
@@ -237,11 +241,7 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
-static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature);
-static int zfs_prop_activate_feature_check(void *arg1, void *arg2,
- dmu_tx_t *tx);
-static void zfs_prop_activate_feature_sync(void *arg1, void *arg2,
- dmu_tx_t *tx);
+static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature);
/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
void
@@ -461,49 +461,48 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
{
uint64_t zoned;
- rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
- if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+ if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
return (ENOENT);
- }
- rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
return (zfs_dozonecheck_impl(dataset, zoned, cr));
}
static int
-zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
+zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
+ const char *perm, cred_t *cr)
{
int error;
- dsl_dataset_t *ds;
-
- error = dsl_dataset_hold(name, FTAG, &ds);
- if (error != 0)
- return (error);
error = zfs_dozonecheck_ds(name, ds, cr);
if (error == 0) {
error = secpolicy_zfs(cr);
- if (error)
+ if (error != 0)
error = dsl_deleg_access_impl(ds, perm, cr);
}
-
- dsl_dataset_rele(ds, FTAG);
return (error);
}
static int
-zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
- const char *perm, cred_t *cr)
+zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
{
int error;
+ dsl_dataset_t *ds;
+ dsl_pool_t *dp;
- error = zfs_dozonecheck_ds(name, ds, cr);
- if (error == 0) {
- error = secpolicy_zfs(cr);
- if (error)
- error = dsl_deleg_access_impl(ds, perm, cr);
+ error = dsl_pool_hold(name, FTAG, &dp);
+ if (error != 0)
+ return (error);
+
+ error = dsl_dataset_hold(dp, name, FTAG, &ds);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
}
+
+ error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
+
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
@@ -525,7 +524,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
/* First get the existing dataset label. */
error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1, sizeof (ds_hexsl), &ds_hexsl, NULL);
- if (error)
+ if (error != 0)
return (EPERM);
if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
@@ -575,7 +574,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
*/
error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
setsl_tag, &os);
- if (error)
+ if (error != 0)
return (EPERM);
dmu_objset_disown(os, setsl_tag);
@@ -663,7 +662,7 @@ zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
int error;
error = zfs_dozonecheck(zc->zc_name, cr);
- if (error)
+ if (error != 0)
return (error);
/*
@@ -685,7 +684,6 @@ zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
static int
zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- spa_t *spa;
dsl_pool_t *dp;
dsl_dataset_t *ds;
char *cp;
@@ -698,23 +696,22 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
cp = strchr(zc->zc_name, '@');
if (cp == NULL)
return (EINVAL);
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error)
+ error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+ if (error != 0)
return (error);
- dp = spa_get_dsl(spa);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
- rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
- if (error)
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
return (error);
+ }
dsl_dataset_name(ds, zc->zc_name);
error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
ZFS_DELEG_PERM_SEND, cr);
dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
@@ -835,12 +832,21 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
return (EINVAL);
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nextpair) {
+ dsl_pool_t *dp;
dsl_dataset_t *ds;
+ error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp);
+ if (error != 0)
+ break;
nextpair = nvlist_next_nvpair(snaps, pair);
- error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds);
- if (error == 0) {
+ error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds);
+ if (error == 0)
dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ if (error == 0) {
+ error = zfs_secpolicy_destroy_perms(nvpair_name(pair),
+ cr);
} else if (error == ENOENT) {
/*
* Ignore any snapshots that don't exist (we consider
@@ -852,11 +858,7 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
*/
fnvlist_remove_nvpair(snaps, pair);
error = 0;
- continue;
- } else {
- break;
}
- error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
if (error != 0)
break;
}
@@ -904,41 +906,47 @@ zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
static int
zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- char parentname[MAXNAMELEN];
- objset_t *clone;
+ dsl_pool_t *dp;
+ dsl_dataset_t *clone;
int error;
error = zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_PROMOTE, cr);
- if (error)
+ if (error != 0)
+ return (error);
+
+ error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+ if (error != 0)
return (error);
- error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
+ error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
if (error == 0) {
- dsl_dataset_t *pclone = NULL;
+ char parentname[MAXNAMELEN];
+ dsl_dataset_t *origin = NULL;
dsl_dir_t *dd;
- dd = clone->os_dsl_dataset->ds_dir;
+ dd = clone->ds_dir;
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
error = dsl_dataset_hold_obj(dd->dd_pool,
- dd->dd_phys->dd_origin_obj, FTAG, &pclone);
- rw_exit(&dd->dd_pool->dp_config_rwlock);
- if (error) {
- dmu_objset_rele(clone, FTAG);
+ dd->dd_phys->dd_origin_obj, FTAG, &origin);
+ if (error != 0) {
+ dsl_dataset_rele(clone, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
- error = zfs_secpolicy_write_perms(zc->zc_name,
+ error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
ZFS_DELEG_PERM_MOUNT, cr);
- dsl_dataset_name(pclone, parentname);
- dmu_objset_rele(clone, FTAG);
- dsl_dataset_rele(pclone, FTAG);
- if (error == 0)
- error = zfs_secpolicy_write_perms(parentname,
+ dsl_dataset_name(origin, parentname);
+ if (error == 0) {
+ error = zfs_secpolicy_write_perms_ds(parentname, origin,
ZFS_DELEG_PERM_PROMOTE, cr);
+ }
+ dsl_dataset_rele(clone, FTAG);
+ dsl_dataset_rele(origin, FTAG);
}
+ dsl_pool_rele(dp, FTAG);
return (error);
}
@@ -1147,16 +1155,47 @@ zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
static int
zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- return (zfs_secpolicy_write_perms(zc->zc_name,
- ZFS_DELEG_PERM_HOLD, cr));
+ nvpair_t *pair;
+ nvlist_t *holds;
+ int error;
+
+ error = nvlist_lookup_nvlist(innvl, "holds", &holds);
+ if (error != 0)
+ return (EINVAL);
+
+ for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(holds, pair)) {
+ char fsname[MAXNAMELEN];
+ error = dmu_fsname(nvpair_name(pair), fsname);
+ if (error != 0)
+ return (error);
+ error = zfs_secpolicy_write_perms(fsname,
+ ZFS_DELEG_PERM_HOLD, cr);
+ if (error != 0)
+ return (error);
+ }
+ return (0);
}
/* ARGSUSED */
static int
zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- return (zfs_secpolicy_write_perms(zc->zc_name,
- ZFS_DELEG_PERM_RELEASE, cr));
+ nvpair_t *pair;
+ int error;
+
+ for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(innvl, pair)) {
+ char fsname[MAXNAMELEN];
+ error = dmu_fsname(nvpair_name(pair), fsname);
+ if (error != 0)
+ return (error);
+ error = zfs_secpolicy_write_perms(fsname,
+ ZFS_DELEG_PERM_RELEASE, cr);
+ if (error != 0)
+ return (error);
+ }
+ return (0);
}
/*
@@ -1177,11 +1216,11 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
return (0);
error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
- if (!error)
+ if (error == 0)
error = zfs_secpolicy_hold(zc, innvl, cr);
- if (!error)
+ if (error == 0)
error = zfs_secpolicy_release(zc, innvl, cr);
- if (!error)
+ if (error == 0)
error = zfs_secpolicy_destroy(zc, innvl, cr);
return (error);
}
@@ -1291,7 +1330,7 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
int error;
error = dmu_objset_hold(dsname, FTAG, &os);
- if (error)
+ if (error != 0)
return (error);
if (dmu_objset_type(os) != DMU_OST_ZFS) {
dmu_objset_rele(os, FTAG);
@@ -1394,7 +1433,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
error = zfs_fill_zplprops_root(version, rootprops,
zplprops, NULL);
- if (error)
+ if (error != 0)
goto pool_props_bad;
}
@@ -1667,12 +1706,7 @@ zfs_ioc_pool_reguid(zfs_cmd_t *zc)
static int
zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
{
- int error;
-
- if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
- return (error);
-
- return (0);
+ return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
}
/*
@@ -1988,15 +2022,14 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
static int
zfs_ioc_objset_stats(zfs_cmd_t *zc)
{
- objset_t *os = NULL;
+ objset_t *os;
int error;
- if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
- return (error);
-
- error = zfs_ioc_objset_stats_impl(zc, os);
-
- dmu_objset_rele(os, FTAG);
+ error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+ if (error == 0) {
+ error = zfs_ioc_objset_stats_impl(zc, os);
+ dmu_objset_rele(os, FTAG);
+ }
return (error);
}
@@ -2017,30 +2050,23 @@ zfs_ioc_objset_stats(zfs_cmd_t *zc)
static int
zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
{
- objset_t *os = NULL;
- int error;
+ int error = 0;
nvlist_t *nv;
- if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
- return (error);
-
/*
* Without this check, we would return local property values if the
* caller has not already received properties on or after
* SPA_VERSION_RECVD_PROPS.
*/
- if (!dsl_prop_get_hasrecvd(os)) {
- dmu_objset_rele(os, FTAG);
+ if (!dsl_prop_get_hasrecvd(zc->zc_name))
return (ENOTSUP);
- }
if (zc->zc_nvlist_dst != 0 &&
- (error = dsl_prop_get_received(os, &nv)) == 0) {
+ (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
error = put_nvlist(zc, nv);
nvlist_free(nv);
}
- dmu_objset_rele(os, FTAG);
return (error);
}
@@ -2155,20 +2181,6 @@ top:
(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
p = zc->zc_name + strlen(zc->zc_name);
- /*
- * Pre-fetch the datasets. dmu_objset_prefetch() always returns 0
- * but is not declared void because its called by dmu_objset_find().
- */
- if (zc->zc_cookie == 0) {
- uint64_t cookie = 0;
- int len = sizeof (zc->zc_name) - (p - zc->zc_name);
-
- while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
- if (!dataset_name_hidden(zc->zc_name))
- (void) dmu_objset_prefetch(zc->zc_name, NULL);
- }
- }
-
do {
error = dmu_dir_list_next(os,
sizeof (zc->zc_name) - (p - zc->zc_name), p,
@@ -2211,14 +2223,10 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
objset_t *os;
int error;
-top:
- if (zc->zc_cookie == 0)
- (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
- NULL, DS_FIND_SNAPSHOTS);
-
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
- if (error)
+ if (error != 0) {
return (error == ENOENT ? ESRCH : error);
+ }
/*
* A dataset name of maximum length cannot have any snapshots,
@@ -2238,24 +2246,8 @@ top:
dsl_dataset_t *ds;
dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
- /*
- * Since we probably don't have a hold on this snapshot,
- * it's possible that the objsetid could have been destroyed
- * and reused for a new objset. It's OK if this happens during
- * a zfs send operation, since the new createtxg will be
- * beyond the range we're interested in.
- */
- rw_enter(&dp->dp_config_rwlock, RW_READER);
error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
- rw_exit(&dp->dp_config_rwlock);
- if (error) {
- if (error == ENOENT) {
- /* Racing with destroy, get the next one. */
- *strchr(zc->zc_name, '@') = '\0';
- dmu_objset_rele(os, FTAG);
- goto top;
- }
- } else {
+ if (error == 0) {
objset_t *ossnap;
error = dmu_objset_from_ds(ds, &ossnap);
@@ -2269,7 +2261,7 @@ top:
dmu_objset_rele(os, FTAG);
/* if we failed, undo the @ that we tacked on to zc_name */
- if (error)
+ if (error != 0)
*strchr(zc->zc_name, '@') = '\0';
return (error);
}
@@ -2359,13 +2351,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
err = dsl_dir_set_quota(dsname, source, intval);
break;
case ZFS_PROP_REFQUOTA:
- err = dsl_dataset_set_quota(dsname, source, intval);
+ err = dsl_dataset_set_refquota(dsname, source, intval);
break;
case ZFS_PROP_RESERVATION:
err = dsl_dir_set_reservation(dsname, source, intval);
break;
case ZFS_PROP_REFRESERVATION:
- err = dsl_dataset_set_reservation(dsname, source, intval);
+ err = dsl_dataset_set_refreservation(dsname, source, intval);
break;
case ZFS_PROP_VOLSIZE:
err = zvol_set_volsize(dsname, intval);
@@ -2396,19 +2388,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
zfeature_info_t *feature =
&spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
spa_t *spa;
- dsl_pool_t *dp;
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
return (err);
- dp = spa->spa_dsl_pool;
-
/*
* Setting the LZ4 compression algorithm activates
* the feature.
*/
if (!spa_feature_is_active(spa, feature)) {
- if ((err = zfs_prop_activate_feature(dp,
+ if ((err = zfs_prop_activate_feature(spa,
feature)) != 0) {
spa_close(spa, FTAG);
return (err);
@@ -2567,12 +2556,12 @@ retry:
if (nvpair_type(propval) == DATA_TYPE_STRING) {
strval = fnvpair_value_string(propval);
- err = dsl_prop_set(dsname, propname, source, 1,
- strlen(strval) + 1, strval);
+ err = dsl_prop_set_string(dsname, propname,
+ source, strval);
} else {
intval = fnvpair_value_uint64(propval);
- err = dsl_prop_set(dsname, propname, source, 8,
- 1, &intval);
+ err = dsl_prop_set_int(dsname, propname, source,
+ intval);
}
if (err != 0) {
@@ -2638,7 +2627,7 @@ props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
}
static int
-clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
+clear_received_props(const char *dsname, nvlist_t *props,
nvlist_t *skipped)
{
int err = 0;
@@ -2650,8 +2639,8 @@ clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
* properties at least once on or after SPA_VERSION_RECVD_PROPS.
*/
zprop_source_t flags = (ZPROP_SRC_NONE |
- (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
- err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
+ (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
+ err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
}
nvlist_free(cleared_props);
return (err);
@@ -2683,22 +2672,19 @@ zfs_ioc_set_prop(zfs_cmd_t *zc)
if (received) {
nvlist_t *origprops;
- objset_t *os;
-
- if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
- if (dsl_prop_get_received(os, &origprops) == 0) {
- (void) clear_received_props(os,
- zc->zc_name, origprops, nvl);
- nvlist_free(origprops);
- }
- dsl_prop_set_hasrecvd(os);
- dmu_objset_rele(os, FTAG);
+ if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
+ (void) clear_received_props(zc->zc_name,
+ origprops, nvl);
+ nvlist_free(origprops);
}
+
+ error = dsl_prop_set_hasrecvd(zc->zc_name);
}
errors = fnvlist_alloc();
- error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
+ if (error == 0)
+ error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
if (zc->zc_nvlist_dst != NULL && errors != NULL) {
(void) put_nvlist(zc, errors);
@@ -2781,7 +2767,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
}
/* property name has been validated by zfs_secpolicy_inherit_prop() */
- return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
+ return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
}
static int
@@ -2893,7 +2879,7 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc)
*/
error = secpolicy_zfs(CRED());
- if (error) {
+ if (error != 0) {
if (zc->zc_perm_action == B_FALSE) {
error = dsl_deleg_can_allow(zc->zc_name,
fsaclnv, CRED());
@@ -3221,7 +3207,7 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
nvprops, outnvl);
if (error != 0)
- (void) dmu_objset_destroy(fsname, B_FALSE);
+ (void) dsl_destroy_head(fsname);
}
return (error);
}
@@ -3240,7 +3226,6 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
int error = 0;
nvlist_t *nvprops = NULL;
char *origin_name;
- dsl_dataset_t *origin;
if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
return (EINVAL);
@@ -3252,14 +3237,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (dataset_namecheck(origin_name, NULL, NULL) != 0)
return (EINVAL);
-
- error = dsl_dataset_hold(origin_name, FTAG, &origin);
- if (error)
- return (error);
-
- error = dmu_objset_clone(fsname, origin, 0);
- dsl_dataset_rele(origin, FTAG);
- if (error)
+ error = dmu_objset_clone(fsname, origin_name);
+ if (error != 0)
return (error);
/*
@@ -3269,7 +3248,7 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
nvprops, outnvl);
if (error != 0)
- (void) dmu_objset_destroy(fsname, B_FALSE);
+ (void) dsl_destroy_head(fsname);
}
return (error);
}
@@ -3281,7 +3260,6 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
* }
*
* outnvl: snapshot -> error code (int32)
- *
*/
static int
zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
@@ -3331,7 +3309,7 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
}
}
- error = dmu_objset_snapshot(snaps, props, outnvl);
+ error = dsl_dataset_snapshot(snaps, props, outnvl);
return (error);
}
@@ -3376,30 +3354,73 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
-/* ARGSUSED */
-int
-zfs_unmount_snap(const char *name, void *arg)
+/*
+ * The dp_config_rwlock must not be held when calling this, because the
+ * unmount may need to write out data.
+ *
+ * This function is best-effort. Callers must deal gracefully if it
+ * remains mounted (or is remounted after this call).
+ */
+void
+zfs_unmount_snap(const char *snapname)
{
vfs_t *vfsp;
- int err;
+ zfsvfs_t *zfsvfs;
- if (strchr(name, '@') == NULL)
- return (0);
+ if (strchr(snapname, '@') == NULL)
+ return;
- vfsp = zfs_get_vfs(name);
+ vfsp = zfs_get_vfs(snapname);
if (vfsp == NULL)
- return (0);
+ return;
- if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
+ zfsvfs = vfsp->vfs_data;
+ ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
+
+ if (vn_vfswlock(vfsp->vfs_vnodecovered) != 0) {
VFS_RELE(vfsp);
- return (err);
+ return;
}
VFS_RELE(vfsp);
/*
* Always force the unmount for snapshots.
*/
- return (dounmount(vfsp, MS_FORCE, kcred));
+ (void) dounmount(vfsp, MS_FORCE, kcred);
+}
+
+/* ARGSUSED */
+static int
+zfs_unmount_snap_cb(const char *snapname, void *arg)
+{
+ zfs_unmount_snap(snapname);
+ return (0);
+}
+
+/*
+ * When a clone is destroyed, its origin may also need to be destroyed,
+ * in which case it must be unmounted. This routine will do that unmount
+ * if necessary.
+ */
+void
+zfs_destroy_unmount_origin(const char *fsname)
+{
+ int error;
+ objset_t *os;
+ dsl_dataset_t *ds;
+
+ error = dmu_objset_hold(fsname, FTAG, &os);
+ if (error != 0)
+ return;
+ ds = dmu_objset_ds(os);
+ if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
+ char originname[MAXNAMELEN];
+ dsl_dataset_name(ds->ds_prev, originname);
+ dmu_objset_rele(os, FTAG);
+ zfs_unmount_snap(originname);
+ } else {
+ dmu_objset_rele(os, FTAG);
+ }
}
/*
@@ -3435,14 +3456,10 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
(name[poollen] != '/' && name[poollen] != '@'))
return (EXDEV);
- /*
- * Ignore failures to unmount; dmu_snapshots_destroy_nvl()
- * will deal with this gracefully (by filling in outnvl).
- */
- (void) zfs_unmount_snap(name, NULL);
+ zfs_unmount_snap(name);
}
- return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl));
+ return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
}
/*
@@ -3457,13 +3474,13 @@ static int
zfs_ioc_destroy(zfs_cmd_t *zc)
{
int err;
- if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
- err = zfs_unmount_snap(zc->zc_name, NULL);
- if (err)
- return (err);
- }
+ if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS)
+ zfs_unmount_snap(zc->zc_name);
- err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
+ if (strchr(zc->zc_name, '@'))
+ err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
+ else
+ err = dsl_destroy_head(zc->zc_name);
if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
(void) zvol_remove_minor(zc->zc_name);
return (err);
@@ -3478,79 +3495,34 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
static int
zfs_ioc_rollback(zfs_cmd_t *zc)
{
- dsl_dataset_t *ds, *clone;
- int error;
zfsvfs_t *zfsvfs;
- char *clone_name;
-
- error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
- if (error)
- return (error);
-
- /* must not be a snapshot */
- if (dsl_dataset_is_snapshot(ds)) {
- dsl_dataset_rele(ds, FTAG);
- return (EINVAL);
- }
-
- /* must have a most recent snapshot */
- if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
- dsl_dataset_rele(ds, FTAG);
- return (EINVAL);
- }
-
- /*
- * Create clone of most recent snapshot.
- */
- clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
- error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
- if (error)
- goto out;
-
- error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
- if (error)
- goto out;
+ int error;
- /*
- * Do clone swap.
- */
if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
error = zfs_suspend_fs(zfsvfs);
if (error == 0) {
int resume_err;
- if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
- error = dsl_dataset_clone_swap(clone, ds,
- B_TRUE);
- dsl_dataset_disown(ds, FTAG);
- ds = NULL;
- } else {
- error = EBUSY;
- }
+ error = dsl_dataset_rollback(zc->zc_name);
resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
error = error ? error : resume_err;
}
VFS_RELE(zfsvfs->z_vfs);
} else {
- if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
- error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
- dsl_dataset_disown(ds, FTAG);
- ds = NULL;
- } else {
- error = EBUSY;
- }
+ error = dsl_dataset_rollback(zc->zc_name);
}
+ return (error);
+}
- /*
- * Destroy clone (which also closes it).
- */
- (void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
+static int
+recursive_unmount(const char *fsname, void *arg)
+{
+ const char *snapname = arg;
+ char fullname[MAXNAMELEN];
-out:
- strfree(clone_name);
- if (ds)
- dsl_dataset_rele(ds, FTAG);
- return (error);
+ (void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
+ zfs_unmount_snap(fullname);
+ return (0);
}
/*
@@ -3565,26 +3537,33 @@ static int
zfs_ioc_rename(zfs_cmd_t *zc)
{
boolean_t recursive = zc->zc_cookie & 1;
+ char *at;
zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
strchr(zc->zc_value, '%'))
return (EINVAL);
- /*
- * Unmount snapshot unless we're doing a recursive rename,
- * in which case the dataset code figures out which snapshots
- * to unmount.
- */
- if (!recursive && strchr(zc->zc_name, '@') != NULL &&
- zc->zc_objset_type == DMU_OST_ZFS) {
- int err = zfs_unmount_snap(zc->zc_name, NULL);
- if (err)
- return (err);
+ at = strchr(zc->zc_name, '@');
+ if (at != NULL) {
+ /* snaps must be in same fs */
+ if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
+ return (EXDEV);
+ *at = '\0';
+ if (zc->zc_objset_type == DMU_OST_ZFS) {
+ int error = dmu_objset_find(zc->zc_name,
+ recursive_unmount, at + 1,
+ recursive ? DS_FIND_CHILDREN : 0);
+ if (error != 0)
+ return (error);
+ }
+ return (dsl_dataset_rename_snapshot(zc->zc_name,
+ at + 1, strchr(zc->zc_value, '@') + 1, recursive));
+ } else {
+ if (zc->zc_objset_type == DMU_OST_ZVOL)
+ (void) zvol_remove_minor(zc->zc_name);
+ return (dsl_dir_rename(zc->zc_name, zc->zc_value));
}
- if (zc->zc_objset_type == DMU_OST_ZVOL)
- (void) zvol_remove_minor(zc->zc_name);
- return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
}
static int
@@ -3728,35 +3707,14 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
}
/*
- * Activates a feature on a pool in response to a property setting. This
- * creates a new sync task which modifies the pool to reflect the feature
- * as being active.
- */
-static int
-zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature)
-{
- int err;
-
- /* EBUSY here indicates that the feature is already active */
- err = dsl_sync_task_do(dp, zfs_prop_activate_feature_check,
- zfs_prop_activate_feature_sync, dp->dp_spa, feature, 2);
-
- if (err != 0 && err != EBUSY)
- return (err);
- else
- return (0);
-}
-
-/*
* Checks for a race condition to make sure we don't increment a feature flag
* multiple times.
*/
-/*ARGSUSED*/
static int
-zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx)
+zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- zfeature_info_t *feature = arg2;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ zfeature_info_t *feature = arg;
if (!spa_feature_is_active(spa, feature))
return (0);
@@ -3769,15 +3727,36 @@ zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx)
* zfs_prop_activate_feature.
*/
static void
-zfs_prop_activate_feature_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
{
- spa_t *spa = arg1;
- zfeature_info_t *feature = arg2;
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ zfeature_info_t *feature = arg;
spa_feature_incr(spa, feature, tx);
}
/*
+ * Activates a feature on a pool in response to a property setting. This
+ * creates a new sync task which modifies the pool to reflect the feature
+ * as being active.
+ */
+static int
+zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature)
+{
+ int err;
+
+ /* EBUSY here indicates that the feature is already active */
+ err = dsl_sync_task(spa_name(spa),
+ zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
+ feature, 2);
+
+ if (err != 0 && err != EBUSY)
+ return (err);
+ else
+ return (0);
+}
+
+/*
* Removes properties from the given props list that fail permission checks
* needed to clear them and to restore them in case of a receive error. For each
* property, make sure we have both set and inherit permissions.
@@ -3931,7 +3910,6 @@ static int
zfs_ioc_recv(zfs_cmd_t *zc)
{
file_t *fp;
- objset_t *os;
dmu_recv_cookie_t drc;
boolean_t force = (boolean_t)zc->zc_guid;
int fd;
@@ -3941,7 +3919,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
offset_t off;
nvlist_t *props = NULL; /* sent properties */
nvlist_t *origprops = NULL; /* existing properties */
- objset_t *origin = NULL;
+ char *origin = NULL;
char *tosnap;
char tofs[ZFS_MAXNAMELEN];
boolean_t first_recvd_props = B_FALSE;
@@ -3969,18 +3947,31 @@ zfs_ioc_recv(zfs_cmd_t *zc)
VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
- if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
- !dsl_prop_get_hasrecvd(os)) {
+ if (zc->zc_string[0])
+ origin = zc->zc_string;
+
+ error = dmu_recv_begin(tofs, tosnap,
+ &zc->zc_begin_record, force, origin, &drc);
+ if (error != 0)
+ goto out;
+
+ /*
+ * Set properties before we receive the stream so that they are applied
+ * to the new data. Note that we must call dmu_recv_stream() if
+ * dmu_recv_begin() succeeds.
+ */
+ if (props != NULL && !drc.drc_newfs) {
+ if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
+ SPA_VERSION_RECVD_PROPS &&
+ !dsl_prop_get_hasrecvd(tofs))
first_recvd_props = B_TRUE;
- }
/*
* If new received properties are supplied, they are to
* completely replace the existing received properties, so stash
* away the existing ones.
*/
- if (dsl_prop_get_received(os, &origprops) == 0) {
+ if (dsl_prop_get_received(tofs, &origprops) == 0) {
nvlist_t *errlist = NULL;
/*
* Don't bother writing a property if its value won't
@@ -3992,53 +3983,25 @@ zfs_ioc_recv(zfs_cmd_t *zc)
*/
if (!first_recvd_props)
props_reduce(props, origprops);
- if (zfs_check_clearable(tofs, origprops,
- &errlist) != 0)
+ if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
(void) nvlist_merge(errors, errlist, 0);
nvlist_free(errlist);
- }
- dmu_objset_rele(os, FTAG);
- }
-
- if (zc->zc_string[0]) {
- error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
- if (error)
- goto out;
- }
-
- error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
- &zc->zc_begin_record, force, origin, &drc);
- if (origin)
- dmu_objset_rele(origin, FTAG);
- if (error)
- goto out;
-
- /*
- * Set properties before we receive the stream so that they are applied
- * to the new data. Note that we must call dmu_recv_stream() if
- * dmu_recv_begin() succeeds.
- */
- if (props) {
- if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
- if (drc.drc_newfs) {
- if (spa_version(os->os_spa) >=
- SPA_VERSION_RECVD_PROPS)
- first_recvd_props = B_TRUE;
- } else if (origprops != NULL) {
- if (clear_received_props(os, tofs, origprops,
- first_recvd_props ? NULL : props) != 0)
- zc->zc_obj |= ZPROP_ERR_NOCLEAR;
- } else {
+ if (clear_received_props(tofs, origprops,
+ first_recvd_props ? NULL : props) != 0)
zc->zc_obj |= ZPROP_ERR_NOCLEAR;
- }
- dsl_prop_set_hasrecvd(os);
- } else if (!drc.drc_newfs) {
+ } else {
zc->zc_obj |= ZPROP_ERR_NOCLEAR;
}
+ }
+
+ if (props != NULL) {
+ props_error = dsl_prop_set_hasrecvd(tofs);
- (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
- props, errors);
+ if (props_error == 0) {
+ (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
+ props, errors);
+ }
}
if (zc->zc_nvlist_dst_size != 0 &&
@@ -4090,22 +4053,16 @@ zfs_ioc_recv(zfs_cmd_t *zc)
/*
* On error, restore the original props.
*/
- if (error && props) {
- if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
- if (clear_received_props(os, tofs, props, NULL) != 0) {
- /*
- * We failed to clear the received properties.
- * Since we may have left a $recvd value on the
- * system, we can't clear the $hasrecvd flag.
- */
- zc->zc_obj |= ZPROP_ERR_NORESTORE;
- } else if (first_recvd_props) {
- dsl_prop_unset_hasrecvd(os);
- }
- dmu_objset_rele(os, FTAG);
- } else if (!drc.drc_newfs) {
- /* We failed to clear the received properties. */
+ if (error != 0 && props != NULL && !drc.drc_newfs) {
+ if (clear_received_props(tofs, props, NULL) != 0) {
+ /*
+ * We failed to clear the received properties.
+ * Since we may have left a $recvd value on the
+ * system, we can't clear the $hasrecvd flag.
+ */
zc->zc_obj |= ZPROP_ERR_NORESTORE;
+ } else if (first_recvd_props) {
+ dsl_prop_unset_hasrecvd(tofs);
}
if (origprops == NULL && !drc.drc_newfs) {
@@ -4157,100 +4114,75 @@ out:
static int
zfs_ioc_send(zfs_cmd_t *zc)
{
- objset_t *fromsnap = NULL;
- objset_t *tosnap;
int error;
offset_t off;
- dsl_dataset_t *ds;
- dsl_dataset_t *dsfrom = NULL;
- spa_t *spa;
- dsl_pool_t *dp;
boolean_t estimate = (zc->zc_guid != 0);
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error)
- return (error);
+ if (zc->zc_obj != 0) {
+ dsl_pool_t *dp;
+ dsl_dataset_t *tosnap;
- dp = spa_get_dsl(spa);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
- rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
- if (error)
- return (error);
-
- error = dmu_objset_from_ds(ds, &tosnap);
- if (error) {
- dsl_dataset_rele(ds, FTAG);
- return (error);
- }
-
- if (zc->zc_fromobj != 0) {
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
- rw_exit(&dp->dp_config_rwlock);
- if (error) {
- dsl_dataset_rele(ds, FTAG);
+ error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+ if (error != 0)
return (error);
- }
- error = dmu_objset_from_ds(dsfrom, &fromsnap);
- if (error) {
- dsl_dataset_rele(dsfrom, FTAG);
- dsl_dataset_rele(ds, FTAG);
+
+ error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
return (error);
}
+
+ if (dsl_dir_is_clone(tosnap->ds_dir))
+ zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj;
+ dsl_dataset_rele(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
}
- if (zc->zc_obj) {
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ if (estimate) {
+ dsl_pool_t *dp;
+ dsl_dataset_t *tosnap;
+ dsl_dataset_t *fromsnap = NULL;
+
+ error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+ if (error != 0)
+ return (error);
- if (fromsnap != NULL) {
- dsl_dataset_rele(dsfrom, FTAG);
- dsl_dataset_rele(ds, FTAG);
- return (EINVAL);
+ error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
}
- if (dsl_dir_is_clone(ds->ds_dir)) {
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- error = dsl_dataset_hold_obj(dp,
- ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom);
- rw_exit(&dp->dp_config_rwlock);
- if (error) {
- dsl_dataset_rele(ds, FTAG);
- return (error);
- }
- error = dmu_objset_from_ds(dsfrom, &fromsnap);
- if (error) {
- dsl_dataset_rele(dsfrom, FTAG);
- dsl_dataset_rele(ds, FTAG);
+ if (zc->zc_fromobj != 0) {
+ error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
+ FTAG, &fromsnap);
+ if (error != 0) {
+ dsl_dataset_rele(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
}
- }
- if (estimate) {
error = dmu_send_estimate(tosnap, fromsnap,
&zc->zc_objset_type);
+
+ if (fromsnap != NULL)
+ dsl_dataset_rele(fromsnap, FTAG);
+ dsl_dataset_rele(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
} else {
file_t *fp = getf(zc->zc_cookie);
- if (fp == NULL) {
- dsl_dataset_rele(ds, FTAG);
- if (dsfrom)
- dsl_dataset_rele(dsfrom, FTAG);
+ if (fp == NULL)
return (EBADF);
- }
off = fp->f_offset;
- error = dmu_send(tosnap, fromsnap,
- zc->zc_cookie, fp->f_vnode, &off);
+ error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
+ zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
releasef(zc->zc_cookie);
}
- if (dsfrom)
- dsl_dataset_rele(dsfrom, FTAG);
- dsl_dataset_rele(ds, FTAG);
return (error);
}
@@ -4265,12 +4197,20 @@ zfs_ioc_send(zfs_cmd_t *zc)
static int
zfs_ioc_send_progress(zfs_cmd_t *zc)
{
+ dsl_pool_t *dp;
dsl_dataset_t *ds;
dmu_sendarg_t *dsp = NULL;
int error;
- if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
+ error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+ if (error != 0)
+ return (error);
+
+ error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
return (error);
+ }
mutex_enter(&ds->ds_sendstream_lock);
@@ -4294,6 +4234,7 @@ zfs_ioc_send_progress(zfs_cmd_t *zc)
mutex_exit(&ds->ds_sendstream_lock);
dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
@@ -4400,7 +4341,7 @@ zfs_ioc_clear(zfs_cmd_t *zc)
}
}
- if (error)
+ if (error != 0)
return (error);
spa_vdev_state_enter(spa, SCL_NONE);
@@ -4438,7 +4379,7 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc)
int error;
error = spa_open(zc->zc_name, &spa, FTAG);
- if (error)
+ if (error != 0)
return (error);
spa_vdev_state_enter(spa, SCL_NONE);
@@ -4478,7 +4419,7 @@ zfs_ioc_promote(zfs_cmd_t *zc)
if (cp)
*cp = '\0';
(void) dmu_objset_find(zc->zc_value,
- zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
+ zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
}
@@ -4504,7 +4445,7 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc)
return (EINVAL);
error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
- if (error)
+ if (error != 0)
return (error);
error = zfs_userspace_one(zfsvfs,
@@ -4535,7 +4476,7 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc)
return (ENOMEM);
int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
- if (error)
+ if (error != 0)
return (error);
void *buf = kmem_alloc(bufsize, KM_SLEEP);
@@ -4585,7 +4526,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
} else {
/* XXX kind of reading contents without owning */
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
- if (error)
+ if (error != 0)
return (error);
error = dmu_objset_userspace_upgrade(os);
@@ -4658,7 +4599,7 @@ zfs_ioc_share(zfs_cmd_t *zc)
return (ENOSYS);
}
error = zfs_init_sharefs();
- if (error) {
+ if (error != 0) {
mutex_exit(&zfs_share_lock);
return (ENOSYS);
}
@@ -4683,7 +4624,7 @@ zfs_ioc_share(zfs_cmd_t *zc)
return (ENOSYS);
}
error = zfs_init_sharefs();
- if (error) {
+ if (error != 0) {
mutex_exit(&zfs_share_lock);
return (ENOSYS);
}
@@ -4748,7 +4689,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc)
int error;
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
- if (error)
+ if (error != 0)
return (error);
error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
@@ -4771,25 +4712,26 @@ static int
zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
{
char *snap_name;
+ char *hold_name;
int error;
+ minor_t minor;
- snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value,
- (u_longlong_t)ddi_get_lbolt64());
-
- if (strlen(snap_name) >= MAXPATHLEN) {
- strfree(snap_name);
- return (E2BIG);
- }
-
- error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd);
- if (error != 0) {
- strfree(snap_name);
+ error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
+ if (error != 0)
return (error);
- }
- (void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1);
+ snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
+ (u_longlong_t)ddi_get_lbolt64());
+ hold_name = kmem_asprintf("%%%s", zc->zc_value);
+
+ error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
+ hold_name);
+ if (error == 0)
+ (void) strcpy(zc->zc_value, snap_name);
strfree(snap_name);
- return (0);
+ strfree(hold_name);
+ zfs_onexit_fd_rele(zc->zc_cleanup_fd);
+ return (error);
}
/*
@@ -4804,39 +4746,22 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
static int
zfs_ioc_diff(zfs_cmd_t *zc)
{
- objset_t *fromsnap;
- objset_t *tosnap;
file_t *fp;
offset_t off;
int error;
- error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
- if (error)
- return (error);
-
- error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
- if (error) {
- dmu_objset_rele(tosnap, FTAG);
- return (error);
- }
-
fp = getf(zc->zc_cookie);
- if (fp == NULL) {
- dmu_objset_rele(fromsnap, FTAG);
- dmu_objset_rele(tosnap, FTAG);
+ if (fp == NULL)
return (EBADF);
- }
off = fp->f_offset;
- error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
+ error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
releasef(zc->zc_cookie);
- dmu_objset_rele(fromsnap, FTAG);
- dmu_objset_rele(tosnap, FTAG);
return (error);
}
@@ -4905,13 +4830,13 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
ZFS_SHARES_DIR);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
+ if (error != 0) {
dmu_tx_abort(tx);
} else {
error = zfs_create_share_dir(zfsvfs, tx);
dmu_tx_commit(tx);
}
- if (error) {
+ if (error != 0) {
mutex_exit(&zfsvfs->z_lock);
VN_RELE(vp);
ZFS_EXIT(zfsvfs);
@@ -4990,124 +4915,82 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
}
/*
- * inputs:
- * zc_name name of filesystem
- * zc_value short name of snap
- * zc_string user-supplied tag for this hold
- * zc_cookie recursive flag
- * zc_temphold set if hold is temporary
- * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
- * zc_sendobj if non-zero, the objid for zc_name@zc_value
- * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg
+ * innvl: {
+ * "holds" -> { snapname -> holdname (string), ... }
+ * (optional) "cleanup_fd" -> fd (int32)
+ * }
*
- * outputs: none
+ * outnvl: {
+ * snapname -> error value (int32)
+ * ...
+ * }
*/
+/* ARGSUSED */
static int
-zfs_ioc_hold(zfs_cmd_t *zc)
+zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
{
- boolean_t recursive = zc->zc_cookie;
- spa_t *spa;
- dsl_pool_t *dp;
- dsl_dataset_t *ds;
+ nvlist_t *holds;
+ int cleanup_fd = -1;
int error;
minor_t minor = 0;
- if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
- return (EINVAL);
-
- if (zc->zc_sendobj == 0) {
- return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
- zc->zc_string, recursive, zc->zc_temphold,
- zc->zc_cleanup_fd));
- }
-
- if (recursive)
+ error = nvlist_lookup_nvlist(args, "holds", &holds);
+ if (error != 0)
return (EINVAL);
- error = spa_open(zc->zc_name, &spa, FTAG);
- if (error)
- return (error);
-
- dp = spa_get_dsl(spa);
- rw_enter(&dp->dp_config_rwlock, RW_READER);
- error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
- rw_exit(&dp->dp_config_rwlock);
- spa_close(spa, FTAG);
- if (error)
- return (error);
-
- /*
- * Until we have a hold on this snapshot, it's possible that
- * zc_sendobj could've been destroyed and reused as part
- * of a later txg. Make sure we're looking at the right object.
- */
- if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
- dsl_dataset_rele(ds, FTAG);
- return (ENOENT);
- }
-
- if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
- error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
- if (error) {
- dsl_dataset_rele(ds, FTAG);
+ if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
+ error = zfs_onexit_fd_hold(cleanup_fd, &minor);
+ if (error != 0)
return (error);
- }
}
- error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
- zc->zc_temphold);
- if (minor != 0) {
- if (error == 0) {
- dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
- minor);
- }
- zfs_onexit_fd_rele(zc->zc_cleanup_fd);
- }
- dsl_dataset_rele(ds, FTAG);
-
+ error = dsl_dataset_user_hold(holds, minor, errlist);
+ if (minor != 0)
+ zfs_onexit_fd_rele(cleanup_fd);
return (error);
}
/*
- * inputs:
- * zc_name name of dataset from which we're releasing a user hold
- * zc_value short name of snap
- * zc_string user-supplied tag for this hold
- * zc_cookie recursive flag
+ * innvl is not used.
*
- * outputs: none
+ * outnvl: {
+ * holdname -> time added (uint64 seconds since epoch)
+ * ...
+ * }
*/
+/* ARGSUSED */
static int
-zfs_ioc_release(zfs_cmd_t *zc)
+zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
{
- boolean_t recursive = zc->zc_cookie;
-
- if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
- return (EINVAL);
-
- return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
- zc->zc_string, recursive));
+ return (dsl_dataset_get_holds(snapname, outnvl));
}
/*
- * inputs:
- * zc_name name of filesystem
+ * innvl: {
+ * snapname -> { holdname, ... }
+ * ...
+ * }
*
- * outputs:
- * zc_nvlist_src{_size} nvlist of snapshot holds
+ * outnvl: {
+ * snapname -> error value (int32)
+ * ...
+ * }
*/
+/* ARGSUSED */
static int
-zfs_ioc_get_holds(zfs_cmd_t *zc)
+zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
{
- nvlist_t *nvp;
- int error;
+ nvpair_t *pair;
- if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
- error = put_nvlist(zc, nvp);
- nvlist_free(nvp);
- }
+ /*
+ * The release may cause the snapshot to be destroyed; make sure it
+ * is not mounted.
+ */
+ for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(holds, pair))
+ zfs_unmount_snap(nvpair_name(pair));
- return (error);
+ return (dsl_dataset_user_release(holds, errlist));
}
/*
@@ -5124,14 +5007,21 @@ static int
zfs_ioc_space_written(zfs_cmd_t *zc)
{
int error;
+ dsl_pool_t *dp;
dsl_dataset_t *new, *old;
- error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
+ error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
if (error != 0)
return (error);
- error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
+ error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+ error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
if (error != 0) {
dsl_dataset_rele(new, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
@@ -5139,8 +5029,10 @@ zfs_ioc_space_written(zfs_cmd_t *zc)
&zc->zc_objset_type, &zc->zc_perm_action);
dsl_dataset_rele(old, FTAG);
dsl_dataset_rele(new, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
+
/*
* innvl: {
* "firstsnap" -> snapshot name
@@ -5156,6 +5048,7 @@ static int
zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
{
int error;
+ dsl_pool_t *dp;
dsl_dataset_t *new, *old;
char *firstsnap;
uint64_t used, comp, uncomp;
@@ -5163,18 +5056,26 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
return (EINVAL);
- error = dsl_dataset_hold(lastsnap, FTAG, &new);
+ error = dsl_pool_hold(lastsnap, FTAG, &dp);
if (error != 0)
return (error);
- error = dsl_dataset_hold(firstsnap, FTAG, &old);
+
+ error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+ error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
if (error != 0) {
dsl_dataset_rele(new, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
dsl_dataset_rele(old, FTAG);
dsl_dataset_rele(new, FTAG);
+ dsl_pool_rele(dp, FTAG);
fnvlist_add_uint64(outnvl, "used", used);
fnvlist_add_uint64(outnvl, "compressed", comp);
fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
@@ -5193,47 +5094,27 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
static int
zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{
- objset_t *fromsnap = NULL;
- objset_t *tosnap;
int error;
offset_t off;
- char *fromname;
+ char *fromname = NULL;
int fd;
error = nvlist_lookup_int32(innvl, "fd", &fd);
if (error != 0)
return (EINVAL);
- error = dmu_objset_hold(snapname, FTAG, &tosnap);
- if (error)
- return (error);
-
- error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
- if (error == 0) {
- error = dmu_objset_hold(fromname, FTAG, &fromsnap);
- if (error) {
- dmu_objset_rele(tosnap, FTAG);
- return (error);
- }
- }
+ (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
file_t *fp = getf(fd);
- if (fp == NULL) {
- dmu_objset_rele(tosnap, FTAG);
- if (fromsnap != NULL)
- dmu_objset_rele(fromsnap, FTAG);
+ if (fp == NULL)
return (EBADF);
- }
off = fp->f_offset;
- error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off);
+ error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
releasef(fd);
- if (fromsnap != NULL)
- dmu_objset_rele(fromsnap, FTAG);
- dmu_objset_rele(tosnap, FTAG);
return (error);
}
@@ -5252,21 +5133,29 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
static int
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{
- objset_t *fromsnap = NULL;
- objset_t *tosnap;
+ dsl_pool_t *dp;
+ dsl_dataset_t *fromsnap = NULL;
+ dsl_dataset_t *tosnap;
int error;
char *fromname;
uint64_t space;
- error = dmu_objset_hold(snapname, FTAG, &tosnap);
- if (error)
+ error = dsl_pool_hold(snapname, FTAG, &dp);
+ if (error != 0)
+ return (error);
+
+ error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
return (error);
+ }
error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
if (error == 0) {
- error = dmu_objset_hold(fromname, FTAG, &fromsnap);
- if (error) {
- dmu_objset_rele(tosnap, FTAG);
+ error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
+ if (error != 0) {
+ dsl_dataset_rele(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
}
@@ -5275,8 +5164,9 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
fnvlist_add_uint64(outnvl, "space", space);
if (fromsnap != NULL)
- dmu_objset_rele(fromsnap, FTAG);
- dmu_objset_rele(tosnap, FTAG);
+ dsl_dataset_rele(fromsnap, FTAG);
+ dsl_dataset_rele(tosnap, FTAG);
+ dsl_pool_rele(dp, FTAG);
return (error);
}
@@ -5421,6 +5311,17 @@ zfs_ioctl_init(void)
zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+ zfs_ioctl_register("hold", ZFS_IOC_HOLD,
+ zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+ zfs_ioctl_register("release", ZFS_IOC_RELEASE,
+ zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+ zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
+ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
+ POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -5498,8 +5399,6 @@ zfs_ioctl_init(void)
zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
zfs_ioc_space_written);
- zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS,
- zfs_ioc_get_holds);
zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
zfs_ioc_objset_recvd_props);
zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
@@ -5542,10 +5441,6 @@ zfs_ioctl_init(void)
zfs_secpolicy_recv);
zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
zfs_secpolicy_promote);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold,
- zfs_secpolicy_hold);
- zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release,
- zfs_secpolicy_release);
zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index 2a2501707b..42486ea23c 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -513,27 +513,31 @@ zfs_register_callbacks(vfs_t *vfsp)
* overboard...
*/
ds = dmu_objset_ds(os);
- error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
+ dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+ error = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "xattr", xattr_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "recordsize", blksz_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "readonly", readonly_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "devices", devices_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "setuid", setuid_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "exec", exec_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "snapdir", snapdir_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "aclmode", acl_mode_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "aclinherit", acl_inherit_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
+ zfsvfs);
error = error ? error : dsl_prop_register(ds,
- "vscan", vscan_changed_cb, zfsvfs);
+ zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
+ dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto unregister;
@@ -563,28 +567,35 @@ unregister:
* registered, but this is OK; it will simply return ENOMSG,
* which we will ignore.
*/
- (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
- (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
- zfsvfs);
- (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
+ atime_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
+ xattr_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
+ blksz_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
+ readonly_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
+ devices_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
+ setuid_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
+ exec_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
+ snapdir_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE),
+ acl_mode_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
+ acl_inherit_changed_cb, zfsvfs);
+ (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
+ vscan_changed_cb, zfsvfs);
return (error);
-
}
static int
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
uint64_t *userp, uint64_t *groupp)
{
- int error = 0;
-
/*
* Is it a valid type of object to track?
*/
@@ -641,7 +652,7 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
*groupp = BSWAP_64(*groupp);
}
}
- return (error);
+ return (0);
}
static void
@@ -993,7 +1004,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
- rrw_init(&zfsvfs->z_teardown_lock);
+ rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@@ -1444,8 +1455,9 @@ zfs_mount_label_policy(vfs_t *vfsp, char *osname)
char *str = NULL;
if (l_to_str_internal(mnt_sl, &str) == 0 &&
- dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
- ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
+ dsl_prop_set_string(osname,
+ zfs_prop_to_name(ZFS_PROP_MLSLABEL),
+ ZPROP_SRC_LOCAL, str) == 0)
retv = 0;
if (str != NULL)
kmem_free(str, strlen(str) + 1);
@@ -1856,7 +1868,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
- (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
+ dmu_objset_evict_dbufs(zfsvfs->z_os);
return (0);
}
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index 81d2bb5a97..e9616f8f65 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -235,7 +235,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
}
}
- VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
+ VERIFY(arc_buf_remove_ref(abuf, &abuf));
}
return (error);
@@ -332,7 +332,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
break;
error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
- if (error)
+ if (error != 0)
break;
for (lrp = lrbuf; lrp < end; lrp += reclen) {
@@ -467,7 +467,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
if (dsl_dataset_is_snapshot(ds))
panic("dirtying snapshot!");
- if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) {
+ if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(ds->ds_dbuf, zilog);
}
@@ -626,8 +626,8 @@ zil_claim(const char *osname, void *txarg)
objset_t *os;
int error;
- error = dmu_objset_hold(osname, FTAG, &os);
- if (error) {
+ error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);
+ if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@@ -640,7 +640,7 @@ zil_claim(const char *osname, void *txarg)
zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
BP_ZERO(&zh->zh_log);
dsl_dataset_dirty(dmu_objset_ds(os), tx);
- dmu_objset_rele(os, FTAG);
+ dmu_objset_disown(os, FTAG);
return (0);
}
@@ -665,7 +665,7 @@ zil_claim(const char *osname, void *txarg)
}
ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
- dmu_objset_rele(os, FTAG);
+ dmu_objset_disown(os, FTAG);
return (0);
}
@@ -685,7 +685,7 @@ zil_check_log_chain(const char *osname, void *tx)
ASSERT(tx == NULL);
error = dmu_objset_hold(osname, FTAG, &os);
- if (error) {
+ if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@@ -973,7 +973,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
/* pass the old blkptr in order to spread log blocks across devs */
error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz,
USE_SLOG(zilog));
- if (!error) {
+ if (error == 0) {
ASSERT3U(bp->blk_birth, ==, txg);
bp->blk_cksum = lwb->lwb_blk.blk_cksum;
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
@@ -1084,7 +1084,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
txg_wait_synced(zilog->zl_dmu_pool, txg);
return (lwb);
}
- if (error) {
+ if (error != 0) {
ASSERT(error == ENOENT || error == EEXIST ||
error == EALREADY);
return (lwb);
@@ -1708,6 +1708,9 @@ zil_free(zilog_t *zilog)
{
zilog->zl_stop_sync = 1;
+ ASSERT0(zilog->zl_suspend);
+ ASSERT0(zilog->zl_suspending);
+
ASSERT(list_is_empty(&zilog->zl_lwb_list));
list_destroy(&zilog->zl_lwb_list);
@@ -1803,32 +1806,100 @@ zil_close(zilog_t *zilog)
mutex_exit(&zilog->zl_lock);
}
+static char *suspend_tag = "zil suspending";
+
/*
* Suspend an intent log. While in suspended mode, we still honor
* synchronous semantics, but we rely on txg_wait_synced() to do it.
- * We suspend the log briefly when taking a snapshot so that the snapshot
- * contains all the data it's supposed to, and has an empty intent log.
+ * On old version pools, we suspend the log briefly when taking a
+ * snapshot so that it will have an empty intent log.
+ *
+ * Long holds are not really intended to be used the way we do here --
+ * held for such a short time. A concurrent caller of dsl_dataset_long_held()
+ * could fail. Therefore we take pains to only put a long hold if it is
+ * actually necessary. Fortunately, it will only be necessary if the
+ * objset is currently mounted (or the ZVOL equivalent). In that case it
+ * will already have a long hold, so we are not really making things any worse.
+ *
+ * Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or
+ * zvol_state_t), and use their mechanism to prevent their hold from being
+ * dropped (e.g. VFS_HOLD()). However, that would be even more pain for
+ * very little gain.
+ *
+ * if cookiep == NULL, this does both the suspend & resume.
+ * Otherwise, it returns with the dataset "long held", and the cookie
+ * should be passed into zil_resume().
*/
int
-zil_suspend(zilog_t *zilog)
+zil_suspend(const char *osname, void **cookiep)
{
- const zil_header_t *zh = zilog->zl_header;
+ objset_t *os;
+ zilog_t *zilog;
+ const zil_header_t *zh;
+ int error;
+
+ error = dmu_objset_hold(osname, suspend_tag, &os);
+ if (error != 0)
+ return (error);
+ zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_lock);
+ zh = zilog->zl_header;
+
if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */
mutex_exit(&zilog->zl_lock);
+ dmu_objset_rele(os, suspend_tag);
return (EBUSY);
}
- if (zilog->zl_suspend++ != 0) {
+
+ /*
+ * Don't put a long hold in the cases where we can avoid it. This
+ * is when there is no cookie so we are doing a suspend & resume
+ * (i.e. called from zil_vdev_offline()), and there's nothing to do
+ * for the suspend because it's already suspended, or there's no ZIL.
+ */
+ if (cookiep == NULL && !zilog->zl_suspending &&
+ (zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
+ mutex_exit(&zilog->zl_lock);
+ dmu_objset_rele(os, suspend_tag);
+ return (0);
+ }
+
+ dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag);
+ dsl_pool_rele(dmu_objset_pool(os), suspend_tag);
+
+ zilog->zl_suspend++;
+
+ if (zilog->zl_suspend > 1) {
/*
- * Someone else already began a suspend.
+ * Someone else is already suspending it.
* Just wait for them to finish.
*/
+
while (zilog->zl_suspending)
cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
mutex_exit(&zilog->zl_lock);
+
+ if (cookiep == NULL)
+ zil_resume(os);
+ else
+ *cookiep = os;
+ return (0);
+ }
+
+ /*
+ * If there is no pointer to an on-disk block, this ZIL must not
+ * be active (e.g. filesystem not mounted), so there's nothing
+ * to clean up.
+ */
+ if (BP_IS_HOLE(&zh->zh_log)) {
+ ASSERT(cookiep != NULL); /* fast path already handled */
+
+ *cookiep = os;
+ mutex_exit(&zilog->zl_lock);
return (0);
}
+
zilog->zl_suspending = B_TRUE;
mutex_exit(&zilog->zl_lock);
@@ -1841,16 +1912,25 @@ zil_suspend(zilog_t *zilog)
cv_broadcast(&zilog->zl_cv_suspend);
mutex_exit(&zilog->zl_lock);
+ if (cookiep == NULL)
+ zil_resume(os);
+ else
+ *cookiep = os;
return (0);
}
void
-zil_resume(zilog_t *zilog)
+zil_resume(void *cookie)
{
+ objset_t *os = cookie;
+ zilog_t *zilog = dmu_objset_zil(os);
+
mutex_enter(&zilog->zl_lock);
ASSERT(zilog->zl_suspend != 0);
zilog->zl_suspend--;
mutex_exit(&zilog->zl_lock);
+ dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
+ dsl_dataset_rele(dmu_objset_ds(os), suspend_tag);
}
typedef struct zil_replay_arg {
@@ -1923,7 +2003,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
error = zil_read_log_data(zilog, (lr_write_t *)lr,
zr->zr_lr + reclen);
- if (error)
+ if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
@@ -1944,7 +2024,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
* is updated if we are in replay mode.
*/
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap);
- if (error) {
+ if (error != 0) {
/*
* The DMU's dnode layer doesn't see removes until the txg
* commits, so a subsequent claim can spuriously fail with
@@ -1954,7 +2034,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
*/
txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE);
- if (error)
+ if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
return (0);
@@ -2026,19 +2106,10 @@ zil_replaying(zilog_t *zilog, dmu_tx_t *tx)
int
zil_vdev_offline(const char *osname, void *arg)
{
- objset_t *os;
- zilog_t *zilog;
int error;
- error = dmu_objset_hold(osname, FTAG, &os);
- if (error)
- return (error);
-
- zilog = dmu_objset_zil(os);
- if (zil_suspend(zilog) != 0)
- error = EEXIST;
- else
- zil_resume(zilog);
- dmu_objset_rele(os, FTAG);
- return (error);
+ error = zil_suspend(osname, NULL);
+ if (error != 0)
+ return (EEXIST);
+ return (0);
}
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index 432a992b26..7940e201ef 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -697,6 +697,7 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
void
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
{
+ metaslab_check_free(spa, bp);
bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
}
@@ -713,6 +714,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(spa_syncing_txg(spa) == txg);
ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
+ metaslab_check_free(spa, bp);
+
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
@@ -2010,7 +2013,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
bcmp(abuf->b_data, zio->io_orig_data,
zio->io_orig_size) != 0)
error = EEXIST;
- VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
+ VERIFY(arc_buf_remove_ref(abuf, &abuf));
}
ddt_enter(ddt);
@@ -2600,8 +2603,9 @@ zio_vdev_io_assess(zio_t *zio)
* set vdev_cant_write so that we stop trying to allocate from it.
*/
if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE &&
- vd != NULL && !vd->vdev_ops->vdev_op_leaf)
+ vd != NULL && !vd->vdev_ops->vdev_op_leaf) {
vd->vdev_cant_write = B_TRUE;
+ }
if (zio->io_error)
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index b413f5ed8b..5911fd3d2d 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -653,7 +653,7 @@ zvol_last_close(zvol_state_t *zv)
if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
!(zv->zv_flags & ZVOL_RDONLY))
txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
- (void) dmu_objset_evict_dbufs(zv->zv_objset);
+ dmu_objset_evict_dbufs(zv->zv_objset);
dmu_objset_disown(zv->zv_objset, zvol_tag);
zv->zv_objset = NULL;
@@ -698,7 +698,7 @@ zvol_prealloc(zvol_state_t *zv)
return (0);
}
-int
+static int
zvol_update_volsize(objset_t *os, uint64_t volsize)
{
dmu_tx_t *tx;
@@ -749,13 +749,12 @@ zvol_remove_minors(const char *name)
}
static int
-zvol_set_volsize_impl(objset_t *os, zvol_state_t *zv, uint64_t volsize)
+zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize)
{
uint64_t old_volsize = 0ULL;
- int error;
+ int error = 0;
ASSERT(MUTEX_HELD(&zfsdev_state_lock));
- error = zvol_update_volsize(os, volsize);
/*
* Reinitialize the dump area to the new size. If we
@@ -764,27 +763,25 @@ zvol_set_volsize_impl(objset_t *os, zvol_state_t *zv, uint64_t volsize)
* to calling dumpvp_resize() to ensure that the devices'
* size(9P) is not visible by the dump subsystem.
*/
- if (zv && error == 0) {
- old_volsize = zv->zv_volsize;
- zvol_size_changed(zv, volsize);
-
- if (zv->zv_flags & ZVOL_DUMPIFIED) {
- if ((error = zvol_dumpify(zv)) != 0 ||
- (error = dumpvp_resize()) != 0) {
- int dumpify_error;
-
- (void) zvol_update_volsize(os, old_volsize);
- zvol_size_changed(zv, old_volsize);
- dumpify_error = zvol_dumpify(zv);
- error = dumpify_error ? dumpify_error : error;
- }
+ old_volsize = zv->zv_volsize;
+ zvol_size_changed(zv, volsize);
+
+ if (zv->zv_flags & ZVOL_DUMPIFIED) {
+ if ((error = zvol_dumpify(zv)) != 0 ||
+ (error = dumpvp_resize()) != 0) {
+ int dumpify_error;
+
+ (void) zvol_update_volsize(zv->zv_objset, old_volsize);
+ zvol_size_changed(zv, old_volsize);
+ dumpify_error = zvol_dumpify(zv);
+ error = dumpify_error ? dumpify_error : error;
}
}
/*
* Generate a LUN expansion event.
*/
- if (zv && error == 0) {
+ if (error == 0) {
sysevent_id_t eid;
nvlist_t *attr;
char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
@@ -812,29 +809,45 @@ zvol_set_volsize(const char *name, uint64_t volsize)
int error;
dmu_object_info_t doi;
uint64_t readonly;
+ boolean_t owned = B_FALSE;
+
+ error = dsl_prop_get_integer(name,
+ zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL);
+ if (error != 0)
+ return (error);
+ if (readonly)
+ return (EROFS);
mutex_enter(&zfsdev_state_lock);
zv = zvol_minor_lookup(name);
- if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
- mutex_exit(&zfsdev_state_lock);
- return (error);
+
+ if (zv == NULL || zv->zv_objset == NULL) {
+ if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE,
+ FTAG, &os)) != 0) {
+ mutex_exit(&zfsdev_state_lock);
+ return (error);
+ }
+ owned = B_TRUE;
+ if (zv != NULL)
+ zv->zv_objset = os;
+ } else {
+ os = zv->zv_objset;
}
if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 ||
- (error = zvol_check_volsize(volsize,
- doi.doi_data_block_size)) != 0)
+ (error = zvol_check_volsize(volsize, doi.doi_data_block_size)) != 0)
goto out;
- VERIFY3U(dsl_prop_get_integer(name,
- zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL), ==, 0);
- if (readonly) {
- error = EROFS;
- goto out;
- }
+ error = zvol_update_volsize(os, volsize);
- error = zvol_set_volsize_impl(os, zv, volsize);
+ if (error == 0 && zv != NULL)
+ error = zvol_update_live_volsize(zv, volsize);
out:
- dmu_objset_rele(os, FTAG);
+ if (owned) {
+ dmu_objset_disown(os, FTAG);
+ if (zv != NULL)
+ zv->zv_objset = NULL;
+ }
mutex_exit(&zfsdev_state_lock);
return (error);
}
@@ -1155,6 +1168,9 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
ze = list_next(&zv->zv_extents, ze);
}
+ if (ze == NULL)
+ return (EINVAL);
+
if (!ddi_in_panic())
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
@@ -1308,6 +1324,9 @@ zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks)
if (zv == NULL)
return (ENXIO);
+ if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0)
+ return (EINVAL);
+
boff = ldbtob(blkno);
resid = ldbtob(nblocks);
diff --git a/usr/src/uts/common/sys/nvpair.h b/usr/src/uts/common/sys/nvpair.h
index ad25effc29..e4d637b007 100644
--- a/usr/src/uts/common/sys/nvpair.h
+++ b/usr/src/uts/common/sys/nvpair.h
@@ -284,6 +284,7 @@ void fnvlist_pack_free(char *, size_t);
nvlist_t *fnvlist_unpack(char *, size_t);
nvlist_t *fnvlist_dup(nvlist_t *);
void fnvlist_merge(nvlist_t *, nvlist_t *);
+size_t fnvlist_num_pairs(nvlist_t *);
void fnvlist_add_boolean(nvlist_t *, const char *);
void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);