summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Ahrens <Matthew.Ahrens@Sun.COM>2009-08-06 22:16:07 -0700
committerMatthew Ahrens <Matthew.Ahrens@Sun.COM>2009-08-06 22:16:07 -0700
commitae46e4c775f2becc5343ff90b60a95acb79735f9 (patch)
tree7cf841cc2f35b6ad2c2b9e386313e610a7656147
parent9d0d62ad2e60e8f742a2e723d06e88352ee6a1f3 (diff)
downloadillumos-gate-ae46e4c775f2becc5343ff90b60a95acb79735f9.tar.gz
6861581 ZFS frees in synching context during rollback
6869470 panic from refcount_remove when destroy clone
-rw-r--r--usr/src/cmd/ztest/ztest.c12
-rw-r--r--usr/src/lib/libzpool/common/kernel.c24
-rw-r--r--usr/src/lib/libzpool/common/sys/zfs_context.h3
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c13
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_objset.c116
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_send.c303
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dataset.c215
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu.h6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_objset.h6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_dataset.h1
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c74
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c16
-rw-r--r--usr/src/uts/common/os/strext.c33
-rw-r--r--usr/src/uts/common/sys/systm.h1
14 files changed, 339 insertions, 484 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index 5f49fd5a06..80512d427a 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -1477,7 +1477,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
/*
* Verify that we can create a new dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
+ error = dmu_objset_create(name, DMU_OST_OTHER, 0,
ztest_create_cb, NULL);
if (error) {
if (error == ENOSPC) {
@@ -1533,7 +1533,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
/*
* Verify that we cannot create an existing dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, NULL, NULL);
+ error = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
if (error != EEXIST)
fatal(0, "created existing dataset, error = %d", error);
@@ -1675,8 +1675,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
- error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0,
- NULL, NULL);
+ error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
dmu_objset_close(clone);
if (error) {
if (error == ENOSPC) {
@@ -1711,8 +1710,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
- error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0,
- NULL, NULL);
+ error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
dmu_objset_close(clone);
if (error) {
if (error == ENOSPC) {
@@ -3796,7 +3794,7 @@ ztest_run(char *pool)
int test_future = FALSE;
(void) rw_rdlock(&ztest_shared->zs_name_lock);
(void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
+ error = dmu_objset_create(name, DMU_OST_OTHER, 0,
ztest_create_cb, NULL);
if (error == EEXIST) {
test_future = TRUE;
diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c
index 89108fe5b2..cfdb32e405 100644
--- a/usr/src/lib/libzpool/common/kernel.c
+++ b/usr/src/lib/libzpool/common/kernel.c
@@ -884,3 +884,27 @@ ksiddomain_rele(ksiddomain_t *ksid)
spa_strfree(ksid->kd_name);
umem_free(ksid, sizeof (ksiddomain_t));
}
+
+/*
+ * Do not change the length of the returned string; it must be freed
+ * with strfree().
+ */
+char *
+kmem_asprintf(const char *fmt, ...)
+{
+ int size;
+ va_list adx;
+ char *buf;
+
+ va_start(adx, fmt);
+ size = vsnprintf(NULL, 0, fmt, adx) + 1;
+ va_end(adx);
+
+ buf = kmem_alloc(size, KM_SLEEP);
+
+ va_start(adx, fmt);
+ size = vsnprintf(buf, size, fmt, adx);
+ va_end(adx);
+
+ return (buf);
+}
diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h
index 230c233a24..d89a4ae9d5 100644
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h
@@ -490,6 +490,9 @@ typedef struct callb_cpr {
#define zone_dataset_visible(x, y) (1)
#define INGLOBALZONE(z) (1)
+extern char *kmem_asprintf(const char *fmt, ...);
+#define strfree(str) kmem_free((str), strlen(str)+1)
+
/*
* Hostname information
*/
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index d5e5aa544b..cfde0e7e34 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -3160,11 +3160,14 @@ arc_write_done(zio_t *zio)
* sync-to-convergence, because we remove
* buffers from the hash table when we arc_free().
*/
- ASSERT(zio->io_flags & ZIO_FLAG_IO_REWRITE);
- ASSERT(DVA_EQUAL(BP_IDENTITY(&zio->io_bp_orig),
- BP_IDENTITY(zio->io_bp)));
- ASSERT3U(zio->io_bp_orig.blk_birth, ==,
- zio->io_bp->blk_birth);
+ if (!(zio->io_flags & ZIO_FLAG_IO_REWRITE) ||
+ !DVA_EQUAL(BP_IDENTITY(&zio->io_bp_orig),
+ BP_IDENTITY(zio->io_bp)) ||
+ zio->io_bp_orig.blk_birth !=
+ zio->io_bp->blk_birth) {
+ panic("bad overwrite, hdr=%p exists=%p",
+ (void *)hdr, (void *)exists);
+ }
ASSERT(refcount_is_zero(&exists->b_refcnt));
arc_change_state(arc_anon, exists, hash_lock);
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 5a9d25b774..583d1de5fc 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -564,7 +564,7 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
struct oscarg {
void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
void *userarg;
- dsl_dataset_t *clone_parent;
+ dsl_dataset_t *clone_origin;
const char *lastname;
dmu_objset_type_t type;
uint64_t flags;
@@ -585,17 +585,13 @@ dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (err != ENOENT)
return (err ? err : EEXIST);
- if (oa->clone_parent != NULL) {
- /*
- * You can't clone across pools.
- */
- if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool)
+ if (oa->clone_origin != NULL) {
+ /* You can't clone across pools. */
+ if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool)
return (EXDEV);
- /*
- * You can only clone snapshots, not the head datasets.
- */
- if (oa->clone_parent->ds_phys->ds_num_children == 0)
+ /* You can only clone snapshots, not the head datasets. */
+ if (!dsl_dataset_is_snapshot(oa->clone_origin))
return (EINVAL);
}
@@ -607,37 +603,37 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct oscarg *oa = arg2;
- dsl_dataset_t *ds;
- blkptr_t *bp;
uint64_t dsobj;
ASSERT(dmu_tx_is_syncing(tx));
dsobj = dsl_dataset_create_sync(dd, oa->lastname,
- oa->clone_parent, oa->flags, cr, tx);
+ oa->clone_origin, oa->flags, cr, tx);
- VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, FTAG, &ds));
- bp = dsl_dataset_get_blkptr(ds);
- if (BP_IS_HOLE(bp)) {
+ if (oa->clone_origin == NULL) {
+ dsl_dataset_t *ds;
+ blkptr_t *bp;
objset_impl_t *osi;
- /* This is an empty dmu_objset; not a clone. */
+ VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj,
+ FTAG, &ds));
+ bp = dsl_dataset_get_blkptr(ds);
+ ASSERT(BP_IS_HOLE(bp));
+
osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
ds, bp, oa->type, tx);
if (oa->userfunc)
oa->userfunc(&osi->os, oa->userarg, cr, tx);
+ dsl_dataset_rele(ds, FTAG);
}
spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa,
tx, cr, "dataset = %llu", dsobj);
-
- dsl_dataset_rele(ds, FTAG);
}
int
-dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent, uint64_t flags,
+dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
{
dsl_dir_t *pdd;
@@ -654,24 +650,39 @@ dmu_objset_create(const char *name, dmu_objset_type_t type,
return (EEXIST);
}
- dprintf("name=%s\n", name);
-
oa.userfunc = func;
oa.userarg = arg;
oa.lastname = tail;
oa.type = type;
oa.flags = flags;
- if (clone_parent != NULL) {
- /*
- * You can't clone to a different type.
- */
- if (clone_parent->os->os_phys->os_type != type) {
- dsl_dir_close(pdd, FTAG);
- return (EINVAL);
- }
- oa.clone_parent = clone_parent->os->os_dsl_dataset;
+ err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
+ dmu_objset_create_sync, pdd, &oa, 5);
+ dsl_dir_close(pdd, FTAG);
+ return (err);
+}
+
+int
+dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags)
+{
+ dsl_dir_t *pdd;
+ const char *tail;
+ int err = 0;
+ struct oscarg oa = { 0 };
+
+ ASSERT(strchr(name, '@') == NULL);
+ err = dsl_dir_open(name, FTAG, &pdd, &tail);
+ if (err)
+ return (err);
+ if (tail == NULL) {
+ dsl_dir_close(pdd, FTAG);
+ return (EEXIST);
}
+
+ oa.lastname = tail;
+ oa.clone_origin = clone_origin;
+ oa.flags = flags;
+
err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
dmu_objset_create_sync, pdd, &oa, 5);
dsl_dir_close(pdd, FTAG);
@@ -685,10 +696,11 @@ dmu_objset_destroy(const char *name, boolean_t defer)
int error;
/*
- * If it looks like we'll be able to destroy it, and there's
- * an unplayed replay log sitting around, destroy the log.
- * It would be nicer to do this in dsl_dataset_destroy_sync(),
- * but the replay log objset is modified in open context.
+ * dsl_dataset_destroy() can free any claimed-but-unplayed
+ * intent log, but if there is an active log, it has blocks that
+ * are allocated, but may not yet be reflected in the on-disk
+ * structure. Only the ZIL knows how to free them, so we have
+ * to call into it here.
*/
error = dmu_objset_open(name, DMU_OST_ANY,
DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os);
@@ -697,43 +709,13 @@ dmu_objset_destroy(const char *name, boolean_t defer)
zil_destroy(dmu_objset_zil(os), B_FALSE);
error = dsl_dataset_destroy(ds, os, defer);
- /*
- * dsl_dataset_destroy() closes the ds.
- */
+ /* dsl_dataset_destroy() closes the ds. */
kmem_free(os, sizeof (objset_t));
}
return (error);
}
-/*
- * This will close the objset.
- */
-int
-dmu_objset_rollback(objset_t *os)
-{
- int err;
- dsl_dataset_t *ds;
-
- ds = os->os->os_dsl_dataset;
-
- if (!dsl_dataset_tryown(ds, TRUE, os)) {
- dmu_objset_close(os);
- return (EBUSY);
- }
-
- err = dsl_dataset_rollback(ds, os->os->os_phys->os_type);
-
- /*
- * NB: we close the objset manually because the rollback
- * actually implicitly called dmu_objset_evict(), thus freeing
- * the objset_impl_t.
- */
- dsl_dataset_disown(ds, os);
- kmem_free(os, sizeof (objset_t));
- return (err);
-}
-
struct snaparg {
dsl_sync_task_group_t *dstg;
char *snapname;
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index ce59aac508..67fae08aff 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -321,31 +321,9 @@ struct recvbeginsyncarg {
dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
};
-static dsl_dataset_t *
-recv_full_sync_impl(dsl_pool_t *dp, uint64_t dsobj, dmu_objset_type_t type,
- cred_t *cr, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds;
-
- /* This should always work, since we just created it */
- /* XXX - create should return an owned ds */
- VERIFY(0 == dsl_dataset_own_obj(dp, dsobj,
- DS_MODE_INCONSISTENT, dmu_recv_tag, &ds));
-
- if (type != DMU_OST_NONE) {
- (void) dmu_objset_create_impl(dp->dp_spa,
- ds, &ds->ds_phys->ds_bp, type, tx);
- }
-
- spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC,
- dp->dp_spa, tx, cr, "dataset = %lld", dsobj);
-
- return (ds);
-}
-
/* ARGSUSED */
static int
-recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx)
+recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct recvbeginsyncarg *rbsa = arg2;
@@ -363,7 +341,7 @@ recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx)
/* make sure it's a snap in the same pool */
if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
return (EXDEV);
- if (rbsa->origin->ds_phys->ds_num_children == 0)
+ if (!dsl_dataset_is_snapshot(rbsa->origin))
return (EINVAL);
if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
return (ENODEV);
@@ -373,82 +351,31 @@ recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
-recv_full_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+recv_new_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct recvbeginsyncarg *rbsa = arg2;
uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
uint64_t dsobj;
+ /* Create and open new dataset. */
dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
rbsa->origin, flags, cr, tx);
+ VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
+ DS_MODE_INCONSISTENT, dmu_recv_tag, &rbsa->ds));
- rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj,
- rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx);
-}
-
-static int
-recv_full_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- struct recvbeginsyncarg *rbsa = arg2;
- int err;
- struct dsl_ds_destroyarg dsda = {0};
-
- /* must be a head ds */
- if (ds->ds_phys->ds_next_snap_obj != 0)
- return (EINVAL);
-
- /* must not be a clone ds */
- if (dsl_dir_is_clone(ds->ds_dir))
- return (EINVAL);
-
- dsda.ds = ds;
- err = dsl_dataset_destroy_check(&dsda, rbsa->tag, tx);
- if (err)
- return (err);
-
- if (rbsa->origin) {
- /* make sure it's a snap in the same pool */
- if (rbsa->origin->ds_dir->dd_pool != ds->ds_dir->dd_pool)
- return (EXDEV);
- if (rbsa->origin->ds_phys->ds_num_children == 0)
- return (EINVAL);
- if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
- return (ENODEV);
+ if (rbsa->origin == NULL) {
+ (void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
+ rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
}
- return (0);
-}
-
-static void
-recv_full_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- struct recvbeginsyncarg *rbsa = arg2;
- dsl_dir_t *dd = ds->ds_dir;
- uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
- uint64_t dsobj;
- struct dsl_ds_destroyarg dsda = {0};
-
- /*
- * NB: caller must provide an extra hold on the dsl_dir_t, so it
- * won't go away when dsl_dataset_destroy_sync() closes the
- * dataset.
- */
- dsda.ds = ds;
- dsl_dataset_destroy_sync(&dsda, rbsa->tag, cr, tx);
- ASSERT3P(dsda.rm_origin, ==, NULL);
-
- dsobj = dsl_dataset_create_sync_dd(dd, rbsa->origin, flags, tx);
-
- rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj,
- rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx);
+ spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC,
+ dd->dd_pool->dp_spa, tx, cr, "dataset = %lld", dsobj);
}
/* ARGSUSED */
static int
-recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
+recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
struct recvbeginsyncarg *rbsa = arg2;
@@ -459,13 +386,17 @@ recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
return (ETXTBSY);
- /* must already be a snapshot of this fs */
- if (ds->ds_phys->ds_prev_snap_obj == 0)
- return (ENODEV);
-
- /* most recent snapshot must match fromguid */
- if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid)
- return (ENODEV);
+ if (rbsa->fromguid) {
+ /* if incremental, most recent snapshot must match fromguid */
+ if (ds->ds_prev == NULL)
+ return (ENODEV);
+ if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid)
+ return (ENODEV);
+ } else {
+ /* if full, most recent snapshot must be $ORIGIN */
+ if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL)
+ return (ENODEV);
+ }
/* temporary clone name must not exist */
err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
@@ -488,26 +419,30 @@ recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
/* ARGSUSED */
static void
-recv_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+recv_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
dsl_dataset_t *ohds = arg1;
struct recvbeginsyncarg *rbsa = arg2;
dsl_pool_t *dp = ohds->ds_dir->dd_pool;
- dsl_dataset_t *ods, *cds;
+ dsl_dataset_t *cds;
uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
uint64_t dsobj;
- /* create the temporary clone */
- VERIFY(0 == dsl_dataset_hold_obj(dp, ohds->ds_phys->ds_prev_snap_obj,
- FTAG, &ods));
- dsobj = dsl_dataset_create_sync(ohds->ds_dir,
- rbsa->clonelastname, ods, flags, cr, tx);
- dsl_dataset_rele(ods, FTAG);
-
- /* open the temporary clone */
+ /* create and open the temporary clone */
+ dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
+ ohds->ds_prev, flags, cr, tx);
VERIFY(0 == dsl_dataset_own_obj(dp, dsobj,
DS_MODE_INCONSISTENT, dmu_recv_tag, &cds));
+ /*
+ * If we actually created a non-clone, we need to create the
+ * objset in our new dataset.
+ */
+ if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) {
+ (void) dmu_objset_create_impl(dp->dp_spa,
+ cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx);
+ }
+
/* copy the refquota from the target fs to the clone */
if (ohds->ds_quota > 0)
dsl_dataset_set_quota_sync(cds, &ohds->ds_quota, cr, tx);
@@ -528,7 +463,7 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
{
int err = 0;
boolean_t byteswap;
- struct recvbeginsyncarg rbsa;
+ struct recvbeginsyncarg rbsa = { 0 };
uint64_t version;
int flags;
dsl_dataset_t *ds;
@@ -573,17 +508,17 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
/*
* Process the begin in syncing context.
*/
- if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) {
- /* incremental receive */
- /* tmp clone name is: tofs/%tosnap" */
- (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
- "%%%s", tosnap);
+ /* open the dataset we are logically receiving into */
+ err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
+ if (err == 0) {
+ /* target fs already exists; recv into temp clone */
- /* open the dataset we are logically receiving into */
- err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
- if (err)
- return (err);
+ /* Can't recv a clone into an existing fs */
+ if (flags & DRR_FLAG_CLONE) {
+ dsl_dataset_rele(ds, dmu_recv_tag);
+ return (EINVAL);
+ }
/* must not have an incremental recv already in progress */
if (!mutex_tryenter(&ds->ds_recvlock)) {
@@ -591,10 +526,12 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
return (EBUSY);
}
+ /* tmp clone name is: tofs/%tosnap" */
+ (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
+ "%%%s", tosnap);
rbsa.force = force;
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- recv_incremental_check,
- recv_incremental_sync, ds, &rbsa, 5);
+ recv_existing_check, recv_existing_sync, ds, &rbsa, 5);
if (err) {
mutex_exit(&ds->ds_recvlock);
dsl_dataset_rele(ds, dmu_recv_tag);
@@ -602,47 +539,36 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
}
drc->drc_logical_ds = ds;
drc->drc_real_ds = rbsa.ds;
- } else {
- /* create new fs -- full backup or clone */
- dsl_dir_t *dd = NULL;
- const char *tail;
+ } else if (err == ENOENT) {
+ /* target fs does not exist; must be a full backup or clone */
+ dsl_dataset_t *parent;
+ char *cp;
- err = dsl_dir_open(tofs, FTAG, &dd, &tail);
+ /*
+ * If it's a non-clone incremental, we are missing the
+ * target fs, so fail the recv.
+ */
+ if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE))
+ return (ENOENT);
+
+ /* Open the parent of tofs */
+ cp = strrchr(tofs, '/');
+ *cp = '\0';
+ err = dsl_dataset_hold(tofs, FTAG, &parent);
+ *cp = '/';
if (err)
return (err);
- if (tail == NULL) {
- if (!force) {
- dsl_dir_close(dd, FTAG);
- return (EEXIST);
- }
-
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
- err = dsl_dataset_own_obj(dd->dd_pool,
- dd->dd_phys->dd_head_dataset_obj,
- DS_MODE_INCONSISTENT, FTAG, &ds);
- rw_exit(&dd->dd_pool->dp_config_rwlock);
- if (err) {
- dsl_dir_close(dd, FTAG);
- return (err);
- }
- dsl_dataset_make_exclusive(ds, FTAG);
- err = dsl_sync_task_do(dd->dd_pool,
- recv_full_existing_check,
- recv_full_existing_sync, ds, &rbsa, 5);
- dsl_dataset_disown(ds, FTAG);
- } else {
- err = dsl_sync_task_do(dd->dd_pool, recv_full_check,
- recv_full_sync, dd, &rbsa, 5);
- }
- dsl_dir_close(dd, FTAG);
+ err = dsl_sync_task_do(ds->ds_dir->dd_pool,
+ recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5);
+ dsl_dataset_rele(parent, FTAG);
if (err)
return (err);
drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
drc->drc_newfs = B_TRUE;
}
- return (0);
+ return (err);
}
struct restorearg {
@@ -1079,37 +1005,31 @@ recv_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
}
-int
-dmu_recv_end(dmu_recv_cookie_t *drc)
+static int
+dmu_recv_existing_end(dmu_recv_cookie_t *drc)
{
struct recvendsyncarg resa;
dsl_dataset_t *ds = drc->drc_logical_ds;
int err;
/*
- * XXX hack; seems the ds is still dirty and
- * dsl_pool_zil_clean() expects it to have a ds_user_ptr
- * (and zil), but clone_swap() can close it.
+ * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
+ * expects it to have a ds_user_ptr (and zil), but clone_swap()
+ * can close it.
*/
txg_wait_synced(ds->ds_dir->dd_pool, 0);
- if (ds != drc->drc_real_ds) {
- /* we are doing an online recv */
- if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
- err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
- drc->drc_force);
- if (err)
- dsl_dataset_disown(ds, dmu_recv_tag);
- } else {
- err = EBUSY;
- dsl_dataset_rele(ds, dmu_recv_tag);
- }
- /* dsl_dataset_destroy() will disown the ds */
+ if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
+ err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
+ drc->drc_force);
+ if (err)
+ goto out;
+ } else {
+ mutex_exit(&ds->ds_recvlock);
+ dsl_dataset_rele(ds, dmu_recv_tag);
(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
B_FALSE);
- mutex_exit(&drc->drc_logical_ds->ds_recvlock);
- if (err)
- return (err);
+ return (EBUSY);
}
resa.creation_time = drc->drc_drrb->drr_creation_time;
@@ -1119,17 +1039,52 @@ dmu_recv_end(dmu_recv_cookie_t *drc)
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
recv_end_check, recv_end_sync, ds, &resa, 3);
if (err) {
- if (drc->drc_newfs) {
- ASSERT(ds == drc->drc_real_ds);
- (void) dsl_dataset_destroy(ds, dmu_recv_tag,
- B_FALSE);
- return (err);
- } else {
- (void) dsl_dataset_rollback(ds, DMU_OST_NONE);
- }
+ /* swap back */
+ (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE);
}
- /* release the hold from dmu_recv_begin */
+out:
+ mutex_exit(&ds->ds_recvlock);
dsl_dataset_disown(ds, dmu_recv_tag);
+ (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
return (err);
}
+
+static int
+dmu_recv_new_end(dmu_recv_cookie_t *drc)
+{
+ struct recvendsyncarg resa;
+ dsl_dataset_t *ds = drc->drc_logical_ds;
+ int err;
+
+ /*
+ * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
+ * expects it to have a ds_user_ptr (and zil), but clone_swap()
+ * can close it.
+ */
+ txg_wait_synced(ds->ds_dir->dd_pool, 0);
+
+ resa.creation_time = drc->drc_drrb->drr_creation_time;
+ resa.toguid = drc->drc_drrb->drr_toguid;
+ resa.tosnap = drc->drc_tosnap;
+
+ err = dsl_sync_task_do(ds->ds_dir->dd_pool,
+ recv_end_check, recv_end_sync, ds, &resa, 3);
+ if (err) {
+ /* clean up the fs we just recv'd into */
+ (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
+ } else {
+ /* release the hold from dmu_recv_begin */
+ dsl_dataset_disown(ds, dmu_recv_tag);
+ }
+ return (err);
+}
+
+int
+dmu_recv_end(dmu_recv_cookie_t *drc)
+{
+ if (drc->drc_logical_ds != drc->drc_real_ds)
+ return (dmu_recv_existing_end(drc));
+ else
+ return (dmu_recv_new_end(drc));
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index 5e33e024ea..63d01d1072 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -45,8 +45,6 @@ static char *dsl_reaper = "the grim reaper";
static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
-static dsl_checkfunc_t dsl_dataset_rollback_check;
-static dsl_syncfunc_t dsl_dataset_rollback_sync;
static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
#define DS_REF_MAX (1ULL << 62)
@@ -244,8 +242,6 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv)
ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
- dprintf_ds(ds, "evicting %s\n", "");
-
unique_remove(ds->ds_fsid_guid);
if (ds->ds_user_ptr != NULL)
@@ -867,15 +863,11 @@ dsl_snapshot_destroy_one(char *name, void *arg)
dsl_dataset_t *ds;
int err;
char *dsname;
- size_t buflen;
- /* alloc a buffer to hold name@snapname, plus the terminating NULL */
- buflen = strlen(name) + strlen(da->snapname) + 2;
- dsname = kmem_alloc(buflen, KM_SLEEP);
- (void) snprintf(dsname, buflen, "%s@%s", name, da->snapname);
+ dsname = kmem_asprintf("%s@%s", name, da->snapname);
err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
da->dstg, &ds);
- kmem_free(dsname, buflen);
+ strfree(dsname);
if (err == 0) {
struct dsl_ds_destroyarg *dsda;
@@ -1181,25 +1173,6 @@ out:
return (err);
}
-int
-dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
-{
- int err;
-
- ASSERT(ds->ds_owner);
-
- dsl_dataset_make_exclusive(ds, ds->ds_owner);
- err = dsl_sync_task_do(ds->ds_dir->dd_pool,
- dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
- ds, &ost, 0);
- /* drop exclusive access */
- mutex_enter(&ds->ds_lock);
- rw_exit(&ds->ds_rwlock);
- cv_broadcast(&ds->ds_exclusive_cv);
- mutex_exit(&ds->ds_lock);
- return (err);
-}
-
void *
dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
void *p, dsl_dataset_evict_func_t func)
@@ -1345,145 +1318,6 @@ kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
/* ARGSUSED */
static int
-dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- dmu_objset_type_t *ost = arg2;
-
- /*
- * We can only roll back to emptyness if it is a ZPL objset.
- */
- if (*ost != DMU_OST_ZFS &&
- ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL)
- return (EINVAL);
-
- /*
- * This must not be a snapshot.
- */
- if (ds->ds_phys->ds_next_snap_obj != 0)
- return (EINVAL);
-
- /*
- * If we made changes this txg, traverse_dataset won't find
- * them. Try again.
- */
- if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
- return (EAGAIN);
-
- return (0);
-}
-
-/* ARGSUSED */
-static void
-dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
-{
- dsl_dataset_t *ds = arg1;
- dmu_objset_type_t *ost = arg2;
- objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
-
- if (ds->ds_user_ptr != NULL) {
- /*
- * We need to make sure that the objset_impl_t is reopened after
- * we do the rollback, otherwise it will have the wrong
- * objset_phys_t. Normally this would happen when this
- * dataset-open is closed, thus causing the
- * dataset to be immediately evicted. But when doing "zfs recv
- * -F", we reopen the objset before that, so that there is no
- * window where the dataset is closed and inconsistent.
- */
- ds->ds_user_evict_func(ds, ds->ds_user_ptr);
- ds->ds_user_ptr = NULL;
- }
-
- /* Transfer space that was freed since last snap back to the head. */
- {
- uint64_t used;
-
- VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist,
- ds->ds_origin_txg, UINT64_MAX, &used));
- dsl_dir_transfer_space(ds->ds_dir, used,
- DD_USED_SNAP, DD_USED_HEAD, tx);
- }
-
- /* Zero out the deadlist. */
- bplist_close(&ds->ds_deadlist);
- bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
- ds->ds_phys->ds_deadlist_obj =
- bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
- VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
- ds->ds_phys->ds_deadlist_obj));
-
- {
- /*
- * Free blkptrs that we gave birth to - this covers
- * claimed but not played log blocks too.
- */
- zio_t *zio;
- struct killarg ka;
-
- zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
- ZIO_FLAG_MUSTSUCCEED);
- ka.ds = ds;
- ka.zio = zio;
- ka.tx = tx;
- (void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
- TRAVERSE_POST, kill_blkptr, &ka);
- (void) zio_wait(zio);
- }
-
- ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
-
- if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) {
- /* Change our contents to that of the prev snapshot */
-
- ASSERT3U(ds->ds_prev->ds_object, ==,
- ds->ds_phys->ds_prev_snap_obj);
- ASSERT3U(ds->ds_phys->ds_used_bytes, <=,
- ds->ds_prev->ds_phys->ds_used_bytes);
-
- ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
- ds->ds_phys->ds_used_bytes =
- ds->ds_prev->ds_phys->ds_used_bytes;
- ds->ds_phys->ds_compressed_bytes =
- ds->ds_prev->ds_phys->ds_compressed_bytes;
- ds->ds_phys->ds_uncompressed_bytes =
- ds->ds_prev->ds_phys->ds_uncompressed_bytes;
- ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
-
- if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
- dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
- ds->ds_prev->ds_phys->ds_unique_bytes = 0;
- }
- } else {
- objset_impl_t *osi;
-
- ASSERT(*ost != DMU_OST_ZVOL);
- ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0);
- ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0);
- ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0);
-
- bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
- ds->ds_phys->ds_flags = 0;
- ds->ds_phys->ds_unique_bytes = 0;
- if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
- SPA_VERSION_UNIQUE_ACCURATE)
- ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
-
- osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
- &ds->ds_phys->ds_bp, *ost, tx);
-#ifdef _KERNEL
- zfs_create_fs(&osi->os, kcred, NULL, tx);
-#endif
- }
-
- spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
- tx, cr, "dataset = %llu", ds->ds_object);
-}
-
-/* ARGSUSED */
-static int
dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
@@ -1991,7 +1825,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
origin->ds_user_ptr = NULL;
}
- dsl_dataset_rele(origin, tag);
+ dsl_dataset_rele(origin, ds);
ds->ds_prev = NULL;
ndsda.ds = origin;
@@ -3002,9 +2836,11 @@ dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (csa->cds->ds_prev != csa->ohds->ds_prev)
return (EINVAL);
- /* cds should be the clone */
- if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
- csa->ohds->ds_object)
+ /* cds should be the clone (unless they are unrelated) */
+ if (csa->cds->ds_prev != NULL &&
+ csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
+ csa->ohds->ds_object !=
+ csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
return (EINVAL);
/* the clone should be a child of the origin */
@@ -3042,7 +2878,6 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
- dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
if (csa->cds->ds_user_ptr != NULL) {
csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
@@ -3055,10 +2890,16 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
csa->ohds->ds_user_ptr = NULL;
}
- /* reset origin's unique bytes */
- VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist,
- csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX,
- &csa->cds->ds_prev->ds_phys->ds_unique_bytes));
+ /*
+ * Reset origin's unique bytes, if it exists.
+ */
+ if (csa->cds->ds_prev) {
+ dsl_dataset_t *origin = csa->cds->ds_prev;
+ dmu_buf_will_dirty(origin->ds_dbuf, tx);
+ VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist,
+ origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
+ &origin->ds_phys->ds_unique_bytes));
+ }
/* swap blkptrs */
{
@@ -3144,8 +2985,10 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
}
/*
- * Swap 'clone' with its origin head file system. Used at the end
- * of "online recv" to swizzle the file system to the new version.
+ * Swap 'clone' with its origin head datasets. Used at the end of "zfs
+ * recv" into an existing fs to swizzle the file system to the new
+ * version, and by "zfs rollback". Can also be used to swap two
+ * independent head datasets if neither has any snapshots.
*/
int
dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
@@ -3481,14 +3324,11 @@ dsl_dataset_user_hold_one(char *dsname, void *arg)
dsl_dataset_t *ds;
int error;
char *name;
- size_t buflen;
/* alloc a buffer to hold dsname@snapname plus terminating NULL */
- buflen = strlen(dsname) + strlen(ha->snapname) + 2;
- name = kmem_alloc(buflen, KM_SLEEP);
- (void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname);
+ name = kmem_asprintf("%s@%s", dsname, ha->snapname);
error = dsl_dataset_hold(name, ha->dstg, &ds);
- kmem_free(name, buflen);
+ strfree(name);
if (error == 0) {
ha->gotone = B_TRUE;
dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
@@ -3678,7 +3518,6 @@ dsl_dataset_user_release_one(char *dsname, void *arg)
int error;
void *dtag = ha->dstg;
char *name;
- size_t buflen;
boolean_t own = B_FALSE;
boolean_t might_destroy;
@@ -3686,11 +3525,9 @@ dsl_dataset_user_release_one(char *dsname, void *arg)
return (ENAMETOOLONG);
/* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
- buflen = strlen(dsname) + strlen(ha->snapname) + 2;
- name = kmem_alloc(buflen, KM_SLEEP);
- (void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname);
+ name = kmem_asprintf("%s@%s", dsname, ha->snapname);
error = dsl_dataset_hold(name, dtag, &ds);
- kmem_free(name, buflen);
+ strfree(name);
if (error == ENOENT && ha->recursive)
return (0);
(void) strcpy(ha->failed, dsname);
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index 3ff71b3b7f..cb23a22a7d 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -172,12 +172,12 @@ int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_evict_dbufs(objset_t *os);
-int dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent, uint64_t flags,
+int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
+int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
+ uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
-int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
boolean_t recursive);
int dmu_objset_rename(const char *name, const char *newname,
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
index 052cb8dd91..13fed60b9d 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
@@ -114,11 +114,11 @@ typedef struct objset_impl {
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
void dmu_objset_close(objset_t *os);
-int dmu_objset_create(const char *name, dmu_objset_type_t type,
- objset_t *clone_parent, uint64_t flags,
+int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
+int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
+ uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
-int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
boolean_t recursive);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
index b51036d38a..2d02c2805e 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -195,7 +195,6 @@ dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_checkfunc_t dsl_dataset_snapshot_check;
dsl_syncfunc_t dsl_dataset_snapshot_sync;
-int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
int dsl_dataset_promote(const char *name);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 9cb40816d8..f31e09dab3 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -2406,14 +2406,12 @@ zfs_ioc_create(zfs_cmd_t *zc)
return (error);
}
- error = dmu_objset_create(zc->zc_name, type, clone, 0,
- NULL, NULL);
+ error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
+ dmu_objset_close(clone);
if (error) {
- dmu_objset_close(clone);
nvlist_free(nvprops);
return (error);
}
- dmu_objset_close(clone);
} else {
boolean_t is_insensitive = B_FALSE;
@@ -2470,7 +2468,7 @@ zfs_ioc_create(zfs_cmd_t *zc)
return (error);
}
}
- error = dmu_objset_create(zc->zc_name, type, NULL,
+ error = dmu_objset_create(zc->zc_name, type,
is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
nvlist_free(zct.zct_zplprops);
}
@@ -2617,19 +2615,42 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
static int
zfs_ioc_rollback(zfs_cmd_t *zc)
{
- objset_t *os;
+ dsl_dataset_t *ds, *clone;
int error;
- zfsvfs_t *zfsvfs = NULL;
+ zfsvfs_t *zfsvfs;
+ char *clone_name;
+
+ error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
+ if (error)
+ return (error);
+
+ /* must not be a snapshot */
+ if (dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
+
+ /* must have a most recent snapshot */
+ if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
+ dsl_dataset_rele(ds, FTAG);
+ return (EINVAL);
+ }
/*
- * Get the zfsvfs for the receiving objset. There
- * won't be one if we're operating on a zvol, if the
- * objset doesn't exist yet, or is not mounted.
+ * Create clone of most recent snapshot.
*/
- error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, DS_MODE_USER, &os);
+ clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
+ error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
if (error)
- return (error);
+ goto out;
+ error = dsl_dataset_own(clone_name, DS_MODE_INCONSISTENT, FTAG, &clone);
+ if (error)
+ goto out;
+
+ /*
+ * Do clone swap.
+ */
if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
int mode;
@@ -2637,18 +2658,37 @@ zfs_ioc_rollback(zfs_cmd_t *zc)
if (error == 0) {
int resume_err;
- error = dmu_objset_rollback(os);
+ if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
+ error = dsl_dataset_clone_swap(clone, ds,
+ B_TRUE);
+ dsl_dataset_disown(ds, FTAG);
+ ds = NULL;
+ } else {
+ error = EBUSY;
+ }
resume_err = zfs_resume_fs(zfsvfs, zc->zc_name, mode);
error = error ? error : resume_err;
- } else {
- dmu_objset_close(os);
}
VFS_RELE(zfsvfs->z_vfs);
} else {
- error = dmu_objset_rollback(os);
+ if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
+ error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
+ dsl_dataset_disown(ds, FTAG);
+ ds = NULL;
+ } else {
+ error = EBUSY;
+ }
}
- /* Note, the dmu_objset_rollback() releases the objset for us. */
+ /*
+ * Destroy clone (which also closes it).
+ */
+ (void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
+
+out:
+ strfree(clone_name);
+ if (ds)
+ dsl_dataset_rele(ds, FTAG);
return (error);
}
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index cfd3b3dbdb..107f135b48 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -435,7 +435,6 @@ zvol_create_minor(const char *name, major_t maj)
int ds_mode = DS_MODE_OWNER;
vnode_t *vp = NULL;
char *devpath;
- size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(name) + 1;
char chrbuf[30], blkbuf[30];
int error;
@@ -468,13 +467,9 @@ zvol_create_minor(const char *name, major_t maj)
* If there's an existing /dev/zvol symlink, try to use the
* same minor number we used last time.
*/
- devpath = kmem_alloc(devpathlen, KM_SLEEP);
-
- (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, name);
-
+ devpath = kmem_asprintf("%s%s", ZVOL_FULL_DEV_DIR, name);
error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp);
-
- kmem_free(devpath, devpathlen);
+ strfree(devpath);
if (error == 0 && vp->v_type != VLNK)
error = EINVAL;
@@ -1612,14 +1607,11 @@ zvol_is_swap(zvol_state_t *zv)
vnode_t *vp;
boolean_t ret = B_FALSE;
char *devpath;
- size_t devpathlen;
int error;
- devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(zv->zv_name) + 1;
- devpath = kmem_alloc(devpathlen, KM_SLEEP);
- (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, zv->zv_name);
+ devpath = kmem_asprintf("%s%s", ZVOL_FULL_DEV_DIR, zv->zv_name);
error = lookupname(devpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
- kmem_free(devpath, devpathlen);
+ strfree(devpath);
ret = !error && IS_SWAPVP(common_specvp(vp));
diff --git a/usr/src/uts/common/os/strext.c b/usr/src/uts/common/os/strext.c
index 1e45415ca2..8ba7116047 100644
--- a/usr/src/uts/common/os/strext.c
+++ b/usr/src/uts/common/os/strext.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/cmn_err.h>
#include <sys/systm.h>
@@ -147,3 +144,27 @@ vsprintf(char *buf, const char *fmt, va_list args)
(void) vsnprintf(buf, INT_MAX, fmt, args);
return (buf);
}
+
+/*
+ * Do not change the length of the returned string; it must be freed
+ * with strfree().
+ */
+char *
+kmem_asprintf(const char *fmt, ...)
+{
+ int size;
+ va_list adx;
+ char *buf;
+
+ va_start(adx, fmt);
+ size = vsnprintf(NULL, 0, fmt, adx) + 1;
+ va_end(adx);
+
+ buf = kmem_alloc(size, KM_SLEEP);
+
+ va_start(adx, fmt);
+ size = vsnprintf(buf, size, fmt, adx);
+ va_end(adx);
+
+ return (buf);
+}
diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h
index 6857f5f057..5963460441 100644
--- a/usr/src/uts/common/sys/systm.h
+++ b/usr/src/uts/common/sys/systm.h
@@ -183,6 +183,7 @@ dev_t expldev(dev32_t);
int bcmp(const void *, const void *, size_t) __PURE;
int stoi(char **);
void numtos(ulong_t, char *);
+char *kmem_asprintf(const char *fmt, ...);
int strident_valid(const char *);
void strident_canon(char *, size_t);
int getsubopt(char **optionsp, char * const *tokens, char **valuep);