diff options
author | Matthew Ahrens <Matthew.Ahrens@Sun.COM> | 2009-08-06 22:16:07 -0700 |
---|---|---|
committer | Matthew Ahrens <Matthew.Ahrens@Sun.COM> | 2009-08-06 22:16:07 -0700 |
commit | ae46e4c775f2becc5343ff90b60a95acb79735f9 (patch) | |
tree | 7cf841cc2f35b6ad2c2b9e386313e610a7656147 | |
parent | 9d0d62ad2e60e8f742a2e723d06e88352ee6a1f3 (diff) | |
download | illumos-gate-ae46e4c775f2becc5343ff90b60a95acb79735f9.tar.gz |
6861581 ZFS frees in synching context during rollback
6869470 panic from refcount_remove when destroy clone
-rw-r--r-- | usr/src/cmd/ztest/ztest.c | 12 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/kernel.c | 24 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/sys/zfs_context.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/arc.c | 13 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_objset.c | 116 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_send.c | 303 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dsl_dataset.c | 215 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu.h | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu_objset.h | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dsl_dataset.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_ioctl.c | 74 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zvol.c | 16 | ||||
-rw-r--r-- | usr/src/uts/common/os/strext.c | 33 | ||||
-rw-r--r-- | usr/src/uts/common/sys/systm.h | 1 |
14 files changed, 339 insertions, 484 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index 5f49fd5a06..80512d427a 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -1477,7 +1477,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za) /* * Verify that we can create a new dataset. */ - error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, + error = dmu_objset_create(name, DMU_OST_OTHER, 0, ztest_create_cb, NULL); if (error) { if (error == ENOSPC) { @@ -1533,7 +1533,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za) /* * Verify that we cannot create an existing dataset. */ - error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, NULL, NULL); + error = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL); if (error != EEXIST) fatal(0, "created existing dataset, error = %d", error); @@ -1675,8 +1675,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za) if (error) fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); - error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0, - NULL, NULL); + error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0); dmu_objset_close(clone); if (error) { if (error == ENOSPC) { @@ -1711,8 +1710,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za) if (error) fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); - error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0, - NULL, NULL); + error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0); dmu_objset_close(clone); if (error) { if (error == ENOSPC) { @@ -3796,7 +3794,7 @@ ztest_run(char *pool) int test_future = FALSE; (void) rw_rdlock(&ztest_shared->zs_name_lock); (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d); - error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, + error = dmu_objset_create(name, DMU_OST_OTHER, 0, ztest_create_cb, NULL); if (error == EEXIST) { test_future = TRUE; diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c index 89108fe5b2..cfdb32e405 100644 --- a/usr/src/lib/libzpool/common/kernel.c +++ b/usr/src/lib/libzpool/common/kernel.c @@ -884,3 +884,27 @@ ksiddomain_rele(ksiddomain_t *ksid) spa_strfree(ksid->kd_name); umem_free(ksid, sizeof (ksiddomain_t)); } + +/* + * Do not change the length of the returned string; it must be freed + * with strfree(). + */ +char * +kmem_asprintf(const char *fmt, ...) +{ + int size; + va_list adx; + char *buf; + + va_start(adx, fmt); + size = vsnprintf(NULL, 0, fmt, adx) + 1; + va_end(adx); + + buf = kmem_alloc(size, KM_SLEEP); + + va_start(adx, fmt); + size = vsnprintf(buf, size, fmt, adx); + va_end(adx); + + return (buf); +} diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h index 230c233a24..d89a4ae9d5 100644 --- a/usr/src/lib/libzpool/common/sys/zfs_context.h +++ b/usr/src/lib/libzpool/common/sys/zfs_context.h @@ -490,6 +490,9 @@ typedef struct callb_cpr { #define zone_dataset_visible(x, y) (1) #define INGLOBALZONE(z) (1) +extern char *kmem_asprintf(const char *fmt, ...); +#define strfree(str) kmem_free((str), strlen(str)+1) + /* * Hostname information */ diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index d5e5aa544b..cfde0e7e34 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -3160,11 +3160,14 @@ arc_write_done(zio_t *zio) * sync-to-convergence, because we remove * buffers from the hash table when we arc_free(). */ - ASSERT(zio->io_flags & ZIO_FLAG_IO_REWRITE); - ASSERT(DVA_EQUAL(BP_IDENTITY(&zio->io_bp_orig), - BP_IDENTITY(zio->io_bp))); - ASSERT3U(zio->io_bp_orig.blk_birth, ==, - zio->io_bp->blk_birth); + if (!(zio->io_flags & ZIO_FLAG_IO_REWRITE) || + !DVA_EQUAL(BP_IDENTITY(&zio->io_bp_orig), + BP_IDENTITY(zio->io_bp)) || + zio->io_bp_orig.blk_birth != + zio->io_bp->blk_birth) { + panic("bad overwrite, hdr=%p exists=%p", + (void *)hdr, (void *)exists); + } ASSERT(refcount_is_zero(&exists->b_refcnt)); arc_change_state(arc_anon, exists, hash_lock); diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 5a9d25b774..583d1de5fc 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -564,7 +564,7 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, struct oscarg { void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); void *userarg; - dsl_dataset_t *clone_parent; + dsl_dataset_t *clone_origin; const char *lastname; dmu_objset_type_t type; uint64_t flags; @@ -585,17 +585,13 @@ dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) if (err != ENOENT) return (err ? err : EEXIST); - if (oa->clone_parent != NULL) { - /* - * You can't clone across pools. - */ - if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) + if (oa->clone_origin != NULL) { + /* You can't clone across pools. */ + if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) return (EXDEV); - /* - * You can only clone snapshots, not the head datasets. - */ - if (oa->clone_parent->ds_phys->ds_num_children == 0) + /* You can only clone snapshots, not the head datasets. */ + if (!dsl_dataset_is_snapshot(oa->clone_origin)) return (EINVAL); } @@ -607,37 +603,37 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; struct oscarg *oa = arg2; - dsl_dataset_t *ds; - blkptr_t *bp; uint64_t dsobj; ASSERT(dmu_tx_is_syncing(tx)); dsobj = dsl_dataset_create_sync(dd, oa->lastname, - oa->clone_parent, oa->flags, cr, tx); + oa->clone_origin, oa->flags, cr, tx); - VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, FTAG, &ds)); - bp = dsl_dataset_get_blkptr(ds); - if (BP_IS_HOLE(bp)) { + if (oa->clone_origin == NULL) { + dsl_dataset_t *ds; + blkptr_t *bp; objset_impl_t *osi; - /* This is an empty dmu_objset; not a clone. */ + VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, + FTAG, &ds)); + bp = dsl_dataset_get_blkptr(ds); + ASSERT(BP_IS_HOLE(bp)); + osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), ds, bp, oa->type, tx); if (oa->userfunc) oa->userfunc(&osi->os, oa->userarg, cr, tx); + dsl_dataset_rele(ds, FTAG); } spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa, tx, cr, "dataset = %llu", dsobj); - - dsl_dataset_rele(ds, FTAG); } int -dmu_objset_create(const char *name, dmu_objset_type_t type, - objset_t *clone_parent, uint64_t flags, +dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) { dsl_dir_t *pdd; @@ -654,24 +650,39 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, return (EEXIST); } - dprintf("name=%s\n", name); - oa.userfunc = func; oa.userarg = arg; oa.lastname = tail; oa.type = type; oa.flags = flags; - if (clone_parent != NULL) { - /* - * You can't clone to a different type. - */ - if (clone_parent->os->os_phys->os_type != type) { - dsl_dir_close(pdd, FTAG); - return (EINVAL); - } - oa.clone_parent = clone_parent->os->os_dsl_dataset; + err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, + dmu_objset_create_sync, pdd, &oa, 5); + dsl_dir_close(pdd, FTAG); + return (err); +} + +int +dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) +{ + dsl_dir_t *pdd; + const char *tail; + int err = 0; + struct oscarg oa = { 0 }; + + ASSERT(strchr(name, '@') == NULL); + err = dsl_dir_open(name, FTAG, &pdd, &tail); + if (err) + return (err); + if (tail == NULL) { + dsl_dir_close(pdd, FTAG); + return (EEXIST); } + + oa.lastname = tail; + oa.clone_origin = clone_origin; + oa.flags = flags; + err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, dmu_objset_create_sync, pdd, &oa, 5); dsl_dir_close(pdd, FTAG); @@ -685,10 +696,11 @@ dmu_objset_destroy(const char *name, boolean_t defer) int error; /* - * If it looks like we'll be able to destroy it, and there's - * an unplayed replay log sitting around, destroy the log. - * It would be nicer to do this in dsl_dataset_destroy_sync(), - * but the replay log objset is modified in open context. + * dsl_dataset_destroy() can free any claimed-but-unplayed + * intent log, but if there is an active log, it has blocks that + * are allocated, but may not yet be reflected in the on-disk + * structure. Only the ZIL knows how to free them, so we have + * to call into it here. */ error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os); @@ -697,43 +709,13 @@ dmu_objset_destroy(const char *name, boolean_t defer) zil_destroy(dmu_objset_zil(os), B_FALSE); error = dsl_dataset_destroy(ds, os, defer); - /* - * dsl_dataset_destroy() closes the ds. - */ + /* dsl_dataset_destroy() closes the ds. */ kmem_free(os, sizeof (objset_t)); } return (error); } -/* - * This will close the objset. - */ -int -dmu_objset_rollback(objset_t *os) -{ - int err; - dsl_dataset_t *ds; - - ds = os->os->os_dsl_dataset; - - if (!dsl_dataset_tryown(ds, TRUE, os)) { - dmu_objset_close(os); - return (EBUSY); - } - - err = dsl_dataset_rollback(ds, os->os->os_phys->os_type); - - /* - * NB: we close the objset manually because the rollback - * actually implicitly called dmu_objset_evict(), thus freeing - * the objset_impl_t. - */ - dsl_dataset_disown(ds, os); - kmem_free(os, sizeof (objset_t)); - return (err); -} - struct snaparg { dsl_sync_task_group_t *dstg; char *snapname; diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index ce59aac508..67fae08aff 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -321,31 +321,9 @@ struct recvbeginsyncarg { dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ }; -static dsl_dataset_t * -recv_full_sync_impl(dsl_pool_t *dp, uint64_t dsobj, dmu_objset_type_t type, - cred_t *cr, dmu_tx_t *tx) -{ - dsl_dataset_t *ds; - - /* This should always work, since we just created it */ - /* XXX - create should return an owned ds */ - VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, - DS_MODE_INCONSISTENT, dmu_recv_tag, &ds)); - - if (type != DMU_OST_NONE) { - (void) dmu_objset_create_impl(dp->dp_spa, - ds, &ds->ds_phys->ds_bp, type, tx); - } - - spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC, - dp->dp_spa, tx, cr, "dataset = %lld", dsobj); - - return (ds); -} - /* ARGSUSED */ static int -recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx) +recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; struct recvbeginsyncarg *rbsa = arg2; @@ -363,7 +341,7 @@ recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx) /* make sure it's a snap in the same pool */ if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) return (EXDEV); - if (rbsa->origin->ds_phys->ds_num_children == 0) + if (!dsl_dataset_is_snapshot(rbsa->origin)) return (EINVAL); if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) return (ENODEV); @@ -373,82 +351,31 @@ recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -recv_full_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +recv_new_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; struct recvbeginsyncarg *rbsa = arg2; uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; uint64_t dsobj; + /* Create and open new dataset. */ dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, rbsa->origin, flags, cr, tx); + VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, + DS_MODE_INCONSISTENT, dmu_recv_tag, &rbsa->ds)); - rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj, - rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx); -} - -static int -recv_full_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - struct recvbeginsyncarg *rbsa = arg2; - int err; - struct dsl_ds_destroyarg dsda = {0}; - - /* must be a head ds */ - if (ds->ds_phys->ds_next_snap_obj != 0) - return (EINVAL); - - /* must not be a clone ds */ - if (dsl_dir_is_clone(ds->ds_dir)) - return (EINVAL); - - dsda.ds = ds; - err = dsl_dataset_destroy_check(&dsda, rbsa->tag, tx); - if (err) - return (err); - - if (rbsa->origin) { - /* make sure it's a snap in the same pool */ - if (rbsa->origin->ds_dir->dd_pool != ds->ds_dir->dd_pool) - return (EXDEV); - if (rbsa->origin->ds_phys->ds_num_children == 0) - return (EINVAL); - if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) - return (ENODEV); + if (rbsa->origin == NULL) { + (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, + rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); } - return (0); -} - -static void -recv_full_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - struct recvbeginsyncarg *rbsa = arg2; - dsl_dir_t *dd = ds->ds_dir; - uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; - uint64_t dsobj; - struct dsl_ds_destroyarg dsda = {0}; - - /* - * NB: caller must provide an extra hold on the dsl_dir_t, so it - * won't go away when dsl_dataset_destroy_sync() closes the - * dataset. - */ - dsda.ds = ds; - dsl_dataset_destroy_sync(&dsda, rbsa->tag, cr, tx); - ASSERT3P(dsda.rm_origin, ==, NULL); - - dsobj = dsl_dataset_create_sync_dd(dd, rbsa->origin, flags, tx); - - rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj, - rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx); + spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC, + dd->dd_pool->dp_spa, tx, cr, "dataset = %lld", dsobj); } /* ARGSUSED */ static int -recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) +recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct recvbeginsyncarg *rbsa = arg2; @@ -459,13 +386,17 @@ recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) return (ETXTBSY); - /* must already be a snapshot of this fs */ - if (ds->ds_phys->ds_prev_snap_obj == 0) - return (ENODEV); - - /* most recent snapshot must match fromguid */ - if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) - return (ENODEV); + if (rbsa->fromguid) { + /* if incremental, most recent snapshot must match fromguid */ + if (ds->ds_prev == NULL) + return (ENODEV); + if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) + return (ENODEV); + } else { + /* if full, most recent snapshot must be $ORIGIN */ + if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) + return (ENODEV); + } /* temporary clone name must not exist */ err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, @@ -488,26 +419,30 @@ recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) /* ARGSUSED */ static void -recv_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +recv_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dataset_t *ohds = arg1; struct recvbeginsyncarg *rbsa = arg2; dsl_pool_t *dp = ohds->ds_dir->dd_pool; - dsl_dataset_t *ods, *cds; + dsl_dataset_t *cds; uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; uint64_t dsobj; - /* create the temporary clone */ - VERIFY(0 == dsl_dataset_hold_obj(dp, ohds->ds_phys->ds_prev_snap_obj, - FTAG, &ods)); - dsobj = dsl_dataset_create_sync(ohds->ds_dir, - rbsa->clonelastname, ods, flags, cr, tx); - dsl_dataset_rele(ods, FTAG); - - /* open the temporary clone */ + /* create and open the temporary clone */ + dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, + ohds->ds_prev, flags, cr, tx); VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, DS_MODE_INCONSISTENT, dmu_recv_tag, &cds)); + /* + * If we actually created a non-clone, we need to create the + * objset in our new dataset. + */ + if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { + (void) dmu_objset_create_impl(dp->dp_spa, + cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); + } + /* copy the refquota from the target fs to the clone */ if (ohds->ds_quota > 0) dsl_dataset_set_quota_sync(cds, &ohds->ds_quota, cr, tx); @@ -528,7 +463,7 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, { int err = 0; boolean_t byteswap; - struct recvbeginsyncarg rbsa; + struct recvbeginsyncarg rbsa = { 0 }; uint64_t version; int flags; dsl_dataset_t *ds; @@ -573,17 +508,17 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, /* * Process the begin in syncing context. */ - if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) { - /* incremental receive */ - /* tmp clone name is: tofs/%tosnap" */ - (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), - "%%%s", tosnap); + /* open the dataset we are logically receiving into */ + err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); + if (err == 0) { + /* target fs already exists; recv into temp clone */ - /* open the dataset we are logically receiving into */ - err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); - if (err) - return (err); + /* Can't recv a clone into an existing fs */ + if (flags & DRR_FLAG_CLONE) { + dsl_dataset_rele(ds, dmu_recv_tag); + return (EINVAL); + } /* must not have an incremental recv already in progress */ if (!mutex_tryenter(&ds->ds_recvlock)) { @@ -591,10 +526,12 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, return (EBUSY); } + /* tmp clone name is: tofs/%tosnap" */ + (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), + "%%%s", tosnap); rbsa.force = force; err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_incremental_check, - recv_incremental_sync, ds, &rbsa, 5); + recv_existing_check, recv_existing_sync, ds, &rbsa, 5); if (err) { mutex_exit(&ds->ds_recvlock); dsl_dataset_rele(ds, dmu_recv_tag); @@ -602,47 +539,36 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, } drc->drc_logical_ds = ds; drc->drc_real_ds = rbsa.ds; - } else { - /* create new fs -- full backup or clone */ - dsl_dir_t *dd = NULL; - const char *tail; + } else if (err == ENOENT) { + /* target fs does not exist; must be a full backup or clone */ + dsl_dataset_t *parent; + char *cp; - err = dsl_dir_open(tofs, FTAG, &dd, &tail); + /* + * If it's a non-clone incremental, we are missing the + * target fs, so fail the recv. + */ + if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) + return (ENOENT); + + /* Open the parent of tofs */ + cp = strrchr(tofs, '/'); + *cp = '\0'; + err = dsl_dataset_hold(tofs, FTAG, &parent); + *cp = '/'; if (err) return (err); - if (tail == NULL) { - if (!force) { - dsl_dir_close(dd, FTAG); - return (EEXIST); - } - - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); - err = dsl_dataset_own_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, - DS_MODE_INCONSISTENT, FTAG, &ds); - rw_exit(&dd->dd_pool->dp_config_rwlock); - if (err) { - dsl_dir_close(dd, FTAG); - return (err); - } - dsl_dataset_make_exclusive(ds, FTAG); - err = dsl_sync_task_do(dd->dd_pool, - recv_full_existing_check, - recv_full_existing_sync, ds, &rbsa, 5); - dsl_dataset_disown(ds, FTAG); - } else { - err = dsl_sync_task_do(dd->dd_pool, recv_full_check, - recv_full_sync, dd, &rbsa, 5); - } - dsl_dir_close(dd, FTAG); + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); + dsl_dataset_rele(parent, FTAG); if (err) return (err); drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; drc->drc_newfs = B_TRUE; } - return (0); + return (err); } struct restorearg { @@ -1079,37 +1005,31 @@ recv_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; } -int -dmu_recv_end(dmu_recv_cookie_t *drc) +static int +dmu_recv_existing_end(dmu_recv_cookie_t *drc) { struct recvendsyncarg resa; dsl_dataset_t *ds = drc->drc_logical_ds; int err; /* - * XXX hack; seems the ds is still dirty and - * dsl_pool_zil_clean() expects it to have a ds_user_ptr - * (and zil), but clone_swap() can close it. + * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() + * expects it to have a ds_user_ptr (and zil), but clone_swap() + * can close it. */ txg_wait_synced(ds->ds_dir->dd_pool, 0); - if (ds != drc->drc_real_ds) { - /* we are doing an online recv */ - if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { - err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, - drc->drc_force); - if (err) - dsl_dataset_disown(ds, dmu_recv_tag); - } else { - err = EBUSY; - dsl_dataset_rele(ds, dmu_recv_tag); - } - /* dsl_dataset_destroy() will disown the ds */ + if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { + err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, + drc->drc_force); + if (err) + goto out; + } else { + mutex_exit(&ds->ds_recvlock); + dsl_dataset_rele(ds, dmu_recv_tag); (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); - mutex_exit(&drc->drc_logical_ds->ds_recvlock); - if (err) - return (err); + return (EBUSY); } resa.creation_time = drc->drc_drrb->drr_creation_time; @@ -1119,17 +1039,52 @@ dmu_recv_end(dmu_recv_cookie_t *drc) err = dsl_sync_task_do(ds->ds_dir->dd_pool, recv_end_check, recv_end_sync, ds, &resa, 3); if (err) { - if (drc->drc_newfs) { - ASSERT(ds == drc->drc_real_ds); - (void) dsl_dataset_destroy(ds, dmu_recv_tag, - B_FALSE); - return (err); - } else { - (void) dsl_dataset_rollback(ds, DMU_OST_NONE); - } + /* swap back */ + (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); } - /* release the hold from dmu_recv_begin */ +out: + mutex_exit(&ds->ds_recvlock); dsl_dataset_disown(ds, dmu_recv_tag); + (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); return (err); } + +static int +dmu_recv_new_end(dmu_recv_cookie_t *drc) +{ + struct recvendsyncarg resa; + dsl_dataset_t *ds = drc->drc_logical_ds; + int err; + + /* + * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() + * expects it to have a ds_user_ptr (and zil), but clone_swap() + * can close it. + */ + txg_wait_synced(ds->ds_dir->dd_pool, 0); + + resa.creation_time = drc->drc_drrb->drr_creation_time; + resa.toguid = drc->drc_drrb->drr_toguid; + resa.tosnap = drc->drc_tosnap; + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_end_check, recv_end_sync, ds, &resa, 3); + if (err) { + /* clean up the fs we just recv'd into */ + (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); + } else { + /* release the hold from dmu_recv_begin */ + dsl_dataset_disown(ds, dmu_recv_tag); + } + return (err); +} + +int +dmu_recv_end(dmu_recv_cookie_t *drc) +{ + if (drc->drc_logical_ds != drc->drc_real_ds) + return (dmu_recv_existing_end(drc)); + else + return (dmu_recv_new_end(drc)); +} diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index 5e33e024ea..63d01d1072 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -45,8 +45,6 @@ static char *dsl_reaper = "the grim reaper"; static dsl_checkfunc_t dsl_dataset_destroy_begin_check; static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; -static dsl_checkfunc_t dsl_dataset_rollback_check; -static dsl_syncfunc_t dsl_dataset_rollback_sync; static dsl_syncfunc_t dsl_dataset_set_reservation_sync; #define DS_REF_MAX (1ULL << 62) @@ -244,8 +242,6 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); - dprintf_ds(ds, "evicting %s\n", ""); - unique_remove(ds->ds_fsid_guid); if (ds->ds_user_ptr != NULL) @@ -867,15 +863,11 @@ dsl_snapshot_destroy_one(char *name, void *arg) dsl_dataset_t *ds; int err; char *dsname; - size_t buflen; - /* alloc a buffer to hold name@snapname, plus the terminating NULL */ - buflen = strlen(name) + strlen(da->snapname) + 2; - dsname = kmem_alloc(buflen, KM_SLEEP); - (void) snprintf(dsname, buflen, "%s@%s", name, da->snapname); + dsname = kmem_asprintf("%s@%s", name, da->snapname); err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT, da->dstg, &ds); - kmem_free(dsname, buflen); + strfree(dsname); if (err == 0) { struct dsl_ds_destroyarg *dsda; @@ -1181,25 +1173,6 @@ out: return (err); } -int -dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) -{ - int err; - - ASSERT(ds->ds_owner); - - dsl_dataset_make_exclusive(ds, ds->ds_owner); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_rollback_check, dsl_dataset_rollback_sync, - ds, &ost, 0); - /* drop exclusive access */ - mutex_enter(&ds->ds_lock); - rw_exit(&ds->ds_rwlock); - cv_broadcast(&ds->ds_exclusive_cv); - mutex_exit(&ds->ds_lock); - return (err); -} - void * dsl_dataset_set_user_ptr(dsl_dataset_t *ds, void *p, dsl_dataset_evict_func_t func) @@ -1345,145 +1318,6 @@ kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, /* ARGSUSED */ static int -dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - dmu_objset_type_t *ost = arg2; - - /* - * We can only roll back to emptyness if it is a ZPL objset. - */ - if (*ost != DMU_OST_ZFS && - ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) - return (EINVAL); - - /* - * This must not be a snapshot. - */ - if (ds->ds_phys->ds_next_snap_obj != 0) - return (EINVAL); - - /* - * If we made changes this txg, traverse_dataset won't find - * them. Try again. - */ - if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) - return (EAGAIN); - - return (0); -} - -/* ARGSUSED */ -static void -dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - dmu_objset_type_t *ost = arg2; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - - dmu_buf_will_dirty(ds->ds_dbuf, tx); - - if (ds->ds_user_ptr != NULL) { - /* - * We need to make sure that the objset_impl_t is reopened after - * we do the rollback, otherwise it will have the wrong - * objset_phys_t. Normally this would happen when this - * dataset-open is closed, thus causing the - * dataset to be immediately evicted. But when doing "zfs recv - * -F", we reopen the objset before that, so that there is no - * window where the dataset is closed and inconsistent. - */ - ds->ds_user_evict_func(ds, ds->ds_user_ptr); - ds->ds_user_ptr = NULL; - } - - /* Transfer space that was freed since last snap back to the head. */ - { - uint64_t used; - - VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist, - ds->ds_origin_txg, UINT64_MAX, &used)); - dsl_dir_transfer_space(ds->ds_dir, used, - DD_USED_SNAP, DD_USED_HEAD, tx); - } - - /* Zero out the deadlist. */ - bplist_close(&ds->ds_deadlist); - bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); - ds->ds_phys->ds_deadlist_obj = - bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); - VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, - ds->ds_phys->ds_deadlist_obj)); - - { - /* - * Free blkptrs that we gave birth to - this covers - * claimed but not played log blocks too. - */ - zio_t *zio; - struct killarg ka; - - zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, - ZIO_FLAG_MUSTSUCCEED); - ka.ds = ds; - ka.zio = zio; - ka.tx = tx; - (void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, - TRAVERSE_POST, kill_blkptr, &ka); - (void) zio_wait(zio); - } - - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); - - if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) { - /* Change our contents to that of the prev snapshot */ - - ASSERT3U(ds->ds_prev->ds_object, ==, - ds->ds_phys->ds_prev_snap_obj); - ASSERT3U(ds->ds_phys->ds_used_bytes, <=, - ds->ds_prev->ds_phys->ds_used_bytes); - - ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; - ds->ds_phys->ds_used_bytes = - ds->ds_prev->ds_phys->ds_used_bytes; - ds->ds_phys->ds_compressed_bytes = - ds->ds_prev->ds_phys->ds_compressed_bytes; - ds->ds_phys->ds_uncompressed_bytes = - ds->ds_prev->ds_phys->ds_uncompressed_bytes; - ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; - - if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { - dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - ds->ds_prev->ds_phys->ds_unique_bytes = 0; - } - } else { - objset_impl_t *osi; - - ASSERT(*ost != DMU_OST_ZVOL); - ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0); - ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0); - ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0); - - bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); - ds->ds_phys->ds_flags = 0; - ds->ds_phys->ds_unique_bytes = 0; - if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= - SPA_VERSION_UNIQUE_ACCURATE) - ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; - - osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, - &ds->ds_phys->ds_bp, *ost, tx); -#ifdef _KERNEL - zfs_create_fs(&osi->os, kcred, NULL, tx); -#endif - } - - spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, - tx, cr, "dataset = %llu", ds->ds_object); -} - -/* ARGSUSED */ -static int dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; @@ -1991,7 +1825,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) origin->ds_user_ptr = NULL; } - dsl_dataset_rele(origin, tag); + dsl_dataset_rele(origin, ds); ds->ds_prev = NULL; ndsda.ds = origin; @@ -3002,9 +2836,11 @@ dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) if (csa->cds->ds_prev != csa->ohds->ds_prev) return (EINVAL); - /* cds should be the clone */ - if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != - csa->ohds->ds_object) + /* cds should be the clone (unless they are unrelated) */ + if (csa->cds->ds_prev != NULL && + csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && + csa->ohds->ds_object != + csa->cds->ds_prev->ds_phys->ds_next_snap_obj) return (EINVAL); /* the clone should be a child of the origin */ @@ -3042,7 +2878,6 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); - dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); if (csa->cds->ds_user_ptr != NULL) { csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); @@ -3055,10 +2890,16 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) csa->ohds->ds_user_ptr = NULL; } - /* reset origin's unique bytes */ - VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, - csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &csa->cds->ds_prev->ds_phys->ds_unique_bytes)); + /* + * Reset origin's unique bytes, if it exists. + */ + if (csa->cds->ds_prev) { + dsl_dataset_t *origin = csa->cds->ds_prev; + dmu_buf_will_dirty(origin->ds_dbuf, tx); + VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, + origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &origin->ds_phys->ds_unique_bytes)); + } /* swap blkptrs */ { @@ -3144,8 +2985,10 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) } /* - * Swap 'clone' with its origin head file system. Used at the end - * of "online recv" to swizzle the file system to the new version. + * Swap 'clone' with its origin head datasets. Used at the end of "zfs + * recv" into an existing fs to swizzle the file system to the new + * version, and by "zfs rollback". Can also be used to swap two + * independent head datasets if neither has any snapshots. */ int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, @@ -3481,14 +3324,11 @@ dsl_dataset_user_hold_one(char *dsname, void *arg) dsl_dataset_t *ds; int error; char *name; - size_t buflen; /* alloc a buffer to hold dsname@snapname plus terminating NULL */ - buflen = strlen(dsname) + strlen(ha->snapname) + 2; - name = kmem_alloc(buflen, KM_SLEEP); - (void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname); + name = kmem_asprintf("%s@%s", dsname, ha->snapname); error = dsl_dataset_hold(name, ha->dstg, &ds); - kmem_free(name, buflen); + strfree(name); if (error == 0) { ha->gotone = B_TRUE; dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, @@ -3678,7 +3518,6 @@ dsl_dataset_user_release_one(char *dsname, void *arg) int error; void *dtag = ha->dstg; char *name; - size_t buflen; boolean_t own = B_FALSE; boolean_t might_destroy; @@ -3686,11 +3525,9 @@ dsl_dataset_user_release_one(char *dsname, void *arg) return (ENAMETOOLONG); /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ - buflen = strlen(dsname) + strlen(ha->snapname) + 2; - name = kmem_alloc(buflen, KM_SLEEP); - (void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname); + name = kmem_asprintf("%s@%s", dsname, ha->snapname); error = dsl_dataset_hold(name, dtag, &ds); - kmem_free(name, buflen); + strfree(name); if (error == ENOENT && ha->recursive) return (0); (void) strcpy(ha->failed, dsname); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index 3ff71b3b7f..cb23a22a7d 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -172,12 +172,12 @@ int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type, objset_t **osp); void dmu_objset_close(objset_t *os); int dmu_objset_evict_dbufs(objset_t *os); -int dmu_objset_create(const char *name, dmu_objset_type_t type, - objset_t *clone_parent, uint64_t flags, +int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); +int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, + uint64_t flags); int dmu_objset_destroy(const char *name, boolean_t defer); int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); -int dmu_objset_rollback(objset_t *os); int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props, boolean_t recursive); int dmu_objset_rename(const char *name, const char *newname, diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index 052cb8dd91..13fed60b9d 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -114,11 +114,11 @@ typedef struct objset_impl { int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, objset_t **osp); void dmu_objset_close(objset_t *os); -int dmu_objset_create(const char *name, dmu_objset_type_t type, - objset_t *clone_parent, uint64_t flags, +int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); +int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, + uint64_t flags); int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_objset_rollback(objset_t *os); int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props, boolean_t recursive); void dmu_objset_stats(objset_t *os, nvlist_t *nv); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index b51036d38a..2d02c2805e 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -195,7 +195,6 @@ dsl_checkfunc_t dsl_dataset_destroy_check; dsl_syncfunc_t dsl_dataset_destroy_sync; dsl_checkfunc_t dsl_dataset_snapshot_check; dsl_syncfunc_t dsl_dataset_snapshot_sync; -int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost); int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive); int dsl_dataset_promote(const char *name); int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 9cb40816d8..f31e09dab3 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -2406,14 +2406,12 @@ zfs_ioc_create(zfs_cmd_t *zc) return (error); } - error = dmu_objset_create(zc->zc_name, type, clone, 0, - NULL, NULL); + error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); + dmu_objset_close(clone); if (error) { - dmu_objset_close(clone); nvlist_free(nvprops); return (error); } - dmu_objset_close(clone); } else { boolean_t is_insensitive = B_FALSE; @@ -2470,7 +2468,7 @@ zfs_ioc_create(zfs_cmd_t *zc) return (error); } } - error = dmu_objset_create(zc->zc_name, type, NULL, + error = dmu_objset_create(zc->zc_name, type, is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); nvlist_free(zct.zct_zplprops); } @@ -2617,19 +2615,42 @@ zfs_ioc_destroy(zfs_cmd_t *zc) static int zfs_ioc_rollback(zfs_cmd_t *zc) { - objset_t *os; + dsl_dataset_t *ds, *clone; int error; - zfsvfs_t *zfsvfs = NULL; + zfsvfs_t *zfsvfs; + char *clone_name; + + error = dsl_dataset_hold(zc->zc_name, FTAG, &ds); + if (error) + return (error); + + /* must not be a snapshot */ + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + /* must have a most recent snapshot */ + if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } /* - * Get the zfsvfs for the receiving objset. There - * won't be one if we're operating on a zvol, if the - * objset doesn't exist yet, or is not mounted. + * Create clone of most recent snapshot. */ - error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, DS_MODE_USER, &os); + clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name); + error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT); if (error) - return (error); + goto out; + error = dsl_dataset_own(clone_name, DS_MODE_INCONSISTENT, FTAG, &clone); + if (error) + goto out; + + /* + * Do clone swap. + */ if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { int mode; @@ -2637,18 +2658,37 @@ zfs_ioc_rollback(zfs_cmd_t *zc) if (error == 0) { int resume_err; - error = dmu_objset_rollback(os); + if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { + error = dsl_dataset_clone_swap(clone, ds, + B_TRUE); + dsl_dataset_disown(ds, FTAG); + ds = NULL; + } else { + error = EBUSY; + } resume_err = zfs_resume_fs(zfsvfs, zc->zc_name, mode); error = error ? error : resume_err; - } else { - dmu_objset_close(os); } VFS_RELE(zfsvfs->z_vfs); } else { - error = dmu_objset_rollback(os); + if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { + error = dsl_dataset_clone_swap(clone, ds, B_TRUE); + dsl_dataset_disown(ds, FTAG); + ds = NULL; + } else { + error = EBUSY; + } } - /* Note, the dmu_objset_rollback() releases the objset for us. */ + /* + * Destroy clone (which also closes it). + */ + (void) dsl_dataset_destroy(clone, FTAG, B_FALSE); + +out: + strfree(clone_name); + if (ds) + dsl_dataset_rele(ds, FTAG); return (error); } diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index cfd3b3dbdb..107f135b48 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -435,7 +435,6 @@ zvol_create_minor(const char *name, major_t maj) int ds_mode = DS_MODE_OWNER; vnode_t *vp = NULL; char *devpath; - size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(name) + 1; char chrbuf[30], blkbuf[30]; int error; @@ -468,13 +467,9 @@ zvol_create_minor(const char *name, major_t maj) * If there's an existing /dev/zvol symlink, try to use the * same minor number we used last time. */ - devpath = kmem_alloc(devpathlen, KM_SLEEP); - - (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, name); - + devpath = kmem_asprintf("%s%s", ZVOL_FULL_DEV_DIR, name); error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp); - - kmem_free(devpath, devpathlen); + strfree(devpath); if (error == 0 && vp->v_type != VLNK) error = EINVAL; @@ -1612,14 +1607,11 @@ zvol_is_swap(zvol_state_t *zv) vnode_t *vp; boolean_t ret = B_FALSE; char *devpath; - size_t devpathlen; int error; - devpathlen = strlen(ZVOL_FULL_DEV_DIR) + strlen(zv->zv_name) + 1; - devpath = kmem_alloc(devpathlen, KM_SLEEP); - (void) sprintf(devpath, "%s%s", ZVOL_FULL_DEV_DIR, zv->zv_name); + devpath = kmem_asprintf("%s%s", ZVOL_FULL_DEV_DIR, zv->zv_name); error = lookupname(devpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); - kmem_free(devpath, devpathlen); + strfree(devpath); ret = !error && IS_SWAPVP(common_specvp(vp)); diff --git a/usr/src/uts/common/os/strext.c b/usr/src/uts/common/os/strext.c index 1e45415ca2..8ba7116047 100644 --- a/usr/src/uts/common/os/strext.c +++ b/usr/src/uts/common/os/strext.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/cmn_err.h> #include <sys/systm.h> @@ -147,3 +144,27 @@ vsprintf(char *buf, const char *fmt, va_list args) (void) vsnprintf(buf, INT_MAX, fmt, args); return (buf); } + +/* + * Do not change the length of the returned string; it must be freed + * with strfree(). + */ +char * +kmem_asprintf(const char *fmt, ...) +{ + int size; + va_list adx; + char *buf; + + va_start(adx, fmt); + size = vsnprintf(NULL, 0, fmt, adx) + 1; + va_end(adx); + + buf = kmem_alloc(size, KM_SLEEP); + + va_start(adx, fmt); + size = vsnprintf(buf, size, fmt, adx); + va_end(adx); + + return (buf); +} diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h index 6857f5f057..5963460441 100644 --- a/usr/src/uts/common/sys/systm.h +++ b/usr/src/uts/common/sys/systm.h @@ -183,6 +183,7 @@ dev_t expldev(dev32_t); int bcmp(const void *, const void *, size_t) __PURE; int stoi(char **); void numtos(ulong_t, char *); +char *kmem_asprintf(const char *fmt, ...); int strident_valid(const char *); void strident_canon(char *, size_t); int getsubopt(char **optionsp, char * const *tokens, char **valuep); |