diff options
author | ahrens <none@none> | 2006-06-14 23:16:39 -0700 |
---|---|---|
committer | ahrens <none@none> | 2006-06-14 23:16:39 -0700 |
commit | 1d452cf5123cb6ac0a013a4dbd4dcceeb0da314d (patch) | |
tree | 4c6acabeb0476b9d46ef194f560e953da44094b0 | |
parent | 7a0b67e3ef0ce92ca436e68c45383a76e14311a0 (diff) | |
download | illumos-joyent-1d452cf5123cb6ac0a013a4dbd4dcceeb0da314d.tar.gz |
PSARC 2006/388 snapshot -r
6373978 want to take lots of snapshots quickly ('zfs snapshot -r')
31 files changed, 1659 insertions, 1113 deletions
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index 37e79f6322..f95c90b174 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -939,6 +939,10 @@ const struct ioc { "zfs_cmd_t" }, { (uint_t)ZFS_IOC_PROMOTE, "ZFS_IOC_PROMOTE", "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_DESTROY_SNAPS, "ZFS_IOC_DESTROY_SNAPS", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_SNAPSHOT, "ZFS_IOC_SNAPSHOT", + "zfs_cmd_t" }, /* kssl ioctls */ { (uint_t)KSSL_ADD_ENTRY, "KSSL_ADD_ENTRY", diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 0af9a59690..24ff9d768e 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -1207,7 +1207,7 @@ dump_label(const char *dev) } /*ARGSUSED*/ -static void +static int dump_one_dir(char *dsname, void *arg) { int error; @@ -1217,10 +1217,11 @@ dump_one_dir(char *dsname, void *arg) DS_MODE_STANDARD | DS_MODE_READONLY, &os); if (error) { (void) printf("Could not open %s\n", dsname); - return; + return (0); } dump_dir(os); dmu_objset_close(os); + return (0); } static void @@ -1717,7 +1718,7 @@ dump_zpool(spa_t *spa) dump_dtl(spa->spa_root_vdev, 0); dump_metaslabs(spa); } - dmu_objset_find(spa->spa_name, dump_one_dir, NULL, + (void) dmu_objset_find(spa->spa_name, dump_one_dir, NULL, DS_FIND_SNAPSHOTS); } diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index a4b6a1390d..956b8b6748 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -201,7 +201,8 @@ get_usage(zfs_help_t idx) return (gettext("\tshare -a\n" "\tshare <filesystem>\n")); case HELP_SNAPSHOT: - return (gettext("\tsnapshot <filesystem@name|volume@name>\n")); + return (gettext("\tsnapshot [-r] " + "<filesystem@name|volume@name>\n")); case HELP_UNMOUNT: return (gettext("\tunmount [-f] -a\n" "\tunmount [-f] <filesystem|mountpoint>\n")); @@ -372,7 +373,7 @@ zfs_do_clone(int argc, char **argv) /* * zfs create fs - * zfs create [-s] -V vol size + * zfs create [-s] [-b blocksize] -V vol size * * Create a new dataset. This command can be used to create filesystems * and volumes. Snapshot creation is handled by 'zfs snapshot'. @@ -499,6 +500,7 @@ typedef struct destroy_cbdata { int cb_needforce; int cb_doclones; zfs_handle_t *cb_target; + char *cb_snapname; } destroy_cbdata_t; /* @@ -588,6 +590,29 @@ destroy_callback(zfs_handle_t *zhp, void *data) return (0); } +static int +destroy_snap_clones(zfs_handle_t *zhp, void *arg) +{ + destroy_cbdata_t *cbp = arg; + char thissnap[MAXPATHLEN]; + zfs_handle_t *szhp; + + (void) snprintf(thissnap, sizeof (thissnap), + "%s@%s", zfs_get_name(zhp), cbp->cb_snapname); + + libzfs_print_on_error(g_zfs, B_FALSE); + szhp = zfs_open(g_zfs, thissnap, ZFS_TYPE_SNAPSHOT); + libzfs_print_on_error(g_zfs, B_TRUE); + if (szhp) { + /* + * Destroy any clones of this snapshot + */ + (void) zfs_iter_dependents(szhp, destroy_callback, cbp); + zfs_close(szhp); + } + + return (zfs_iter_filesystems(zhp, destroy_snap_clones, arg)); +} static int zfs_do_destroy(int argc, char **argv) @@ -595,6 +620,7 @@ zfs_do_destroy(int argc, char **argv) destroy_cbdata_t cb = { 0 }; int c; zfs_handle_t *zhp; + char *cp; /* check options */ while ((c = getopt(argc, argv, "frR")) != -1) { @@ -630,6 +656,34 @@ zfs_do_destroy(int argc, char **argv) usage(B_FALSE); } + /* + * If we are doing recursive destroy of a snapshot, then the + * named snapshot may not exist. Go straight to libzfs. + */ + if (cb.cb_recurse && (cp = strchr(argv[0], '@'))) { + int ret; + + *cp = '\0'; + if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL) + return (1); + *cp = '@'; + cp++; + + if (cb.cb_doclones) { + cb.cb_snapname = cp; + (void) destroy_snap_clones(zhp, &cb); + } + + ret = zfs_destroy_snaps(zhp, cp); + zfs_close(zhp); + if (ret) { + (void) fprintf(stderr, + gettext("no snapshots destroyed\n")); + } + return (ret != 0); + } + + /* Open the given dataset */ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL) return (1); @@ -653,7 +707,6 @@ zfs_do_destroy(int argc, char **argv) return (1); } - /* * Check for any dependents and/or clones. */ @@ -1728,7 +1781,7 @@ zfs_do_set(int argc, char **argv) } /* - * zfs snapshot <fs@snap> + * zfs snapshot [-r] <fs@snap> * * Creates a snapshot with the given name. While functionally equivalent to * 'zfs create', it is a separate command to diffferentiate intent. @@ -1736,24 +1789,41 @@ zfs_do_set(int argc, char **argv) static int zfs_do_snapshot(int argc, char **argv) { + int recursive = B_FALSE; + int ret; + char c; + /* check options */ - if (argc > 1 && argv[1][0] == '-') { - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - argv[1][1]); - usage(B_FALSE); + while ((c = getopt(argc, argv, ":r")) != -1) { + switch (c) { + case 'r': + recursive = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } } + argc -= optind; + argv += optind; + /* check number of arguments */ - if (argc < 2) { + if (argc < 1) { (void) fprintf(stderr, gettext("missing snapshot argument\n")); usage(B_FALSE); } - if (argc > 2) { + if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } - return (zfs_snapshot(g_zfs, argv[1]) != 0); + ret = zfs_snapshot(g_zfs, argv[0], recursive); + if (ret && recursive) + (void) fprintf(stderr, gettext("no snapshots were created\n")); + return (ret != 0); + } /* diff --git a/usr/src/cmd/zoneadm/zfs.c b/usr/src/cmd/zoneadm/zfs.c index eb9822781a..d53d32271c 100644 --- a/usr/src/cmd/zoneadm/zfs.c +++ b/usr/src/cmd/zoneadm/zfs.c @@ -296,7 +296,7 @@ take_snapshot(char *source_zone, zfs_handle_t *zhp, char *snapshot_name, if (pre_snapshot(source_zone) != Z_OK) return (Z_ERR); - res = zfs_snapshot(g_zfs, snapshot_name); + res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE); if (post_snapshot(source_zone) != Z_OK) return (Z_ERR); diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index a3de4f4e7f..e6e26beec3 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -1071,7 +1071,7 @@ ztest_create_cb(objset_t *os, void *arg, dmu_tx_t *tx) } /* ARGSUSED */ -static void +static int ztest_destroy_cb(char *name, void *arg) { objset_t *os; @@ -1098,6 +1098,7 @@ ztest_destroy_cb(char *name, void *arg) */ error = dmu_objset_destroy(name); ASSERT3U(error, ==, 0); + return (0); } /* @@ -1191,7 +1192,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za) * create lying around from a previous run. If so, destroy it * and all of its snapshots. */ - dmu_objset_find(name, ztest_destroy_cb, NULL, DS_FIND_SNAPSHOTS); + (void) dmu_objset_find(name, ztest_destroy_cb, NULL, DS_FIND_SNAPSHOTS); /* * Verify that the destroyed dataset is no longer in the namespace. @@ -1310,7 +1311,7 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za) error = dmu_objset_destroy(snapname); if (error != 0 && error != ENOENT) fatal(0, "dmu_objset_destroy() = %d", error); - error = dmu_objset_create(snapname, DMU_OST_OTHER, NULL, NULL, NULL); + error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE); if (error == ENOSPC) ztest_record_enospc("dmu_take_snapshot"); else if (error != 0 && error != EEXIST) @@ -3144,7 +3145,7 @@ ztest_run(char *pool) (int)ztest_random(zopt_datasets)); if (zopt_verbose >= 3) (void) printf("Destroying %s to free up space\n", name); - dmu_objset_find(name, ztest_destroy_cb, NULL, + (void) dmu_objset_find(name, ztest_destroy_cb, NULL, DS_FIND_SNAPSHOTS); (void) rw_unlock(&ztest_shared->zs_name_lock); } diff --git a/usr/src/common/zfs/zfs_namecheck.c b/usr/src/common/zfs/zfs_namecheck.c index 821c06fd30..61a80f420e 100644 --- a/usr/src/common/zfs/zfs_namecheck.c +++ b/usr/src/common/zfs/zfs_namecheck.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,6 +56,41 @@ valid_char(char c) } /* + * Snapshot names must be made up of alphanumeric characters plus the following + * characters: + * + * [-_.:] + */ +int +snapshot_namecheck(const char *path, namecheck_err_t *why, char *what) +{ + const char *loc; + + if (strlen(path) >= MAXNAMELEN) { + if (why) + *why = NAME_ERR_TOOLONG; + return (-1); + } + + if (path[0] == '\0') { + if (why) + *why = NAME_ERR_EMPTY_COMPONENT; + return (-1); + } + + for (loc = path; *loc; loc++) { + if (!valid_char(*loc)) { + if (why) { + *why = NAME_ERR_INVALCHAR; + *what = *loc; + } + return (-1); + } + } + return (0); +} + +/* * Dataset names must be of the following form: * * [component][/]*[component][@component] diff --git a/usr/src/common/zfs/zfs_namecheck.h b/usr/src/common/zfs/zfs_namecheck.h index c7fa504906..7e0cda974c 100644 --- a/usr/src/common/zfs/zfs_namecheck.h +++ b/usr/src/common/zfs/zfs_namecheck.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,6 +47,7 @@ typedef enum { int pool_namecheck(const char *, namecheck_err_t *, char *); int dataset_namecheck(const char *, namecheck_err_t *, char *); int dataset_name_hidden(const char *); +int snapshot_namecheck(const char *, namecheck_err_t *, char *); #ifdef __cplusplus } diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 410649bdcb..dcaccba2d3 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -284,8 +284,9 @@ extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, const char *, const char *); extern int zfs_destroy(zfs_handle_t *); +extern int zfs_destroy_snaps(zfs_handle_t *, char *); extern int zfs_clone(zfs_handle_t *, const char *); -extern int zfs_snapshot(libzfs_handle_t *, const char *); +extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int); extern int zfs_rename(zfs_handle_t *, const char *); extern int zfs_send(zfs_handle_t *, zfs_handle_t *); diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index 9ff3a083be..148bacdcaf 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -169,6 +169,13 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type) return (0); } + if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) { + if (hdl != NULL) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "missing '@' delimeter in snapshot name")); + return (0); + } + return (-1); } @@ -1943,7 +1950,6 @@ zfs_destroy(zfs_handle_t *zhp) { zfs_cmd_t zc = { 0 }; int ret; - char errbuf[1024]; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -1961,20 +1967,95 @@ zfs_destroy(zfs_handle_t *zhp) } ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot destroy '%s'"), zhp->zfs_name); - - if (ret != 0) + if (ret != 0) { return (zfs_standard_error(zhp->zfs_hdl, errno, dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zfs_name)); + } remove_mountpoint(zhp); return (0); } +struct destroydata { + char *snapname; + boolean_t gotone; +}; + +static int +zfs_remove_link_cb(zfs_handle_t *zhp, void *arg) +{ + struct destroydata *dd = arg; + zfs_handle_t *szhp; + char name[ZFS_MAXNAMELEN]; + + (void) strcpy(name, zhp->zfs_name); + (void) strcat(name, "@"); + (void) strcat(name, dd->snapname); + + szhp = make_dataset_handle(zhp->zfs_hdl, name); + if (szhp) { + dd->gotone = B_TRUE; + zfs_close(szhp); + } + + if (zhp->zfs_type == ZFS_TYPE_VOLUME) { + (void) zvol_remove_link(zhp->zfs_hdl, name); + /* + * NB: this is simply a best-effort. We don't want to + * return an error, because then we wouldn't visit all + * the volumes. + */ + } + + return (zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg)); +} + +/* + * Destroys all snapshots with the given name in zhp & descendants. + */ +int +zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname) +{ + zfs_cmd_t zc = { 0 }; + int ret; + struct destroydata dd = { 0 }; + + dd.snapname = snapname; + (void) zfs_remove_link_cb(zhp, &dd); + + if (!dd.gotone) { + return (zfs_standard_error(zhp->zfs_hdl, ENOENT, + dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), + zhp->zfs_name, snapname)); + } + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_prop_value, snapname, sizeof (zc.zc_prop_value)); + + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY_SNAPS, &zc); + if (ret != 0) { + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot destroy '%s@%s'"), zc.zc_name, snapname); + + switch (errno) { + case EEXIST: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "snapshot is cloned")); + return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf)); + + default: + return (zfs_standard_error(zhp->zfs_hdl, errno, + errbuf)); + } + } + + return (0); +} + /* * Clones the given dataset. The target must be of the same type as the source. */ @@ -2171,11 +2252,32 @@ zfs_promote(zfs_handle_t *zhp) return (ret); } +static int +zfs_create_link_cb(zfs_handle_t *zhp, void *arg) +{ + char *snapname = arg; + + if (zhp->zfs_type == ZFS_TYPE_VOLUME) { + char name[MAXPATHLEN]; + + (void) strcpy(name, zhp->zfs_name); + (void) strcat(name, "@"); + (void) strcat(name, snapname); + (void) zvol_create_link(zhp->zfs_hdl, name); + /* + * NB: this is simply a best-effort. We don't want to + * return an error, because then we wouldn't visit all + * the volumes. + */ + } + return (zfs_iter_filesystems(zhp, zfs_create_link_cb, snapname)); +} + /* * Takes a snapshot of the given dataset */ int -zfs_snapshot(libzfs_handle_t *hdl, const char *path) +zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive) { const char *delim; char *parent; @@ -2191,14 +2293,8 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path) if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - /* make sure we have a snapshot */ - if ((delim = strchr(path, '@')) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "missing '@' delimeter in snapshot name")); - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); - } - /* make sure the parent exists and is of the appropriate type */ + delim = strchr(path, '@'); if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL) return (-1); (void) strncpy(parent, path, delim - path); @@ -2210,20 +2306,27 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path) return (-1); } - (void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name)); - - if (zhp->zfs_type == ZFS_TYPE_VOLUME) - zc.zc_objset_type = DMU_OST_ZVOL; - else - zc.zc_objset_type = DMU_OST_ZFS; - - ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_prop_value, delim+1, sizeof (zc.zc_prop_value)); + zc.zc_cookie = recursive; + ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT, &zc); + /* + * if it was recursive, the one that actually failed will be in + * zc.zc_name. + */ + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_prop_value); + if (ret == 0 && recursive) { + (void) zfs_iter_filesystems(zhp, + zfs_create_link_cb, (char *)delim+1); + } if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) { ret = zvol_create_link(zhp->zfs_hdl, path); - if (ret != 0) + if (ret != 0) { (void) ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); + } } if (ret != 0) diff --git a/usr/src/lib/libzfs/spec/libzfs.spec b/usr/src/lib/libzfs/spec/libzfs.spec index 6120603e18..0ebb03f0b8 100644 --- a/usr/src/lib/libzfs/spec/libzfs.spec +++ b/usr/src/lib/libzfs/spec/libzfs.spec @@ -64,6 +64,10 @@ function zfs_destroy version SUNWprivate_1.1 end +function zfs_destroy_snaps +version SUNWprivate_1.1 +end + function zfs_get_handle version SUNWprivate_1.1 end diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index cc6769989b..58354340f7 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -853,6 +853,7 @@ ZFS_COMMON_OBJS += \ dsl_dir.o \ dsl_dataset.o \ dsl_pool.o \ + dsl_synctask.o \ dmu_zfetch.o \ dsl_prop.o \ fletcher.o \ diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 77886f5e24..e48238f29b 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -36,6 +36,7 @@ #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h> #include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> #include <sys/dmu_zfetch.h> #include <sys/zfs_ioctl.h> #include <sys/zap.h> @@ -468,6 +469,10 @@ dmu_write_uio(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } #endif +/* + * XXX move send/recv stuff to its own new file! + */ + struct backuparg { dmu_replay_record_t *drr; vnode_t *vp; @@ -718,124 +723,94 @@ struct restorearg { zio_cksum_t zc; }; +/* ARGSUSED */ static int -replay_incremental_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +replay_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) { - struct drr_begin *drrb = arg; - dsl_dataset_t *ds = NULL; - dsl_dataset_t *ds_prev = NULL; + dsl_dataset_t *ds = arg1; + struct drr_begin *drrb = arg2; const char *snapname; - int err = EINVAL; + int err; uint64_t val; - /* this must be a filesytem */ - if (dd->dd_phys->dd_head_dataset_obj == 0) - goto die; - - err = dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, - NULL, DS_MODE_EXCLUSIVE, FTAG, &ds); - if (err) - goto die; - - if (ds == NULL) { - err = EBUSY; - goto die; - } - /* must already be a snapshot of this fs */ - if (ds->ds_phys->ds_prev_snap_obj == 0) { - err = ENODEV; - goto die; - } + if (ds->ds_phys->ds_prev_snap_obj == 0) + return (ENODEV); /* most recent snapshot must match fromguid */ - err = dsl_dataset_open_obj(dd->dd_pool, - ds->ds_phys->ds_prev_snap_obj, NULL, - DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds_prev); - if (err) - goto die; - if (ds_prev->ds_phys->ds_guid != drrb->drr_fromguid) { - err = ENODEV; - goto die; - } - + if (ds->ds_prev->ds_phys->ds_guid != drrb->drr_fromguid) + return (ENODEV); /* must not have any changes since most recent snapshot */ if (ds->ds_phys->ds_bp.blk_birth > - ds_prev->ds_phys->ds_creation_txg) { - err = ETXTBSY; - goto die; - } + ds->ds_prev->ds_phys->ds_creation_txg) + return (ETXTBSY); /* new snapshot name must not exist */ snapname = strrchr(drrb->drr_toname, '@'); - if (snapname == NULL) { - err = EEXIST; - goto die; - } + if (snapname == NULL) + return (EEXIST); + snapname++; - err = zap_lookup(dd->dd_pool->dp_meta_objset, + err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val); - if (err != ENOENT) { - if (err == 0) - err = EEXIST; - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); + if (err == 0) + return (EEXIST); + if (err != ENOENT) return (err); - } - - dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); - /* The point of no (unsuccessful) return. */ + return (0); +} +/* ARGSUSED */ +static void +replay_incremental_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - return (0); - -die: - if (ds_prev) - dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); - if (ds) - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - return (err); } +/* ARGSUSED */ static int -replay_full_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +replay_full_check(void *arg1, void *arg2, dmu_tx_t *tx) { - struct drr_begin *drrb = arg; + dsl_dir_t *dd = arg1; + struct drr_begin *drrb = arg2; + objset_t *mos = dd->dd_pool->dp_meta_objset; + char *cp; + uint64_t val; int err; - char *fsfullname, *fslastname, *cp; - dsl_dataset_t *ds; - fsfullname = kmem_alloc(MAXNAMELEN, KM_SLEEP); - (void) strncpy(fsfullname, drrb->drr_toname, MAXNAMELEN); - cp = strchr(fsfullname, '@'); - if (cp == NULL) { - kmem_free(fsfullname, MAXNAMELEN); - return (EINVAL); - } + cp = strchr(drrb->drr_toname, '@'); *cp = '\0'; - fslastname = strrchr(fsfullname, '/'); - if (fslastname == NULL) { - kmem_free(fsfullname, MAXNAMELEN); - return (EINVAL); - } - fslastname++; + err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, + strrchr(drrb->drr_toname, '/') + 1, + sizeof (uint64_t), 1, &val); + *cp = '@'; - err = dsl_dataset_create_sync(dd, fsfullname, fslastname, NULL, tx); - if (err) { - kmem_free(fsfullname, MAXNAMELEN); - return (err); - } + if (err != ENOENT) + return (err ? err : EEXIST); + + return (0); +} - /* the point of no (unsuccessful) return */ +static void +replay_full_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct drr_begin *drrb = arg2; + char *cp; + dsl_dataset_t *ds; + uint64_t dsobj; + + cp = strchr(drrb->drr_toname, '@'); + *cp = '\0'; + dsobj = dsl_dataset_create_sync(dd, strrchr(drrb->drr_toname, '/') + 1, + NULL, tx); + *cp = '@'; - VERIFY(0 == dsl_dataset_open_spa(dd->dd_pool->dp_spa, fsfullname, + VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, DS_MODE_EXCLUSIVE, FTAG, &ds)); - kmem_free(fsfullname, MAXNAMELEN); (void) dmu_objset_create_impl(dsl_dataset_get_spa(ds), ds, drrb->drr_type, tx); @@ -844,16 +819,14 @@ replay_full_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - return (0); } static int -replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +replay_end_check(void *arg1, void *arg2, dmu_tx_t *tx) { - struct drr_begin *drrb = arg; - int err; + objset_t *os = arg1; + struct drr_begin *drrb = arg2; char *snapname; - dsl_dataset_t *ds; /* XXX verify that drr_toname is in dd */ @@ -862,13 +835,25 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) return (EINVAL); snapname++; - /* create snapshot */ - err = dsl_dataset_snapshot_sync(dd, snapname, tx); - if (err) - return (err); + return (dsl_dataset_snapshot_check(os, snapname, tx)); +} + +static void +replay_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + objset_t *os = arg1; + struct drr_begin *drrb = arg2; + char *snapname; + dsl_dataset_t *ds, *hds; + + snapname = strchr(drrb->drr_toname, '@') + 1; + + dsl_dataset_snapshot_sync(os, snapname, tx); /* set snapshot's creation time and guid */ - VERIFY(0 == dsl_dataset_open_spa(dd->dd_pool->dp_spa, drrb->drr_toname, + hds = os->os->os_dsl_dataset; + VERIFY(0 == dsl_dataset_open_obj(hds->ds_dir->dd_pool, + hds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_INCONSISTENT, FTAG, &ds)); @@ -879,14 +864,8 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG); - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, - NULL, DS_MODE_STANDARD | DS_MODE_INCONSISTENT, FTAG, &ds)); - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; - dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); - - return (0); + dmu_buf_will_dirty(hds->ds_dbuf, tx); + hds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; } void * @@ -1169,7 +1148,6 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, struct restorearg ra; dmu_replay_record_t *drr; char *cp; - dsl_dir_t *dd = NULL; objset_t *os = NULL; zio_cksum_t pzc; @@ -1228,20 +1206,30 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, */ if (drrb->drr_fromguid) { /* incremental backup */ + dsl_dataset_t *ds = NULL; cp = strchr(tosnap, '@'); *cp = '\0'; - ra.err = dsl_dir_open(tosnap, FTAG, &dd, NULL); + ra.err = dsl_dataset_open(tosnap, DS_MODE_EXCLUSIVE, FTAG, &ds); *cp = '@'; if (ra.err) goto out; - ra.err = dsl_dir_sync_task(dd, replay_incremental_sync, - drrb, 1<<20); + ra.err = dsl_sync_task_do(ds->ds_dir->dd_pool, + replay_incremental_check, replay_incremental_sync, + ds, drrb, 1); + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); } else { /* full backup */ + dsl_dir_t *dd = NULL; const char *tail; + /* can't restore full backup into topmost fs, for now */ + if (strrchr(drrb->drr_toname, '/') == NULL) { + ra.err = EINVAL; + goto out; + } + cp = strchr(tosnap, '@'); *cp = '\0'; ra.err = dsl_dir_open(tosnap, FTAG, &dd, &tail); @@ -1253,8 +1241,9 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, goto out; } - ra.err = dsl_dir_sync_task(dd, replay_full_sync, - drrb, 1<<20); + ra.err = dsl_sync_task_do(dd->dd_pool, replay_full_check, + replay_full_sync, dd, drrb, 5); + dsl_dir_close(dd, FTAG); } if (ra.err) goto out; @@ -1332,12 +1321,9 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, goto out; } - /* - * dd may be the parent of the dd we are - * restoring into (eg. if it's a full backup). - */ - ra.err = dsl_dir_sync_task(dmu_objset_ds(os)-> - ds_dir, replay_end_sync, drrb, 1<<20); + ra.err = dsl_sync_task_do(dmu_objset_ds(os)-> + ds_dir->dd_pool, replay_end_check, replay_end_sync, + os, drrb, 3); goto out; } default: @@ -1356,33 +1342,34 @@ out: * processed the begin properly. 'os' will only be set if this * is the case. */ - if (ra.err && os && dd && tosnap && strchr(tosnap, '@')) { + if (ra.err && os && tosnap && strchr(tosnap, '@')) { /* * rollback or destroy what we created, so we don't * leave it in the restoring state. */ - txg_wait_synced(dd->dd_pool, 0); - if (drrb->drr_fromguid) { - /* incremental: rollback to most recent snapshot */ - (void) dsl_dir_sync_task(dd, - dsl_dataset_rollback_sync, NULL, 0); - } else { - /* full: destroy whole fs */ - cp = strchr(tosnap, '@'); - *cp = '\0'; - cp = strchr(tosnap, '/'); - if (cp) { - (void) dsl_dir_sync_task(dd, - dsl_dir_destroy_sync, cp+1, 0); + dsl_dataset_t *ds; + int err; + + cp = strchr(tosnap, '@'); + *cp = '\0'; + err = dsl_dataset_open(tosnap, + DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, + FTAG, &ds); + if (err == 0) { + txg_wait_synced(ds->ds_dir->dd_pool, 0); + if (drrb->drr_fromguid) { + /* incremental: rollback to most recent snap */ + (void) dsl_dataset_rollback(ds); + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + } else { + /* full: destroy whole fs */ + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + (void) dsl_dataset_destroy(tosnap); } - cp = strchr(tosnap, '\0'); - *cp = '@'; } - + *cp = '@'; } - if (dd) - dsl_dir_close(dd, FTAG); kmem_free(ra.buf, ra.bufsize); if (sizep) *sizep = ra.voff; diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 3d5f1f7b5c..3f7128c5f4 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -31,6 +31,7 @@ #include <sys/dsl_dataset.h> #include <sys/dsl_prop.h> #include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> #include <sys/dnode.h> #include <sys/dbuf.h> #include <sys/dmu_tx.h> @@ -418,29 +419,56 @@ struct oscarg { void (*userfunc)(objset_t *os, void *arg, dmu_tx_t *tx); void *userarg; dsl_dataset_t *clone_parent; - const char *fullname; const char *lastname; dmu_objset_type_t type; }; +/* ARGSUSED */ static int -dmu_objset_create_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) { - struct oscarg *oa = arg; - dsl_dataset_t *ds; + dsl_dir_t *dd = arg1; + struct oscarg *oa = arg2; + objset_t *mos = dd->dd_pool->dp_meta_objset; int err; + uint64_t ddobj; + + err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, + oa->lastname, sizeof (uint64_t), 1, &ddobj); + if (err != ENOENT) + return (err ? err : EEXIST); + + if (oa->clone_parent != NULL) { + /* + * You can't clone across pools. + */ + if (oa->clone_parent->ds_dir->dd_pool != dd->dd_pool) + return (EXDEV); + + /* + * You can only clone snapshots, not the head datasets. + */ + if (oa->clone_parent->ds_phys->ds_num_children == 0) + return (EINVAL); + } + return (0); +} + +static void +dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct oscarg *oa = arg2; + dsl_dataset_t *ds; blkptr_t bp; + uint64_t dsobj; ASSERT(dmu_tx_is_syncing(tx)); - err = dsl_dataset_create_sync(dd, oa->fullname, oa->lastname, + dsobj = dsl_dataset_create_sync(dd, oa->lastname, oa->clone_parent, tx); - dprintf_dd(dd, "fn=%s ln=%s err=%d\n", - oa->fullname, oa->lastname, err); - if (err) - return (err); - VERIFY(0 == dsl_dataset_open_spa(dd->dd_pool->dp_spa, oa->fullname, + VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); dsl_dataset_get_blkptr(ds, &bp); if (BP_IS_HOLE(&bp)) { @@ -454,8 +482,6 @@ dmu_objset_create_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) oa->userfunc(&osi->os, oa->userarg, tx); } dsl_dataset_close(ds, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG); - - return (0); } int @@ -463,65 +489,39 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, objset_t *clone_parent, void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg) { - dsl_dir_t *pds; + dsl_dir_t *pdd; const char *tail; int err = 0; + struct oscarg oa = { 0 }; - err = dsl_dir_open(name, FTAG, &pds, &tail); + ASSERT(strchr(name, '@') == NULL); + err = dsl_dir_open(name, FTAG, &pdd, &tail); if (err) return (err); if (tail == NULL) { - dsl_dir_close(pds, FTAG); + dsl_dir_close(pdd, FTAG); return (EEXIST); } dprintf("name=%s\n", name); - if (tail[0] == '@') { + oa.userfunc = func; + oa.userarg = arg; + oa.lastname = tail; + oa.type = type; + if (clone_parent != NULL) { /* - * If we're creating a snapshot, make sure everything - * they might want is on disk. XXX Sketchy to know - * about snapshots here, better to put in DSL. + * You can't clone to a different type. */ - objset_t *os; - size_t plen = strchr(name, '@') - name + 1; - char *pbuf = kmem_alloc(plen, KM_SLEEP); - bcopy(name, pbuf, plen - 1); - pbuf[plen - 1] = '\0'; - - err = dmu_objset_open(pbuf, DMU_OST_ANY, DS_MODE_STANDARD, &os); - if (err == 0) { - err = zil_suspend(dmu_objset_zil(os)); - if (err == 0) { - err = dsl_dir_sync_task(pds, - dsl_dataset_snapshot_sync, - (void*)(tail+1), 16*1024); - zil_resume(dmu_objset_zil(os)); - } - dmu_objset_close(os); + if (clone_parent->os->os_phys->os_type != type) { + dsl_dir_close(pdd, FTAG); + return (EINVAL); } - kmem_free(pbuf, plen); - } else { - struct oscarg oa = { 0 }; - oa.userfunc = func; - oa.userarg = arg; - oa.fullname = name; - oa.lastname = tail; - oa.type = type; - if (clone_parent != NULL) { - /* - * You can't clone to a different type. - */ - if (clone_parent->os->os_phys->os_type != type) { - dsl_dir_close(pds, FTAG); - return (EINVAL); - } - oa.clone_parent = clone_parent->os->os_dsl_dataset; - } - err = dsl_dir_sync_task(pds, dmu_objset_create_sync, &oa, - 256*1024); + oa.clone_parent = clone_parent->os->os_dsl_dataset; } - dsl_dir_close(pds, FTAG); + err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, + dmu_objset_create_sync, pdd, &oa, 5); + dsl_dir_close(pdd, FTAG); return (err); } @@ -543,7 +543,6 @@ dmu_objset_destroy(const char *name) dmu_objset_close(os); } - /* XXX uncache everything? */ return (dsl_dataset_destroy(name)); } @@ -553,20 +552,104 @@ dmu_objset_rollback(const char *name) int err; objset_t *os; - err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); + err = dmu_objset_open(name, DMU_OST_ANY, + DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); if (err == 0) { err = zil_suspend(dmu_objset_zil(os)); if (err == 0) zil_resume(dmu_objset_zil(os)); - dmu_objset_close(os); if (err == 0) { /* XXX uncache everything? */ - err = dsl_dataset_rollback(name); + err = dsl_dataset_rollback(os->os->os_dsl_dataset); } + dmu_objset_close(os); } return (err); } +struct snaparg { + dsl_sync_task_group_t *dstg; + char *snapname; + char failed[MAXPATHLEN]; +}; + +static int +dmu_objset_snapshot_one(char *name, void *arg) +{ + struct snaparg *sn = arg; + objset_t *os; + int err; + + (void) strcpy(sn->failed, name); + + err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_STANDARD, &os); + if (err != 0) + return (err); + + /* + * NB: we need to wait for all in-flight changes to get to disk, + * so that we snapshot those changes. zil_suspend does this as + * a side effect. + */ + err = zil_suspend(dmu_objset_zil(os)); + if (err == 0) { + dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, + dsl_dataset_snapshot_sync, os, sn->snapname, 3); + } + return (err); +} + +int +dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) +{ + dsl_sync_task_t *dst; + struct snaparg sn = { 0 }; + char *cp; + spa_t *spa; + int err; + + (void) strcpy(sn.failed, fsname); + + cp = strchr(fsname, '/'); + if (cp) { + *cp = '\0'; + err = spa_open(fsname, &spa, FTAG); + *cp = '/'; + } else { + err = spa_open(fsname, &spa, FTAG); + } + if (err) + return (err); + + sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + sn.snapname = snapname; + + if (recursive) + err = dmu_objset_find(fsname, dmu_objset_snapshot_one, &sn, 0); + else + err = dmu_objset_snapshot_one(fsname, &sn); + + if (err) + goto out; + + err = dsl_sync_task_group_wait(sn.dstg); + + for (dst = list_head(&sn.dstg->dstg_tasks); dst; + dst = list_next(&sn.dstg->dstg_tasks, dst)) { + objset_t *os = dst->dst_arg1; + if (dst->dst_err) + dmu_objset_name(os, sn.failed); + zil_resume(dmu_objset_zil(os)); + dmu_objset_close(os); + } +out: + if (err) + (void) strcpy(fsname, sn.failed); + dsl_sync_task_group_destroy(sn.dstg); + spa_close(spa, FTAG); + return (err); +} + static void dmu_objset_sync_dnodes(objset_impl_t *os, list_t *list, dmu_tx_t *tx) { @@ -755,9 +838,6 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, zap_cursor_t cursor; zap_attribute_t attr; - if (dd->dd_phys->dd_child_dir_zapobj == 0) - return (ENOENT); - /* there is no next dir on a snapshot! */ if (os->os->os_dsl_dataset->ds_object != dd->dd_phys->dd_head_dataset_obj) @@ -790,8 +870,8 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, /* * Find all objsets under name, and for each, call 'func(child_name, arg)'. */ -void -dmu_objset_find(char *name, void func(char *, void *), void *arg, int flags) +int +dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags) { dsl_dir_t *dd; objset_t *os; @@ -803,33 +883,39 @@ dmu_objset_find(char *name, void func(char *, void *), void *arg, int flags) err = dsl_dir_open(name, FTAG, &dd, NULL); if (err) - return; + return (err); + /* NB: the $MOS dir doesn't have a head dataset */ do_self = (dd->dd_phys->dd_head_dataset_obj != 0); /* * Iterate over all children. */ - if (dd->dd_phys->dd_child_dir_zapobj != 0) { - for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, - dd->dd_phys->dd_child_dir_zapobj); - zap_cursor_retrieve(&zc, &attr) == 0; - (void) zap_cursor_advance(&zc)) { - ASSERT(attr.za_integer_length == sizeof (uint64_t)); - ASSERT(attr.za_num_integers == 1); + for (zap_cursor_init(&zc, dd->dd_pool->dp_meta_objset, + dd->dd_phys->dd_child_dir_zapobj); + zap_cursor_retrieve(&zc, &attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT(attr.za_integer_length == sizeof (uint64_t)); + ASSERT(attr.za_num_integers == 1); - /* - * No separating '/' because parent's name ends in /. - */ - child = kmem_alloc(MAXPATHLEN, KM_SLEEP); - /* XXX could probably just use name here */ - dsl_dir_name(dd, child); - (void) strcat(child, "/"); - (void) strcat(child, attr.za_name); - dmu_objset_find(child, func, arg, flags); - kmem_free(child, MAXPATHLEN); - } - zap_cursor_fini(&zc); + /* + * No separating '/' because parent's name ends in /. + */ + child = kmem_alloc(MAXPATHLEN, KM_SLEEP); + /* XXX could probably just use name here */ + dsl_dir_name(dd, child); + (void) strcat(child, "/"); + (void) strcat(child, attr.za_name); + err = dmu_objset_find(child, func, arg, flags); + kmem_free(child, MAXPATHLEN); + if (err) + break; + } + zap_cursor_fini(&zc); + + if (err) { + dsl_dir_close(dd, FTAG); + return (err); } /* @@ -853,17 +939,23 @@ dmu_objset_find(char *name, void func(char *, void *), void *arg, int flags) dsl_dir_name(dd, child); (void) strcat(child, "@"); (void) strcat(child, attr.za_name); - func(child, arg); + err = func(child, arg); kmem_free(child, MAXPATHLEN); + if (err) + break; } zap_cursor_fini(&zc); } dsl_dir_close(dd, FTAG); + if (err) + return (err); + /* * Apply to self if appropriate. */ if (do_self) - func(name, arg); + err = func(name, arg); + return (err); } diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c index d9c232e112..79559d258d 100644 --- a/usr/src/uts/common/fs/zfs/dmu_tx.c +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c @@ -42,7 +42,7 @@ typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, dmu_tx_t * -dmu_tx_create_ds(dsl_dir_t *dd) +dmu_tx_create_dd(dsl_dir_t *dd) { dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); tx->tx_dir = dd; @@ -60,7 +60,7 @@ dmu_tx_create_ds(dsl_dir_t *dd) dmu_tx_t * dmu_tx_create(objset_t *os) { - dmu_tx_t *tx = dmu_tx_create_ds(os->os->os_dsl_dataset->ds_dir); + dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir); tx->tx_objset = os; tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset); return (tx); @@ -69,7 +69,7 @@ dmu_tx_create(objset_t *os) dmu_tx_t * dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) { - dmu_tx_t *tx = dmu_tx_create_ds(NULL); + dmu_tx_t *tx = dmu_tx_create_dd(NULL); ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg); tx->tx_pool = dp; diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index a199aec8de..7eb9028189 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -29,6 +29,7 @@ #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h> #include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> #include <sys/dmu_traverse.h> #include <sys/dmu_tx.h> #include <sys/arc.h> @@ -37,8 +38,12 @@ #include <sys/unique.h> #include <sys/zfs_context.h> -static int dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, - void *arg, dmu_tx_t *tx); +static dsl_checkfunc_t dsl_dataset_destroy_begin_check; +static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; +static dsl_checkfunc_t dsl_dataset_rollback_check; +static dsl_syncfunc_t dsl_dataset_rollback_sync; +static dsl_checkfunc_t dsl_dataset_destroy_check; +static dsl_syncfunc_t dsl_dataset_destroy_sync; #define DOS_REF_MAX (1ULL << 62) @@ -176,9 +181,6 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) { - uint64_t txg; - dsl_dir_t *dd; - if (ds == NULL) return (0); /* @@ -191,15 +193,7 @@ dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) * snapshot, because we could set the sync task in the quiescing * phase. So this should only be used as a guess. */ - dd = ds->ds_dir; - mutex_enter(&dd->dd_lock); - if (dd->dd_sync_func == dsl_dataset_snapshot_sync) - txg = dd->dd_sync_txg; - else - txg = ds->ds_phys->ds_prev_snap_txg; - mutex_exit(&dd->dd_lock); - - return (txg); + return (MAX(ds->ds_phys->ds_prev_snap_txg, ds->ds_trysnap_txg)); } int @@ -533,41 +527,25 @@ dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) dsl_dataset_close(ds, DS_MODE_NONE, FTAG); } -int -dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname, +uint64_t +dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) { - int err; - dsl_pool_t *dp = pds->dd_pool; + dsl_pool_t *dp = pdd->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; - uint64_t dsobj; + uint64_t dsobj, ddobj; objset_t *mos = dp->dp_meta_objset; dsl_dir_t *dd; - if (clone_parent != NULL) { - /* - * You can't clone across pools. - */ - if (clone_parent->ds_dir->dd_pool != dp) - return (EXDEV); - - /* - * You can only clone snapshots, not the head datasets. - */ - if (clone_parent->ds_phys->ds_num_children == 0) - return (EINVAL); - } - + ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); + ASSERT(clone_parent == NULL || + clone_parent->ds_phys->ds_num_children > 0); ASSERT(lastname[0] != '@'); ASSERT(dmu_tx_is_syncing(tx)); - err = dsl_dir_create_sync(pds, lastname, tx); - if (err) - return (err); - VERIFY(0 == dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, &dd, NULL)); - - /* This is the point of no (unsuccessful) return */ + ddobj = dsl_dir_create_sync(pdd, lastname, tx); + VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); @@ -609,149 +587,209 @@ dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname, dd->dd_phys->dd_head_dataset_obj = dsobj; dsl_dir_close(dd, FTAG); - return (0); + return (dsobj); } -int -dsl_dataset_destroy(const char *name) +struct destroyarg { + dsl_sync_task_group_t *dstg; + char *snapname; + void *tag; + char *failed; +}; + +static int +dsl_snapshot_destroy_one(char *name, void *arg) { + struct destroyarg *da = arg; + dsl_dataset_t *ds; + char *cp; int err; - dsl_pool_t *dp; - dsl_dir_t *dd; - const char *tail; - err = dsl_dir_open(name, FTAG, &dd, &tail); - if (err) + (void) strcat(name, "@"); + (void) strcat(name, da->snapname); + err = dsl_dataset_open(name, + DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, + da->tag, &ds); + cp = strchr(name, '@'); + *cp = '\0'; + if (err == ENOENT) + return (0); + if (err) { + (void) strcpy(da->failed, name); return (err); + } - dp = dd->dd_pool; - if (tail != NULL) { - if (tail[0] != '@') { - dsl_dir_close(dd, FTAG); - return (ENOENT); - } - tail++; - /* Just blow away the snapshot */ - do { - txg_wait_synced(dp, 0); - err = dsl_dir_sync_task(dd, - dsl_dataset_destroy_sync, (void*)tail, 0); - } while (err == EAGAIN); - dsl_dir_close(dd, FTAG); - } else { - char buf[MAXNAMELEN]; - char *cp; - objset_t *os; - uint64_t obj; - dsl_dir_t *pds; - - if (dd->dd_phys->dd_parent_obj == 0) { - dsl_dir_close(dd, FTAG); - return (EINVAL); - } - - err = dmu_objset_open(name, DMU_OST_ANY, - DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); - if (err) { - dsl_dir_close(dd, FTAG); - return (err); - } - - /* - * Check for errors and mark this ds as inconsistent, in - * case we crash while freeing the objects. - */ - err = dsl_dir_sync_task(os->os->os_dsl_dataset->ds_dir, - dsl_dataset_destroy_begin_sync, os->os->os_dsl_dataset, 0); - if (err) { - dmu_objset_close(os); - dsl_dir_close(dd, FTAG); - return (err); - } - - /* - * remove the objects in open context, so that we won't - * have too much to do in syncing context. - */ - for (obj = 0; err == 0; - err = dmu_object_next(os, &obj, FALSE)) { - dmu_tx_t *tx = dmu_tx_create(os); - dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); - dmu_tx_hold_bonus(tx, obj); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - /* - * Perhaps there is not enough disk - * space. Just deal with it from - * dsl_dataset_destroy_sync(). - */ - dmu_tx_abort(tx); - continue; - } - VERIFY(0 == dmu_object_free(os, obj, tx)); - dmu_tx_commit(tx); - } - /* Make sure it's not dirty before we finish destroying it. */ - txg_wait_synced(dd->dd_pool, 0); + dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, + dsl_dataset_destroy_sync, ds, da->tag, 0); + return (0); +} - dmu_objset_close(os); - if (err != ESRCH) { - dsl_dir_close(dd, FTAG); - return (err); +/* + * Destroy 'snapname' in all descendants of 'fsname'. + */ +#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy +int +dsl_snapshots_destroy(char *fsname, char *snapname) +{ + int err; + struct destroyarg da; + dsl_sync_task_t *dst; + spa_t *spa; + char *cp; + + cp = strchr(fsname, '/'); + if (cp) { + *cp = '\0'; + err = spa_open(fsname, &spa, FTAG); + *cp = '/'; + } else { + err = spa_open(fsname, &spa, FTAG); + } + if (err) + return (err); + da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + da.snapname = snapname; + da.tag = FTAG; + da.failed = fsname; + + err = dmu_objset_find(fsname, + dsl_snapshot_destroy_one, &da, 0); + + if (err == 0) + err = dsl_sync_task_group_wait(da.dstg); + + for (dst = list_head(&da.dstg->dstg_tasks); dst; + dst = list_next(&da.dstg->dstg_tasks, dst)) { + dsl_dataset_t *ds = dst->dst_arg1; + if (dst->dst_err) { + dsl_dataset_name(ds, fsname); + cp = strchr(fsname, '@'); + *cp = '\0'; } - /* - * Blow away the dsl_dir + head dataset. - * dsl_dir_destroy_sync() will call - * dsl_dataset_destroy_sync() to destroy the head dataset. + * If it was successful, destroy_sync would have + * closed the ds */ - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dir_open_obj(dd->dd_pool, - dd->dd_phys->dd_parent_obj, NULL, FTAG, &pds); - dsl_dir_close(dd, FTAG); - rw_exit(&dp->dp_config_rwlock); if (err) - return (err); - - (void) strcpy(buf, name); - cp = strrchr(buf, '/') + 1; - ASSERT(cp[0] != '\0'); - do { - txg_wait_synced(dp, 0); - err = dsl_dir_sync_task(pds, - dsl_dir_destroy_sync, cp, 0); - } while (err == EAGAIN); - dsl_dir_close(pds, FTAG); + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); } + dsl_sync_task_group_destroy(da.dstg); + spa_close(spa, FTAG); return (err); } int -dsl_dataset_rollback(const char *name) +dsl_dataset_destroy(const char *name) { int err; + dsl_sync_task_group_t *dstg; + objset_t *os; + dsl_dataset_t *ds; dsl_dir_t *dd; - const char *tail; + uint64_t obj; + + if (strchr(name, '@')) { + /* Destroying a snapshot is simpler */ + err = dsl_dataset_open(name, + DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, + FTAG, &ds); + if (err) + return (err); + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_destroy_check, dsl_dataset_destroy_sync, + ds, FTAG, 0); + if (err) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + return (err); + } - err = dsl_dir_open(name, FTAG, &dd, &tail); + err = dmu_objset_open(name, DMU_OST_ANY, + DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); if (err) return (err); + ds = os->os->os_dsl_dataset; + dd = ds->ds_dir; - if (tail != NULL) { - dsl_dir_close(dd, FTAG); - return (EINVAL); + /* + * Check for errors and mark this ds as inconsistent, in + * case we crash while freeing the objects. + */ + err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, + dsl_dataset_destroy_begin_sync, ds, NULL, 0); + if (err) { + dmu_objset_close(os); + return (err); } - do { - txg_wait_synced(dd->dd_pool, 0); - err = dsl_dir_sync_task(dd, - dsl_dataset_rollback_sync, NULL, 0); - } while (err == EAGAIN); - dsl_dir_close(dd, FTAG); + /* + * remove the objects in open context, so that we won't + * have too much to do in syncing context. + */ + for (obj = 0; err == 0; + err = dmu_object_next(os, &obj, FALSE)) { + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); + dmu_tx_hold_bonus(tx, obj); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + /* + * Perhaps there is not enough disk + * space. Just deal with it from + * dsl_dataset_destroy_sync(). + */ + dmu_tx_abort(tx); + continue; + } + VERIFY(0 == dmu_object_free(os, obj, tx)); + dmu_tx_commit(tx); + } + /* Make sure it's not dirty before we finish destroying it. */ + txg_wait_synced(dd->dd_pool, 0); + + dmu_objset_close(os); + if (err != ESRCH) + return (err); + + err = dsl_dataset_open(name, + DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, + FTAG, &ds); + if (err) + return (err); + + err = dsl_dir_open(name, FTAG, &dd, NULL); + if (err) { + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + return (err); + } + + /* + * Blow away the dsl_dir + head dataset. + */ + dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); + dsl_sync_task_create(dstg, dsl_dataset_destroy_check, + dsl_dataset_destroy_sync, ds, FTAG, 0); + dsl_sync_task_create(dstg, dsl_dir_destroy_check, + dsl_dir_destroy_sync, dd, FTAG, 0); + err = dsl_sync_task_group_wait(dstg); + dsl_sync_task_group_destroy(dstg); + /* if it is successful, *destroy_sync will close the ds+dd */ + if (err) { + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + dsl_dir_close(dd, FTAG); + } return (err); } +int +dsl_dataset_rollback(dsl_dataset_t *ds) +{ + ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); + return (dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_rollback_check, dsl_dataset_rollback_sync, + ds, NULL, 0)); +} + void * dsl_dataset_set_user_ptr(dsl_dataset_t *ds, void *p, dsl_dataset_evict_func_t func) @@ -849,63 +887,52 @@ kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) } /* ARGSUSED */ -int -dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +static int +dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) { - objset_t *mos = dd->dd_pool->dp_meta_objset; - dsl_dataset_t *ds; - int err; + dsl_dataset_t *ds = arg1; - if (dd->dd_phys->dd_head_dataset_obj == 0) + /* + * There must be a previous snapshot. I suppose we could roll + * it back to being empty (and re-initialize the upper (ZPL) + * layer). But for now there's no way to do this via the user + * interface. + */ + if (ds->ds_phys->ds_prev_snap_txg == 0) return (EINVAL); - err = dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &ds); - if (err) - return (err); - if (ds->ds_phys->ds_prev_snap_txg == 0) { - /* - * There's no previous snapshot. I suppose we could - * roll it back to being empty (and re-initialize the - * upper (ZPL) layer). But for now there's no way to do - * this via the user interface. - */ - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); + /* + * This must not be a snapshot. + */ + if (ds->ds_phys->ds_next_snap_obj != 0) return (EINVAL); - } - - mutex_enter(&ds->ds_lock); - if (ds->ds_open_refcount > 0) { - mutex_exit(&ds->ds_lock); - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - return (EBUSY); - } /* * If we made changes this txg, traverse_dsl_dataset won't find * them. Try again. */ - if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { - mutex_exit(&ds->ds_lock); - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); + if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) return (EAGAIN); - } - /* THE POINT OF NO (unsuccessful) RETURN */ - ds->ds_open_refcount = DOS_REF_MAX; - mutex_exit(&ds->ds_lock); + return (0); +} + +/* ARGSUSED */ +static void +dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; dmu_buf_will_dirty(ds->ds_dbuf, tx); /* Zero out the deadlist. */ - dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); bplist_close(&ds->ds_deadlist); bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); ds->ds_phys->ds_deadlist_obj = bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj)); - dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); { /* Free blkptrs that we gave birth to */ @@ -924,11 +951,11 @@ dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) ADVANCE_POST, kill_blkptr, &ka); (void) zio_wait(zio); - dsl_dir_diduse_space(dd, + dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); } - /* Change our contents to that of the prev snapshot (finally!) */ + /* Change our contents to that of the prev snapshot */ ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; @@ -941,19 +968,13 @@ dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); ds->ds_prev->ds_phys->ds_unique_bytes = 0; - - dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj); - ds->ds_open_refcount = 0; - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - - return (0); } /* ARGSUSED */ static int -dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg; + dsl_dataset_t *ds = arg1; /* * Can't delete a head dataset if there are snapshots of it. @@ -964,64 +985,29 @@ dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) return (EINVAL); - /* Mark it as inconsistent on-disk, in case we crash */ - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - return (0); } -int -dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +/* ARGSUSED */ +static void +dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) { - const char *snapname = arg; - uint64_t used = 0, compressed = 0, uncompressed = 0; - blkptr_t bp; - zio_t *zio; - int err; - int after_branch_point = FALSE; - int drop_lock = FALSE; - dsl_pool_t *dp = dd->dd_pool; - objset_t *mos = dp->dp_meta_objset; - dsl_dataset_t *ds, *ds_prev = NULL; - uint64_t obj; - - if (dd->dd_phys->dd_head_dataset_obj == 0) - return (EINVAL); - - if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) { - rw_enter(&dp->dp_config_rwlock, RW_WRITER); - drop_lock = TRUE; - } - - err = dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, NULL, - snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG, &ds); + dsl_dataset_t *ds = arg1; - if (err == 0 && snapname) { - err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, - snapname, 8, 1, &obj); - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - if (err == 0) { - err = dsl_dataset_open_obj(dd->dd_pool, obj, NULL, - DS_MODE_EXCLUSIVE, FTAG, &ds); - } - } - if (err) { - if (drop_lock) - rw_exit(&dp->dp_config_rwlock); - return (err); - } + /* Mark it as inconsistent on-disk, in case we crash */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; +} - obj = ds->ds_object; +/* ARGSUSED */ +static int +dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; /* Can't delete a branch point. */ - if (ds->ds_phys->ds_num_children > 1) { - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - if (drop_lock) - rw_exit(&dp->dp_config_rwlock); - return (EINVAL); - } + if (ds->ds_phys->ds_num_children > 1) + return (EEXIST); /* * Can't delete a head dataset if there are snapshots of it. @@ -1029,37 +1015,50 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) * from.) */ if (ds->ds_prev != NULL && - ds->ds_prev->ds_phys->ds_next_snap_obj == obj) { - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - if (drop_lock) - rw_exit(&dp->dp_config_rwlock); + ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) return (EINVAL); - } /* * If we made changes this txg, traverse_dsl_dataset won't find * them. Try again. */ - if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) { - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - if (drop_lock) - rw_exit(&dp->dp_config_rwlock); + if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) return (EAGAIN); - } + + /* XXX we should do some i/o error checking... */ + return (0); +} + +static void +dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + uint64_t used = 0, compressed = 0, uncompressed = 0; + zio_t *zio; + int err; + int after_branch_point = FALSE; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + dsl_dataset_t *ds_prev = NULL; + uint64_t obj; + + ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); + ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + ASSERT(ds->ds_prev == NULL || + ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); + ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + + ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); + + obj = ds->ds_object; if (ds->ds_phys->ds_prev_snap_obj != 0) { if (ds->ds_prev) { ds_prev = ds->ds_prev; } else { - err = dsl_dataset_open_obj(dd->dd_pool, + VERIFY(0 == dsl_dataset_open_obj(dp, ds->ds_phys->ds_prev_snap_obj, NULL, - DS_MODE_NONE, FTAG, &ds_prev); - if (err) { - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - if (drop_lock) - rw_exit(&dp->dp_config_rwlock); - return (err); - } + DS_MODE_NONE, FTAG, &ds_prev)); } after_branch_point = (ds_prev->ds_phys->ds_next_snap_obj != obj); @@ -1076,18 +1075,16 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) } } - /* THE POINT OF NO (unsuccessful) RETURN */ - - ASSERT3P(tx->tx_pool, ==, dd->dd_pool); zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); if (ds->ds_phys->ds_next_snap_obj != 0) { + blkptr_t bp; dsl_dataset_t *ds_next; uint64_t itor = 0; spa_scrub_restart(dp->dp_spa, tx->tx_txg); - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, + VERIFY(0 == dsl_dataset_open_obj(dp, ds->ds_phys->ds_next_snap_obj, NULL, DS_MODE_NONE, FTAG, &ds_next)); ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); @@ -1155,7 +1152,7 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) */ dsl_dataset_t *ds_after_next; - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, + VERIFY(0 == dsl_dataset_open_obj(dp, ds_next->ds_phys->ds_next_snap_obj, NULL, DS_MODE_NONE, FTAG, &ds_after_next)); itor = 0; @@ -1184,7 +1181,7 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, ds_next); if (ds_prev) { - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, + VERIFY(0 == dsl_dataset_open_obj(dp, ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, ds_next, &ds_next->ds_prev)); } else { @@ -1232,17 +1229,17 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) err = zio_wait(zio); ASSERT3U(err, ==, 0); - dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx); + dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); if (ds->ds_phys->ds_snapnames_zapobj) { err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); ASSERT(err == 0); } - if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) { + if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { /* Erase the link in the dataset */ - dmu_buf_will_dirty(dd->dd_dbuf, tx); - dd->dd_phys->dd_head_dataset_obj = 0; + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; /* * dsl_dir_sync_destroy() called us, they'll destroy * the dataset. @@ -1250,21 +1247,21 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) } else { /* remove from snapshot namespace */ dsl_dataset_t *ds_head; - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, NULL, + VERIFY(0 == dsl_dataset_open_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &ds_head)); #ifdef ZFS_DEBUG { uint64_t val; err = zap_lookup(mos, ds_head->ds_phys->ds_snapnames_zapobj, - snapname, 8, 1, &val); + ds->ds_snapname, 8, 1, &val); ASSERT3U(err, ==, 0); ASSERT3U(val, ==, obj); } #endif err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, - snapname, tx); + ds->ds_snapname, tx); ASSERT(err == 0); dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); } @@ -1272,64 +1269,64 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) if (ds_prev && ds->ds_prev != ds_prev) dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); - err = dmu_object_free(mos, obj, tx); - ASSERT(err == 0); - - /* - * Close the objset with mode NONE, thus leaving it with - * DOS_REF_MAX set, so that noone can access it. - */ - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - - if (drop_lock) - rw_exit(&dp->dp_config_rwlock); - return (0); + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); + VERIFY(0 == dmu_object_free(mos, obj, tx)); } +/* ARGSUSED */ int -dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { - const char *snapname = arg; - dsl_pool_t *dp = dd->dd_pool; - dmu_buf_t *dbuf; - dsl_dataset_phys_t *dsphys; - uint64_t dsobj, value; - objset_t *mos = dp->dp_meta_objset; - dsl_dataset_t *ds; + objset_t *os = arg1; + dsl_dataset_t *ds = os->os->os_dsl_dataset; + const char *snapname = arg2; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int err; + uint64_t value; - ASSERT(dmu_tx_is_syncing(tx)); - - if (dd->dd_phys->dd_head_dataset_obj == 0) - return (EINVAL); - err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL, - DS_MODE_NONE, FTAG, &ds); - if (err) - return (err); + /* + * We don't allow multiple snapshots of the same txg. If there + * is already one, try again. + */ + if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) + return (EAGAIN); + /* + * Check for conflicting name snapshot name. + */ err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &value); - if (err == 0) { - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); + if (err == 0) return (EEXIST); - } - ASSERT(err == ENOENT); + if (err != ENOENT) + return (err); - /* The point of no (unsuccessful) return */ + ds->ds_trysnap_txg = tx->tx_txg; + return (0); +} - dprintf_dd(dd, "taking snapshot %s in txg %llu\n", - snapname, tx->tx_txg); +void +dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + objset_t *os = arg1; + dsl_dataset_t *ds = os->os->os_dsl_dataset; + const char *snapname = arg2; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dmu_buf_t *dbuf; + dsl_dataset_phys_t *dsphys; + uint64_t dsobj; + objset_t *mos = dp->dp_meta_objset; + int err; spa_scrub_restart(dp->dp_spa, tx->tx_txg); - - rw_enter(&dp->dp_config_rwlock, RW_WRITER); + ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; - dsphys->ds_dir_obj = dd->dd_object; + dsphys->ds_dir_obj = ds->ds_dir->dd_object; dsphys->ds_fsid_guid = unique_create(); unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, @@ -1348,24 +1345,17 @@ dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dsphys->ds_bp = ds->ds_phys->ds_bp; dmu_buf_rele(dbuf, FTAG); - if (ds->ds_phys->ds_prev_snap_obj != 0) { - dsl_dataset_t *ds_prev; - - VERIFY(0 == dsl_dataset_open_obj(dp, - ds->ds_phys->ds_prev_snap_obj, NULL, - DS_MODE_NONE, FTAG, &ds_prev)); - ASSERT(ds_prev->ds_phys->ds_next_snap_obj == + ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); + if (ds->ds_prev) { + ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object || - ds_prev->ds_phys->ds_num_children > 1); - if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { - dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); + ds->ds_prev->ds_phys->ds_num_children > 1); + if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, - ds_prev->ds_phys->ds_creation_txg); - ds_prev->ds_phys->ds_next_snap_obj = dsobj; + ds->ds_prev->ds_phys->ds_creation_txg); + ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; } - dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); - } else { - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0); } bplist_close(&ds->ds_deadlist); @@ -1389,11 +1379,6 @@ dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) VERIFY(0 == dsl_dataset_open_obj(dp, ds->ds_phys->ds_prev_snap_obj, snapname, DS_MODE_NONE, ds, &ds->ds_prev)); - - rw_exit(&dp->dp_config_rwlock); - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - - return (0); } void @@ -1452,115 +1437,78 @@ dsl_dataset_pool(dsl_dataset_t *ds) return (ds->ds_dir->dd_pool); } -struct osrenamearg { - const char *oldname; - const char *newname; -}; - +/* ARGSUSED */ static int -dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) { - struct osrenamearg *ora = arg; + dsl_dataset_t *ds = arg1; + char *newsnapname = arg2; + dsl_dir_t *dd = ds->ds_dir; objset_t *mos = dd->dd_pool->dp_meta_objset; - dsl_dir_t *nds; - const char *tail; - int err; - dsl_dataset_t *snds, *fsds; + dsl_dataset_t *hds; uint64_t val; + int err; - err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname, - DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds); + err = dsl_dataset_open_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); if (err) return (err); - if (snds->ds_dir != dd) { - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (EINVAL); - } - - /* better be changing a snapshot */ - if (snds->ds_phys->ds_next_snap_obj == 0) { - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (EINVAL); - } - - /* new fs better exist */ - err = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, - FTAG, &nds, &tail); - if (err) { - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (err); - } - - dsl_dir_close(nds, FTAG); - - /* new name better be in same fs */ - if (nds != dd) { - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (EINVAL); - } - - /* new name better be a snapshot */ - if (tail == NULL || tail[0] != '@') { - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (EINVAL); - } - - tail++; + /* new name better not be in use */ + err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, + newsnapname, 8, 1, &val); + dsl_dataset_close(hds, DS_MODE_NONE, FTAG); + + if (err == 0) + err = EEXIST; + else if (err == ENOENT) + err = 0; + return (err); +} - err = dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &fsds); - if (err) { - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (err); - } +static void +dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + char *newsnapname = arg2; + dsl_dir_t *dd = ds->ds_dir; + objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_dataset_t *hds; + int err; - /* new name better not be in use */ - err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj, - tail, 8, 1, &val); - if (err != ENOENT) { - if (err == 0) - err = EEXIST; - dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (EEXIST); - } + ASSERT(ds->ds_phys->ds_next_snap_obj != 0); - /* The point of no (unsuccessful) return */ + VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); - VERIFY(0 == dsl_dataset_get_snapname(snds)); - err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj, - snds->ds_snapname, tx); + VERIFY(0 == dsl_dataset_get_snapname(ds)); + err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, + ds->ds_snapname, tx); ASSERT3U(err, ==, 0); - mutex_enter(&snds->ds_lock); - (void) strcpy(snds->ds_snapname, tail); - mutex_exit(&snds->ds_lock); - err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj, - snds->ds_snapname, 8, 1, &snds->ds_object, tx); + mutex_enter(&ds->ds_lock); + (void) strcpy(ds->ds_snapname, newsnapname); + mutex_exit(&ds->ds_lock); + err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, + ds->ds_snapname, 8, 1, &ds->ds_object, tx); ASSERT3U(err, ==, 0); - rw_exit(&dd->dd_pool->dp_config_rwlock); - dsl_dataset_close(fsds, DS_MODE_NONE, FTAG); - dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG); - return (0); + dsl_dataset_close(hds, DS_MODE_NONE, FTAG); } #pragma weak dmu_objset_rename = dsl_dataset_rename int -dsl_dataset_rename(const char *osname, const char *newname) +dsl_dataset_rename(const char *oldname, const char *newname) { dsl_dir_t *dd; + dsl_dataset_t *ds; const char *tail; - struct osrenamearg ora; int err; - err = dsl_dir_open(osname, FTAG, &dd, &tail); + err = dsl_dir_open(oldname, FTAG, &dd, &tail); if (err) return (err); if (tail == NULL) { - err = dsl_dir_sync_task(dd, - dsl_dir_rename_sync, (void*)newname, 1<<12); + err = dsl_dir_rename(dd, newname); dsl_dir_close(dd, FTAG); return (err); } @@ -1570,46 +1518,76 @@ dsl_dataset_rename(const char *osname, const char *newname) return (ENOENT); } - ora.oldname = osname; - ora.newname = newname; - - err = dsl_dir_sync_task(dd, - dsl_dataset_snapshot_rename_sync, &ora, 1<<12); dsl_dir_close(dd, FTAG); + + /* new name must be snapshot in same filesystem */ + tail = strchr(newname, '@'); + if (tail == NULL) + return (EINVAL); + tail++; + if (strncmp(oldname, newname, tail - newname) != 0) + return (EXDEV); + + err = dsl_dataset_open(oldname, + DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); + if (err) + return (err); + + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_snapshot_rename_check, + dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); + + dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); + return (err); } -/* ARGSUSED */ +struct promotearg { + uint64_t used, comp, uncomp, unique; + uint64_t newnext_obj, snapnames_obj; +}; + static int -dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) { + dsl_dataset_t *hds = arg1; + struct promotearg *pa = arg2; + dsl_dir_t *dd = hds->ds_dir; + dsl_pool_t *dp = hds->ds_dir->dd_pool; dsl_dir_t *pdd = NULL; dsl_dataset_t *ds = NULL; - dsl_dataset_t *hds = NULL; - dsl_dataset_t *phds = NULL; dsl_dataset_t *pivot_ds = NULL; dsl_dataset_t *newnext_ds = NULL; int err; char *name = NULL; - uint64_t used = 0, comp = 0, uncomp = 0, unique = 0, itor = 0; + uint64_t itor = 0; blkptr_t bp; + bzero(pa, sizeof (*pa)); + /* Check that it is a clone */ if (dd->dd_phys->dd_clone_parent_obj == 0) return (EINVAL); - /* Open everyone */ - if (err = dsl_dataset_open_obj(dd->dd_pool, + /* Since this is so expensive, don't do the preliminary check */ + if (!dmu_tx_is_syncing(tx)) + return (0); + + if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_clone_parent_obj, NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) goto out; pdd = pivot_ds->ds_dir; - if (err = dsl_dataset_open_obj(dd->dd_pool, - pdd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &phds)) - goto out; - if (err = dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)) - goto out; + + { + dsl_dataset_t *phds; + if (err = dsl_dataset_open_obj(dd->dd_pool, + pdd->dd_phys->dd_head_dataset_obj, + NULL, DS_MODE_NONE, FTAG, &phds)) + goto out; + pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; + dsl_dataset_close(phds, DS_MODE_NONE, FTAG); + } if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { err = EXDEV; @@ -1623,25 +1601,23 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dsl_dataset_t *prev; if (err = dsl_dataset_open_obj(dd->dd_pool, - newnext_ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, - FTAG, &prev)) + newnext_ds->ds_phys->ds_prev_snap_obj, + NULL, DS_MODE_NONE, FTAG, &prev)) goto out; dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); newnext_ds = prev; } + pa->newnext_obj = newnext_ds->ds_object; /* compute pivot point's new unique space */ while ((err = bplist_iterate(&newnext_ds->ds_deadlist, &itor, &bp)) == 0) { if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) - unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); + pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); } if (err != ENOENT) goto out; - /* need the config lock to ensure that the snapshots are not open */ - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); - /* Walk the snapshots that we are moving */ name = kmem_alloc(MAXPATHLEN, KM_SLEEP); ds = pivot_ds; @@ -1665,9 +1641,9 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) * compute space to transfer. Each snapshot gave birth to: * (my used) - (prev's used) + (deadlist's used) */ - used += ds->ds_phys->ds_used_bytes; - comp += ds->ds_phys->ds_compressed_bytes; - uncomp += ds->ds_phys->ds_uncompressed_bytes; + pa->used += ds->ds_phys->ds_used_bytes; + pa->comp += ds->ds_phys->ds_compressed_bytes; + pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; /* If we reach the first snapshot, we're done. */ if (ds->ds_phys->ds_prev_snap_obj == 0) @@ -1680,9 +1656,9 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, FTAG, &prev)) goto out; - used += dlused - prev->ds_phys->ds_used_bytes; - comp += dlcomp - prev->ds_phys->ds_compressed_bytes; - uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; + pa->used += dlused - prev->ds_phys->ds_used_bytes; + pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; + pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; /* * We could be a clone of a clone. If we reach our @@ -1696,17 +1672,43 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); ds = prev; } - if (ds != pivot_ds) - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - ds = NULL; /* Check that there is enough space here */ - if (err = dsl_dir_transfer_possible(pdd, dd, used)) - goto out; + err = dsl_dir_transfer_possible(pdd, dd, pa->used); + +out: + if (ds && ds != pivot_ds) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + if (pivot_ds) + dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); + if (newnext_ds) + dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); + if (name) + kmem_free(name, MAXPATHLEN); + return (err); +} - /* The point of no (unsuccessful) return */ +static void +dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *hds = arg1; + struct promotearg *pa = arg2; + dsl_dir_t *dd = hds->ds_dir; + dsl_pool_t *dp = hds->ds_dir->dd_pool; + dsl_dir_t *pdd = NULL; + dsl_dataset_t *ds, *pivot_ds; + char *name; + + ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); + ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); + + VERIFY(0 == dsl_dataset_open_obj(dp, + dd->dd_phys->dd_clone_parent_obj, + NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); + pdd = pivot_ds->ds_dir; /* move snapshots to this dir */ + name = kmem_alloc(MAXPATHLEN, KM_SLEEP); ds = pivot_ds; /* CONSTCOND */ while (TRUE) { @@ -1714,9 +1716,9 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) /* move snap name entry */ dsl_dataset_name(ds, name); - VERIFY(0 == zap_remove(dd->dd_pool->dp_meta_objset, - phds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, tx)); - VERIFY(0 == zap_add(dd->dd_pool->dp_meta_objset, + VERIFY(0 == zap_remove(dp->dp_meta_objset, + pa->snapnames_obj, ds->ds_snapname, tx)); + VERIFY(0 == zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); @@ -1726,7 +1728,7 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) ds->ds_phys->ds_dir_obj = dd->dd_object; ASSERT3P(ds->ds_dir, ==, pdd); dsl_dir_close(ds->ds_dir, ds); - VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, dd->dd_object, + VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); ASSERT3U(dsl_prop_numcb(ds), ==, 0); @@ -1734,7 +1736,7 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) if (ds->ds_phys->ds_prev_snap_obj == 0) break; - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, + VERIFY(0 == dsl_dataset_open_obj(dp, ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, FTAG, &prev)); @@ -1746,10 +1748,12 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); ds = prev; } + if (ds != pivot_ds) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); /* change pivot point's next snap */ dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); - pivot_ds->ds_phys->ds_next_snap_obj = newnext_ds->ds_object; + pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; /* change clone_parent-age */ dmu_buf_will_dirty(dd->dd_dbuf, tx); @@ -1759,28 +1763,12 @@ dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; /* change space accounting */ - dsl_dir_diduse_space(pdd, -used, -comp, -uncomp, tx); - dsl_dir_diduse_space(dd, used, comp, uncomp, tx); - pivot_ds->ds_phys->ds_unique_bytes = unique; + dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); + dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); + pivot_ds->ds_phys->ds_unique_bytes = pa->unique; - err = 0; - -out: - if (RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)) - rw_exit(&dd->dd_pool->dp_config_rwlock); - if (hds) - dsl_dataset_close(hds, DS_MODE_NONE, FTAG); - if (phds) - dsl_dataset_close(phds, DS_MODE_NONE, FTAG); - if (ds && ds != pivot_ds) - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - if (pivot_ds) - dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); - if (newnext_ds) - dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); - if (name) - kmem_free(name, MAXPATHLEN); - return (err); + dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); + kmem_free(name, MAXPATHLEN); } int @@ -1789,6 +1777,7 @@ dsl_dataset_promote(const char *name) dsl_dataset_t *ds; int err; dmu_object_info_t doi; + struct promotearg pa; err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); if (err) @@ -1806,8 +1795,9 @@ dsl_dataset_promote(const char *name) * a bunch of snapnames to the promoted ds, and dirtying their * bonus buffers. */ - err = dsl_dir_sync_task(ds->ds_dir, dsl_dataset_promote_sync, NULL, - (1<<20) + (doi.doi_physical_blks << (SPA_MINBLOCKSHIFT + 7))); + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + dsl_dataset_promote_check, + dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); dsl_dataset_close(ds, DS_MODE_NONE, FTAG); return (err); } diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c index d7095cb0d3..be9cd8ebc2 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dir.c +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c @@ -30,6 +30,7 @@ #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h> #include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> #include <sys/spa.h> #include <sys/zap.h> #include <sys/zio.h> @@ -38,10 +39,9 @@ static uint64_t dsl_dir_space_accounted(dsl_dir_t *dd); static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); -static int dsl_dir_set_reservation_sync(dsl_dir_t *dd, - void *arg, dmu_tx_t *tx); static uint64_t dsl_dir_space_available(dsl_dir_t *dd, dsl_dir_t *ancestor, int64_t delta, int ondiskonly); +static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); /* ARGSUSED */ @@ -60,8 +60,6 @@ dsl_dir_evict(dmu_buf_t *db, void *arg) ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); - ASSERT(dd->dd_sync_txg == 0); - if (dd->dd_parent) dsl_dir_close(dd->dd_parent, dd); @@ -323,8 +321,6 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, ASSERT(next[0] != '\0'); if (next[0] == '@') break; - if (dd->dd_phys->dd_child_dir_zapobj == 0) - break; dprintf("looking up %s in obj%lld\n", buf, dd->dd_phys->dd_child_dir_zapobj); @@ -384,39 +380,18 @@ dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); } -int +uint64_t dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) { objset_t *mos = pds->dd_pool->dp_meta_objset; uint64_t ddobj; dsl_dir_phys_t *dsphys; dmu_buf_t *dbuf; - int err; - - ASSERT(dmu_tx_is_syncing(tx)); - - if (pds->dd_phys->dd_child_dir_zapobj == 0) { - dmu_buf_will_dirty(pds->dd_dbuf, tx); - pds->dd_phys->dd_child_dir_zapobj = zap_create(mos, - DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); - } - - rw_enter(&pds->dd_pool->dp_config_rwlock, RW_WRITER); - err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, - name, sizeof (uint64_t), 1, &ddobj); - if (err != ENOENT) { - rw_exit(&pds->dd_pool->dp_config_rwlock); - return (err ? err : EEXIST); - } ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); - err = zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, - name, sizeof (uint64_t), 1, &ddobj, tx); - ASSERT3U(err, ==, 0); - dprintf("dataset_create: zap_add %s->%lld to %lld returned %d\n", - name, ddobj, pds->dd_phys->dd_child_dir_zapobj, err); - + VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, + name, sizeof (uint64_t), 1, &ddobj, tx)); VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; @@ -429,95 +404,64 @@ dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); dmu_buf_rele(dbuf, FTAG); - rw_exit(&pds->dd_pool->dp_config_rwlock); - - return (0); + return (ddobj); } +/* ARGSUSED */ int -dsl_dir_destroy_sync(dsl_dir_t *pds, void *arg, dmu_tx_t *tx) +dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) { - const char *name = arg; - dsl_dir_t *dd = NULL; - dsl_pool_t *dp = pds->dd_pool; + dsl_dir_t *dd = arg1; + dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; - uint64_t val, obj, child_zapobj, props_zapobj; - int t, err; - - rw_enter(&dp->dp_config_rwlock, RW_WRITER); + int err; + uint64_t count; - err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, name, - 8, 1, &obj); - if (err) - goto out; + /* + * There should be exactly two holds, both from + * dsl_dataset_destroy: one on the dd directory, and one on its + * head ds. Otherwise, someone is trying to lookup something + * inside this dir while we want to destroy it. The + * config_rwlock ensures that nobody else opens it after we + * check. + */ + ASSERT(dmu_buf_refcount(dd->dd_dbuf) == 2); + if (dmu_buf_refcount(dd->dd_dbuf) > 2) + return (EBUSY); - err = dsl_dir_open_obj(dp, obj, name, FTAG, &dd); + err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); if (err) - goto out; - ASSERT3U(dd->dd_phys->dd_parent_obj, ==, pds->dd_object); - - if (dmu_buf_refcount(dd->dd_dbuf) > 1) { - err = EBUSY; - goto out; - } - - for (t = 0; t < TXG_SIZE; t++) { - /* - * if they were dirty, they'd also be open. - * dp_config_rwlock ensures that it stays that way. - */ - ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); - } + return (err); + if (count != 0) + return (EEXIST); - child_zapobj = dd->dd_phys->dd_child_dir_zapobj; - props_zapobj = dd->dd_phys->dd_props_zapobj; + return (0); +} - if (child_zapobj != 0) { - uint64_t count; - err = EEXIST; - (void) zap_count(mos, child_zapobj, &count); - if (count != 0) - goto out; - } +void +dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + objset_t *mos = dd->dd_pool->dp_meta_objset; + uint64_t val, obj; - if (dd->dd_phys->dd_head_dataset_obj != 0) { - err = dsl_dataset_destroy_sync(dd, NULL, tx); - if (err) - goto out; - } + ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); - /* The point of no (unsuccessful) return */ - - /* Make sure parent's used gets updated */ + /* Remove our reservation. */ val = 0; - err = dsl_dir_set_reservation_sync(dd, &val, tx); - ASSERT(err == 0); + dsl_dir_set_reservation_sync(dd, &val, tx); ASSERT3U(dd->dd_used_bytes, ==, 0); ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); - dsl_dir_close(dd, FTAG); - dd = NULL; - - err = dmu_object_free(mos, obj, tx); - ASSERT(err == 0); - if (child_zapobj) - err = zap_destroy(mos, child_zapobj, tx); - ASSERT(err == 0); + VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); + VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); + VERIFY(0 == zap_remove(mos, + dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); - if (props_zapobj) - err = zap_destroy(mos, props_zapobj, tx); - ASSERT(err == 0); - - err = zap_remove(mos, pds->dd_phys->dd_child_dir_zapobj, name, tx); - ASSERT(err == 0); - -out: - rw_exit(&dp->dp_config_rwlock); - if (dd) - dsl_dir_close(dd, FTAG); - - return (err); + obj = dd->dd_object; + dsl_dir_close(dd, tag); + VERIFY(0 == dmu_object_free(mos, obj, tx)); } void @@ -577,71 +521,6 @@ dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds) } } -int -dsl_dir_sync_task(dsl_dir_t *dd, - int (*func)(dsl_dir_t *, void*, dmu_tx_t *), void *arg, uint64_t space) -{ - dmu_tx_t *tx; - dsl_pool_t *dp = dd->dd_pool; - int err = 0; - uint64_t txg; - - dprintf_dd(dd, "func=%p space=%llu\n", func, space); - -again: - tx = dmu_tx_create_ds(dd); - dmu_tx_hold_space(tx, space); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err == ENOSPC || err == EDQUOT) { - dsl_dir_t *rds; - /* - * They can get their space from either this dd, or the - * root dd. - */ - for (rds = dd; rds->dd_parent; rds = rds->dd_parent) - continue; - dmu_tx_abort(tx); - tx = dmu_tx_create_ds(rds); - dmu_tx_hold_space(tx, space); - err = dmu_tx_assign(tx, TXG_WAIT); - } - if (err) { - dmu_tx_abort(tx); - return (err); - } - - txg = dmu_tx_get_txg(tx); - mutex_enter(&dd->dd_lock); - if (dd->dd_sync_txg != 0) { - mutex_exit(&dd->dd_lock); - dmu_tx_commit(tx); - txg_wait_synced(dp, 0); - goto again; - } - - /* We're good to go */ - - dd->dd_sync_txg = txg; - dd->dd_sync_func = func; - dd->dd_sync_arg = arg; - - mutex_exit(&dd->dd_lock); - - dsl_dir_dirty(dd, tx); - dmu_tx_commit(tx); - - txg_wait_synced(dp, txg); - - mutex_enter(&dd->dd_lock); - ASSERT(dd->dd_sync_txg == txg); - ASSERT(dd->dd_sync_func == NULL); - err = dd->dd_sync_err; - dd->dd_sync_txg = 0; - mutex_exit(&dd->dd_lock); - - return (err); -} - void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) { @@ -666,11 +545,6 @@ parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) { - if (dd->dd_sync_txg == tx->tx_txg && dd->dd_sync_func) { - dd->dd_sync_err = dd->dd_sync_func(dd, dd->dd_sync_arg, tx); - dd->dd_sync_func = NULL; - } - ASSERT(dmu_tx_is_syncing(tx)); dmu_buf_will_dirty(dd->dd_dbuf, tx); @@ -1004,26 +878,51 @@ dsl_dir_diduse_space(dsl_dir_t *dd, } } +/* ARGSUSED */ static int -dsl_dir_set_quota_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) { - uint64_t *quotap = arg; + dsl_dir_t *dd = arg1; + uint64_t *quotap = arg2; uint64_t new_quota = *quotap; int err = 0; + uint64_t towrite; - dmu_buf_will_dirty(dd->dd_dbuf, tx); + if (new_quota == 0) + return (0); mutex_enter(&dd->dd_lock); - if (new_quota != 0 && (new_quota < dd->dd_phys->dd_reserved || + /* + * If we are doing the preliminary check in open context, and + * there are pending changes, then don't fail it, since the + * pending changes could under-estimat the amount of space to be + * freed up. + */ + towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] + + dd->dd_space_towrite[2] + dd->dd_space_towrite[3]; + if ((dmu_tx_is_syncing(tx) || towrite == 0) && + (new_quota < dd->dd_phys->dd_reserved || new_quota < dsl_dir_estimated_space(dd))) { err = ENOSPC; - } else { - dd->dd_phys->dd_quota = new_quota; } mutex_exit(&dd->dd_lock); return (err); } +static void +dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + uint64_t *quotap = arg2; + uint64_t new_quota = *quotap; + + dmu_buf_will_dirty(dd->dd_dbuf, tx); + + mutex_enter(&dd->dd_lock); + dd->dd_phys->dd_quota = new_quota; + mutex_exit(&dd->dd_lock); +} + int dsl_dir_set_quota(const char *ddname, uint64_t quota) { @@ -1039,15 +938,18 @@ dsl_dir_set_quota(const char *ddname, uint64_t quota) */ txg_wait_open(dd->dd_pool, 0); - err = dsl_dir_sync_task(dd, dsl_dir_set_quota_sync, "a, 0); + err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, + dsl_dir_set_quota_sync, dd, "a, 0); dsl_dir_close(dd, FTAG); return (err); } +/* ARGSUSED */ static int -dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) { - uint64_t *reservationp = arg; + dsl_dir_t *dd = arg1; + uint64_t *reservationp = arg2; uint64_t new_reservation = *reservationp; uint64_t used, avail; int64_t delta; @@ -1055,6 +957,13 @@ dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) if (new_reservation > INT64_MAX) return (EOVERFLOW); + /* + * If we are doing the preliminary check in open context, the + * space estimates may be inaccurate. + */ + if (!dmu_tx_is_syncing(tx)) + return (0); + mutex_enter(&dd->dd_lock); used = dd->dd_used_bytes; delta = MAX(used, new_reservation) - @@ -1073,6 +982,23 @@ dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) if (delta > 0 && dd->dd_phys->dd_quota > 0 && new_reservation > dd->dd_phys->dd_quota) return (ENOSPC); + return (0); +} + +static void +dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + uint64_t *reservationp = arg2; + uint64_t new_reservation = *reservationp; + uint64_t used; + int64_t delta; + + mutex_enter(&dd->dd_lock); + used = dd->dd_used_bytes; + delta = MAX(used, new_reservation) - + MAX(used, dd->dd_phys->dd_reserved); + mutex_exit(&dd->dd_lock); dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_reserved = new_reservation; @@ -1081,7 +1007,6 @@ dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) /* Roll up this additional usage into our ancestors */ dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); } - return (0); } int @@ -1093,8 +1018,8 @@ dsl_dir_set_reservation(const char *ddname, uint64_t reservation) err = dsl_dir_open(ddname, FTAG, &dd, NULL); if (err) return (err); - err = dsl_dir_sync_task(dd, - dsl_dir_set_reservation_sync, &reservation, 0); + err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check, + dsl_dir_set_reservation_sync, dd, &reservation, 0); dsl_dir_close(dd, FTAG); return (err); } @@ -1128,74 +1053,74 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) return (would_change(dd->dd_parent, delta, ancestor)); } -int -dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) +struct renamearg { + dsl_dir_t *newparent; + const char *mynewname; +}; + +/* ARGSUSED */ +static int +dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) { - const char *newname = arg; + dsl_dir_t *dd = arg1; + struct renamearg *ra = arg2; dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; - dsl_dir_t *newpds; - const char *tail; - int err, len; - - /* can't rename to different pool */ - len = strlen(dp->dp_root_dir->dd_myname); - if (strncmp(dp->dp_root_dir->dd_myname, newname, len != 0) || - newname[len] != '/') { - return (ENXIO); - } - - /* new parent should exist */ - err = dsl_dir_open_spa(dp->dp_spa, newname, FTAG, &newpds, &tail); - if (err) - return (err); - - /* new name should not already exist */ - if (tail == NULL) { - dsl_dir_close(newpds, FTAG); - return (EEXIST); - } - - rw_enter(&dp->dp_config_rwlock, RW_WRITER); + int err; + uint64_t val; /* There should be 2 references: the open and the dirty */ - if (dmu_buf_refcount(dd->dd_dbuf) > 2) { - rw_exit(&dp->dp_config_rwlock); - dsl_dir_close(newpds, FTAG); + if (dmu_buf_refcount(dd->dd_dbuf) > 2) return (EBUSY); - } - if (newpds != dd->dd_parent) { + /* check for existing name */ + err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, + ra->mynewname, 8, 1, &val); + if (err == 0) + return (EEXIST); + if (err != ENOENT) + return (err); + + if (ra->newparent != dd->dd_parent) { /* is there enough space? */ uint64_t myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); - /* no rename into our descendent */ - if (closest_common_ancestor(dd, newpds) == dd) { - dsl_dir_close(newpds, FTAG); - rw_exit(&dp->dp_config_rwlock); + /* no rename into our descendant */ + if (closest_common_ancestor(dd, ra->newparent) == dd) return (EINVAL); - } - if (err = dsl_dir_transfer_possible(dd->dd_parent, newpds, - myspace)) { - dsl_dir_close(newpds, FTAG); - rw_exit(&dp->dp_config_rwlock); + if (err = dsl_dir_transfer_possible(dd->dd_parent, + ra->newparent, myspace)) return (err); - } + } - /* The point of no (unsuccessful) return */ + return (0); +} + +static void +dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + struct renamearg *ra = arg2; + dsl_pool_t *dp = dd->dd_pool; + objset_t *mos = dp->dp_meta_objset; + int err; + + ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); + + if (ra->newparent != dd->dd_parent) { + uint64_t myspace = + MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); dsl_dir_diduse_space(dd->dd_parent, -myspace, -dd->dd_phys->dd_compressed_bytes, -dd->dd_phys->dd_uncompressed_bytes, tx); - dsl_dir_diduse_space(newpds, myspace, + dsl_dir_diduse_space(ra->newparent, myspace, dd->dd_phys->dd_compressed_bytes, dd->dd_phys->dd_uncompressed_bytes, tx); } - /* The point of no (unsuccessful) return */ - dmu_buf_will_dirty(dd->dd_dbuf, tx); /* remove from old parent zapobj */ @@ -1203,20 +1128,48 @@ dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) dd->dd_myname, tx); ASSERT3U(err, ==, 0); - (void) strcpy(dd->dd_myname, tail); + (void) strcpy(dd->dd_myname, ra->mynewname); dsl_dir_close(dd->dd_parent, dd); - dd->dd_phys->dd_parent_obj = newpds->dd_object; + dd->dd_phys->dd_parent_obj = ra->newparent->dd_object; VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, - newpds->dd_object, NULL, dd, &dd->dd_parent)); + ra->newparent->dd_object, NULL, dd, &dd->dd_parent)); /* add to new parent zapobj */ - err = zap_add(mos, newpds->dd_phys->dd_child_dir_zapobj, + err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, 8, 1, &dd->dd_object, tx); ASSERT3U(err, ==, 0); +} - dsl_dir_close(newpds, FTAG); - rw_exit(&dp->dp_config_rwlock); - return (0); +int +dsl_dir_rename(dsl_dir_t *dd, const char *newname) +{ + struct renamearg ra; + int err; + + /* new parent should exist */ + err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname); + if (err) + return (err); + + /* can't rename to different pool */ + if (dd->dd_pool != ra.newparent->dd_pool) { + err = ENXIO; + goto out; + } + + /* new name should not already exist */ + if (ra.mynewname == NULL) { + err = EEXIST; + goto out; + } + + + err = dsl_sync_task_do(dd->dd_pool, + dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); + +out: + dsl_dir_close(ra.newparent, FTAG); + return (err); } int diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c index d12e1acfeb..407a6849cc 100644 --- a/usr/src/uts/common/fs/zfs/dsl_pool.c +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c @@ -28,6 +28,7 @@ #include <sys/dsl_pool.h> #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h> +#include <sys/dsl_synctask.h> #include <sys/dmu_tx.h> #include <sys/dmu_objset.h> #include <sys/arc.h> @@ -68,6 +69,8 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) offsetof(dsl_dataset_t, ds_dirty_link)); txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); + txg_list_create(&dp->dp_sync_tasks, + offsetof(dsl_sync_task_group_t, dstg_node)); list_create(&dp->dp_synced_objsets, sizeof (dsl_dataset_t), offsetof(dsl_dataset_t, ds_synced_link)); @@ -154,8 +157,7 @@ dsl_pool_create(spa_t *spa, uint64_t txg) NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ - VERIFY(0 == dsl_dir_create_sync(dp->dp_root_dir, MOS_DIR_NAME, tx)); - ASSERT3U(err, ==, 0); + (void) dsl_dir_create_sync(dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY(0 == dsl_pool_open_mos_dir(dp, &dp->dp_mos_dir)); dmu_tx_commit(tx); @@ -174,17 +176,20 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) do { dsl_dir_t *dd; dsl_dataset_t *ds; + dsl_sync_task_group_t *dstg; while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { if (!list_link_active(&ds->ds_synced_link)) list_insert_tail(&dp->dp_synced_objsets, ds); dsl_dataset_sync(ds, tx); } + while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) + dsl_sync_task_group_sync(dstg, tx); while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) dsl_dir_sync(dd, tx); /* - * We need to loop since dsl_dir_sync() could create a - * new (dirty) objset. + * We need to loop since dsl_sync_task_group_sync() + * could create a new (dirty) objset. * XXX - isn't this taken care of by the spa's sync to * convergence loop? */ diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c index 0bb55f8b95..6583d7944a 100644 --- a/usr/src/uts/common/fs/zfs/dsl_prop.c +++ b/usr/src/uts/common/fs/zfs/dsl_prop.c @@ -31,6 +31,7 @@ #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h> #include <sys/dsl_prop.h> +#include <sys/dsl_synctask.h> #include <sys/spa.h> #include <sys/zio_checksum.h> /* for the default checksum value */ #include <sys/zap.h> @@ -106,8 +107,11 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname, uint64_t value; dsl_prop_cb_record_t *cbr; int err; + int need_rwlock; - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); + need_rwlock = !RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock); + if (need_rwlock) + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); err = dsl_prop_get_impl(dd, propname, 8, 1, &value, NULL); if (err != 0) { @@ -129,7 +133,8 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname, VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, cbr, &dd)); - rw_exit(&dd->dd_pool->dp_config_rwlock); + if (need_rwlock) + rw_exit(&dd->dd_pool->dp_config_rwlock); /* Leave dataset open until this callback is unregistered */ return (0); } @@ -266,6 +271,8 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_t *dd; dsl_prop_cb_record_t *cbr; objset_t *mos = dp->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; int err; ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); @@ -296,20 +303,15 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, } mutex_exit(&dd->dd_lock); - if (dd->dd_phys->dd_child_dir_zapobj) { - zap_cursor_t zc; - zap_attribute_t za; - - for (zap_cursor_init(&zc, mos, - dd->dd_phys->dd_child_dir_zapobj); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - /* XXX recursion could blow stack; esp. za! */ - dsl_prop_changed_notify(dp, za.za_first_integer, - propname, value, FALSE); - } - zap_cursor_fini(&zc); + for (zap_cursor_init(&zc, mos, + dd->dd_phys->dd_child_dir_zapobj); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + /* XXX recursion could blow stack; esp. za! */ + dsl_prop_changed_notify(dp, za.za_first_integer, + propname, value, FALSE); } + zap_cursor_fini(&zc); dsl_dir_close(dd, FTAG); } @@ -320,41 +322,37 @@ struct prop_set_arg { const void *buf; }; -static int -dsl_prop_set_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) + +static void +dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) { - struct prop_set_arg *psa = arg; + dsl_dir_t *dd = arg1; + struct prop_set_arg *psa = arg2; objset_t *mos = dd->dd_pool->dp_meta_objset; uint64_t zapobj = dd->dd_phys->dd_props_zapobj; uint64_t intval; - int err, isint; - - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER); + int isint; isint = (dodefault(psa->name, 8, 1, &intval) == 0); if (psa->numints == 0) { - err = zap_remove(mos, zapobj, psa->name, tx); - if (err == ENOENT) /* that's fine. */ - err = 0; - if (err == 0 && isint) { - err = dsl_prop_get_impl(dd->dd_parent, - psa->name, 8, 1, &intval, NULL); + int err = zap_remove(mos, zapobj, psa->name, tx); + ASSERT(err == 0 || err == ENOENT); + if (isint) { + VERIFY(0 == dsl_prop_get_impl(dd->dd_parent, + psa->name, 8, 1, &intval, NULL)); } } else { - err = zap_update(mos, zapobj, psa->name, - psa->intsz, psa->numints, psa->buf, tx); + VERIFY(0 == zap_update(mos, zapobj, psa->name, + psa->intsz, psa->numints, psa->buf, tx)); if (isint) intval = *(uint64_t *)psa->buf; } - if (err == 0 && isint) { + if (isint) { dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, psa->name, intval, TRUE); } - rw_exit(&dd->dd_pool->dp_config_rwlock); - - return (err); } int @@ -373,7 +371,8 @@ dsl_prop_set(const char *ddname, const char *propname, psa.intsz = intsz; psa.numints = numints; psa.buf = buf; - err = dsl_dir_sync_task(dd, dsl_prop_set_sync, &psa, 1<<20); + err = dsl_sync_task_do(dd->dd_pool, + NULL, dsl_prop_set_sync, dd, &psa, 2); dsl_dir_close(dd, FTAG); diff --git a/usr/src/uts/common/fs/zfs/dsl_synctask.c b/usr/src/uts/common/fs/zfs/dsl_synctask.c new file mode 100644 index 0000000000..45cc56685b --- /dev/null +++ b/usr/src/uts/common/fs/zfs/dsl_synctask.c @@ -0,0 +1,196 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/dmu.h> +#include <sys/dmu_tx.h> +#include <sys/dsl_pool.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_synctask.h> + +#define DST_AVG_BLKSHIFT 14 + +/* ARGSUSED */ +static int +dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx) +{ + return (0); +} + +dsl_sync_task_group_t * +dsl_sync_task_group_create(dsl_pool_t *dp) +{ + dsl_sync_task_group_t *dstg; + + dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP); + list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t), + offsetof(dsl_sync_task_t, dst_node)); + dstg->dstg_pool = dp; + + return (dstg); +} + +void +dsl_sync_task_create(dsl_sync_task_group_t *dstg, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified) +{ + dsl_sync_task_t *dst; + + if (checkfunc == NULL) + checkfunc = dsl_null_checkfunc; + dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP); + dst->dst_checkfunc = checkfunc; + dst->dst_syncfunc = syncfunc; + dst->dst_arg1 = arg1; + dst->dst_arg2 = arg2; + list_insert_tail(&dstg->dstg_tasks, dst); + + dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT; +} + +int +dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg) +{ + dmu_tx_t *tx; + uint64_t txg; + dsl_sync_task_t *dst; + +top: + tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir); + VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT)); + + txg = dmu_tx_get_txg(tx); + + /* Do a preliminary error check. */ + dstg->dstg_err = 0; + rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { +#ifdef ZFS_DEBUG + /* + * Only check half the time, otherwise, the sync-context + * check will almost never fail. + */ + if (spa_get_random(2) == 0) + continue; +#endif + dst->dst_err = + dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); + if (dst->dst_err) + dstg->dstg_err = dst->dst_err; + } + rw_exit(&dstg->dstg_pool->dp_config_rwlock); + + if (dstg->dstg_err) { + dmu_tx_commit(tx); + return (dstg->dstg_err); + } + + VERIFY(0 == txg_list_add(&dstg->dstg_pool->dp_sync_tasks, dstg, txg)); + + dmu_tx_commit(tx); + + txg_wait_synced(dstg->dstg_pool, txg); + + if (dstg->dstg_err == EAGAIN) + goto top; + + return (dstg->dstg_err); +} + +void +dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg) +{ + dsl_sync_task_t *dst; + + while (dst = list_head(&dstg->dstg_tasks)) { + list_remove(&dstg->dstg_tasks, dst); + kmem_free(dst, sizeof (dsl_sync_task_t)); + } + kmem_free(dstg, sizeof (dsl_sync_task_group_t)); +} + +void +dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) +{ + dsl_sync_task_t *dst; + void *tr_cookie; + + ASSERT3U(dstg->dstg_err, ==, 0); + + /* + * Check for sufficient space. + */ + dstg->dstg_err = dsl_dir_tempreserve_space(dstg->dstg_pool->dp_mos_dir, + dstg->dstg_space, dstg->dstg_space * 3, 0, &tr_cookie, tx); + /* don't bother trying again */ + if (dstg->dstg_err == ERESTART) + dstg->dstg_err = ENOSPC; + if (dstg->dstg_err) + return; + + /* + * Check for errors by calling checkfuncs. + */ + rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_WRITER); + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { + dst->dst_err = + dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); + if (dst->dst_err) + dstg->dstg_err = dst->dst_err; + } + + if (dstg->dstg_err == 0) { + /* + * Execute sync tasks. + */ + for (dst = list_head(&dstg->dstg_tasks); dst; + dst = list_next(&dstg->dstg_tasks, dst)) { + dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx); + } + } + rw_exit(&dstg->dstg_pool->dp_config_rwlock); + + dsl_dir_tempreserve_clear(tr_cookie, tx); +} + +int +dsl_sync_task_do(dsl_pool_t *dp, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified) +{ + dsl_sync_task_group_t *dstg; + int err; + + dstg = dsl_sync_task_group_create(dp); + dsl_sync_task_create(dstg, checkfunc, syncfunc, + arg1, arg2, blocks_modified); + err = dsl_sync_task_group_wait(dstg); + dsl_sync_task_group_destroy(dstg); + return (err); +} diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 5b618d8377..1a7adf68a0 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -647,7 +647,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) */ tx = dmu_tx_create_assigned(spa_get_dsl(spa), spa_first_txg(spa)); - dmu_objset_find(spa->spa_name, zil_claim, tx, 0); + (void) dmu_objset_find(spa->spa_name, zil_claim, tx, 0); dmu_tx_commit(tx); spa->spa_sync_on = B_TRUE; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index b24c7132e2..6fd2c8f19b 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -154,12 +154,14 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type, objset_t *clone_parent, void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg); int dmu_objset_destroy(const char *name); +int dmu_snapshots_destroy(char *fsname, char *snapname); int dmu_objset_rollback(const char *name); +int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive); int dmu_objset_rename(const char *name, const char *newname); void dmu_objset_set_quota(objset_t *os, uint64_t quota); uint64_t dmu_objset_get_quota(objset_t *os); int dmu_objset_request_reservation(objset_t *os, uint64_t reservation); -void dmu_objset_find(char *name, void func(char *, void *), void *arg, +int dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags); void dmu_objset_byteswap(void *buf, size_t size); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index 45cc091448..2fc4a672c5 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -96,8 +96,9 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type, void (*func)(objset_t *os, void *arg, dmu_tx_t *tx), void *arg); int dmu_objset_destroy(const char *name); int dmu_objset_rollback(const char *name); +int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive); void dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds); -void dmu_objset_find(char *name, void func(char *, void *), void *arg, +int dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags); void dmu_objset_byteswap(void *buf, size_t size); int dmu_objset_evict_dbufs(objset_t *os, int try); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h index 422d9d3ffb..872fb70b44 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h @@ -113,7 +113,7 @@ extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg); /* * These routines are only called by the DMU. */ -dmu_tx_t *dmu_tx_create_ds(dsl_dir_t *dd); +dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd); int dmu_tx_is_syncing(dmu_tx_t *tx); int dmu_tx_private_ok(dmu_tx_t *tx); void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index 912445b160..6760bd6067 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -32,6 +32,7 @@ #include <sys/spa.h> #include <sys/txg.h> #include <sys/bplist.h> +#include <sys/dsl_synctask.h> #include <sys/zfs_context.h> #ifdef __cplusplus @@ -105,6 +106,9 @@ typedef struct dsl_dataset { dsl_dataset_evict_func_t *ds_user_evict_func; uint64_t ds_open_refcount; + /* no locking; only for making guesses */ + uint64_t ds_trysnap_txg; + /* Protected by ds_lock; keep at end of struct for better locality */ char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; @@ -120,13 +124,13 @@ int dsl_dataset_open_obj(struct dsl_pool *dp, uint64_t dsobj, const char *tail, int mode, void *tag, dsl_dataset_t **); void dsl_dataset_name(dsl_dataset_t *ds, char *name); void dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag); -int dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname, +uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx); -int dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); int dsl_dataset_destroy(const char *name); -int dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); -int dsl_dataset_rollback(const char *name); -int dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); +int dsl_snapshots_destroy(char *fsname, char *snapname); +dsl_checkfunc_t dsl_dataset_snapshot_check; +dsl_syncfunc_t dsl_dataset_snapshot_sync; +int dsl_dataset_rollback(dsl_dataset_t *ds); int dsl_dataset_rename(const char *name, const char *newname); int dsl_dataset_promote(const char *name); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h index 123d6d128f..7be3c2167c 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h @@ -30,6 +30,7 @@ #include <sys/dmu.h> #include <sys/dsl_pool.h> +#include <sys/dsl_synctask.h> #include <sys/refcount.h> #include <sys/zfs_context.h> @@ -76,17 +77,10 @@ struct dsl_dir { /* Protected by dd_lock */ kmutex_t dd_lock; list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */ - /* Thing to do when we sync */ - uint64_t dd_sync_txg; - int (*dd_sync_func)(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); - void *dd_sync_arg; - int dd_sync_err; /* Accounting */ /* reflects any changes to dd_phys->dd_used_bytes made this syncing */ int64_t dd_used_bytes; - /* int64_t dd_compressed_bytes; */ - /* int64_t dd_uncompressed_bytes; */ /* gross estimate of space used by in-flight tx's */ uint64_t dd_tempreserved[TXG_SIZE]; /* amount of space we expect to write; == amount of dirty data */ @@ -104,9 +98,10 @@ int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, const char *tail, void *tag, dsl_dir_t **); void dsl_dir_name(dsl_dir_t *dd, char *buf); int dsl_dir_is_private(dsl_dir_t *dd); -int dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx); +uint64_t dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx); void dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx); -int dsl_dir_destroy_sync(dsl_dir_t *pds, void *arg, dmu_tx_t *tx); +dsl_checkfunc_t dsl_dir_destroy_check; +dsl_syncfunc_t dsl_dir_destroy_sync; void dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds); void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx); void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx); @@ -116,11 +111,9 @@ void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx); void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx); void dsl_dir_diduse_space(dsl_dir_t *dd, int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx); -int dsl_dir_sync_task(dsl_dir_t *dd, - int (*func)(dsl_dir_t *, void*, dmu_tx_t *), void *arg, uint64_t space); int dsl_dir_set_quota(const char *ddname, uint64_t quota); int dsl_dir_set_reservation(const char *ddname, uint64_t reservation); -int dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx); +int dsl_dir_rename(dsl_dir_t *dd, const char *newname); int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space); #ifdef ZFS_DEBUG diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h index 2eab6ae945..f7ec67a0e0 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h @@ -56,6 +56,7 @@ typedef struct dsl_pool { tx_state_t dp_tx; txg_list_t dp_dirty_datasets; txg_list_t dp_dirty_dirs; + txg_list_t dp_sync_tasks; /* * Protects administrative changes (properties, namespace) diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h b/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h new file mode 100644 index 0000000000..e695b182f7 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h @@ -0,0 +1,77 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DSL_SYNCTASK_H +#define _SYS_DSL_SYNCTASK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/txg.h> +#include <sys/zfs_context.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dsl_pool; + +typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *); +typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *); + +typedef struct dsl_sync_task { + list_node_t dst_node; + dsl_checkfunc_t *dst_checkfunc; + dsl_syncfunc_t *dst_syncfunc; + void *dst_arg1; + void *dst_arg2; + int dst_err; +} dsl_sync_task_t; + +typedef struct dsl_sync_task_group { + txg_node_t dstg_node; + list_t dstg_tasks; + struct dsl_pool *dstg_pool; + uint64_t dstg_txg; + int dstg_err; + int dstg_space; +} dsl_sync_task_group_t; + +dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp); +void dsl_sync_task_create(dsl_sync_task_group_t *dstg, + dsl_checkfunc_t *, dsl_syncfunc_t *, + void *arg1, void *arg2, int blocks_modified); +int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg); +void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg); +void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx); + +int dsl_sync_task_do(struct dsl_pool *dp, + dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, + void *arg1, void *arg2, int blocks_modified); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_SYNCTASK_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h index 2e5c5809c2..74cac6aaaf 100644 --- a/usr/src/uts/common/fs/zfs/sys/zil.h +++ b/usr/src/uts/common/fs/zfs/sys/zil.h @@ -238,7 +238,7 @@ extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx); extern void zil_commit(zilog_t *zilog, uint64_t seq, int ioflag); -extern void zil_claim(char *osname, void *txarg); +extern int zil_claim(char *osname, void *txarg); extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx); extern void zil_clean(zilog_t *zilog); extern int zil_is_committed(zilog_t *zilog); diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 137a402538..c58f9ddca0 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -662,7 +662,6 @@ zfs_ioc_vdev_setpath(zfs_cmd_t *zc) return (error); } - static int zfs_ioc_objset_stats(zfs_cmd_t *zc) { @@ -912,8 +911,10 @@ zfs_ioc_create(zfs_cmd_t *zc) break; default: - return (EINVAL); + cbfunc = NULL; } + if (strchr(zc->zc_name, '@')) + return (EINVAL); if (zc->zc_filename[0] != '\0') { /* @@ -929,12 +930,9 @@ zfs_ioc_create(zfs_cmd_t *zc) return (error); error = dmu_objset_create(zc->zc_name, type, clone, NULL, NULL); dmu_objset_close(clone); - } else if (strchr(zc->zc_name, '@') != 0) { - /* - * We're taking a snapshot of an existing dataset. - */ - error = dmu_objset_create(zc->zc_name, type, NULL, NULL, NULL); } else { + if (cbfunc == NULL) + return (EINVAL); /* * We're creating a new dataset. */ @@ -953,31 +951,75 @@ zfs_ioc_create(zfs_cmd_t *zc) } static int -zfs_ioc_destroy(zfs_cmd_t *zc) +zfs_ioc_snapshot(zfs_cmd_t *zc) { - if (strchr(zc->zc_name, '@') != NULL && - zc->zc_objset_type == DMU_OST_ZFS) { - vfs_t *vfsp; - int err; + if (snapshot_namecheck(zc->zc_prop_value, NULL, NULL) != 0) + return (EINVAL); + return (dmu_objset_snapshot(zc->zc_name, + zc->zc_prop_value, zc->zc_cookie)); +} +static int +zfs_unmount_snap(char *name, void *arg) +{ + char *snapname = arg; + char *cp; + vfs_t *vfsp; + + /* + * Snapshots (which are under .zfs control) must be unmounted + * before they can be destroyed. + */ + + if (snapname) { + (void) strcat(name, "@"); + (void) strcat(name, snapname); + vfsp = zfs_get_vfs(name); + cp = strchr(name, '@'); + *cp = '\0'; + } else { + vfsp = zfs_get_vfs(name); + } + + if (vfsp) { /* - * Snapshots under .zfs control must be unmounted - * before they can be destroyed. + * Always force the unmount for snapshots. */ - if ((vfsp = zfs_get_vfs(zc->zc_name)) != NULL) { - /* - * Always force the unmount for snapshots. - */ - int flag = MS_FORCE; - - if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { - VFS_RELE(vfsp); - return (err); - } + int flag = MS_FORCE; + int err; + + if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { VFS_RELE(vfsp); - if ((err = dounmount(vfsp, flag, kcred)) != 0) - return (err); + return (err); } + VFS_RELE(vfsp); + if ((err = dounmount(vfsp, flag, kcred)) != 0) + return (err); + } + return (0); +} + +static int +zfs_ioc_destroy_snaps(zfs_cmd_t *zc) +{ + int err; + + if (snapshot_namecheck(zc->zc_prop_value, NULL, NULL) != 0) + return (EINVAL); + err = dmu_objset_find(zc->zc_name, + zfs_unmount_snap, zc->zc_prop_value, 0); + if (err) + return (err); + return (dmu_snapshots_destroy(zc->zc_name, zc->zc_prop_value)); +} + +static int +zfs_ioc_destroy(zfs_cmd_t *zc) +{ + if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) { + int err = zfs_unmount_snap(zc->zc_name, NULL); + if (err) + return (err); } return (dmu_objset_destroy(zc->zc_name)); @@ -998,27 +1040,9 @@ zfs_ioc_rename(zfs_cmd_t *zc) if (strchr(zc->zc_name, '@') != NULL && zc->zc_objset_type == DMU_OST_ZFS) { - vfs_t *vfsp; - int err; - - /* - * Snapshots under .zfs control must be unmounted - * before they can be renamed. - */ - if ((vfsp = zfs_get_vfs(zc->zc_name)) != NULL) { - /* - * Always force the unmount for snapshots. - */ - int flag = MS_FORCE; - - if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { - VFS_RELE(vfsp); - return (err); - } - VFS_RELE(vfsp); - if ((err = dounmount(vfsp, flag, kcred)) != 0) - return (err); - } + int err = zfs_unmount_snap(zc->zc_name, NULL); + if (err) + return (err); } return (dmu_objset_rename(zc->zc_name, zc->zc_prop_value)); @@ -1229,7 +1253,9 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_error_log, zfs_secpolicy_inject, pool_name }, { zfs_ioc_clear, zfs_secpolicy_config, pool_name }, { zfs_ioc_bookmark_name, zfs_secpolicy_inject, pool_name }, - { zfs_ioc_promote, zfs_secpolicy_write, dataset_name } + { zfs_ioc_promote, zfs_secpolicy_write, dataset_name }, + { zfs_ioc_destroy_snaps, zfs_secpolicy_write, dataset_name }, + { zfs_ioc_snapshot, zfs_secpolicy_write, dataset_name } }; static int @@ -1237,7 +1263,7 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) { zfs_cmd_t *zc; uint_t vec; - int error; + int error, rc; if (getminor(dev) != 0) return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); @@ -1280,11 +1306,9 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) if (error == 0) error = zfs_ioc_vec[vec].zvec_func(zc); - if (error == 0 || error == ENOMEM) { - int rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t)); - if (error == 0) - error = rc; - } + rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t)); + if (error == 0) + error = rc; kmem_free(zc, sizeof (zfs_cmd_t)); return (error); diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index 8ee194d5f9..7441d3271f 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -448,7 +448,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) ASSERT(BP_IS_HOLE(&zh->zh_log)); } -void +int zil_claim(char *osname, void *txarg) { dmu_tx_t *tx = txarg; @@ -461,7 +461,7 @@ zil_claim(char *osname, void *txarg) error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_STANDARD, &os); if (error) { cmn_err(CE_WARN, "can't process intent log for %s", osname); - return; + return (0); } zilog = dmu_objset_zil(os); @@ -484,6 +484,7 @@ zil_claim(char *osname, void *txarg) ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); dmu_objset_close(os); + return (0); } void diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 07ada9c30e..2af99527fd 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -350,7 +350,9 @@ typedef enum zfs_ioc { ZFS_IOC_ERROR_LOG, ZFS_IOC_CLEAR, ZFS_IOC_BOOKMARK_NAME, - ZFS_IOC_PROMOTE + ZFS_IOC_PROMOTE, + ZFS_IOC_DESTROY_SNAPS, + ZFS_IOC_SNAPSHOT } zfs_ioc_t; /* |