diff options
author | ahrens <none@none> | 2007-10-29 17:12:17 -0700 |
---|---|---|
committer | ahrens <none@none> | 2007-10-29 17:12:17 -0700 |
commit | 3cb34c601f3ef3016f638574f5982e80c3735c71 (patch) | |
tree | bbaa202cdc73b80f8c5169f479ba79234553d4ba | |
parent | 7451ee9355b4d9cafcf1bb6055bb01fc7bdaa1a1 (diff) | |
download | illumos-joyent-3cb34c601f3ef3016f638574f5982e80c3735c71.tar.gz |
PSARC/2007/574 zfs send -R
6358519 'zfs restore' can't restore full backup into topmost filesystem
6421958 want recursive zfs send ('zfs send -r')
6465969 zfs receive error message could be a little more friendly
6482331 assertion failed: ra.err == 0 (0x10 == 0x0)
6577548 nvlist_next_nvpair() can not iterate recursively
6579048 zfs send -i "" fs@snap can succeed
6580447 "zfs list -t filesystem" slowly iterates over all snapshots
6581508 zfs issues confusing error message when doing an incremental send
6585612 'zfs recv -d' cannot receive the top-level filesystem backups
6589317 create-time permissions not granted on filesystems created by "zfs recv"
6596160 zfs create -p -b 1092 <filesystem> should fail.
6619393 help message for ::dbufs is slightly wrong
6620906 zfs_rename() gives incorrect error message
6621295 dsl_deleg_set_sync() should be broken up
30 files changed, 3521 insertions, 1327 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 0e094c2472..134f6d37b2 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -1892,7 +1892,8 @@ static const mdb_dcmd_t dcmds[] = { { "dbuf", ":", "print dmu_buf_impl_t", dbuf }, { "dbuf_stats", ":", "dbuf stats", dbuf_stats }, { "dbufs", - "\t[-O objset_t*] [-n objset_name | \"mos\"] [-o object | \"mdn\"] \n" + "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] " + "[-o object | \"mdn\"] \n" "\t[-l level] [-b blkid | \"bonus\"]", "find dmu_buf_impl_t's that match specified criteria", dbufs }, { "abuf_find", "dva_word[0] dva_word[1]", diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index 1ebc2f7fe0..b3c3d23352 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -897,9 +897,9 @@ const struct ioc { "zfs_cmd_t" }, { (uint_t)ZFS_IOC_RENAME, "ZFS_IOC_RENAME", "zfs_cmd_t" }, - { (uint_t)ZFS_IOC_RECVBACKUP, "ZFS_IOC_RECVBACKUP", + { (uint_t)ZFS_IOC_RECV, "ZFS_IOC_RECV", "zfs_cmd_t" }, - { (uint_t)ZFS_IOC_SENDBACKUP, "ZFS_IOC_SENDBACKUP", + { (uint_t)ZFS_IOC_SEND, "ZFS_IOC_SEND", "zfs_cmd_t" }, { (uint_t)ZFS_IOC_INJECT_FAULT, "ZFS_IOC_INJECT_FAULT", "zfs_cmd_t" }, diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 055c283c1f..426e275080 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -734,8 +734,8 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) (u_longlong_t)dd->dd_head_dataset_obj); (void) printf("\t\tparent_dir_obj = %llu\n", (u_longlong_t)dd->dd_parent_obj); - (void) printf("\t\tclone_parent_obj = %llu\n", - (u_longlong_t)dd->dd_clone_parent_obj); + (void) printf("\t\torigin_obj = %llu\n", + (u_longlong_t)dd->dd_origin_obj); (void) printf("\t\tchild_dir_zapobj = %llu\n", (u_longlong_t)dd->dd_child_dir_zapobj); (void) printf("\t\tused_bytes = %s\n", used); diff --git a/usr/src/cmd/zfs/zfs_iter.c b/usr/src/cmd/zfs/zfs_iter.c index abf0d72c66..c888bbafde 100644 --- a/usr/src/cmd/zfs/zfs_iter.c +++ b/usr/src/cmd/zfs/zfs_iter.c @@ -68,7 +68,7 @@ uu_avl_pool_t *avl_pool; * Called for each dataset. If the object the object is of an appropriate type, * add it to the avl tree and recurse over any children as necessary. */ -int +static int zfs_callback(zfs_handle_t *zhp, void *data) { callback_data_t *cb = data; @@ -100,10 +100,13 @@ zfs_callback(zfs_handle_t *zhp, void *data) /* * Recurse if necessary. */ - if (cb->cb_recurse && (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM || - (zfs_get_type(zhp) == ZFS_TYPE_VOLUME && (cb->cb_types & - ZFS_TYPE_SNAPSHOT)))) - (void) zfs_iter_children(zhp, zfs_callback, data); + if (cb->cb_recurse) { + if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) + (void) zfs_iter_filesystems(zhp, zfs_callback, data); + if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT && + (cb->cb_types & ZFS_TYPE_SNAPSHOT)) + (void) zfs_iter_snapshots(zhp, zfs_callback, data); + } if (!dontclose) zfs_close(zhp); diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index e7ea903595..8a8fa3b0da 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -216,7 +216,7 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] <snapshot>\n")); case HELP_SEND: - return (gettext("\tsend [-i snapshot] <snapshot>\n")); + return (gettext("\tsend [-R] [-[iI] snapshot] <snapshot>\n")); case HELP_SET: return (gettext("\tset <property=value> " "<filesystem|volume> ...\n")); @@ -490,6 +490,7 @@ zfs_do_create(int argc, char **argv) uint64_t volsize; int c; boolean_t noreserve = B_FALSE; + boolean_t bflag = B_FALSE; boolean_t parents = B_FALSE; int ret = 1; nvlist_t *props = NULL; @@ -529,6 +530,7 @@ zfs_do_create(int argc, char **argv) parents = B_TRUE; break; case 'b': + bflag = B_TRUE; if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) { (void) fprintf(stderr, gettext("bad volume " "block size '%s': %s\n"), optarg, @@ -580,9 +582,9 @@ zfs_do_create(int argc, char **argv) } } - if (noreserve && type != ZFS_TYPE_VOLUME) { - (void) fprintf(stderr, gettext("'-s' can only be used when " - "creating a volume\n")); + if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) { + (void) fprintf(stderr, gettext("'-s' and '-b' can only be " + "used when creating a volume\n")); goto badusage; } @@ -1316,7 +1318,7 @@ upgrade_list_callback(zfs_handle_t *zhp, void *data) /* list if it's old/new */ if ((!cb->cb_newer && version < ZPL_VERSION) || - (cb->cb_newer && version > SPA_VERSION)) { + (cb->cb_newer && version > ZPL_VERSION)) { char *str; if (cb->cb_newer) { str = gettext("The following filesystems are " @@ -2196,7 +2198,8 @@ zfs_do_snapshot(int argc, char **argv) } /* - * zfs send [-i <@snap>] <fs@snap> + * zfs send [-v] -R [-i|-I <@snap>] <fs@snap> + * zfs send [-v] [-i|-I <@snap>] <fs@snap> * * Send a backup stream to stdout. */ @@ -2204,18 +2207,35 @@ static int zfs_do_send(int argc, char **argv) { char *fromname = NULL; + char *toname = NULL; char *cp; zfs_handle_t *zhp; + boolean_t doall = B_FALSE; + boolean_t replicate = B_FALSE; + boolean_t fromorigin = B_FALSE; + boolean_t verbose = B_FALSE; int c, err; /* check options */ - while ((c = getopt(argc, argv, ":i:")) != -1) { + while ((c = getopt(argc, argv, ":i:I:Rv")) != -1) { switch (c) { case 'i': if (fromname) usage(B_FALSE); fromname = optarg; break; + case 'I': + if (fromname) + usage(B_FALSE); + fromname = optarg; + doall = B_TRUE; + break; + case 'R': + replicate = B_TRUE; + break; + case 'v': + verbose = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -2248,37 +2268,62 @@ zfs_do_send(int argc, char **argv) return (1); } - if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) + cp = strchr(argv[0], '@'); + if (cp == NULL) { + (void) fprintf(stderr, + gettext("argument must be a snapshot\n")); + usage(B_FALSE); + } + *cp = '\0'; + toname = cp + 1; + zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) return (1); /* * If they specified the full path to the snapshot, chop off - * everything except the short name of the snapshot. + * everything except the short name of the snapshot, but special + * case if they specify the origin. */ if (fromname && (cp = strchr(fromname, '@')) != NULL) { - if (cp != fromname && - strncmp(argv[0], fromname, cp - fromname + 1)) { - (void) fprintf(stderr, - gettext("incremental source must be " - "in same filesystem\n")); - usage(B_FALSE); - } - fromname = cp + 1; - if (strchr(fromname, '@') || strchr(fromname, '/')) { - (void) fprintf(stderr, - gettext("invalid incremental source\n")); - usage(B_FALSE); + char origin[ZFS_MAXNAMELEN]; + zprop_source_t src; + + (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN, + origin, sizeof (origin), &src, NULL, 0, B_FALSE); + + if (strcmp(origin, fromname) == 0) { + fromname = NULL; + fromorigin = B_TRUE; + } else { + *cp = '\0'; + if (cp != fromname && strcmp(argv[0], fromname)) { + (void) fprintf(stderr, + gettext("incremental source must be " + "in same filesystem\n")); + usage(B_FALSE); + } + fromname = cp + 1; + if (strchr(fromname, '@') || strchr(fromname, '/')) { + (void) fprintf(stderr, + gettext("invalid incremental source\n")); + usage(B_FALSE); + } } } - err = zfs_send(zhp, fromname, STDOUT_FILENO); + if (replicate && fromname == NULL) + doall = B_TRUE; + + err = zfs_send(zhp, fromname, toname, replicate, doall, fromorigin, + verbose, STDOUT_FILENO); zfs_close(zhp); return (err != 0); } /* - * zfs receive <fs@snap> + * zfs receive [-dnvF] <fs@snap> * * Restore a backup stream from stdin. */ @@ -2286,25 +2331,23 @@ static int zfs_do_receive(int argc, char **argv) { int c, err; - boolean_t isprefix = B_FALSE; - boolean_t dryrun = B_FALSE; - boolean_t verbose = B_FALSE; - boolean_t force = B_FALSE; + recvflags_t flags; + bzero(&flags, sizeof (recvflags_t)); /* check options */ while ((c = getopt(argc, argv, ":dnvF")) != -1) { switch (c) { case 'd': - isprefix = B_TRUE; + flags.isprefix = B_TRUE; break; case 'n': - dryrun = B_TRUE; + flags.dryrun = B_TRUE; break; case 'v': - verbose = B_TRUE; + flags.verbose = B_TRUE; break; case 'F': - force = B_TRUE; + flags.force = B_TRUE; break; case ':': (void) fprintf(stderr, gettext("missing argument for " @@ -2339,8 +2382,7 @@ zfs_do_receive(int argc, char **argv) return (1); } - err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun, force, - STDIN_FILENO); + err = zfs_receive(g_zfs, argv[0], flags, STDIN_FILENO, NULL); return (err != 0); } @@ -2939,9 +2981,8 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, (void) fprintf(stderr, gettext("cannot %s '%s': " "legacy mountpoint\n"), cmdname, zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use %s to " - "%s this filesystem\n"), op == OP_SHARE ? - "share(1M)" : "mount(1M)", cmdname); + (void) fprintf(stderr, gettext("use %s(1M) to " + "%s this filesystem\n"), cmdname, cmdname); return (1); } @@ -3093,8 +3134,10 @@ report_mount_progress(int current, int total) if (current == 1) { (void) printf(gettext("Mounting ZFS filesystems: ")); len = 0; - } else if (current != total && last_progress_time + MOUNT_TIME >= now) - return; /* too soon to report again */ + } else if (current != total && last_progress_time + MOUNT_TIME >= now) { + /* too soon to report again */ + return; + } last_progress_time = now; diff --git a/usr/src/common/nvpair/nvpair.c b/usr/src/common/nvpair/nvpair.c index 5f66864d6f..3d1f3972af 100644 --- a/usr/src/common/nvpair/nvpair.c +++ b/usr/src/common/nvpair/nvpair.c @@ -1126,13 +1126,15 @@ nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp) curr = NVPAIR2I_NVP(nvp); /* - * Ensure that nvp is an valid pointer. + * Ensure that nvp is a valid nvpair on this nvlist. + * NB: nvp_curr is used only as a hint so that we don't always + * have to walk the list to determine if nvp is still on the list. */ if (nvp == NULL) curr = priv->nvp_list; - else if (priv->nvp_curr == curr) + else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp)) curr = curr->nvi_next; - else if (nvlist_contains_nvp(nvl, nvp) == 0) + else curr = NULL; priv->nvp_curr = curr; diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com index 94accfce61..908a6e981d 100644 --- a/usr/src/lib/libzfs/Makefile.com +++ b/usr/src/lib/libzfs/Makefile.com @@ -31,7 +31,7 @@ VERS= .1 OBJS_SHARED= zfs_namecheck.o zprop_common.o zfs_prop.o zpool_prop.o zfs_deleg.o OBJS_COMMON= libzfs_dataset.o libzfs_util.o libzfs_graph.o libzfs_mount.o \ libzfs_pool.o libzfs_changelist.o libzfs_config.o libzfs_import.o \ - libzfs_status.o + libzfs_status.o libzfs_sendrecv.o OBJECTS= $(OBJS_COMMON) $(OBJS_SHARED) include ../../Makefile.lib diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index dd46a680e7..b70f89a6c7 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -417,11 +417,22 @@ extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int); extern int zfs_rename(zfs_handle_t *, const char *, boolean_t); -extern int zfs_send(zfs_handle_t *, const char *, int); -extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int, - boolean_t, int); +extern int zfs_send(zfs_handle_t *, const char *, const char *, + boolean_t, boolean_t, boolean_t, boolean_t, int); extern int zfs_promote(zfs_handle_t *); +typedef struct recvflags { + boolean_t verbose : 1; + boolean_t isprefix : 1; + boolean_t dryrun : 1; + boolean_t force : 1; + boolean_t canmountoff : 1; + boolean_t byteswap : 1; +} recvflags_t; + +extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t, + int, avl_tree_t *); + /* * Miscellaneous functions. */ diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c index 2b53f7d983..6d690e8f78 100644 --- a/usr/src/lib/libzfs/common/libzfs_changelist.c +++ b/usr/src/lib/libzfs/common/libzfs_changelist.c @@ -350,14 +350,14 @@ changelist_haszonedchild(prop_changelist_t *clp) * Remove a node from a gathered list. */ void -changelist_remove(zfs_handle_t *zhp, prop_changelist_t *clp) +changelist_remove(prop_changelist_t *clp, const char *name) { prop_changenode_t *cn; for (cn = uu_list_first(clp->cl_list); cn != NULL; cn = uu_list_next(clp->cl_list, cn)) { - if (strcmp(cn->cn_handle->zfs_name, zhp->zfs_name) == 0) { + if (strcmp(cn->cn_handle->zfs_name, name) == 0) { uu_list_remove(clp->cl_list, cn); zfs_close(cn->cn_handle); free(cn); diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index db912e7f20..1fc002c39c 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -36,6 +36,7 @@ #include <stdlib.h> #include <strings.h> #include <unistd.h> +#include <stddef.h> #include <zone.h> #include <fcntl.h> #include <sys/mntent.h> @@ -49,7 +50,6 @@ #include <ucred.h> #include <sys/spa.h> -#include <sys/zio.h> #include <sys/zap.h> #include <sys/zfs_i18n.h> #include <libzfs.h> @@ -59,7 +59,6 @@ #include "libzfs_impl.h" #include "zfs_deleg.h" -static int create_parents(libzfs_handle_t *, char *, int); static int zvol_create_link_common(libzfs_handle_t *, const char *, int); /* @@ -373,9 +372,6 @@ top: zc.zc_objset_type = DMU_OST_ZFS; } - /* If we can successfully roll it back, reget the stats */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0) - goto top; /* * If we can successfully destroy it, pretend that it * never existed. @@ -386,6 +382,9 @@ top: errno = ENOENT; return (NULL); } + /* If we can successfully roll it back, reget the stats */ + if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0) + goto top; } /* @@ -467,7 +466,6 @@ zfs_close(zfs_handle_t *zhp) free(zhp); } - /* * Given an nvlist of properties to set, validates that they are correct, and * parses any numeric properties (index, boolean, etc) if they are specified as @@ -1181,29 +1179,26 @@ static void zfs_destroy_perm_tree(avl_tree_t *tree) { zfs_perm_node_t *permnode; - void *cookie; + void *cookie = NULL; - cookie = NULL; - while ((permnode = avl_destroy_nodes(tree, &cookie)) != NULL) { - avl_remove(tree, permnode); + while ((permnode = avl_destroy_nodes(tree, &cookie)) != NULL) free(permnode); - } + avl_destroy(tree); } static void zfs_destroy_tree(avl_tree_t *tree) { zfs_allow_node_t *allownode; - void *cookie; + void *cookie = NULL; - cookie = NULL; while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) { zfs_destroy_perm_tree(&allownode->z_localdescend); zfs_destroy_perm_tree(&allownode->z_local); zfs_destroy_perm_tree(&allownode->z_descend); - avl_remove(tree, allownode); free(allownode); } + avl_destroy(tree); } void @@ -2229,10 +2224,9 @@ uint64_t zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop) { char *source; - zprop_source_t sourcetype = ZPROP_SRC_NONE; uint64_t val; - (void) get_numeric_property(zhp, prop, &sourcetype, &source, &val); + (void) get_numeric_property(zhp, prop, NULL, &source, &val); return (val); } @@ -2294,6 +2288,9 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) zfs_handle_t *nzhp; int ret; + if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) + return (0); + for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) { @@ -2337,6 +2334,9 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data) zfs_handle_t *nzhp; int ret; + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) + return (0); + for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; @@ -2501,6 +2501,86 @@ zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types) } /* + * Given a path to 'target', create all the ancestors between + * the prefixlen portion of the path, and the target itself. + * Fail if the initial prefixlen-ancestor does not already exist. + */ +int +create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) +{ + zfs_handle_t *h; + char *cp; + const char *opname; + + /* make sure prefix exists */ + cp = target + prefixlen; + if (*cp != '/') { + assert(strchr(cp, '/') == NULL); + h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); + } else { + *cp = '\0'; + h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); + *cp = '/'; + } + if (h == NULL) + return (-1); + zfs_close(h); + + /* + * Attempt to create, mount, and share any ancestor filesystems, + * up to the prefixlen-long one. + */ + for (cp = target + prefixlen + 1; + cp = strchr(cp, '/'); *cp = '/', cp++) { + char *logstr; + + *cp = '\0'; + + h = make_dataset_handle(hdl, target); + if (h) { + /* it already exists, nothing to do here */ + zfs_close(h); + continue; + } + + logstr = hdl->libzfs_log_str; + hdl->libzfs_log_str = NULL; + if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, + NULL) != 0) { + hdl->libzfs_log_str = logstr; + opname = dgettext(TEXT_DOMAIN, "create"); + goto ancestorerr; + } + + hdl->libzfs_log_str = logstr; + h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); + if (h == NULL) { + opname = dgettext(TEXT_DOMAIN, "open"); + goto ancestorerr; + } + + if (zfs_mount(h, NULL, 0) != 0) { + opname = dgettext(TEXT_DOMAIN, "mount"); + goto ancestorerr; + } + + if (zfs_share(h) != 0) { + opname = dgettext(TEXT_DOMAIN, "share"); + goto ancestorerr; + } + + zfs_close(h); + } + + return (0); + +ancestorerr: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to %s ancestor '%s'"), opname, target); + return (-1); +} + +/* * Creates non-existing ancestors of the given path. */ int @@ -2985,7 +3065,7 @@ zfs_promote(zfs_handle_t *zhp) return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } - (void) strlcpy(parent, zhp->zfs_dmustats.dds_clone_of, sizeof (parent)); + (void) strlcpy(parent, zhp->zfs_dmustats.dds_origin, sizeof (parent)); if (parent[0] == '\0') { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "not a cloned filesystem")); @@ -2995,7 +3075,7 @@ zfs_promote(zfs_handle_t *zhp) *cp = '\0'; /* Walk the snapshots we will be moving */ - pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT); + pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); if (pzhp == NULL) return (-1); pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG); @@ -3014,7 +3094,7 @@ zfs_promote(zfs_handle_t *zhp) } /* issue the ioctl */ - (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_clone_of, + (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin, sizeof (zc.zc_value)); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); @@ -3161,386 +3241,6 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive) } /* - * Dumps a backup of the given snapshot (incremental from fromsnap if it's not - * NULL) to the file descriptor specified by outfd. - */ -int -zfs_send(zfs_handle_t *zhp, const char *fromsnap, int outfd) -{ - zfs_cmd_t zc = { 0 }; - char errbuf[1024]; - libzfs_handle_t *hdl = zhp->zfs_hdl; - - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - if (fromsnap) - (void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name)); - zc.zc_cookie = outfd; - - if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc) != 0) { - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot send '%s'"), zhp->zfs_name); - - switch (errno) { - - case EXDEV: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "not an earlier snapshot from the same fs")); - return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); - - case EDQUOT: - case EFBIG: - case EIO: - case ENOLINK: - case ENOSPC: - case ENOSTR: - case ENXIO: - case EPIPE: - case ERANGE: - case EFAULT: - case EROFS: - zfs_error_aux(hdl, strerror(errno)); - return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); - - default: - return (zfs_standard_error(hdl, errno, errbuf)); - } - } - - return (0); -} - -/* - * Create ancestors of 'target', but not target itself, and not - * ancestors whose names are shorter than prefixlen. Die if - * prefixlen-ancestor does not exist. - */ -static int -create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) -{ - zfs_handle_t *h; - char *cp; - - /* make sure prefix exists */ - cp = strchr(target + prefixlen, '/'); - if (cp == NULL) { - h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); - } else { - *cp = '\0'; - h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); - *cp = '/'; - } - if (h == NULL) - return (-1); - zfs_close(h); - - /* - * Attempt to create, mount, and share any ancestor filesystems, - * up to the prefixlen-long one. - */ - for (cp = target + prefixlen + 1; - cp = strchr(cp, '/'); *cp = '/', cp++) { - const char *opname; - char *logstr; - - *cp = '\0'; - - h = make_dataset_handle(hdl, target); - if (h) { - /* it already exists, nothing to do here */ - zfs_close(h); - continue; - } - - opname = dgettext(TEXT_DOMAIN, "create"); - logstr = hdl->libzfs_log_str; - hdl->libzfs_log_str = NULL; - if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, - NULL) != 0) { - hdl->libzfs_log_str = logstr; - goto ancestorerr; - } - - hdl->libzfs_log_str = logstr; - opname = dgettext(TEXT_DOMAIN, "open"); - h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); - if (h == NULL) - goto ancestorerr; - - opname = dgettext(TEXT_DOMAIN, "mount"); - if (zfs_mount(h, NULL, 0) != 0) - goto ancestorerr; - - opname = dgettext(TEXT_DOMAIN, "share"); - if (zfs_share(h) != 0) - goto ancestorerr; - - zfs_close(h); - - continue; -ancestorerr: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "failed to %s ancestor '%s'"), opname, target); - return (-1); - } - - return (0); -} - -/* - * Restores a backup of tosnap from the file descriptor specified by infd. - */ -int -zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix, - int verbose, int dryrun, boolean_t force, int infd) -{ - zfs_cmd_t zc = { 0 }; - time_t begin_time; - int ioctl_err, err, bytes, size, choplen; - char *cp; - dmu_replay_record_t drr; - struct drr_begin *drrb = &zc.zc_begin_record; - char errbuf[1024]; - char chopprefix[ZFS_MAXNAMELEN]; - - begin_time = time(NULL); - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive")); - - /* read in the BEGIN record */ - cp = (char *)&drr; - bytes = 0; - do { - size = read(infd, cp, sizeof (drr) - bytes); - cp += size; - bytes += size; - } while (size > 0); - - if (size < 0 || bytes != sizeof (drr)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "stream (failed to read first record)")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - - zc.zc_begin_record = drr.drr_u.drr_begin; - - if (drrb->drr_magic != DMU_BACKUP_MAGIC && - drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "stream (bad magic number)")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - - if (drrb->drr_version != DMU_BACKUP_VERSION && - drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version " - "0x%llx is supported (stream is version 0x%llx)"), - DMU_BACKUP_VERSION, drrb->drr_version); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - - if (strchr(drr.drr_u.drr_begin.drr_toname, '@') == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "stream (bad snapshot name)")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - /* - * Determine how much of the snapshot name stored in the stream - * we are going to tack on to the name they specified on the - * command line, and how much we are going to chop off. - * - * If they specified a snapshot, chop the entire name stored in - * the stream. - */ - (void) strcpy(chopprefix, drr.drr_u.drr_begin.drr_toname); - if (isprefix) { - /* - * They specified a fs with -d, we want to tack on - * everything but the pool name stored in the stream - */ - if (strchr(tosnap, '@')) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " - "argument - snapshot not allowed with -d")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - } - cp = strchr(chopprefix, '/'); - if (cp == NULL) - cp = strchr(chopprefix, '@'); - *cp = '\0'; - } else if (strchr(tosnap, '@') == NULL) { - /* - * If they specified a filesystem without -d, we want to - * tack on everything after the fs specified in the - * first name from the stream. - */ - cp = strchr(chopprefix, '@'); - *cp = '\0'; - } - choplen = strlen(chopprefix); - - /* - * Determine name of destination snapshot, store in zc_value. - */ - (void) strcpy(zc.zc_value, tosnap); - (void) strncat(zc.zc_value, drr.drr_u.drr_begin.drr_toname+choplen, - sizeof (zc.zc_value)); - if (!zfs_validate_name(hdl, zc.zc_value, ZFS_TYPE_SNAPSHOT, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - - (void) strcpy(zc.zc_name, zc.zc_value); - if (drrb->drr_fromguid) { - /* incremental backup stream */ - zfs_handle_t *h; - - /* do the recvbackup ioctl to the containing fs */ - *strchr(zc.zc_name, '@') = '\0'; - - /* make sure destination fs exists */ - h = zfs_open(hdl, zc.zc_name, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (h == NULL) - return (-1); - if (!dryrun && h->zfs_type == ZFS_TYPE_VOLUME) { - if (zvol_remove_link(hdl, h->zfs_name) != 0) { - zfs_close(h); - return (-1); - } - } - zfs_close(h); - } else { - /* full backup stream */ - - /* Make sure destination fs does not exist */ - *strchr(zc.zc_name, '@') = '\0'; - if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination '%s' exists"), zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - } - - if (strchr(zc.zc_name, '/') == NULL) { - /* - * they're trying to do a recv into a - * nonexistant topmost filesystem. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination does not exist"), zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); - } - - /* Do the recvbackup ioctl to the fs's parent. */ - *strrchr(zc.zc_name, '/') = '\0'; - - if (isprefix && (err = create_parents(hdl, - zc.zc_value, strlen(tosnap))) != 0) { - return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); - } - - } - - zc.zc_cookie = infd; - zc.zc_guid = force; - if (verbose) { - (void) printf("%s %s stream of %s into %s\n", - dryrun ? "would receive" : "receiving", - drrb->drr_fromguid ? "incremental" : "full", - drr.drr_u.drr_begin.drr_toname, - zc.zc_value); - (void) fflush(stdout); - } - if (dryrun) - return (0); - err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECVBACKUP, &zc); - if (ioctl_err != 0) { - switch (errno) { - case ENODEV: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "most recent snapshot does not match incremental " - "source")); - (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); - break; - case ETXTBSY: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination has been modified since most recent " - "snapshot")); - (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); - break; - case EEXIST: - if (drrb->drr_fromguid == 0) { - /* it's the containing fs that exists */ - cp = strchr(zc.zc_value, '@'); - *cp = '\0'; - } - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination already exists")); - (void) zfs_error_fmt(hdl, EZFS_EXISTS, - dgettext(TEXT_DOMAIN, "cannot restore to %s"), - zc.zc_value); - break; - case EINVAL: - (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); - break; - case ECKSUM: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid stream (checksum mismatch)")); - (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); - break; - default: - (void) zfs_standard_error(hdl, errno, errbuf); - } - } - - /* - * Mount or recreate the /dev links for the target filesystem - * (if created, or if we tore them down to do an incremental - * restore), and the /dev links for the new snapshot (if - * created). Also mount any children of the target filesystem - * if we did an incremental receive. - */ - cp = strchr(zc.zc_value, '@'); - if (cp && (ioctl_err == 0 || drrb->drr_fromguid)) { - zfs_handle_t *h; - - *cp = '\0'; - h = zfs_open(hdl, zc.zc_value, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - *cp = '@'; - if (h) { - if (h->zfs_type == ZFS_TYPE_VOLUME) { - err = zvol_create_link(hdl, h->zfs_name); - if (err == 0 && ioctl_err == 0) - err = zvol_create_link(hdl, - zc.zc_value); - } else if (!drrb->drr_fromguid) { - err = zfs_mount(h, NULL, 0); - } - zfs_close(h); - } - } - - if (err || ioctl_err) - return (-1); - - if (verbose) { - char buf1[64]; - char buf2[64]; - uint64_t bytes = zc.zc_cookie; - time_t delta = time(NULL) - begin_time; - if (delta == 0) - delta = 1; - zfs_nicenum(bytes, buf1, sizeof (buf1)); - zfs_nicenum(bytes/delta, buf2, sizeof (buf1)); - - (void) printf("received %sB stream in %lu seconds (%sB/sec)\n", - buf1, delta, buf2); - } - - return (0); -} - -/* * Destroy any more recent snapshots. We invoke this callback on any dependents * of the snapshot first. If the 'cb_dependent' member is non-zero, then this * is a dependent and we should just destroy it without checking the transaction @@ -3577,14 +3277,14 @@ rollback_destroy(zfs_handle_t *zhp, void *data) if (zfs_destroy(zhp) != 0) cbp->cb_error = 1; else - changelist_remove(zhp, cbp->cb_clp); + changelist_remove(cbp->cb_clp, zhp->zfs_name); zhp->zfs_hdl->libzfs_log_str = logstr; } } else { if (zfs_destroy(zhp) != 0) cbp->cb_error = 1; else - changelist_remove(zhp, cbp->cb_clp); + changelist_remove(cbp->cb_clp, zhp->zfs_name); } zfs_close(zhp); @@ -3889,7 +3589,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) * be in zc.zc_name */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot rename to '%s'"), zc.zc_name); + "cannot rename '%s'"), zc.zc_name); if (recursive && errno == EEXIST) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, diff --git a/usr/src/lib/libzfs/common/libzfs_graph.c b/usr/src/lib/libzfs/common/libzfs_graph.c index c283016df7..c6383a3654 100644 --- a/usr/src/lib/libzfs/common/libzfs_graph.c +++ b/usr/src/lib/libzfs/common/libzfs_graph.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -396,8 +396,8 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) */ (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0 && - zc.zc_objset_stats.dds_clone_of[0] != '\0') { - if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of, + zc.zc_objset_stats.dds_origin[0] != '\0') { + if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_origin, zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0) return (-1); } diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h index cfc03791dd..631a2260ae 100644 --- a/usr/src/lib/libzfs/common/libzfs_impl.h +++ b/usr/src/lib/libzfs/common/libzfs_impl.h @@ -141,13 +141,14 @@ void zcmd_free_nvlists(zfs_cmd_t *); int changelist_prefix(prop_changelist_t *); int changelist_postfix(prop_changelist_t *); void changelist_rename(prop_changelist_t *, const char *, const char *); -void changelist_remove(zfs_handle_t *, prop_changelist_t *); +void changelist_remove(prop_changelist_t *, const char *); void changelist_free(prop_changelist_t *); prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int); int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *); int changelist_haszonedchild(prop_changelist_t *); void remove_mountpoint(zfs_handle_t *); +int create_parents(libzfs_handle_t *, char *, int); zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *); diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index 6810f7efdc..9206021782 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -543,7 +543,6 @@ static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *); * values to be used later. This is triggered by the runtime loader. * Make sure the correct ISA version is loaded. */ - #pragma init(_zfs_init_libshare) static void _zfs_init_libshare(void) @@ -596,7 +595,6 @@ _zfs_init_libshare(void) * service value is which part(s) of the API to initialize and is a * direct map to the libshare sa_init(service) interface. */ - int zfs_init_libshare(libzfs_handle_t *zhandle, int service) { @@ -620,11 +618,9 @@ zfs_init_libshare(libzfs_handle_t *zhandle, int service) * Uninitialize the libshare API if it hasn't already been * uninitialized. It is OK to call multiple times. */ - void zfs_uninit_libshare(libzfs_handle_t *zhandle) { - if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) { if (_sa_fini != NULL) _sa_fini(zhandle->libzfs_sharehdl); @@ -638,18 +634,14 @@ zfs_uninit_libshare(libzfs_handle_t *zhandle) * Call the legacy parse interface to get the protocol specific * options using the NULL arg to indicate that this is a "parse" only. */ - int zfs_parse_options(char *options, zfs_share_proto_t proto) { - int ret; - - if (_sa_parse_legacy_options != NULL) - ret = _sa_parse_legacy_options(NULL, options, - proto_table[proto].p_name); - else - ret = SA_CONFIG_ERR; - return (ret); + if (_sa_parse_legacy_options != NULL) { + return (_sa_parse_legacy_options(NULL, options, + proto_table[proto].p_name)); + } + return (SA_CONFIG_ERR); } /* @@ -658,7 +650,6 @@ zfs_parse_options(char *options, zfs_share_proto_t proto) * wrapper around sa_find_share to find a share path in the * configuration. */ - static sa_share_t zfs_sa_find_share(sa_handle_t handle, char *path) { @@ -673,7 +664,6 @@ zfs_sa_find_share(sa_handle_t handle, char *path) * Wrapper for sa_enable_share which enables a share for a specified * protocol. */ - static int zfs_sa_enable_share(sa_share_t share, char *proto) { @@ -688,7 +678,6 @@ zfs_sa_enable_share(sa_share_t share, char *proto) * Wrapper for sa_enable_share which disables a share for a specified * protocol. */ - static int zfs_sa_disable_share(sa_share_t share, char *proto) { @@ -702,7 +691,6 @@ zfs_sa_disable_share(sa_share_t share, char *proto) * protocol specific properties (sharenfs, sharesmb). We rely * on "libshare" to the dirty work for us. */ - static int zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) { diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c new file mode 100644 index 0000000000..15f8a22b6e --- /dev/null +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c @@ -0,0 +1,1943 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <libdevinfo.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <fcntl.h> +#include <sys/mount.h> +#include <sys/mntent.h> +#include <sys/mnttab.h> +#include <sys/avl.h> +#include <stddef.h> + +#include <libzfs.h> + +#include "zfs_namecheck.h" +#include "zfs_prop.h" +#include "libzfs_impl.h" + +#include <fletcher.c> /* XXX */ + +/* + * Routines for dealing with the AVL tree of fs-nvlists + */ +typedef struct fsavl_node { + avl_node_t fn_node; + nvlist_t *fn_nvfs; + char *fn_snapname; + uint64_t fn_guid; +} fsavl_node_t; + +static int +fsavl_compare(const void *arg1, const void *arg2) +{ + const fsavl_node_t *fn1 = arg1; + const fsavl_node_t *fn2 = arg2; + + if (fn1->fn_guid > fn2->fn_guid) + return (+1); + else if (fn1->fn_guid < fn2->fn_guid) + return (-1); + else + return (0); +} + +/* + * Given the GUID of a snapshot, find its containing filesystem and + * (optionally) name. + */ +static nvlist_t * +fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname) +{ + fsavl_node_t fn_find; + fsavl_node_t *fn; + + fn_find.fn_guid = snapguid; + + fn = avl_find(avl, &fn_find, NULL); + if (fn) { + if (snapname) + *snapname = fn->fn_snapname; + return (fn->fn_nvfs); + } + return (NULL); +} + +static avl_tree_t * +fsavl_create(nvlist_t *fss) +{ + avl_tree_t *fsavl; + nvpair_t *fselem = NULL; + + fsavl = malloc(sizeof (avl_tree_t)); + avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t), + offsetof(fsavl_node_t, fn_node)); + + while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) { + nvlist_t *nvfs, *snaps; + nvpair_t *snapelem = NULL; + + VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); + VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); + + while ((snapelem = + nvlist_next_nvpair(snaps, snapelem)) != NULL) { + fsavl_node_t *fn; + uint64_t guid; + + VERIFY(0 == nvpair_value_uint64(snapelem, &guid)); + fn = malloc(sizeof (fsavl_node_t)); + fn->fn_nvfs = nvfs; + fn->fn_snapname = nvpair_name(snapelem); + fn->fn_guid = guid; + + /* + * Note: if there are multiple snaps with the + * same GUID, we ignore all but one. + */ + if (avl_find(fsavl, fn, NULL) == NULL) + avl_add(fsavl, fn); + else + free(fn); + } + } + + return (fsavl); +} + +static void +fsavl_destroy(avl_tree_t *avl) +{ + fsavl_node_t *fn; + void *cookie; + + if (avl == NULL) + return; + + cookie = NULL; + while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL) + free(fn); + avl_destroy(avl); +} + +/* + * Routines for dealing with the giant nvlist of fs-nvlists, etc. + */ +typedef struct send_data { + uint64_t parent_fromsnap_guid; + nvlist_t *parent_snaps; + nvlist_t *fss; + const char *fromsnap; + const char *tosnap; + + /* + * The header nvlist is of the following format: + * { + * "tosnap" -> string + * "fromsnap" -> string (if incremental) + * "fss" -> { + * id -> { + * + * "name" -> string (full name; for debugging) + * "parentfromsnap" -> number (guid of fromsnap in parent) + * + * "props" -> { name -> value (only if set here) } + * "snaps" -> { name (lastname) -> number (guid) } + * + * "origin" -> number (guid) (if clone) + * "sent" -> boolean (not on-disk) + * } + * } + * } + * + */ +} send_data_t; + +static int +send_iterate_snap(zfs_handle_t *zhp, void *arg) +{ + send_data_t *sd = arg; + uint64_t guid = zhp->zfs_dmustats.dds_guid; + char *snapname; + + snapname = strrchr(zhp->zfs_name, '@')+1; + + VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid)); + /* + * NB: if there is no fromsnap here (it's a newly created fs in + * an incremental replication), we will substitute the tosnap. + */ + if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) || + (sd->parent_fromsnap_guid == 0 && sd->tosnap && + strcmp(snapname, sd->tosnap) == 0)) { + sd->parent_fromsnap_guid = guid; + } + + zfs_close(zhp); + return (0); +} + +static void +send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv) +{ + nvpair_t *elem = NULL; + + while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) { + char *propname = nvpair_name(elem); + zfs_prop_t prop = zfs_name_to_prop(propname); + nvlist_t *propnv; + + if (!zfs_prop_user(propname) && zfs_prop_readonly(prop)) + continue; + + verify(nvpair_value_nvlist(elem, &propnv) == 0); + if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION) { + /* these guys are modifyable, but have no source */ + uint64_t value; + verify(nvlist_lookup_uint64(propnv, + ZPROP_VALUE, &value) == 0); + } else { + char *source; + if (nvlist_lookup_string(propnv, + ZPROP_SOURCE, &source) != 0) + continue; + if (strcmp(source, zhp->zfs_name) != 0) + continue; + } + + if (zfs_prop_user(propname) || + zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + char *value; + verify(nvlist_lookup_string(propnv, + ZPROP_VALUE, &value) == 0); + VERIFY(0 == nvlist_add_string(nv, propname, value)); + } else { + uint64_t value; + verify(nvlist_lookup_uint64(propnv, + ZPROP_VALUE, &value) == 0); + VERIFY(0 == nvlist_add_uint64(nv, propname, value)); + } + } +} + +static int +send_iterate_fs(zfs_handle_t *zhp, void *arg) +{ + send_data_t *sd = arg; + nvlist_t *nvfs, *nv; + int rv; + uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; + uint64_t guid = zhp->zfs_dmustats.dds_guid; + char guidstring[64]; + + VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0)); + VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name)); + VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap", + sd->parent_fromsnap_guid)); + + if (zhp->zfs_dmustats.dds_origin[0]) { + zfs_handle_t *origin = zfs_open(zhp->zfs_hdl, + zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); + if (origin == NULL) + return (-1); + VERIFY(0 == nvlist_add_uint64(nvfs, "origin", + origin->zfs_dmustats.dds_guid)); + } + + /* iterate over props */ + VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); + send_iterate_prop(zhp, nv); + VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); + nvlist_free(nv); + + /* iterate over snaps, and set sd->parent_fromsnap_guid */ + sd->parent_fromsnap_guid = 0; + VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0)); + (void) zfs_iter_snapshots(zhp, send_iterate_snap, sd); + VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps)); + nvlist_free(sd->parent_snaps); + + /* add this fs to nvlist */ + (void) snprintf(guidstring, sizeof (guidstring), + "0x%llx", (longlong_t)guid); + VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); + nvlist_free(nvfs); + + /* iterate over children */ + rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); + + sd->parent_fromsnap_guid = parent_fromsnap_guid_save; + + zfs_close(zhp); + return (rv); +} + +static int +gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, + const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp) +{ + zfs_handle_t *zhp; + send_data_t sd = { 0 }; + int error; + + zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (EZFS_BADTYPE); + + VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0)); + sd.fromsnap = fromsnap; + sd.tosnap = tosnap; + error = send_iterate_fs(zhp, &sd); + + *nvlp = sd.fss; + if (avlp) + *avlp = fsavl_create(sd.fss); + return (error); +} + +/* + * Routines for dealing with the sorted snapshot functionality + */ +typedef struct zfs_node { + zfs_handle_t *zn_handle; + avl_node_t zn_avlnode; +} zfs_node_t; + +static int +zfs_sort_snaps(zfs_handle_t *zhp, void *data) +{ + avl_tree_t *avl = data; + zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); + + node->zn_handle = zhp; + avl_add(avl, node); + return (0); +} + +/* ARGSUSED */ +static int +zfs_snapshot_compare(const void *larg, const void *rarg) +{ + zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; + zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; + uint64_t lcreate, rcreate; + + /* + * Sort them according to creation time. We use the hidden + * CREATETXG property to get an absolute ordering of snapshots. + */ + lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); + rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); + + if (lcreate < rcreate) + return (-1); + else if (lcreate > rcreate) + return (+1); + else + return (0); +} + +static int +zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) +{ + int ret = 0; + zfs_node_t *node; + avl_tree_t avl; + void *cookie = NULL; + + avl_create(&avl, zfs_snapshot_compare, + sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); + + ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl); + + for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) + ret |= callback(node->zn_handle, data); + + while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) + free(node); + + avl_destroy(&avl); + + return (ret); +} + +/* + * Routines specific to "zfs send" + */ +typedef struct send_dump_data { + /* these are all just the short snapname (the part after the @) */ + const char *fromsnap; + const char *tosnap; + char lastsnap[ZFS_MAXNAMELEN]; + boolean_t seenfrom, seento, replicate, doall, fromorigin; + boolean_t verbose; + int outfd; + boolean_t err; + nvlist_t *fss; + avl_tree_t *fsavl; +} send_dump_data_t; + +/* + * Dumps a backup of the given snapshot (incremental from fromsnap if it's not + * NULL) to the file descriptor specified by outfd. + */ +static int +dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin, + int outfd) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + if (fromsnap) + (void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name)); + zc.zc_cookie = outfd; + zc.zc_obj = fromorigin; + + if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); + + switch (errno) { + + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + + case ENOENT: + if (zfs_dataset_exists(hdl, zc.zc_name, + ZFS_TYPE_SNAPSHOT)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source (@%s) does not exist"), + zc.zc_value); + } + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: + case ENOSTR: + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + return (0); +} + +static int +dump_snapshot(zfs_handle_t *zhp, void *arg) +{ + send_dump_data_t *sdd = arg; + const char *thissnap; + int err; + + thissnap = strchr(zhp->zfs_name, '@') + 1; + + if (sdd->fromsnap && !sdd->seenfrom && + strcmp(sdd->fromsnap, thissnap) == 0) { + sdd->seenfrom = B_TRUE; + (void) strcpy(sdd->lastsnap, thissnap); + zfs_close(zhp); + return (0); + } + + if (sdd->seento || !sdd->seenfrom) { + zfs_close(zhp); + return (0); + } + + /* send it */ + if (sdd->verbose) { + (void) fprintf(stderr, "sending from @%s to %s\n", + sdd->lastsnap, zhp->zfs_name); + } + + err = dump_ioctl(zhp, sdd->lastsnap, + sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate), + sdd->outfd); + + if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0) + sdd->seento = B_TRUE; + + (void) strcpy(sdd->lastsnap, thissnap); + zfs_close(zhp); + return (err); +} + +static int +dump_filesystem(zfs_handle_t *zhp, void *arg) +{ + int rv = 0; + send_dump_data_t *sdd = arg; + boolean_t missingfrom = B_FALSE; + zfs_cmd_t zc = { 0 }; + + (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", + zhp->zfs_name, sdd->tosnap); + if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { + (void) fprintf(stderr, "WARNING: " + "could not send %s@%s: does not exist\n", + zhp->zfs_name, sdd->tosnap); + sdd->err = B_TRUE; + return (0); + } + + if (sdd->replicate && sdd->fromsnap) { + /* + * If this fs does not have fromsnap, and we're doing + * recursive, we need to send a full stream from the + * beginning (or an incremental from the origin if this + * is a clone). If we're doing non-recursive, then let + * them get the error. + */ + (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", + zhp->zfs_name, sdd->fromsnap); + if (ioctl(zhp->zfs_hdl->libzfs_fd, + ZFS_IOC_OBJSET_STATS, &zc) != 0) { + missingfrom = B_TRUE; + } + } + + if (sdd->doall) { + sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0; + if (sdd->fromsnap == NULL || missingfrom) + sdd->seenfrom = B_TRUE; + + rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg); + if (!sdd->seenfrom) { + (void) fprintf(stderr, + "WARNING: could not send %s@%s:\n" + "incremental source (%s@%s) does not exist\n", + zhp->zfs_name, sdd->tosnap, + zhp->zfs_name, sdd->fromsnap); + sdd->err = B_TRUE; + } else if (!sdd->seento) { + (void) fprintf(stderr, + "WARNING: could not send %s@%s:\n" + "incremental source (%s@%s) " + "is not earlier than it\n", + zhp->zfs_name, sdd->tosnap, + zhp->zfs_name, sdd->fromsnap); + sdd->err = B_TRUE; + } + } else { + zfs_handle_t *snapzhp; + char snapname[ZFS_MAXNAMELEN]; + + (void) snprintf(snapname, sizeof (snapname), "%s@%s", + zfs_get_name(zhp), sdd->tosnap); + snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT); + rv = dump_ioctl(snapzhp, + missingfrom ? NULL : sdd->fromsnap, + sdd->fromorigin || missingfrom, + sdd->outfd); + sdd->seento = B_TRUE; + zfs_close(snapzhp); + } + + return (rv); +} + +static int +dump_filesystems(zfs_handle_t *rzhp, void *arg) +{ + send_dump_data_t *sdd = arg; + nvpair_t *fspair; + boolean_t needagain, progress; + + if (!sdd->replicate) + return (dump_filesystem(rzhp, sdd)); + +again: + needagain = progress = B_FALSE; + for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; + fspair = nvlist_next_nvpair(sdd->fss, fspair)) { + nvlist_t *fslist; + char *fsname; + zfs_handle_t *zhp; + int err; + uint64_t origin_guid = 0; + nvlist_t *origin_nv; + + VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); + if (nvlist_lookup_boolean(fslist, "sent") == 0) + continue; + + VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0); + (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid); + + origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL); + if (origin_nv && + nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) { + /* + * origin has not been sent yet; + * skip this clone. + */ + needagain = B_TRUE; + continue; + } + + zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET); + err = dump_filesystem(zhp, sdd); + VERIFY(nvlist_add_boolean(fslist, "sent") == 0); + progress = B_TRUE; + zfs_close(zhp); + if (err) + return (err); + } + if (needagain) { + assert(progress); + goto again; + } + return (0); +} + +/* + * Dumps a backup of tosnap, incremental from fromsnap if it isn't NULL. + * If 'doall', dump all intermediate snaps. + * If 'replicate', dump special header and do recursively. + */ +int +zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, + boolean_t replicate, boolean_t doall, boolean_t fromorigin, + boolean_t verbose, int outfd) +{ + char errbuf[1024]; + send_dump_data_t sdd = { 0 }; + int err; + nvlist_t *fss = NULL; + avl_tree_t *fsavl = NULL; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot send '%s'"), zhp->zfs_name); + + if (fromsnap && fromsnap[0] == '\0') { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "zero-length incremental source")); + return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); + } + + if (replicate || doall) { + dmu_replay_record_t drr = { 0 }; + char *packbuf = NULL; + size_t buflen = 0; + zio_cksum_t zc = { 0 }; + + assert(fromsnap || doall); + + if (replicate) { + nvlist_t *hdrnv; + + VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); + if (fromsnap) { + VERIFY(0 == nvlist_add_string(hdrnv, + "fromsnap", fromsnap)); + } + VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); + + err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, + fromsnap, tosnap, &fss, &fsavl); + if (err) + return (err); + VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); + err = nvlist_pack(hdrnv, &packbuf, &buflen, + NV_ENCODE_XDR, 0); + nvlist_free(hdrnv); + if (err) { + fsavl_destroy(fsavl); + nvlist_free(fss); + return (zfs_standard_error(zhp->zfs_hdl, + err, errbuf)); + } + } + + /* write first begin record */ + drr.drr_type = DRR_BEGIN; + drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; + drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION; + (void) snprintf(drr.drr_u.drr_begin.drr_toname, + sizeof (drr.drr_u.drr_begin.drr_toname), + "%s@%s", zhp->zfs_name, tosnap); + drr.drr_payloadlen = buflen; + fletcher_4_incremental_native(&drr, sizeof (drr), &zc); + err = write(outfd, &drr, sizeof (drr)); + + /* write header nvlist */ + if (err != -1) { + fletcher_4_incremental_native(packbuf, buflen, &zc); + err = write(outfd, packbuf, buflen); + } + free(packbuf); + if (err == -1) { + fsavl_destroy(fsavl); + nvlist_free(fss); + return (zfs_standard_error(zhp->zfs_hdl, + errno, errbuf)); + } + + /* write end record */ + if (err != -1) { + bzero(&drr, sizeof (drr)); + drr.drr_type = DRR_END; + drr.drr_u.drr_end.drr_checksum = zc; + err = write(outfd, &drr, sizeof (drr)); + if (err == -1) { + fsavl_destroy(fsavl); + nvlist_free(fss); + return (zfs_standard_error(zhp->zfs_hdl, + errno, errbuf)); + } + } + } + + /* dump each stream */ + sdd.fromsnap = fromsnap; + sdd.tosnap = tosnap; + sdd.outfd = outfd; + sdd.replicate = replicate; + sdd.doall = doall; + sdd.fromorigin = fromorigin; + sdd.fss = fss; + sdd.fsavl = fsavl; + sdd.verbose = verbose; + err = dump_filesystems(zhp, &sdd); + fsavl_destroy(fsavl); + nvlist_free(fss); + + if (replicate || doall) { + /* + * write final end record. NB: want to do this even if + * there was some error, because it might not be totally + * failed. + */ + dmu_replay_record_t drr = { 0 }; + drr.drr_type = DRR_END; + if (write(outfd, &drr, sizeof (drr)) == -1) { + return (zfs_standard_error(zhp->zfs_hdl, + errno, errbuf)); + } + } + + return (err || sdd.err); +} + +/* + * Routines specific to "zfs recv" + */ + +static int +recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen, + boolean_t byteswap, zio_cksum_t *zc) +{ + char *cp = buf; + int rv; + int len = ilen; + + do { + rv = read(fd, cp, len); + cp += rv; + len -= rv; + } while (rv > 0); + + if (rv < 0 || len != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to read from stream")); + return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN, + "cannot receive"))); + } + + if (zc) { + if (byteswap) + fletcher_4_incremental_byteswap(buf, ilen, zc); + else + fletcher_4_incremental_native(buf, ilen, zc); + } + return (0); +} + +static int +recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp, + boolean_t byteswap, zio_cksum_t *zc) +{ + char *buf; + int err; + + buf = zfs_alloc(hdl, len); + if (buf == NULL) + return (ENOMEM); + + err = recv_read(hdl, fd, buf, len, byteswap, zc); + if (err != 0) { + free(buf); + return (err); + } + + err = nvlist_unpack(buf, len, nvp, 0); + free(buf); + if (err != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (malformed nvlist)")); + return (EINVAL); + } + return (0); +} + +static int +recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, + int baselen, char *newname, recvflags_t flags) +{ + static int seq; + zfs_cmd_t zc = { 0 }; + int err; + prop_changelist_t *clp = NULL; + + if (strchr(name, '@') == NULL) { + zfs_handle_t *zhp = zfs_open(hdl, name, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (-1); + clp = changelist_gather(zhp, ZFS_PROP_NAME, + flags.force ? MS_FORCE : 0); + zfs_close(zhp); + if (clp == NULL) + return (-1); + err = changelist_prefix(clp); + if (err) + return (err); + } + + if (tryname) { + (void) strcpy(newname, tryname); + + zc.zc_objset_type = DMU_OST_ZFS; + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value)); + + if (flags.verbose) { + (void) printf("attempting rename %s to %s\n", + zc.zc_name, zc.zc_value); + } + err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + if (err == 0 && clp) + changelist_rename(clp, name, tryname); + } else { + err = ENOENT; + } + + if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) { + seq++; + + (void) strncpy(newname, name, baselen); + (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen, + "recv-%u-%u", getpid(), seq); + (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); + + if (flags.verbose) { + (void) printf("failed - trying rename %s to %s\n", + zc.zc_name, zc.zc_value); + } + err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + if (err == 0 && clp) + changelist_rename(clp, name, newname); + if (err && flags.verbose) { + (void) printf("failed (%u) - " + "will try again on next pass\n", errno); + } + err = EAGAIN; + } else if (flags.verbose) { + if (err == 0) + (void) printf("success\n"); + else + (void) printf("failed (%u)\n", errno); + } + + if (clp) { + (void) changelist_postfix(clp); + changelist_free(clp); + } + + + return (err); +} + +static int +recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, + char *newname, recvflags_t flags) +{ + zfs_cmd_t zc = { 0 }; + int err; + zfs_handle_t *zhp = NULL; + + zc.zc_objset_type = DMU_OST_ZFS; + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + + /* unmount it */ + if (strchr(name, '@') == NULL) { + zhp = zfs_open(hdl, name, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (-1); + err = zfs_unmount(zhp, NULL, flags.force ? MS_FORCE : 0); + if (err) { + zfs_close(zhp); + return (err); + } + } + + if (flags.verbose) + (void) printf("attempting destroy %s\n", zc.zc_name); + err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); + + if (err != 0) { + (void) zfs_mount(zhp, NULL, 0); + err = recv_rename(hdl, name, NULL, baselen, newname, flags); + } + if (zhp) + zfs_close(zhp); + + if (flags.verbose) { + if (err == 0) + (void) printf("success\n"); + else + (void) printf("failed (%u)\n", errno); + } + + return (err); +} + +typedef struct guid_to_name_data { + uint64_t guid; + char *name; +} guid_to_name_data_t; + +static int +guid_to_name_cb(zfs_handle_t *zhp, void *arg) +{ + guid_to_name_data_t *gtnd = arg; + int err; + + if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { + (void) strcpy(gtnd->name, zhp->zfs_name); + return (EEXIST); + } + err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); + zfs_close(zhp); + return (err); +} + +static int +guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, + char *name) +{ + /* exhaustive search all local snapshots */ + guid_to_name_data_t gtnd; + int err = 0; + zfs_handle_t *zhp; + char *cp; + + gtnd.guid = guid; + gtnd.name = name; + + if (strchr(parent, '@') == NULL) { + zhp = make_dataset_handle(hdl, parent); + if (zhp != NULL) { + err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + zfs_close(zhp); + if (err == EEXIST) + return (0); + } + } + + cp = strchr(parent, '/'); + if (cp) + *cp = '\0'; + zhp = make_dataset_handle(hdl, parent); + if (cp) + *cp = '/'; + + if (zhp) { + err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + zfs_close(zhp); + } + + return (err == EEXIST ? 0 : ENOENT); + +} + +/* + * Return true if dataset guid1 is created before guid2. + */ +static boolean_t +created_before(libzfs_handle_t *hdl, avl_tree_t *avl, + uint64_t guid1, uint64_t guid2) +{ + nvlist_t *nvfs; + char *fsname, *snapname; + char buf[ZFS_MAXNAMELEN]; + boolean_t rv; + zfs_node_t zn1, zn2; + + if (guid2 == 0) + return (B_FALSE); + if (guid1 == 0) + return (B_TRUE); + + nvfs = fsavl_find(avl, guid1, &snapname); + VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); + (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); + zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + + nvfs = fsavl_find(avl, guid2, &snapname); + VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); + (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); + zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + + rv = (zfs_snapshot_compare(&zn1, &zn2) == -1); + + zfs_close(zn1.zn_handle); + zfs_close(zn2.zn_handle); + + return (rv); +} + +static int +recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, + recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl) +{ + nvlist_t *local_nv; + avl_tree_t *local_avl; + nvpair_t *fselem, *nextfselem; + char *tosnap, *fromsnap; + char newname[ZFS_MAXNAMELEN]; + int error; + boolean_t needagain, progress; + + VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap)); + VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap)); + + if (flags.dryrun) + return (0); + +again: + needagain = progress = B_FALSE; + + if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, + &local_nv, &local_avl)) != 0) + return (error); + + /* + * Process deletes and renames + */ + for (fselem = nvlist_next_nvpair(local_nv, NULL); + fselem; fselem = nextfselem) { + nvlist_t *nvfs, *snaps; + nvlist_t *stream_nvfs = NULL; + nvpair_t *snapelem, *nextsnapelem; + uint64_t fromguid = 0; + uint64_t originguid = 0; + uint64_t stream_originguid = 0; + uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid; + char *fsname, *stream_fsname; + + nextfselem = nvlist_next_nvpair(local_nv, fselem); + + VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); + VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); + VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); + VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap", + &parent_fromsnap_guid)); + (void) nvlist_lookup_uint64(nvfs, "origin", &originguid); + + /* + * First find the stream's fs, so we can check for + * a different origin (due to "zfs promote") + */ + for (snapelem = nvlist_next_nvpair(snaps, NULL); + snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) { + uint64_t thisguid; + + VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); + stream_nvfs = fsavl_find(stream_avl, thisguid, NULL); + + if (stream_nvfs != NULL) + break; + } + + /* check for promote */ + (void) nvlist_lookup_uint64(stream_nvfs, "origin", + &stream_originguid); + if (stream_nvfs && originguid != stream_originguid) { + if (created_before(hdl, local_avl, stream_originguid, + originguid)) { + /* promote it! */ + zfs_cmd_t zc = { 0 }; + nvlist_t *origin_nvfs; + char *origin_fsname; + + if (flags.verbose) + (void) printf("promoting %s\n", fsname); + + origin_nvfs = fsavl_find(local_avl, originguid, + NULL); + VERIFY(0 == nvlist_lookup_string(origin_nvfs, + "name", &origin_fsname)); + (void) strlcpy(zc.zc_value, origin_fsname, + sizeof (zc.zc_value)); + (void) strlcpy(zc.zc_name, fsname, + sizeof (zc.zc_name)); + error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + if (error == 0) + progress = B_TRUE; + } + /* + * We had/have the wrong origin, therefore our + * list of snapshots is wrong. Need to handle + * them on the next pass. + */ + needagain = B_TRUE; + continue; + } + + for (snapelem = nvlist_next_nvpair(snaps, NULL); + snapelem; snapelem = nextsnapelem) { + uint64_t thisguid; + char *stream_snapname; + nvlist_t *found; + + nextsnapelem = nvlist_next_nvpair(snaps, snapelem); + + VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); + found = fsavl_find(stream_avl, thisguid, + &stream_snapname); + + /* check for delete */ + if (found == NULL) { + char name[ZFS_MAXNAMELEN]; + + if (!flags.force) + continue; + + (void) snprintf(name, sizeof (name), "%s@%s", + fsname, nvpair_name(snapelem)); + + error = recv_destroy(hdl, name, + strlen(fsname)+1, newname, flags); + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + continue; + } + + stream_nvfs = found; + + /* check for different snapname */ + if (strcmp(nvpair_name(snapelem), + stream_snapname) != 0) { + char name[ZFS_MAXNAMELEN]; + char tryname[ZFS_MAXNAMELEN]; + + (void) snprintf(name, sizeof (name), "%s@%s", + fsname, nvpair_name(snapelem)); + (void) snprintf(tryname, sizeof (name), "%s@%s", + fsname, stream_snapname); + + error = recv_rename(hdl, name, tryname, + strlen(fsname)+1, newname, flags); + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + } + + if (strcmp(stream_snapname, fromsnap) == 0) + fromguid = thisguid; + } + + /* check for delete */ + if (stream_nvfs == NULL) { + if (!flags.force) + continue; + + error = recv_destroy(hdl, fsname, strlen(tofs)+1, + newname, flags); + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + continue; + } + + if (fromguid == 0 && flags.verbose) { + (void) printf("local fs %s does not have fromsnap " + "(%s in stream); must have been deleted locally; " + "ignoring\n", fsname, fromsnap); + continue; + } + + VERIFY(0 == nvlist_lookup_string(stream_nvfs, + "name", &stream_fsname)); + VERIFY(0 == nvlist_lookup_uint64(stream_nvfs, + "parentfromsnap", &stream_parent_fromsnap_guid)); + + /* check for rename */ + if ((stream_parent_fromsnap_guid != 0 && + stream_parent_fromsnap_guid != parent_fromsnap_guid) || + strcmp(strrchr(fsname, '/'), + strrchr(stream_fsname, '/')) != 0) { + nvlist_t *parent; + char tryname[ZFS_MAXNAMELEN]; + + parent = fsavl_find(local_avl, + stream_parent_fromsnap_guid, NULL); + /* + * NB: parent might not be found if we used the + * tosnap for stream_parent_fromsnap_guid, + * because the parent is a newly-created fs; + * we'll be able to rename it after we recv the + * new fs. + */ + if (parent != NULL) { + char *pname; + + VERIFY(0 == nvlist_lookup_string(parent, "name", + &pname)); + (void) snprintf(tryname, sizeof (tryname), + "%s%s", pname, strrchr(stream_fsname, '/')); + } else { + tryname[0] = '\0'; + if (flags.verbose) { + (void) printf("local fs %s new parent " + "not found\n", fsname); + } + } + + error = recv_rename(hdl, fsname, tryname, + strlen(tofs)+1, newname, flags); + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + } + } + + fsavl_destroy(local_avl); + nvlist_free(local_nv); + + if (needagain && progress) { + /* do another pass to fix up temporary names */ + if (flags.verbose) + (void) printf("another pass:\n"); + goto again; + } + + return (needagain); +} + +static int +zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, + recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc) +{ + nvlist_t *stream_nv = NULL; + avl_tree_t *stream_avl = NULL; + char *fromsnap = NULL; + char tofs[ZFS_MAXNAMELEN]; + char errbuf[1024]; + dmu_replay_record_t drre; + int error; + boolean_t anyerr = B_FALSE; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive")); + + if (strchr(destname, '@')) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "can not specify snapshot name for multi-snapshot stream")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + + assert(drr->drr_type == DRR_BEGIN); + assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC); + assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION); + + /* + * Read in the nvlist from the stream. + */ + if (drr->drr_payloadlen != 0) { + if (!flags.isprefix) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "must use -d to receive replication " + "(send -R) stream")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + + error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen, + &stream_nv, flags.byteswap, zc); + if (error) { + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + } + + /* + * Read in the end record and verify checksum. + */ + if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre), + flags.byteswap, NULL))) + goto out; + if (flags.byteswap) { + drre.drr_type = BSWAP_32(drre.drr_type); + drre.drr_u.drr_end.drr_checksum.zc_word[0] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]); + drre.drr_u.drr_end.drr_checksum.zc_word[1] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]); + drre.drr_u.drr_end.drr_checksum.zc_word[2] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]); + drre.drr_u.drr_end.drr_checksum.zc_word[3] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]); + } + if (drre.drr_type != DRR_END) { + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incorrect header checksum")); + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + + (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap); + + if (drr->drr_payloadlen != 0) { + nvlist_t *stream_fss; + + VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", + &stream_fss)); + stream_avl = fsavl_create(stream_fss); + + if (fromsnap != NULL) { + (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN); + if (flags.isprefix) { + int i = strcspn(drr->drr_u.drr_begin.drr_toname, + "/@"); + /* zfs_receive_one() will create_parents() */ + (void) strlcat(tofs, + &drr->drr_u.drr_begin.drr_toname[i], + ZFS_MAXNAMELEN); + *strchr(tofs, '@') = '\0'; + } + anyerr |= recv_incremental_replication(hdl, tofs, + flags, stream_nv, stream_avl); + } + } + + + /* Finally, receive each contained stream */ + do { + /* + * we should figure out if it has a recoverable + * error, in which case do a recv_skip() and drive on. + * Note, if we fail due to already having this guid, + * zfs_receive_one() will take care of it (ie, + * recv_skip() and return 0). + */ + error = zfs_receive(hdl, destname, flags, fd, stream_avl); + if (error == ENODATA) { + error = 0; + break; + } + anyerr |= error; + } while (error == 0); + + if (drr->drr_payloadlen != 0 && fromsnap != NULL) { + /* + * Now that we have the fs's they sent us, try the + * renames again. + */ + anyerr |= recv_incremental_replication(hdl, tofs, flags, + stream_nv, stream_avl); + } + +out: + fsavl_destroy(stream_avl); + if (stream_nv) + nvlist_free(stream_nv); + if (anyerr) + error = -1; + return (error); +} + +static int +recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) +{ + dmu_replay_record_t *drr; + void *buf = malloc(1<<20); + + /* XXX would be great to use lseek if possible... */ + drr = buf; + + while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t), + byteswap, NULL) == 0) { + if (byteswap) + drr->drr_type = BSWAP_32(drr->drr_type); + + switch (drr->drr_type) { + case DRR_BEGIN: + /* NB: not to be used on v2 stream packages */ + assert(drr->drr_payloadlen == 0); + break; + + case DRR_END: + free(buf); + return (0); + + case DRR_OBJECT: + if (byteswap) { + drr->drr_u.drr_object.drr_bonuslen = + BSWAP_32(drr->drr_u.drr_object. + drr_bonuslen); + } + (void) recv_read(hdl, fd, buf, + P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8), + B_FALSE, NULL); + break; + + case DRR_WRITE: + if (byteswap) { + drr->drr_u.drr_write.drr_length = + BSWAP_32(drr->drr_u.drr_write.drr_length); + } + (void) recv_read(hdl, fd, buf, + drr->drr_u.drr_write.drr_length, B_FALSE, NULL); + break; + + case DRR_FREEOBJECTS: + case DRR_FREE: + break; + + default: + assert(!"invalid record type"); + } + } + + free(buf); + return (-1); +} + +/* + * Restores a backup of tosnap from the file descriptor specified by infd. + */ +static int +zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, + recvflags_t flags, dmu_replay_record_t *drr, + dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl) +{ + zfs_cmd_t zc = { 0 }; + time_t begin_time; + int ioctl_err, ioctl_errno, err, choplen; + char *cp; + struct drr_begin *drrb = &drr->drr_u.drr_begin; + char errbuf[1024]; + char chopprefix[ZFS_MAXNAMELEN]; + boolean_t newfs = B_FALSE; + boolean_t stream_wantsnewfs; + uint64_t parent_snapguid = 0; + prop_changelist_t *clp = NULL; + + begin_time = time(NULL); + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive")); + + if (stream_avl != NULL) { + nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, NULL); + nvlist_t *props; + + (void) nvlist_lookup_uint64(fs, "parentfromsnap", + &parent_snapguid); + err = nvlist_lookup_nvlist(fs, "props", &props); + if (err) + VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + if (flags.canmountoff) { + VERIFY(0 == nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); + } + if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) + return (-1); + if (err) + nvlist_free(props); + } + + /* + * Determine how much of the snapshot name stored in the stream + * we are going to tack on to the name they specified on the + * command line, and how much we are going to chop off. + * + * If they specified a snapshot, chop the entire name stored in + * the stream. + */ + (void) strcpy(chopprefix, drrb->drr_toname); + if (flags.isprefix) { + /* + * They specified a fs with -d, we want to tack on + * everything but the pool name stored in the stream + */ + if (strchr(tosnap, '@')) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "argument - snapshot not allowed with -d")); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + cp = strchr(chopprefix, '/'); + if (cp == NULL) + cp = strchr(chopprefix, '@'); + *cp = '\0'; + } else if (strchr(tosnap, '@') == NULL) { + /* + * If they specified a filesystem without -d, we want to + * tack on everything after the fs specified in the + * first name from the stream. + */ + cp = strchr(chopprefix, '@'); + *cp = '\0'; + } + choplen = strlen(chopprefix); + + /* + * Determine name of destination snapshot, store in zc_value. + */ + (void) strcpy(zc.zc_value, tosnap); + (void) strncat(zc.zc_value, drrb->drr_toname+choplen, + sizeof (zc.zc_value)); + if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + + /* + * Determine the name of the origin snapshot, store in zc_string. + */ + if (drrb->drr_flags & DRR_FLAG_CLONE) { + if (guid_to_name(hdl, tosnap, + drrb->drr_fromguid, zc.zc_string) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "local origin for clone %s does not exist"), + zc.zc_value); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + if (flags.verbose) + (void) printf("found clone origin %s\n", zc.zc_string); + } + + stream_wantsnewfs = (drrb->drr_fromguid == NULL || + (drrb->drr_flags & DRR_FLAG_CLONE)); + + if (stream_wantsnewfs) { + /* + * if the parent fs does not exist, look for it based on + * the parent snap GUID + */ + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive new filesystem stream")); + + (void) strcpy(zc.zc_name, zc.zc_value); + cp = strrchr(zc.zc_name, '/'); + if (cp) + *cp = '\0'; + if (cp && + !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + char suffix[ZFS_MAXNAMELEN]; + (void) strcpy(suffix, strrchr(zc.zc_value, '/')); + if (guid_to_name(hdl, tosnap, parent_snapguid, + zc.zc_value) == 0) { + *strchr(zc.zc_value, '@') = '\0'; + (void) strcat(zc.zc_value, suffix); + } + } + } else { + /* + * if the fs does not exist, look for it based on the + * fromsnap GUID + */ + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive incremental stream")); + + (void) strcpy(zc.zc_name, zc.zc_value); + *strchr(zc.zc_name, '@') = '\0'; + + if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + char snap[ZFS_MAXNAMELEN]; + (void) strcpy(snap, strchr(zc.zc_value, '@')); + if (guid_to_name(hdl, tosnap, drrb->drr_fromguid, + zc.zc_value) == 0) { + *strchr(zc.zc_value, '@') = '\0'; + (void) strcat(zc.zc_value, snap); + } + } + } + + (void) strcpy(zc.zc_name, zc.zc_value); + *strchr(zc.zc_name, '@') = '\0'; + + if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + zfs_handle_t *zhp; + /* + * Destination fs exists. Therefore this should either + * be an incremental, or the stream specifies a new fs + * (full stream or clone) and they want us to blow it + * away (and have therefore specified -F and removed any + * snapshots). + */ + + if (stream_wantsnewfs) { + if (!flags.force) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' exists\n" + "must specify -F to overwrite it"), + zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + } + if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, + &zc) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination has snapshots (eg. %s)\n" + "must destroy them to overwrite it"), + zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + } + } + + zhp = zfs_open(hdl, zc.zc_name, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (-1); + if (stream_wantsnewfs && + zhp->zfs_dmustats.dds_origin[0]) { + zfs_close(zhp); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' is a clone\n" + "must destroy it to overwrite it"), + zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + } + + if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && + stream_wantsnewfs) { + /* We can't do online recv in this case */ + clp = changelist_gather(zhp, ZFS_PROP_NAME, 0); + if (clp == NULL) + return (-1); + if (changelist_prefix(clp) != 0) { + changelist_free(clp); + return (-1); + } + } + if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME) { + if (zvol_remove_link(hdl, zhp->zfs_name) != 0) { + zfs_close(zhp); + return (-1); + } + } + zfs_close(zhp); + } else { + /* + * Destination FS does not exist. Therefore we better + * be creating a new filesystem (either from a full + * backup, or a clone) + */ + + if (!stream_wantsnewfs) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' does not exist"), zc.zc_name); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + + /* Do the recvbackup ioctl to the fs's parent. */ + *strrchr(zc.zc_name, '/') = '\0'; + + if (flags.isprefix && !flags.dryrun) { + err = create_parents(hdl, zc.zc_value, strlen(tosnap)); + if (err != 0) { + return (zfs_error(hdl, + EZFS_BADRESTORE, errbuf)); + } + } + + newfs = B_TRUE; + } + + zc.zc_begin_record = drr_noswap->drr_u.drr_begin; + zc.zc_cookie = infd; + zc.zc_guid = flags.force; + if (flags.verbose) { + (void) printf("%s %s stream of %s into %s\n", + flags.dryrun ? "would receive" : "receiving", + drrb->drr_fromguid ? "incremental" : "full", + drrb->drr_toname, zc.zc_value); + (void) fflush(stdout); + } + + if (flags.dryrun) + return (recv_skip(hdl, infd, flags.byteswap)); + + err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); + ioctl_errno = errno; + if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) { + /* + * It may be that this snapshot already exists, + * in which case we want to consume & ignore it + * rather than failing. + */ + avl_tree_t *local_avl; + nvlist_t *local_nv, *fs; + char *cp = strchr(zc.zc_value, '@'); + + /* + * XXX Do this faster by just iterating over snaps in + * this fs. Also if zc_value does not exist, we will + * get a strange "does not exist" error message. + */ + *cp = '\0'; + if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, + &local_nv, &local_avl) == 0) { + *cp = '@'; + fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); + fsavl_destroy(local_avl); + nvlist_free(local_nv); + + if (fs != NULL) { + if (flags.verbose) { + (void) printf("snap %s already exists; " + "ignoring\n", zc.zc_value); + } + ioctl_err = recv_skip(hdl, infd, + flags.byteswap); + } + } + *cp = '@'; + } + + if (ioctl_err != 0) { + switch (ioctl_errno) { + case ENODEV: + cp = strchr(zc.zc_value, '@'); + *cp = '\0'; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "most recent snapshot of %s does not\n" + "match incremental source"), zc.zc_value); + (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); + *cp = '@'; + break; + case ETXTBSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination %s has been modified\n" + "since most recent snapshot"), zc.zc_name); + (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); + break; + case EEXIST: + cp = strchr(zc.zc_value, '@'); + if (newfs) { + /* it's the containing fs that exists */ + *cp = '\0'; + } + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination already exists")); + (void) zfs_error_fmt(hdl, EZFS_EXISTS, + dgettext(TEXT_DOMAIN, "cannot restore to %s"), + zc.zc_value); + *cp = '@'; + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; + case ECKSUM: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid stream (checksum mismatch)")); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; + default: + (void) zfs_standard_error(hdl, ioctl_errno, errbuf); + } + } + + /* + * Mount or recreate the /dev links for the target filesystem + * (if created, or if we tore them down to do an incremental + * restore), and the /dev links for the new snapshot (if + * created). Also mount any children of the target filesystem + * if we did an incremental receive. + */ + cp = strchr(zc.zc_value, '@'); + if (cp && (ioctl_err == 0 || !newfs)) { + zfs_handle_t *h; + + *cp = '\0'; + h = zfs_open(hdl, zc.zc_value, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + *cp = '@'; + if (h) { + if (h->zfs_type == ZFS_TYPE_VOLUME) { + err = zvol_create_link(hdl, h->zfs_name); + if (err == 0 && ioctl_err == 0) + err = zvol_create_link(hdl, + zc.zc_value); + } else if (newfs) { + err = zfs_mount(h, NULL, 0); + } + zfs_close(h); + } + } + + if (clp) { + err |= changelist_postfix(clp); + changelist_free(clp); + } + + if (err || ioctl_err) + return (-1); + + if (flags.verbose) { + char buf1[64]; + char buf2[64]; + uint64_t bytes = zc.zc_cookie; + time_t delta = time(NULL) - begin_time; + if (delta == 0) + delta = 1; + zfs_nicenum(bytes, buf1, sizeof (buf1)); + zfs_nicenum(bytes/delta, buf2, sizeof (buf1)); + + (void) printf("received %sB stream in %lu seconds (%sB/sec)\n", + buf1, delta, buf2); + } + + return (0); +} + +/* + * Restores a backup of tosnap from the file descriptor specified by infd. + */ +int +zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, + int infd, avl_tree_t *stream_avl) +{ + int err; + dmu_replay_record_t drr, drr_noswap; + struct drr_begin *drrb = &drr.drr_u.drr_begin; + char errbuf[1024]; + zio_cksum_t zcksum = { 0 }; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive")); + + if (flags.isprefix && + !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs " + "(%s) does not exist"), tosnap); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + + /* read in the BEGIN record */ + if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE, + &zcksum))) + return (err); + + if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) { + /* It's the double end record at the end of a package */ + return (ENODATA); + } + + /* the kernel needs the non-byteswapped begin record */ + drr_noswap = drr; + + if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { + /* + * We computed the checksum in the wrong byteorder in + * recv_read() above; do it again correctly. + */ + bzero(&zcksum, sizeof (zio_cksum_t)); + fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum); + flags.byteswap = B_TRUE; + + drr.drr_type = BSWAP_32(drr.drr_type); + drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen); + drrb->drr_magic = BSWAP_64(drrb->drr_magic); + drrb->drr_version = BSWAP_64(drrb->drr_version); + drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); + drrb->drr_type = BSWAP_32(drrb->drr_type); + drrb->drr_flags = BSWAP_32(drrb->drr_flags); + drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); + drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); + } + + if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (bad magic number)")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + + if (strchr(drrb->drr_toname, '@') == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (bad snapshot name)")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + + if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) { + return (zfs_receive_one(hdl, infd, tosnap, flags, + &drr, &drr_noswap, stream_avl)); + } else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) { + return (zfs_receive_package(hdl, infd, tosnap, flags, + &drr, &zcksum)); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "stream is unsupported version %llu"), + drrb->drr_version); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } +} diff --git a/usr/src/uts/common/fs/zfs/bplist.c b/usr/src/uts/common/fs/zfs/bplist.c index ecab27dd1c..e24c88045b 100644 --- a/usr/src/uts/common/fs/zfs/bplist.c +++ b/usr/src/uts/common/fs/zfs/bplist.c @@ -278,9 +278,7 @@ bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) int bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) { - uint64_t itor = 0, comp = 0, uncomp = 0; int err; - blkptr_t bp; mutex_enter(&bpl->bpl_lock); @@ -298,6 +296,9 @@ bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) mutex_exit(&bpl->bpl_lock); if (!bpl->bpl_havecomp) { + uint64_t itor = 0, comp = 0, uncomp = 0; + blkptr_t bp; + while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { comp += BP_GET_PSIZE(&bp); uncomp += BP_GET_UCSIZE(&bp); diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 0f687ff66d..c249d5e20e 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -261,6 +261,45 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, return (0); } +static int +dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type) +{ + objset_impl_t *osi; + int err; + + mutex_enter(&ds->ds_opening_lock); + osi = dsl_dataset_get_user_ptr(ds); + if (osi == NULL) { + err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), + ds, &ds->ds_phys->ds_bp, &osi); + if (err) + return (err); + } + mutex_exit(&ds->ds_opening_lock); + + os->os = osi; + os->os_mode = DS_MODE_NONE; + + if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) + return (EINVAL); + return (0); +} + +int +dmu_objset_open_ds(dsl_dataset_t *ds, dmu_objset_type_t type, objset_t **osp) +{ + objset_t *os; + int err; + + os = kmem_alloc(sizeof (objset_t), KM_SLEEP); + err = dmu_objset_open_ds_os(ds, os, type); + if (err) + kmem_free(os, sizeof (objset_t)); + else + *osp = os; + return (err); +} + /* called from zpl */ int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, @@ -268,9 +307,10 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, { objset_t *os; dsl_dataset_t *ds; - objset_impl_t *osi; int err; + ASSERT(mode != DS_MODE_NONE); + os = kmem_alloc(sizeof (objset_t), KM_SLEEP); err = dsl_dataset_open(name, mode, os, &ds); if (err) { @@ -278,34 +318,22 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, return (err); } - mutex_enter(&ds->ds_opening_lock); - osi = dsl_dataset_get_user_ptr(ds); - if (osi == NULL) { - err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), - ds, &ds->ds_phys->ds_bp, &osi); - if (err) { - dsl_dataset_close(ds, mode, os); - kmem_free(os, sizeof (objset_t)); - return (err); - } - } - mutex_exit(&ds->ds_opening_lock); - - os->os = osi; + err = dmu_objset_open_ds_os(ds, os, type); os->os_mode = mode; - - if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) { - dmu_objset_close(os); - return (EINVAL); + if (err) { + kmem_free(os, sizeof (objset_t)); + dsl_dataset_close(ds, mode, os); + } else { + *osp = os; } - *osp = os; - return (0); + return (err); } void dmu_objset_close(objset_t *os) { - dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); + if (os->os_mode != DS_MODE_NONE) + dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os); kmem_free(os, sizeof (objset_t)); } @@ -499,7 +527,7 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) ASSERT(dmu_tx_is_syncing(tx)); dsobj = dsl_dataset_create_sync(dd, oa->lastname, - oa->clone_parent, tx); + oa->clone_parent, cr, tx); VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds)); @@ -515,11 +543,6 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) oa->userfunc(&osi->os, oa->userarg, cr, tx); } - /* - * Create create time permission if any? - */ - dsl_deleg_set_create_perms(ds->ds_dir, tx, cr); - spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa, tx, cr, "dataset = %llu", dsobj); @@ -580,13 +603,21 @@ dmu_objset_destroy(const char *name) * It would be nicer to do this in dsl_dataset_destroy_sync(), * but the replay log objset is modified in open context. */ - error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os); + error = dmu_objset_open(name, DMU_OST_ANY, + DS_MODE_EXCLUSIVE|DS_MODE_READONLY, &os); if (error == 0) { + dsl_dataset_t *ds = os->os->os_dsl_dataset; zil_destroy(dmu_objset_zil(os), B_FALSE); - dmu_objset_close(os); + + /* + * dsl_dataset_destroy() closes the ds. + * os is just used as the tag after it's freed. + */ + kmem_free(os, sizeof (objset_t)); + error = dsl_dataset_destroy(ds, os); } - return (dsl_dataset_destroy(name)); + return (error); } int @@ -594,16 +625,23 @@ dmu_objset_rollback(const char *name) { int err; objset_t *os; + dsl_dataset_t *ds; err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); if (err) return (err); - /* XXX uncache everything? */ - err = dsl_dataset_rollback(os->os->os_dsl_dataset); + ds = os->os->os_dsl_dataset; + err = dsl_dataset_rollback(ds, os->os->os_phys->os_type); - dmu_objset_close(os); + /* + * NB: we close the objset manually because the rollback + * actually implicitly called dmu_objset_evict(), thus freeing + * the objset_impl_t. + */ + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, os); + kmem_free(os, sizeof (objset_t)); return (err); } @@ -612,6 +650,12 @@ struct snaparg { char *snapname; char failed[MAXPATHLEN]; boolean_t checkperms; + list_t objsets; +}; + +struct osnode { + list_node_t node; + objset_t *os; }; static int @@ -653,8 +697,13 @@ dmu_objset_snapshot_one(char *name, void *arg) */ err = zil_suspend(dmu_objset_zil(os)); if (err == 0) { + struct osnode *osn; dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, - dsl_dataset_snapshot_sync, os, sn->snapname, 3); + dsl_dataset_snapshot_sync, os->os->os_dsl_dataset, + sn->snapname, 3); + osn = kmem_alloc(sizeof (struct osnode), KM_SLEEP); + osn->os = os; + list_insert_tail(&sn->objsets, osn); } else { dmu_objset_close(os); } @@ -666,6 +715,7 @@ int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) { dsl_sync_task_t *dst; + struct osnode *osn; struct snaparg sn = { 0 }; spa_t *spa; int err; @@ -678,6 +728,8 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); sn.snapname = snapname; + list_create(&sn.objsets, sizeof (struct osnode), + offsetof(struct osnode, node)); if (recursive) { sn.checkperms = B_TRUE; @@ -695,12 +747,18 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) for (dst = list_head(&sn.dstg->dstg_tasks); dst; dst = list_next(&sn.dstg->dstg_tasks, dst)) { - objset_t *os = dst->dst_arg1; + dsl_dataset_t *ds = dst->dst_arg1; if (dst->dst_err) - dmu_objset_name(os, sn.failed); - zil_resume(dmu_objset_zil(os)); - dmu_objset_close(os); + dsl_dataset_name(ds, sn.failed); + } + + while (osn = list_head(&sn.objsets)) { + list_remove(&sn.objsets, osn); + zil_resume(dmu_objset_zil(osn->os)); + dmu_objset_close(osn->os); + kmem_free(osn, sizeof (struct osnode)); } + list_destroy(&sn.objsets); out: if (err) (void) strcpy(fsname, sn.failed); diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index 812abd0265..19009b4415 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -41,9 +41,12 @@ #include <sys/zap.h> #include <sys/zio_checksum.h> +static char *dmu_recv_tag = "dmu_recv_tag"; + struct backuparg { dmu_replay_record_t *drr; vnode_t *vp; + offset_t *off; objset_t *os; zio_cksum_t zc; int err; @@ -59,6 +62,7 @@ dump_bytes(struct backuparg *ba, void *buf, int len) ba->err = vn_rdwr(UIO_WRITE, ba->vp, (caddr_t)buf, len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); + *ba->off += len; return (ba->err); } @@ -217,13 +221,15 @@ backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) } int -dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp) +dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + vnode_t *vp, offset_t *off) { dsl_dataset_t *ds = tosnap->os->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL; dmu_replay_record_t *drr; struct backuparg ba; int err; + uint64_t fromtxg = 0; /* tosnap must be a snapshot */ if (ds->ds_phys->ds_next_snap_obj == 0) @@ -231,25 +237,51 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp) /* fromsnap must be an earlier snapshot from the same fs as tosnap */ if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= - ds->ds_phys->ds_creation_txg)) + fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) return (EXDEV); + if (fromorigin) { + if (fromsnap) + return (EINVAL); + + if (ds->ds_dir->dd_phys->dd_origin_obj != NULL) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + rw_enter(&dp->dp_config_rwlock, RW_READER); + err = dsl_dataset_open_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, NULL, + DS_MODE_NONE, FTAG, &fromds); + rw_exit(&dp->dp_config_rwlock); + if (err) + return (err); + } else { + fromorigin = B_FALSE; + } + } + + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; - drr->drr_u.drr_begin.drr_version = DMU_BACKUP_VERSION; + drr->drr_u.drr_begin.drr_version = DMU_BACKUP_STREAM_VERSION; drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type; + if (fromorigin) + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (fromds) drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); + if (fromds) + fromtxg = fromds->ds_phys->ds_creation_txg; + if (fromorigin) + dsl_dataset_close(fromds, DS_MODE_NONE, FTAG); + ba.drr = drr; ba.vp = vp; ba.os = tosnap; + ba.off = off; ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { @@ -257,8 +289,7 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp) return (ba.err); } - err = traverse_dsl_dataset(ds, - fromds ? fromds->ds_phys->ds_creation_txg : 0, + err = traverse_dsl_dataset(ds, fromtxg, ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK, backup_cb, &ba); @@ -283,271 +314,410 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp) return (0); } -struct restorearg { - int err; - int byteswap; - vnode_t *vp; - char *buf; - uint64_t voff; - int buflen; /* number of valid bytes in buf */ - int bufoff; /* next offset to read */ - int bufsize; /* amount of memory allocated for buf */ - zio_cksum_t zc; +struct recvbeginsyncarg { + const char *tofs; + const char *tosnap; + dsl_dataset_t *origin; + uint64_t fromguid; + dmu_objset_type_t type; + void *tag; + boolean_t force; + char clonelastname[MAXNAMELEN]; + dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ }; -static int -replay_incremental_check(dsl_dataset_t *ds, struct drr_begin *drrb) -{ - const char *snapname; - int err; - uint64_t val; - - /* must already be a snapshot of this fs */ - if (ds->ds_phys->ds_prev_snap_obj == 0) - return (ENODEV); - - /* most recent snapshot must match fromguid */ - if (ds->ds_prev->ds_phys->ds_guid != drrb->drr_fromguid) - return (ENODEV); - - /* new snapshot name must not exist */ - snapname = strrchr(drrb->drr_toname, '@'); - if (snapname == NULL) - return (EEXIST); - - snapname++; - err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); - - return (0); -} - -/* ARGSUSED */ -static int -replay_offline_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) +static dsl_dataset_t * +recv_full_sync_impl(dsl_pool_t *dp, uint64_t dsobj, dmu_objset_type_t type, + cred_t *cr, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - struct drr_begin *drrb = arg2; + dsl_dataset_t *ds; - /* must not have any changes since most recent snapshot */ - if (dsl_dataset_modified_since_lastsnap(ds)) - return (ETXTBSY); + VERIFY(0 == dsl_dataset_open_obj(dp, dsobj, NULL, + DS_MODE_EXCLUSIVE, dmu_recv_tag, &ds)); - return (replay_incremental_check(ds, drrb)); -} + if (type != DMU_OST_NONE) { + (void) dmu_objset_create_impl(dp->dp_spa, + ds, &ds->ds_phys->ds_bp, type, tx); + } -/* ARGSUSED */ -static void -replay_offline_incremental_sync(void *arg1, void *arg2, cred_t *cr, - dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC, + spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC, ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld", ds->ds_phys->ds_dir_obj); + + return (ds); } /* ARGSUSED */ static int -replay_full_check(void *arg1, void *arg2, dmu_tx_t *tx) +recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; - struct drr_begin *drrb = arg2; + struct recvbeginsyncarg *rbsa = arg2; objset_t *mos = dd->dd_pool->dp_meta_objset; - char *cp; uint64_t val; int err; - cp = strchr(drrb->drr_toname, '@'); - *cp = '\0'; err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, - strrchr(drrb->drr_toname, '/') + 1, - sizeof (uint64_t), 1, &val); - *cp = '@'; + strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); if (err != ENOENT) return (err ? err : EEXIST); + if (rbsa->origin) { + /* make sure it's a snap in the same pool */ + if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) + return (EXDEV); + if (rbsa->origin->ds_phys->ds_num_children == 0) + return (EINVAL); + if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) + return (ENODEV); + } + return (0); } static void -replay_full_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +recv_full_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; - struct drr_begin *drrb = arg2; - char *cp; - dsl_dataset_t *ds; + struct recvbeginsyncarg *rbsa = arg2; uint64_t dsobj; - cp = strchr(drrb->drr_toname, '@'); - *cp = '\0'; - dsobj = dsl_dataset_create_sync(dd, strrchr(drrb->drr_toname, '/') + 1, - NULL, tx); + dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, + rbsa->origin, cr, tx); - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, - DS_MODE_EXCLUSIVE, FTAG, &ds)); + rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj, + rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx); +} - (void) dmu_objset_create_impl(dsl_dataset_get_spa(ds), - ds, &ds->ds_phys->ds_bp, drrb->drr_type, tx); +static int +recv_full_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct recvbeginsyncarg *rbsa = arg2; + int err; - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; + /* must be a head ds */ + if (ds->ds_phys->ds_next_snap_obj != 0) + return (EINVAL); - spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC, - ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld", - ds->ds_phys->ds_dir_obj); + /* must not be a clone ds */ + if (ds->ds_prev != NULL) + return (EINVAL); + + err = dsl_dataset_destroy_check(ds, rbsa->tag, tx); + if (err) + return (err); - *cp = '@'; + if (rbsa->origin) { + /* make sure it's a snap in the same pool */ + if (rbsa->origin->ds_dir->dd_pool != ds->ds_dir->dd_pool) + return (EXDEV); + if (rbsa->origin->ds_phys->ds_num_children == 0) + return (EINVAL); + if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) + return (ENODEV); + } - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + return (0); } -struct onlineincarg { - dsl_dir_t *dd; - dsl_dataset_t *ohds; - boolean_t force; - const char *cosname; -}; +static void +recv_full_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct recvbeginsyncarg *rbsa = arg2; + dsl_dir_t *dd = ds->ds_dir; + uint64_t dsobj; + + /* + * NB: caller must provide an extra hold on the dsl_dir_t, so it + * won't go away when dsl_dataset_destroy_sync() closes the + * dataset. + */ + dsl_dataset_destroy_sync(ds, rbsa->tag, cr, tx); + + dsobj = dsl_dataset_create_sync_impl(dd, rbsa->origin, tx); + + rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj, + rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx); +} /* ARGSUSED */ static int -replay_online_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) +recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) { - struct onlineincarg *oia = arg1; + dsl_dataset_t *ds = arg1; + struct recvbeginsyncarg *rbsa = arg2; + int err; + uint64_t val; - if (dsl_dataset_modified_since_lastsnap(oia->ohds) && !oia->force) + /* must not have any changes since most recent snapshot */ + if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) return (ETXTBSY); - return (replay_incremental_check(oia->ohds, arg2)); + /* must already be a snapshot of this fs */ + if (ds->ds_phys->ds_prev_snap_obj == 0) + return (ENODEV); + + /* most recent snapshot must match fromguid */ + if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) + return (ENODEV); + + /* new snapshot name must not exist */ + err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); + if (err == 0) + return (EEXIST); + if (err != ENOENT) + return (err); + return (0); } /* ARGSUSED */ static void -replay_online_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +recv_online_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { - struct onlineincarg *oia = arg1; - dsl_dataset_t *ohds = oia->ohds; - dsl_dir_t *dd = oia->dd; - dsl_dataset_t *ods, *ds; + dsl_dataset_t *ohds = arg1; + struct recvbeginsyncarg *rbsa = arg2; + dsl_pool_t *dp = ohds->ds_dir->dd_pool; + dsl_dataset_t *ods, *cds; uint64_t dsobj; - VERIFY(0 == dsl_dataset_open_obj(ohds->ds_dir->dd_pool, - ohds->ds_phys->ds_prev_snap_obj, NULL, - DS_MODE_STANDARD, FTAG, &ods)); - - dsobj = dsl_dataset_create_sync(dd, strrchr(oia->cosname, '/') + 1, - ods, tx); + /* create the temporary clone */ + VERIFY(0 == dsl_dataset_open_obj(dp, ohds->ds_phys->ds_prev_snap_obj, + NULL, DS_MODE_STANDARD, FTAG, &ods)); + dsobj = dsl_dataset_create_sync(ohds->ds_dir, + rbsa->clonelastname, ods, cr, tx); + dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG); /* open the temporary clone */ - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, - DS_MODE_EXCLUSIVE, FTAG, &ds)); + VERIFY(0 == dsl_dataset_open_obj(dp, dsobj, NULL, + DS_MODE_EXCLUSIVE, dmu_recv_tag, &cds)); + + dmu_buf_will_dirty(cds->ds_dbuf, tx); + cds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; + rbsa->ds = cds; + + spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC, + dp->dp_spa, tx, cr, "dataset = %lld", + cds->ds_phys->ds_dir_obj); +} + +/* ARGSUSED */ +static void +recv_offline_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC, ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld", ds->ds_phys->ds_dir_obj); - - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG); } -static int -replay_end_check(void *arg1, void *arg2, dmu_tx_t *tx) +/* + * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() + * succeeds; otherwise we will leak the holds on the datasets. + */ +int +dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, + boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *drc) { - objset_t *os = arg1; - struct drr_begin *drrb = arg2; - char *snapname; + int err = 0; + boolean_t byteswap; + struct recvbeginsyncarg rbsa; + uint64_t version; + int flags; + dsl_dataset_t *ds; - /* XXX verify that drr_toname is in dd */ + if (drrb->drr_magic == DMU_BACKUP_MAGIC) + byteswap = FALSE; + else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) + byteswap = TRUE; + else + return (EINVAL); + + rbsa.tofs = tofs; + rbsa.tosnap = tosnap; + rbsa.origin = origin ? origin->os->os_dsl_dataset : NULL; + rbsa.fromguid = drrb->drr_fromguid; + rbsa.type = drrb->drr_type; + rbsa.tag = FTAG; + version = drrb->drr_version; + flags = drrb->drr_flags; + + if (byteswap) { + rbsa.type = BSWAP_32(rbsa.type); + rbsa.fromguid = BSWAP_64(rbsa.fromguid); + version = BSWAP_64(version); + flags = BSWAP_32(flags); + } - snapname = strchr(drrb->drr_toname, '@'); - if (snapname == NULL) + if (version != DMU_BACKUP_STREAM_VERSION || + rbsa.type >= DMU_OST_NUMTYPES || + ((flags & DRR_FLAG_CLONE) && origin == NULL)) return (EINVAL); - snapname++; - return (dsl_dataset_snapshot_check(os, snapname, tx)); -} + bzero(drc, sizeof (dmu_recv_cookie_t)); + drc->drc_drrb = drrb; + drc->drc_tosnap = tosnap; + drc->drc_force = force; -static void -replay_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) -{ - objset_t *os = arg1; - struct drr_begin *drrb = arg2; - char *snapname; - dsl_dataset_t *ds, *hds; + /* + * Process the begin in syncing context. + */ + if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE) && !online) { + /* offline incremental receive */ + err = dsl_dataset_open(tofs, + DS_MODE_EXCLUSIVE, dmu_recv_tag, &ds); + if (err) + return (err); - snapname = strchr(drrb->drr_toname, '@') + 1; + /* + * Only do the rollback if the most recent snapshot + * matches the incremental source + */ + if (force) { + if (ds->ds_prev == NULL || + ds->ds_prev->ds_phys->ds_guid != + rbsa.fromguid) { + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, + dmu_recv_tag); + return (ENODEV); + } + (void) dsl_dataset_rollback(ds, DMU_OST_NONE); + } + rbsa.force = B_FALSE; + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_incremental_check, + recv_offline_incremental_sync, + ds, &rbsa, 1); + if (err) { + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, dmu_recv_tag); + return (err); + } + drc->drc_logical_ds = drc->drc_real_ds = ds; + } else if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) { + /* online incremental receive */ - dsl_dataset_snapshot_sync(os, snapname, cr, tx); + /* tmp clone name is: tofs/%tosnap" */ + (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), + "%%%s", tosnap); - /* set snapshot's creation time and guid */ - hds = os->os->os_dsl_dataset; - VERIFY(0 == dsl_dataset_open_obj(hds->ds_dir->dd_pool, - hds->ds_phys->ds_prev_snap_obj, NULL, - DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_INCONSISTENT, - FTAG, &ds)); + /* open the dataset we are logically receiving into */ + err = dsl_dataset_open(tofs, + DS_MODE_STANDARD, dmu_recv_tag, &ds); + if (err) + return (err); - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_creation_time = drrb->drr_creation_time; - ds->ds_phys->ds_guid = drrb->drr_toguid; - ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + rbsa.force = force; + err = dsl_sync_task_do(ds->ds_dir->dd_pool, + recv_incremental_check, + recv_online_incremental_sync, ds, &rbsa, 5); + if (err) { + dsl_dataset_close(ds, DS_MODE_STANDARD, dmu_recv_tag); + return (err); + } + drc->drc_logical_ds = ds; + drc->drc_real_ds = rbsa.ds; + } else { + /* create new fs -- full backup or clone */ + dsl_dir_t *dd = NULL; + const char *tail; - /* log the end of the receive */ - spa_history_internal_log(LOG_DS_RECEIVE, ds->ds_dir->dd_pool->dp_spa, - tx, cr, "dataset = %llu", ds->ds_phys->ds_dir_obj); + err = dsl_dir_open(tofs, FTAG, &dd, &tail); + if (err) + return (err); + if (tail == NULL) { + if (!force) { + dsl_dir_close(dd, FTAG); + return (EEXIST); + } - dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG); + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); + err = dsl_dataset_open_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, NULL, + DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, + FTAG, &ds); + rw_exit(&dd->dd_pool->dp_config_rwlock); + if (err) { + dsl_dir_close(dd, FTAG); + return (err); + } - dmu_buf_will_dirty(hds->ds_dbuf, tx); - hds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + err = dsl_sync_task_do(dd->dd_pool, + recv_full_existing_check, + recv_full_existing_sync, ds, &rbsa, 5); + /* if successful, sync task closes the ds for us */ + if (err) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + } else { + err = dsl_sync_task_do(dd->dd_pool, recv_full_check, + recv_full_sync, dd, &rbsa, 5); + if (err) + return (err); + } + dsl_dir_close(dd, FTAG); + if (err) + return (err); + drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; + drc->drc_newfs = B_TRUE; + } + + /* downgrade our hold on the ds from EXCLUSIVE to PRIMARY */ + dsl_dataset_downgrade(drc->drc_real_ds, + DS_MODE_EXCLUSIVE, DS_MODE_PRIMARY); + + return (0); } +struct restorearg { + int err; + int byteswap; + vnode_t *vp; + char *buf; + uint64_t voff; + int bufsize; /* amount of memory allocated for buf */ + zio_cksum_t cksum; +}; + static void * restore_read(struct restorearg *ra, int len) { void *rv; + int done = 0; /* some things will require 8-byte alignment, so everything must */ ASSERT3U(len % 8, ==, 0); - while (ra->buflen - ra->bufoff < len) { + while (done < len) { ssize_t resid; - int leftover = ra->buflen - ra->bufoff; - (void) memmove(ra->buf, ra->buf + ra->bufoff, leftover); ra->err = vn_rdwr(UIO_READ, ra->vp, - (caddr_t)ra->buf + leftover, ra->bufsize - leftover, + (caddr_t)ra->buf + done, len - done, ra->voff, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); - ra->voff += ra->bufsize - leftover - resid; - ra->buflen = ra->bufsize - resid; - ra->bufoff = 0; - if (resid == ra->bufsize - leftover) + if (resid == len - done) ra->err = EINVAL; + ra->voff += len - done - resid; + done = len - resid; if (ra->err) return (NULL); - /* Could compute checksum here? */ } - ASSERT3U(ra->bufoff % 8, ==, 0); - ASSERT3U(ra->buflen - ra->bufoff, >=, len); - rv = ra->buf + ra->bufoff; - ra->bufoff += len; + ASSERT3U(done, ==, len); + rv = ra->buf; if (ra->byteswap) - fletcher_4_incremental_byteswap(rv, len, &ra->zc); + fletcher_4_incremental_byteswap(rv, len, &ra->cksum); else - fletcher_4_incremental_native(rv, len, &ra->zc); + fletcher_4_incremental_native(rv, len, &ra->cksum); return (rv); } @@ -557,12 +727,14 @@ backup_byteswap(dmu_replay_record_t *drr) #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) drr->drr_type = BSWAP_32(drr->drr_type); + drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); switch (drr->drr_type) { case DRR_BEGIN: DO64(drr_begin.drr_magic); DO64(drr_begin.drr_version); DO64(drr_begin.drr_creation_time); DO32(drr_begin.drr_type); + DO32(drr_begin.drr_flags); DO64(drr_begin.drr_toguid); DO64(drr_begin.drr_fromguid); break; @@ -786,52 +958,67 @@ restore_free(struct restorearg *ra, objset_t *os, return (err); } +static void +recv_abort_cleanup(dmu_recv_cookie_t *drc) +{ + if (drc->drc_newfs || drc->drc_real_ds != drc->drc_logical_ds) { + /* + * online incremental or new fs: destroy the fs (which + * may be a clone) that we created + */ + (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag); + if (drc->drc_real_ds != drc->drc_logical_ds) { + dsl_dataset_close(drc->drc_logical_ds, + DS_MODE_STANDARD, dmu_recv_tag); + } + } else { + /* + * offline incremental: rollback to most recent snapshot. + */ + int lmode = DS_MODE_PRIMARY; + if (dsl_dataset_tryupgrade(drc->drc_real_ds, + DS_MODE_PRIMARY, DS_MODE_EXCLUSIVE)) { + lmode = DS_MODE_EXCLUSIVE; + (void) dsl_dataset_rollback(drc->drc_real_ds, + DMU_OST_NONE); + } + dsl_dataset_close(drc->drc_real_ds, lmode, FTAG); + } +} + +/* + * NB: callers *must* call dmu_recv_end() if this succeeds. + */ int -dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, - boolean_t force, boolean_t online, vnode_t *vp, uint64_t voffset, - char *cosname) +dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) { - struct restorearg ra; + struct restorearg ra = { 0 }; dmu_replay_record_t *drr; - char *cp; - objset_t *os = NULL; - zio_cksum_t pzc; - char *clonebuf = NULL; - size_t len; - - bzero(&ra, sizeof (ra)); - ra.vp = vp; - ra.voff = voffset; - ra.bufsize = 1<<20; - ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); + objset_t *os; + zio_cksum_t pcksum; - if (drrb->drr_magic == DMU_BACKUP_MAGIC) { - ra.byteswap = FALSE; - } else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { + if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) ra.byteswap = TRUE; - } else { - ra.err = EINVAL; - goto out; - } - /* - * NB: this assumes that struct drr_begin will be the largest in - * dmu_replay_record_t's drr_u, and thus we don't need to pad it - * with zeros to make it the same length as we wrote out. - */ - ((dmu_replay_record_t *)ra.buf)->drr_type = DRR_BEGIN; - ((dmu_replay_record_t *)ra.buf)->drr_pad = 0; - ((dmu_replay_record_t *)ra.buf)->drr_u.drr_begin = *drrb; - if (ra.byteswap) { - fletcher_4_incremental_byteswap(ra.buf, - sizeof (dmu_replay_record_t), &ra.zc); - } else { - fletcher_4_incremental_native(ra.buf, - sizeof (dmu_replay_record_t), &ra.zc); + { + /* compute checksum of drr_begin record */ + dmu_replay_record_t *drr; + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); + + drr->drr_type = DRR_BEGIN; + drr->drr_u.drr_begin = *drc->drc_drrb; + if (ra.byteswap) { + fletcher_4_incremental_byteswap(drr, + sizeof (dmu_replay_record_t), &ra.cksum); + } else { + fletcher_4_incremental_native(drr, + sizeof (dmu_replay_record_t), &ra.cksum); + } + kmem_free(drr, sizeof (dmu_replay_record_t)); } - (void) strcpy(drrb->drr_toname, tosnap); /* for the sync funcs */ if (ra.byteswap) { + struct drr_begin *drrb = drc->drc_drrb; drrb->drr_magic = BSWAP_64(drrb->drr_magic); drrb->drr_version = BSWAP_64(drrb->drr_version); drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); @@ -840,133 +1027,26 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); } - ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); - - if (drrb->drr_version != DMU_BACKUP_VERSION || - drrb->drr_type >= DMU_OST_NUMTYPES || - strchr(drrb->drr_toname, '@') == NULL) { - ra.err = EINVAL; - goto out; - } - - /* - * Process the begin in syncing context. - */ - if (drrb->drr_fromguid && !online) { - /* offline incremental receive */ - - dsl_dataset_t *ds = NULL; - - cp = strchr(tosnap, '@'); - *cp = '\0'; - ra.err = dsl_dataset_open(tosnap, DS_MODE_EXCLUSIVE, FTAG, &ds); - *cp = '@'; - if (ra.err) - goto out; - - /* - * Only do the rollback if the most recent snapshot - * matches the incremental source - */ - if (force) { - if (ds->ds_prev == NULL || - ds->ds_prev->ds_phys->ds_guid != - drrb->drr_fromguid) { - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - kmem_free(ra.buf, ra.bufsize); - return (ENODEV); - } - (void) dsl_dataset_rollback(ds); - } - ra.err = dsl_sync_task_do(ds->ds_dir->dd_pool, - replay_offline_incremental_check, - replay_offline_incremental_sync, ds, drrb, 1); - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - } else if (drrb->drr_fromguid && online) { - /* online incremental receive */ - - const char *tail; - struct onlineincarg oia = { 0 }; - - /* - * Get the dsl_dir for the parent of the - * temporary clone. - */ - cp = strchr(tosnap, '@'); - *cp = '\0'; - - /* tmp clone is: tonsap + '/' + '%' + "snapX" */ - len = strlen(tosnap) + 2 + strlen(cp + 1) + 1; - clonebuf = kmem_alloc(len, KM_SLEEP); - (void) snprintf(clonebuf, len, "%s%c%c%s%c", - tosnap, '/', '%', cp + 1, '\0'); - ra.err = dsl_dir_open(tosnap, FTAG, &oia.dd, &tail); - *cp = '@'; - if (ra.err) - goto out; - - /* open the dataset we are logically receiving into */ - *cp = '\0'; - ra.err = dsl_dataset_open(tosnap, DS_MODE_STANDARD, - FTAG, &oia.ohds); - *cp = '@'; - if (ra.err) { - dsl_dir_close(oia.dd, FTAG); - goto out; - } - - oia.force = force; - oia.cosname = clonebuf; - ra.err = dsl_sync_task_do(oia.dd->dd_pool, - replay_online_incremental_check, - replay_online_incremental_sync, &oia, drrb, 5); - dsl_dataset_close(oia.ohds, DS_MODE_STANDARD, FTAG); - dsl_dir_close(oia.dd, FTAG); - } else { - /* full backup */ - - dsl_dir_t *dd = NULL; - const char *tail; - - /* can't restore full backup into topmost fs, for now */ - if (strrchr(drrb->drr_toname, '/') == NULL) { - ra.err = EINVAL; - goto out; - } - - cp = strchr(tosnap, '@'); - *cp = '\0'; - ra.err = dsl_dir_open(tosnap, FTAG, &dd, &tail); - *cp = '@'; - if (ra.err) - goto out; - if (tail == NULL) { - ra.err = EEXIST; - goto out; - } + ra.vp = vp; + ra.voff = *voffp; + ra.bufsize = 1<<20; + ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); - ra.err = dsl_sync_task_do(dd->dd_pool, replay_full_check, - replay_full_sync, dd, drrb, 5); - dsl_dir_close(dd, FTAG); - } - if (ra.err) - goto out; + /* these were verified in dmu_recv_begin */ + ASSERT(drc->drc_drrb->drr_version == DMU_BACKUP_STREAM_VERSION); + ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); /* * Open the objset we are modifying. */ + VERIFY(dmu_objset_open_ds(drc->drc_real_ds, DMU_OST_ANY, &os) == 0); - cp = strchr(tosnap, '@'); - *cp = '\0'; - ra.err = dmu_objset_open(clonebuf == NULL ? tosnap : clonebuf, - DMU_OST_ANY, DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); - *cp = '@'; - ASSERT3U(ra.err, ==, 0); + ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); /* * Read records and process them. */ - pzc = ra.zc; + pcksum = ra.cksum; while (ra.err == 0 && NULL != (drr = restore_read(&ra, sizeof (*drr)))) { if (issig(JUSTLOOKING) && issig(FORREAL)) { @@ -1017,99 +1097,130 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, * everything before the DRR_END record. */ if (drre.drr_checksum.zc_word[0] != 0 && - !ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pzc)) { + !ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) { ra.err = ECKSUM; goto out; } - - if (clonebuf == NULL) { - ra.err = dsl_sync_task_do(dmu_objset_ds(os)-> - ds_dir->dd_pool, replay_end_check, - replay_end_sync, os, drrb, 3); - } goto out; } default: ra.err = EINVAL; goto out; } - pzc = ra.zc; + pcksum = ra.cksum; } out: - if (os) { - if (drrb->drr_fromguid && online && !ra.err) - dmu_objset_name(os, cosname); - dmu_objset_close(os); - } + dmu_objset_close(os); - /* - * Make sure we don't rollback/destroy unless we actually - * processed the begin properly. 'os' will only be set if this - * is the case. - */ - if (ra.err && os && tosnap && strchr(tosnap, '@')) { + if (ra.err != 0) { /* * rollback or destroy what we created, so we don't * leave it in the restoring state. */ - dsl_dataset_t *ds; - int err; - - cp = strchr(tosnap, '@'); - *cp = '\0'; - err = dsl_dataset_open(clonebuf == NULL ? tosnap : clonebuf, - DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, - FTAG, &ds); - if (err == 0) { - txg_wait_synced(ds->ds_dir->dd_pool, 0); - if (drrb->drr_fromguid) { - if (clonebuf != NULL) { - /* - * online incremental: destroy - * the temporarily created clone. - */ - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, - FTAG); - (void) dmu_objset_destroy(clonebuf); - } else { - /* - * offline incremental: rollback to - * most recent snapshot. - */ - (void) dsl_dataset_rollback(ds); - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, - FTAG); - } - } else { - /* full: destroy whole fs */ - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - (void) dsl_dataset_destroy(tosnap); - } - } - *cp = '@'; + txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); + recv_abort_cleanup(drc); } - if (clonebuf != NULL) - kmem_free(clonebuf, len); kmem_free(ra.buf, ra.bufsize); - if (sizep) - *sizep = ra.voff; + *voffp = ra.voff; return (ra.err); } +struct recvendsyncarg { + char *tosnap; + uint64_t creation_time; + uint64_t toguid; +}; + +static int +recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct recvendsyncarg *resa = arg2; + + return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); +} + +static void +recv_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + struct recvendsyncarg *resa = arg2; + + dsl_dataset_snapshot_sync(ds, resa->tosnap, cr, tx); + + /* set snapshot's creation time and guid */ + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; + ds->ds_prev->ds_phys->ds_guid = resa->toguid; + ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; +} + int -dmu_replay_end_snapshot(char *name, struct drr_begin *drrb) +dmu_recv_end(dmu_recv_cookie_t *drc) { - objset_t *os; - int err; + int err = 0; + int lmode; - err = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_STANDARD, &os); - if (err) - return (err); + /* + * XXX hack; seems the ds is still dirty and + * dsl_pool_zil_clean() expects it to have a ds_user_ptr (and + * zil), but clone_swap() can close it. + */ + txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); - err = dsl_sync_task_do(dmu_objset_ds(os)->ds_dir->dd_pool, - replay_end_check, replay_end_sync, os, drrb, 3); - dmu_objset_close(os); + if (dsl_dataset_tryupgrade(drc->drc_real_ds, + DS_MODE_PRIMARY, DS_MODE_EXCLUSIVE)) { + lmode = DS_MODE_EXCLUSIVE; + } else { + recv_abort_cleanup(drc); + return (EBUSY); + } + + if (drc->drc_logical_ds != drc->drc_real_ds) { + if (err == 0 && dsl_dataset_tryupgrade(drc->drc_logical_ds, + DS_MODE_STANDARD, DS_MODE_EXCLUSIVE)) { + lmode = DS_MODE_EXCLUSIVE; + err = dsl_dataset_clone_swap(drc->drc_real_ds, + drc->drc_logical_ds, drc->drc_force); + } else { + lmode = DS_MODE_STANDARD; + err = EBUSY; + } + } + + if (err == 0) { + struct recvendsyncarg resa; + + resa.creation_time = drc->drc_drrb->drr_creation_time; + resa.toguid = drc->drc_drrb->drr_toguid; + resa.tosnap = drc->drc_tosnap; + + err = dsl_sync_task_do(drc->drc_real_ds->ds_dir->dd_pool, + recv_end_check, recv_end_sync, + drc->drc_logical_ds, &resa, 3); + if (err) { + if (drc->drc_newfs) { + ASSERT(drc->drc_logical_ds == drc->drc_real_ds); + (void) dsl_dataset_destroy(drc->drc_real_ds, + dmu_recv_tag); + return (err); + } else { + (void) dsl_dataset_rollback(drc->drc_logical_ds, + DMU_OST_NONE); + } + } + } + + if (drc->drc_logical_ds != drc->drc_real_ds) { + /* dsl_dataset_destroy() will close the ds */ + (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag); + } + /* close the hold from dmu_recv_begin */ + dsl_dataset_close(drc->drc_logical_ds, lmode, dmu_recv_tag); return (err); } diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index 1cba47175a..8c62cd9cef 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -45,8 +45,6 @@ static dsl_checkfunc_t dsl_dataset_destroy_begin_check; static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; static dsl_checkfunc_t dsl_dataset_rollback_check; static dsl_syncfunc_t dsl_dataset_rollback_sync; -static dsl_checkfunc_t dsl_dataset_destroy_check; -static dsl_syncfunc_t dsl_dataset_destroy_sync; #define DS_REF_MAX (1ULL << 62) @@ -533,6 +531,39 @@ dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) } void +dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode) +{ + uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; + uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; + mutex_enter(&ds->ds_lock); + ASSERT3U(ds->ds_open_refcount, >=, oldweight); + ASSERT3U(oldweight, >=, newweight); + ds->ds_open_refcount -= oldweight; + ds->ds_open_refcount += newweight; + mutex_exit(&ds->ds_lock); +} + +boolean_t +dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode) +{ + boolean_t rv; + uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; + uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; + mutex_enter(&ds->ds_lock); + ASSERT3U(ds->ds_open_refcount, >=, oldweight); + ASSERT3U(newweight, >=, oldweight); + if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) { + rv = B_FALSE; + } else { + ds->ds_open_refcount -= oldweight; + ds->ds_open_refcount += newweight; + rv = B_TRUE; + } + mutex_exit(&ds->ds_lock); + return (rv); +} + +void dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) { objset_t *mos = dp->dp_meta_objset; @@ -574,24 +605,18 @@ dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) } uint64_t -dsl_dataset_create_sync(dsl_dir_t *pdd, - const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) +dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx) { - dsl_pool_t *dp = pdd->dd_pool; + dsl_pool_t *dp = dd->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; - uint64_t dsobj, ddobj; + uint64_t dsobj; objset_t *mos = dp->dp_meta_objset; - dsl_dir_t *dd; - ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); - ASSERT(clone_parent == NULL || - clone_parent->ds_phys->ds_num_children > 0); - ASSERT(lastname[0] != '@'); + ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); + ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); ASSERT(dmu_tx_is_syncing(tx)); - - ddobj = dsl_dir_create_sync(pdd, lastname, tx); - VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); + ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); @@ -608,28 +633,49 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, dsphys->ds_creation_txg = tx->tx_txg; dsphys->ds_deadlist_obj = bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); - if (clone_parent) { - dsphys->ds_prev_snap_obj = clone_parent->ds_object; + if (origin) { + dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = - clone_parent->ds_phys->ds_creation_txg; + origin->ds_phys->ds_creation_txg; dsphys->ds_used_bytes = - clone_parent->ds_phys->ds_used_bytes; + origin->ds_phys->ds_used_bytes; dsphys->ds_compressed_bytes = - clone_parent->ds_phys->ds_compressed_bytes; + origin->ds_phys->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = - clone_parent->ds_phys->ds_uncompressed_bytes; - dsphys->ds_bp = clone_parent->ds_phys->ds_bp; + origin->ds_phys->ds_uncompressed_bytes; + dsphys->ds_bp = origin->ds_phys->ds_bp; - dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); - clone_parent->ds_phys->ds_num_children++; + dmu_buf_will_dirty(origin->ds_dbuf, tx); + origin->ds_phys->ds_num_children++; dmu_buf_will_dirty(dd->dd_dbuf, tx); - dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; + dd->dd_phys->dd_origin_obj = origin->ds_object; } dmu_buf_rele(dbuf, FTAG); dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_head_dataset_obj = dsobj; + + return (dsobj); +} + +uint64_t +dsl_dataset_create_sync(dsl_dir_t *pdd, + const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx) +{ + dsl_pool_t *dp = pdd->dd_pool; + uint64_t dsobj, ddobj; + dsl_dir_t *dd; + + ASSERT(lastname[0] != '@'); + + ddobj = dsl_dir_create_sync(pdd, lastname, tx); + VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); + + dsobj = dsl_dataset_create_sync_impl(dd, origin, tx); + + dsl_deleg_set_create_perms(dd, tx, cr); + dsl_dir_close(dd, FTAG); return (dsobj); @@ -713,36 +759,36 @@ dsl_snapshots_destroy(char *fsname, char *snapname) return (err); } +/* + * ds must be opened EXCLUSIVE or PRIMARY. on return (whether + * successful or not), ds will be closed and caller can no longer + * dereference it. + */ int -dsl_dataset_destroy(const char *name) +dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) { int err; dsl_sync_task_group_t *dstg; objset_t *os; - dsl_dataset_t *ds; dsl_dir_t *dd; uint64_t obj; - if (strchr(name, '@')) { + if (ds->ds_open_refcount != DS_REF_MAX) { + if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY, + DS_MODE_EXCLUSIVE) == 0) { + dsl_dataset_close(ds, DS_MODE_PRIMARY, tag); + return (EBUSY); + } + } + + if (dsl_dataset_is_snapshot(ds)) { /* Destroying a snapshot is simpler */ - err = dsl_dataset_open(name, - DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, - FTAG, &ds); - if (err) - return (err); err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, - ds, FTAG, 0); - if (err) - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - return (err); + ds, tag, 0); + goto out; } - err = dmu_objset_open(name, DMU_OST_ANY, - DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); - if (err) - return (err); - ds = os->os->os_dsl_dataset; dd = ds->ds_dir; /* @@ -751,10 +797,12 @@ dsl_dataset_destroy(const char *name) */ err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, dsl_dataset_destroy_begin_sync, ds, NULL, 0); - if (err) { - dmu_objset_close(os); - return (err); - } + if (err) + goto out; + + err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); + if (err) + goto out; /* * remove the objects in open context, so that we won't @@ -783,45 +831,47 @@ dsl_dataset_destroy(const char *name) dmu_objset_close(os); if (err != ESRCH) - return (err); - - err = dsl_dataset_open(name, - DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, - FTAG, &ds); - if (err) - return (err); + goto out; - err = dsl_dir_open(name, FTAG, &dd, NULL); - if (err) { - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - return (err); + if (ds->ds_user_ptr) { + ds->ds_user_evict_func(ds, ds->ds_user_ptr); + ds->ds_user_ptr = NULL; } + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); + err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); + rw_exit(&dd->dd_pool->dp_config_rwlock); + + if (err) + goto out; + /* * Blow away the dsl_dir + head dataset. */ dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); dsl_sync_task_create(dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, ds, FTAG, 0); + dsl_dataset_destroy_sync, ds, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, dsl_dir_destroy_sync, dd, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); /* if it is successful, *destroy_sync will close the ds+dd */ - if (err) { - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); + if (err) dsl_dir_close(dd, FTAG); - } +out: + if (err) + dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); return (err); } int -dsl_dataset_rollback(dsl_dataset_t *ds) +dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) { ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); + return (dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_rollback_check, dsl_dataset_rollback_sync, - ds, NULL, 0)); + ds, &ost, 0)); } void * @@ -927,14 +977,12 @@ static int dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; + dmu_objset_type_t *ost = arg2; /* - * There must be a previous snapshot. I suppose we could roll - * it back to being empty (and re-initialize the upper (ZPL) - * layer). But for now there's no way to do this via the user - * interface. + * We can only roll back to emptyness if it is a ZPL objset. */ - if (ds->ds_phys->ds_prev_snap_txg == 0) + if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) return (EINVAL); /* @@ -958,17 +1006,29 @@ static void dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; + dmu_objset_type_t *ost = arg2; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; dmu_buf_will_dirty(ds->ds_dbuf, tx); /* * Before the roll back destroy the zil. - * Note, ds_user_ptr can be null if we are doing a "zfs receive -F" */ if (ds->ds_user_ptr != NULL) { zil_rollback_destroy( ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); + + /* + * We need to make sure that the objset_impl_t is reopened after + * we do the rollback, otherwise it will have the wrong + * objset_phys_t. Normally this would happen when this + * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the + * dataset to be immediately evicted. But when doing "zfs recv + * -F", we reopen the objset before that, so that there is no + * window where the dataset is closed and inconsistent. + */ + ds->ds_user_evict_func(ds, ds->ds_user_ptr); + ds->ds_user_ptr = NULL; } /* Zero out the deadlist. */ @@ -1000,20 +1060,34 @@ dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) -used, -compressed, -uncompressed, tx); } - /* Change our contents to that of the prev snapshot */ - ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); - ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; - ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; - ds->ds_phys->ds_compressed_bytes = - ds->ds_prev->ds_phys->ds_compressed_bytes; - ds->ds_phys->ds_uncompressed_bytes = - ds->ds_prev->ds_phys->ds_uncompressed_bytes; - ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; - ds->ds_phys->ds_unique_bytes = 0; + if (ds->ds_prev) { + /* Change our contents to that of the prev snapshot */ + ASSERT3U(ds->ds_prev->ds_object, ==, + ds->ds_phys->ds_prev_snap_obj); + ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; + ds->ds_phys->ds_used_bytes = + ds->ds_prev->ds_phys->ds_used_bytes; + ds->ds_phys->ds_compressed_bytes = + ds->ds_prev->ds_phys->ds_compressed_bytes; + ds->ds_phys->ds_uncompressed_bytes = + ds->ds_prev->ds_phys->ds_uncompressed_bytes; + ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; + ds->ds_phys->ds_unique_bytes = 0; - if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { - dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - ds->ds_prev->ds_phys->ds_unique_bytes = 0; + if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + ds->ds_prev->ds_phys->ds_unique_bytes = 0; + } + } else { + /* Zero out our contents, recreate objset */ + bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); + ds->ds_phys->ds_used_bytes = 0; + ds->ds_phys->ds_compressed_bytes = 0; + ds->ds_phys->ds_uncompressed_bytes = 0; + ds->ds_phys->ds_flags = 0; + ds->ds_phys->ds_unique_bytes = 0; + (void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, + &ds->ds_phys->ds_bp, *ost, tx); } spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, @@ -1025,6 +1099,9 @@ static int dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t count; + int err; /* * Can't delete a head dataset if there are snapshots of it. @@ -1035,6 +1112,17 @@ dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) return (EINVAL); + /* + * This is really a dsl_dir thing, but check it here so that + * we'll be less likely to leave this dataset inconsistent & + * nearly destroyed. + */ + err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); + if (err) + return (err); + if (count != 0) + return (EEXIST); + return (0); } @@ -1054,7 +1142,7 @@ dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) } /* ARGSUSED */ -static int +int dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; @@ -1083,7 +1171,7 @@ dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) return (0); } -static void +void dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; @@ -1337,8 +1425,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) int dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { - objset_t *os = arg1; - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = arg1; const char *snapname = arg2; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int err; @@ -1375,8 +1462,7 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) void dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { - objset_t *os = arg1; - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = arg1; const char *snapname = arg2; dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; @@ -1499,20 +1585,21 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) { stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; + stat->dds_guid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_next_snap_obj) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; } /* clone origin is really a dsl_dir thing... */ - if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { + if (ds->ds_dir->dd_phys->dd_origin_obj) { dsl_dataset_t *ods; rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, - ds->ds_dir->dd_phys->dd_clone_parent_obj, + ds->ds_dir->dd_phys->dd_origin_obj, NULL, DS_MODE_NONE, FTAG, &ods)); - dsl_dataset_name(ods, stat->dds_clone_of); + dsl_dataset_name(ods, stat->dds_origin); dsl_dataset_close(ods, DS_MODE_NONE, FTAG); rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } @@ -1808,9 +1895,9 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) struct promotearg *pa = arg2; dsl_dir_t *dd = hds->ds_dir; dsl_pool_t *dp = hds->ds_dir->dd_pool; - dsl_dir_t *pdd = NULL; + dsl_dir_t *odd = NULL; dsl_dataset_t *ds = NULL; - dsl_dataset_t *pivot_ds = NULL; + dsl_dataset_t *origin_ds = NULL; dsl_dataset_t *newnext_ds = NULL; int err; char *name = NULL; @@ -1820,23 +1907,22 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) bzero(pa, sizeof (*pa)); /* Check that it is a clone */ - if (dd->dd_phys->dd_clone_parent_obj == 0) + if (dd->dd_phys->dd_origin_obj == 0) return (EINVAL); /* Since this is so expensive, don't do the preliminary check */ if (!dmu_tx_is_syncing(tx)) return (0); - if (err = dsl_dataset_open_obj(dp, - dd->dd_phys->dd_clone_parent_obj, - NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) + if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, + NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)) goto out; - pdd = pivot_ds->ds_dir; + odd = origin_ds->ds_dir; { dsl_dataset_t *phds; if (err = dsl_dataset_open_obj(dd->dd_pool, - pdd->dd_phys->dd_head_dataset_obj, + odd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &phds)) goto out; pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; @@ -1848,10 +1934,10 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) goto out; } - /* find pivot point's new next ds */ + /* find origin's new next ds */ VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, NULL, DS_MODE_NONE, FTAG, &newnext_ds)); - while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { + while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) { dsl_dataset_t *prev; if (err = dsl_dataset_open_obj(dd->dd_pool, @@ -1863,10 +1949,10 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) } pa->newnext_obj = newnext_ds->ds_object; - /* compute pivot point's new unique space */ + /* compute origin's new unique space */ while ((err = bplist_iterate(&newnext_ds->ds_deadlist, &itor, &bp)) == 0) { - if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) + if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg) pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); } if (err != ENOENT) @@ -1874,7 +1960,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) /* Walk the snapshots that we are moving */ name = kmem_alloc(MAXPATHLEN, KM_SLEEP); - ds = pivot_ds; + ds = origin_ds; /* CONSTCOND */ while (TRUE) { uint64_t val, dlused, dlcomp, dluncomp; @@ -1922,19 +2008,19 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); break; } - if (ds != pivot_ds) + if (ds != origin_ds) dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); ds = prev; } /* Check that there is enough space here */ - err = dsl_dir_transfer_possible(pdd, dd, pa->used); + err = dsl_dir_transfer_possible(odd, dd, pa->used); out: - if (ds && ds != pivot_ds) + if (ds && ds != origin_ds) dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - if (pivot_ds) - dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); + if (origin_ds) + dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); if (newnext_ds) dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); if (name) @@ -1949,26 +2035,25 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) struct promotearg *pa = arg2; dsl_dir_t *dd = hds->ds_dir; dsl_pool_t *dp = hds->ds_dir->dd_pool; - dsl_dir_t *pdd = NULL; - dsl_dataset_t *ds, *pivot_ds; + dsl_dir_t *odd = NULL; + dsl_dataset_t *ds, *origin_ds; char *name; - ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); + ASSERT(dd->dd_phys->dd_origin_obj != 0); ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); - VERIFY(0 == dsl_dataset_open_obj(dp, - dd->dd_phys->dd_clone_parent_obj, - NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); + VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, + NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)); /* - * We need to explicitly open pdd, since pivot_ds's pdd will be + * We need to explicitly open odd, since origin_ds's dd will be * changing. */ - VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, - NULL, FTAG, &pdd)); + VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, + NULL, FTAG, &odd)); /* move snapshots to this dir */ name = kmem_alloc(MAXPATHLEN, KM_SLEEP); - ds = pivot_ds; + ds = origin_ds; /* CONSTCOND */ while (TRUE) { dsl_dataset_t *prev; @@ -1983,9 +2068,9 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) /* change containing dsl_dir */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); + ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); ds->ds_phys->ds_dir_obj = dd->dd_object; - ASSERT3P(ds->ds_dir, ==, pdd); + ASSERT3P(ds->ds_dir, ==, odd); dsl_dir_close(ds->ds_dir, ds); VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); @@ -2003,35 +2088,35 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); break; } - if (ds != pivot_ds) + if (ds != origin_ds) dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); ds = prev; } - if (ds != pivot_ds) + if (ds != origin_ds) dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - /* change pivot point's next snap */ - dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); - pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; + /* change origin's next snap */ + dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); + origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; - /* change clone_parent-age */ + /* change origin */ dmu_buf_will_dirty(dd->dd_dbuf, tx); - ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); - dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; - dmu_buf_will_dirty(pdd->dd_dbuf, tx); - pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; + ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); + dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; + dmu_buf_will_dirty(odd->dd_dbuf, tx); + odd->dd_phys->dd_origin_obj = origin_ds->ds_object; /* change space accounting */ - dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); + dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx); dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); - pivot_ds->ds_phys->ds_unique_bytes = pa->unique; + origin_ds->ds_phys->ds_unique_bytes = pa->unique; /* log history record */ spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, cr, "dataset = %llu", ds->ds_object); - dsl_dir_close(pdd, FTAG); - dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); + dsl_dir_close(odd, FTAG); + dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); kmem_free(name, MAXPATHLEN); } @@ -2066,122 +2151,85 @@ dsl_dataset_promote(const char *name) return (err); } -#define SWITCH64(x, y) \ - { \ - uint64_t __tmp = (x); \ - (x) = (y); \ - (y) = __tmp; \ - } +struct cloneswaparg { + dsl_dataset_t *cds; /* clone dataset */ + dsl_dataset_t *ohds; /* origin's head dataset */ + boolean_t force; +}; /* ARGSUSED */ static int dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) { - dsl_dataset_t *cds = arg1; /* clone to become new head */ - boolean_t *forcep = arg2; - dsl_dir_t *cdd = cds->ds_dir; - dsl_pool_t *dp = cds->ds_dir->dd_pool; - dsl_dataset_t *ods; /* the snapshot cds is cloned off of */ - dsl_dataset_t *ohds = NULL; - dsl_dir_t *odd; - int err; + struct cloneswaparg *csa = arg1; - /* check that it is a clone */ - if (cdd->dd_phys->dd_clone_parent_obj == 0) + /* they should both be heads */ + if (dsl_dataset_is_snapshot(csa->cds) || + dsl_dataset_is_snapshot(csa->ohds)) return (EINVAL); - /* check that cds is not a snapshot */ - if (dsl_dataset_is_snapshot(cds)) + /* the branch point should be just before them */ + if (csa->cds->ds_prev != csa->ohds->ds_prev) return (EINVAL); - /* open the origin */ - if (err = dsl_dataset_open_obj(dp, cdd->dd_phys->dd_clone_parent_obj, - NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ods)) - return (err); - odd = ods->ds_dir; - - /* make sure the clone is descendant of origin */ - if (cdd->dd_parent != odd) { - err = EINVAL; - goto out; - } + /* cds should be the clone */ + if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != + csa->ohds->ds_object) + return (EINVAL); - /* check that there are no snapshots after the origin */ - if (cds->ds_phys->ds_prev_snap_obj != ods->ds_object || - ods->ds_phys->ds_next_snap_obj != - odd->dd_phys->dd_head_dataset_obj) { - err = EINVAL; - goto out; - } + /* the clone should be a child of the origin */ + if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) + return (EINVAL); - /* - * Verify origin head dataset hasn't been modified or - * 'force' has been passed down. - */ - if (!(*forcep) && - (err = dsl_dataset_open_obj(cdd->dd_pool, - odd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_EXCLUSIVE, - FTAG, &ohds)) == 0) { - if (dsl_dataset_modified_since_lastsnap(ohds)) - err = ETXTBSY; - dsl_dataset_close(ohds, DS_MODE_EXCLUSIVE, FTAG); - } -out: - dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG); - return (err); + /* ohds shouldn't be modified unless 'force' */ + if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) + return (ETXTBSY); + return (0); } /* ARGSUSED */ static void dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { - dsl_dataset_t *cds = arg1; /* clone to become new head */ - dsl_dir_t *cdd = cds->ds_dir; - dsl_pool_t *dp = cds->ds_dir->dd_pool; - dsl_dataset_t *ods, *ohds; - dsl_dir_t *odd; + struct cloneswaparg *csa = arg1; + dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; uint64_t itor = 0; blkptr_t bp; uint64_t unique = 0; int err; - ASSERT(cdd->dd_phys->dd_clone_parent_obj != 0); - ASSERT(dsl_dataset_is_snapshot(cds) == 0); - - /* open the origin */ - VERIFY(0 == dsl_dataset_open_obj(dp, cdd->dd_phys->dd_clone_parent_obj, - NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ods)); - odd = ods->ds_dir; - ASSERT(cds->ds_phys->ds_prev_snap_obj == ods->ds_object); - ASSERT(ods->ds_phys->ds_next_snap_obj == - odd->dd_phys->dd_head_dataset_obj); + dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); + dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); + dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); - /* open the origin head */ - VERIFY(0 == dsl_dataset_open_obj(cdd->dd_pool, - odd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_EXCLUSIVE, - FTAG, &ohds)); - ASSERT(odd == ohds->ds_dir); + if (csa->cds->ds_user_ptr != NULL) { + csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); + csa->cds->ds_user_ptr = NULL; + } - dmu_buf_will_dirty(cds->ds_dbuf, tx); - dmu_buf_will_dirty(ohds->ds_dbuf, tx); - dmu_buf_will_dirty(ods->ds_dbuf, tx); + if (csa->ohds->ds_user_ptr != NULL) { + csa->ohds->ds_user_evict_func(csa->ohds, + csa->ohds->ds_user_ptr); + csa->ohds->ds_user_ptr = NULL; + } /* compute unique space */ - while ((err = bplist_iterate(&cds->ds_deadlist, &itor, &bp)) == 0) { - if (bp.blk_birth > ods->ds_phys->ds_prev_snap_txg) - unique += bp_get_dasize(cdd->dd_pool->dp_spa, &bp); + while ((err = bplist_iterate(&csa->cds->ds_deadlist, + &itor, &bp)) == 0) { + if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg) + unique += bp_get_dasize(dp->dp_spa, &bp); } VERIFY(err == ENOENT); /* reset origin's unique bytes */ - ods->ds_phys->ds_unique_bytes = unique; + csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique; /* swap blkptrs */ { blkptr_t tmp; - tmp = ohds->ds_phys->ds_bp; - ohds->ds_phys->ds_bp = cds->ds_phys->ds_bp; - cds->ds_phys->ds_bp = tmp; + tmp = csa->ohds->ds_phys->ds_bp; + csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; + csa->cds->ds_phys->ds_bp = tmp; } /* set dd_*_bytes */ @@ -2190,60 +2238,68 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) uint64_t cdl_used, cdl_comp, cdl_uncomp; uint64_t odl_used, odl_comp, odl_uncomp; - VERIFY(0 == bplist_space(&cds->ds_deadlist, &cdl_used, + VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, &cdl_comp, &cdl_uncomp)); - VERIFY(0 == bplist_space(&ohds->ds_deadlist, &odl_used, + VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, &odl_comp, &odl_uncomp)); - dused = cds->ds_phys->ds_used_bytes + cdl_used - - (ohds->ds_phys->ds_used_bytes + odl_used); - dcomp = cds->ds_phys->ds_compressed_bytes + cdl_comp - - (ohds->ds_phys->ds_compressed_bytes + odl_comp); - duncomp = cds->ds_phys->ds_uncompressed_bytes + cdl_uncomp - - (ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); - - dsl_dir_diduse_space(odd, dused, dcomp, duncomp, tx); - dsl_dir_diduse_space(cdd, -dused, -dcomp, -duncomp, tx); + dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - + (csa->ohds->ds_phys->ds_used_bytes + odl_used); + dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - + (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); + duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + + cdl_uncomp - + (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); + + dsl_dir_diduse_space(csa->ohds->ds_dir, + dused, dcomp, duncomp, tx); + dsl_dir_diduse_space(csa->cds->ds_dir, + -dused, -dcomp, -duncomp, tx); + } + +#define SWITCH64(x, y) \ + { \ + uint64_t __tmp = (x); \ + (x) = (y); \ + (y) = __tmp; \ } /* swap ds_*_bytes */ - SWITCH64(ohds->ds_phys->ds_used_bytes, cds->ds_phys->ds_used_bytes); - SWITCH64(ohds->ds_phys->ds_compressed_bytes, - cds->ds_phys->ds_compressed_bytes); - SWITCH64(ohds->ds_phys->ds_uncompressed_bytes, - cds->ds_phys->ds_uncompressed_bytes); + SWITCH64(csa->ohds->ds_phys->ds_used_bytes, + csa->cds->ds_phys->ds_used_bytes); + SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, + csa->cds->ds_phys->ds_compressed_bytes); + SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, + csa->cds->ds_phys->ds_uncompressed_bytes); /* swap deadlists */ - bplist_close(&cds->ds_deadlist); - bplist_close(&ohds->ds_deadlist); - SWITCH64(ohds->ds_phys->ds_deadlist_obj, cds->ds_phys->ds_deadlist_obj); - VERIFY(0 == bplist_open(&cds->ds_deadlist, dp->dp_meta_objset, - cds->ds_phys->ds_deadlist_obj)); - VERIFY(0 == bplist_open(&ohds->ds_deadlist, dp->dp_meta_objset, - ohds->ds_phys->ds_deadlist_obj)); - - dsl_dataset_close(ohds, DS_MODE_EXCLUSIVE, FTAG); - dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG); + bplist_close(&csa->cds->ds_deadlist); + bplist_close(&csa->ohds->ds_deadlist); + SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, + csa->cds->ds_phys->ds_deadlist_obj); + VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, + csa->cds->ds_phys->ds_deadlist_obj)); + VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, + csa->ohds->ds_phys->ds_deadlist_obj)); } /* * Swap the clone "cosname" with its origin head file system. */ int -dsl_dataset_clone_swap(const char *cosname, boolean_t force) +dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, + boolean_t force) { - dsl_dataset_t *ds; - int err; + struct cloneswaparg csa; - err = dsl_dataset_open(cosname, - DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, FTAG, &ds); - if (err) - return (err); + ASSERT(clone->ds_open_refcount == DS_REF_MAX); + ASSERT(origin_head->ds_open_refcount == DS_REF_MAX); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, + csa.cds = clone; + csa.ohds = origin_head; + csa.force = force; + return (dsl_sync_task_do(clone->ds_dir->dd_pool, dsl_dataset_clone_swap_check, - dsl_dataset_clone_swap_sync, ds, &force, 9); - dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); - return (err); + dsl_dataset_clone_swap_sync, &csa, NULL, 9)); } /* diff --git a/usr/src/uts/common/fs/zfs/dsl_deleg.c b/usr/src/uts/common/fs/zfs/dsl_deleg.c index 3a9ffa430d..e5d32bd5fc 100644 --- a/usr/src/uts/common/fs/zfs/dsl_deleg.c +++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c @@ -151,36 +151,69 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr) return (0); } -typedef struct { - nvlist_t *p_nvp; - boolean_t p_unset; -} perm_args_t; - static void dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; - perm_args_t *pa = arg2; + nvlist_t *nvp = arg2; objset_t *mos = dd->dd_pool->dp_meta_objset; nvpair_t *whopair = NULL; uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; if (zapobj == 0) { - if (pa->p_unset) - return; dmu_buf_will_dirty(dd->dd_dbuf, tx); zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } - while (whopair = nvlist_next_nvpair(pa->p_nvp, whopair)) { + while (whopair = nvlist_next_nvpair(nvp, whopair)) { + const char *whokey = nvpair_name(whopair); + nvlist_t *perms; + nvpair_t *permpair = NULL; + uint64_t jumpobj; + + VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); + + if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) { + jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, + DMU_OT_NONE, 0, tx); + VERIFY(zap_update(mos, zapobj, + whokey, 8, 1, &jumpobj, tx) == 0); + } + + while (permpair = nvlist_next_nvpair(perms, permpair)) { + const char *perm = nvpair_name(permpair); + uint64_t n = 0; + + VERIFY(zap_update(mos, jumpobj, + perm, 8, 1, &n, tx) == 0); + spa_history_internal_log(LOG_DS_PERM_UPDATE, + dd->dd_pool->dp_spa, tx, cr, + "%s %s dataset = %llu", whokey, perm, + dd->dd_phys->dd_head_dataset_obj); + } + } +} + +static void +dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) +{ + dsl_dir_t *dd = arg1; + nvlist_t *nvp = arg2; + objset_t *mos = dd->dd_pool->dp_meta_objset; + nvpair_t *whopair = NULL; + uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + + if (zapobj == 0) + return; + + while (whopair = nvlist_next_nvpair(nvp, whopair)) { const char *whokey = nvpair_name(whopair); nvlist_t *perms; nvpair_t *permpair = NULL; uint64_t jumpobj; if (nvpair_value_nvlist(whopair, &perms) != 0) { - ASSERT(pa->p_unset); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == 0) { (void) zap_remove(mos, zapobj, whokey, tx); @@ -193,37 +226,21 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) continue; } - if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) { - /* - * If object doesn't exist and we are removing - * it, then just continue to next item in nvlist - */ - if (pa->p_unset) - continue; - jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, - DMU_OT_NONE, 0, tx); - VERIFY(zap_update(mos, zapobj, - whokey, 8, 1, &jumpobj, tx) == 0); - } + if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) + continue; while (permpair = nvlist_next_nvpair(perms, permpair)) { const char *perm = nvpair_name(permpair); uint64_t n = 0; - if (pa->p_unset) { - (void) zap_remove(mos, jumpobj, perm, tx); - if (zap_count(mos, jumpobj, &n) == 0 && !n) { - (void) zap_remove(mos, zapobj, - whokey, tx); - VERIFY(0 == zap_destroy(mos, - jumpobj, tx)); - } - } else { - VERIFY(zap_update(mos, jumpobj, - perm, 8, 1, &n, tx) == 0); + (void) zap_remove(mos, jumpobj, perm, tx); + if (zap_count(mos, jumpobj, &n) == 0 && n == 0) { + (void) zap_remove(mos, zapobj, + whokey, tx); + VERIFY(0 == zap_destroy(mos, + jumpobj, tx)); } - spa_history_internal_log((pa->p_unset == B_FALSE) ? - LOG_DS_PERM_UPDATE : LOG_DS_PERM_REMOVE, + spa_history_internal_log(LOG_DS_PERM_REMOVE, dd->dd_pool->dp_spa, tx, cr, "%s %s dataset = %llu", whokey, perm, dd->dd_phys->dd_head_dataset_obj); @@ -236,7 +253,6 @@ dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) { dsl_dir_t *dd; int error; - perm_args_t pa; nvpair_t *whopair = NULL; int blocks_modified = 0; @@ -253,11 +269,9 @@ dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) while (whopair = nvlist_next_nvpair(nvp, whopair)) blocks_modified++; - pa.p_nvp = nvp; - pa.p_unset = unset; - - error = dsl_sync_task_do(dd->dd_pool, NULL, dsl_deleg_set_sync, - dd, &pa, blocks_modified); + error = dsl_sync_task_do(dd->dd_pool, NULL, + unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, + dd, nvp, blocks_modified); dsl_dir_close(dd, FTAG); return (error); diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c index 90c6ca4e15..d5e168e3b0 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dir.c +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c @@ -533,13 +533,13 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) dd->dd_phys->dd_compressed_bytes)); mutex_exit(&dd->dd_lock); - if (dd->dd_phys->dd_clone_parent_obj) { + if (dd->dd_phys->dd_origin_obj) { dsl_dataset_t *ds; char buf[MAXNAMELEN]; rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_clone_parent_obj, + dd->dd_phys->dd_origin_obj, NULL, DS_MODE_NONE, FTAG, &ds)); dsl_dataset_name(ds, buf); dsl_dataset_close(ds, DS_MODE_NONE, FTAG); diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c index 7046254db8..6c615fa94c 100644 --- a/usr/src/uts/common/fs/zfs/dsl_pool.c +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c @@ -70,7 +70,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, offsetof(dsl_sync_task_group_t, dstg_node)); - list_create(&dp->dp_synced_objsets, sizeof (dsl_dataset_t), + list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), offsetof(dsl_dataset_t, ds_synced_link)); return (dp); @@ -129,7 +129,7 @@ dsl_pool_close(dsl_pool_t *dp) txg_list_destroy(&dp->dp_dirty_datasets); txg_list_destroy(&dp->dp_dirty_dirs); - list_destroy(&dp->dp_synced_objsets); + list_destroy(&dp->dp_synced_datasets); arc_flush(); txg_fini(dp); @@ -181,7 +181,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { if (!list_link_active(&ds->ds_synced_link)) - list_insert_tail(&dp->dp_synced_objsets, ds); + list_insert_tail(&dp->dp_synced_datasets, ds); else dmu_buf_rele(ds->ds_dbuf, ds); dsl_dataset_sync(ds, zio, tx); @@ -212,8 +212,8 @@ dsl_pool_zil_clean(dsl_pool_t *dp) { dsl_dataset_t *ds; - while (ds = list_head(&dp->dp_synced_objsets)) { - list_remove(&dp->dp_synced_objsets, ds); + while (ds = list_head(&dp->dp_synced_datasets)) { + list_remove(&dp->dp_synced_datasets, ds); ASSERT(ds->ds_user_ptr != NULL); zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil); dmu_buf_rele(ds->ds_dbuf, ds); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index 3300e901a1..ee50db4b0e 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -161,6 +161,8 @@ void zfs_znode_byteswap(void *buf, size_t size); */ int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, objset_t **osp); +int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type, + objset_t **osp); void dmu_objset_close(objset_t *os); int dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, @@ -486,10 +488,11 @@ void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, typedef struct dmu_objset_stats { uint64_t dds_num_clones; /* number of clones of this */ uint64_t dds_creation_txg; + uint64_t dds_guid; dmu_objset_type_t dds_type; uint8_t dds_is_snapshot; uint8_t dds_inconsistent; - char dds_clone_of[MAXNAMELEN]; + char dds_origin[MAXNAMELEN]; } dmu_objset_stats_t; /* @@ -578,11 +581,29 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); -int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp); -int dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, - boolean_t force, boolean_t online, struct vnode *vp, uint64_t voffset, - char *cosname); -int dmu_replay_end_snapshot(char *name, struct drr_begin *drrb); +int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + struct vnode *vp, offset_t *off); + +typedef struct dmu_recv_cookie { + /* + * This structure is opaque! + * + * If logical and real are different, we are recving the stream + * into the "real" temporary clone, and then switching it with + * the "logical" target. + */ + struct dsl_dataset *drc_logical_ds; + struct dsl_dataset *drc_real_ds; + struct drr_begin *drc_drrb; + char *drc_tosnap; + boolean_t drc_newfs; + boolean_t drc_force; +} dmu_recv_cookie_t; + +int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *, + boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *); +int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp); +int dmu_recv_end(dmu_recv_cookie_t *drc); /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index d02eba1ce7..53cee115e3 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -129,16 +129,23 @@ int dsl_dataset_open_obj(struct dsl_pool *dp, uint64_t dsobj, const char *tail, int mode, void *tag, dsl_dataset_t **); void dsl_dataset_name(dsl_dataset_t *ds, char *name); void dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag); +void dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode); +boolean_t dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode); +uint64_t dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, + dmu_tx_t *tx); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, - const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx); -int dsl_dataset_destroy(const char *name); + const char *lastname, dsl_dataset_t *origin, cred_t *, dmu_tx_t *); +int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag); int dsl_snapshots_destroy(char *fsname, char *snapname); +dsl_checkfunc_t dsl_dataset_destroy_check; +dsl_syncfunc_t dsl_dataset_destroy_sync; dsl_checkfunc_t dsl_dataset_snapshot_check; dsl_syncfunc_t dsl_dataset_snapshot_sync; -int dsl_dataset_rollback(dsl_dataset_t *ds); +int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost); int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive); int dsl_dataset_promote(const char *name); -int dsl_dataset_clone_swap(const char *name, boolean_t force); +int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, + boolean_t force); void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds, void *p, dsl_dataset_evict_func_t func); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h index bcab488f3b..d5db4c1d8d 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h @@ -44,7 +44,7 @@ typedef struct dsl_dir_phys { uint64_t dd_creation_time; /* not actually used */ uint64_t dd_head_dataset_obj; uint64_t dd_parent_obj; - uint64_t dd_clone_parent_obj; + uint64_t dd_origin_obj; uint64_t dd_child_dir_zapobj; /* * how much space our children are accounting for; for leaf diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h index f7ec67a0e0..44adeea7c9 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,7 +50,7 @@ typedef struct dsl_pool { /* No lock needed - sync context only */ blkptr_t dp_meta_rootbp; - list_t dp_synced_objsets; + list_t dp_synced_datasets; /* Has its own locking */ tx_state_t dp_tx; diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index 93c8d76bc0..26f696f21b 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -47,9 +47,12 @@ extern "C" { #define ZFS_SNAPDIR_HIDDEN 0 #define ZFS_SNAPDIR_VISIBLE 1 -#define DMU_BACKUP_VERSION (1ULL) +#define DMU_BACKUP_STREAM_VERSION (1ULL) +#define DMU_BACKUP_HEADER_VERSION (2ULL) #define DMU_BACKUP_MAGIC 0x2F5bacbacULL +#define DRR_FLAG_CLONE (1<<0) + /* * zfs ioctl command structure */ @@ -58,14 +61,14 @@ typedef struct dmu_replay_record { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_WRITE, DRR_FREE, DRR_END, } drr_type; - uint32_t drr_pad; + uint32_t drr_payloadlen; union { struct drr_begin { uint64_t drr_magic; uint64_t drr_version; uint64_t drr_creation_time; dmu_objset_type_t drr_type; - uint32_t drr_pad; + uint32_t drr_flags; uint64_t drr_toguid; uint64_t drr_fromguid; char drr_toname[MAXNAMELEN]; @@ -131,6 +134,7 @@ typedef struct zfs_share { typedef struct zfs_cmd { char zc_name[MAXPATHLEN]; char zc_value[MAXPATHLEN * 2]; + char zc_string[MAXNAMELEN]; uint64_t zc_guid; uint64_t zc_nvlist_conf; /* really (char *) */ uint64_t zc_nvlist_conf_size; diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h index b9e0c95290..fcd8574876 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h @@ -236,7 +236,7 @@ typedef struct znode { /* * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation. * ZFS_EXIT() must be called before exitting the vop. - * ZFS_ENTER_VERIFY_ZP() does ZFS_ENTER plus verifies the znode is valid. + * ZFS_VERIFY_ZP() verifies the znode is valid. */ #define ZFS_ENTER(zfsvfs) \ { \ @@ -249,14 +249,11 @@ typedef struct znode { #define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG) -#define ZFS_ENTER_VERIFY_ZP(zfsvfs, zp) \ - { \ - ZFS_ENTER((zfsvfs)); \ - if (!(zp)->z_dbuf_held) { \ - ZFS_EXIT(zfsvfs); \ - return (EIO); \ - } \ - } +#define ZFS_VERIFY_ZP(zp) \ + if (!(zp)->z_dbuf_held) { \ + ZFS_EXIT((zp)->z_zfsvfs); \ + return (EIO); \ + } \ /* * Macros for dealing with dmu_buf_hold diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 72e2524646..674e73406a 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -345,7 +345,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) if (!INGLOBALZONE(curproc)) return (EPERM); - if (secpolicy_nfs(CRED()) == 0) { + if (secpolicy_nfs(cr) == 0) { return (0); } else { vnode_t *vp; @@ -477,7 +477,7 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); error = dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_clone_parent_obj, NULL, + dd->dd_phys->dd_origin_obj, NULL, DS_MODE_NONE, FTAG, &pclone); rw_exit(&dd->dd_pool->dp_config_rwlock); if (error) { @@ -1083,6 +1083,17 @@ zfs_ioc_vdev_setpath(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of filesystem + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_objset_stats stats + * zc_nvlist_dst property nvlist + * zc_nvlist_dst_size size of property nvlist + * zc_value alternate root + */ static int zfs_ioc_objset_stats(zfs_cmd_t *zc) { @@ -1133,6 +1144,19 @@ retry: return (error); } +/* + * inputs: + * zc_name name of filesystem + * zc_cookie zap cursor + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_name name of next filesystem + * zc_objset_stats stats + * zc_nvlist_dst property nvlist + * zc_nvlist_dst_size size of property nvlist + * zc_value alternate root + */ static int zfs_ioc_objset_version(zfs_cmd_t *zc) { @@ -1226,6 +1250,19 @@ retry: return (error); } +/* + * inputs: + * zc_name name of filesystem + * zc_cookie zap cursor + * zc_nvlist_dst_size size of buffer for property nvlist + * + * outputs: + * zc_name name of next snapshot + * zc_objset_stats stats + * zc_nvlist_dst property nvlist + * zc_nvlist_dst_size size of property nvlist + * zc_value alternate root + */ static int zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) { @@ -1270,6 +1307,10 @@ retry: if (error == 0) error = zfs_ioc_objset_stats(zc); /* fill in the stats */ + /* if we failed, undo the @ that we tacked on to zc_name */ + if (error != 0) + *strchr(zc->zc_name, '@') = '\0'; + dmu_objset_close(os); return (error); } @@ -1435,6 +1476,14 @@ zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) return (0); } +/* + * inputs: + * zc_name name of filesystem + * zc_value name of property to inherit + * zc_nvlist_src{_size} nvlist of properties to apply + * + * outputs: none + */ static int zfs_ioc_set_prop(zfs_cmd_t *zc) { @@ -1451,6 +1500,13 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of filesystem + * zc_value name of property to inherit + * + * outputs: none + */ static int zfs_ioc_inherit_prop(zfs_cmd_t *zc) { @@ -1553,6 +1609,14 @@ zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of filesystem + * zc_nvlist_src{_size} nvlist of delegated permissions + * zc_perm_action allow/unallow flag + * + * outputs: none + */ static int zfs_ioc_set_fsacl(zfs_cmd_t *zc) { @@ -1595,6 +1659,13 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of filesystem + * + * outputs: + * zc_nvlist_src{_size} nvlist of delegated permissions + */ static int zfs_ioc_get_fsacl(zfs_cmd_t *zc) { @@ -1609,12 +1680,24 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of volume + * + * outputs: none + */ static int zfs_ioc_create_minor(zfs_cmd_t *zc) { return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip))); } +/* + * inputs: + * zc_name name of volume + * + * outputs: none + */ static int zfs_ioc_remove_minor(zfs_cmd_t *zc) { @@ -1809,6 +1892,15 @@ zfs_normalization_get(const char *dataset, nvlist_t *proplist, int *norm, return (0); } +/* + * inputs: + * zc_objset_type type of objset to create (fs vs zvol) + * zc_name name of new objset + * zc_value name of snapshot to clone from (may be empty) + * zc_nvlist_src{_size} nvlist of properties to apply + * + * outputs: none + */ static int zfs_ioc_create(zfs_cmd_t *zc) { @@ -1973,6 +2065,14 @@ zfs_ioc_create(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of filesystem + * zc_value short name of snapshot + * zc_cookie recursive flag + * + * outputs: none + */ static int zfs_ioc_snapshot(zfs_cmd_t *zc) { @@ -2022,6 +2122,13 @@ zfs_unmount_snap(char *name, void *arg) return (0); } +/* + * inputs: + * zc_name name of filesystem + * zc_value short name of snapshot + * + * outputs: none + */ static int zfs_ioc_destroy_snaps(zfs_cmd_t *zc) { @@ -2036,6 +2143,13 @@ zfs_ioc_destroy_snaps(zfs_cmd_t *zc) return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value)); } +/* + * inputs: + * zc_name name of dataset to destroy + * zc_objset_type type of objset + * + * outputs: none + */ static int zfs_ioc_destroy(zfs_cmd_t *zc) { @@ -2048,12 +2162,26 @@ zfs_ioc_destroy(zfs_cmd_t *zc) return (dmu_objset_destroy(zc->zc_name)); } +/* + * inputs: + * zc_name name of snapshot to roll back to + * + * outputs: none + */ static int zfs_ioc_rollback(zfs_cmd_t *zc) { return (dmu_objset_rollback(zc->zc_name)); } +/* + * inputs: + * zc_name old name of dataset + * zc_value new name of dataset + * zc_cookie recursive flag (only valid for snapshots) + * + * outputs: none + */ static int zfs_ioc_rename(zfs_cmd_t *zc) { @@ -2079,38 +2207,64 @@ zfs_ioc_rename(zfs_cmd_t *zc) return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive)); } +/* + * inputs: + * zc_name name of containing filesystem + * zc_nvlist_src{_size} nvlist of properties to apply + * zc_value name of snapshot to create + * zc_string name of clone origin (if DRR_FLAG_CLONE) + * zc_cookie file descriptor to recv from + * zc_begin_record the BEGIN record of the stream (not byteswapped) + * zc_guid force flag + * + * outputs: + * zc_cookie number of bytes read + */ static int -zfs_ioc_recvbackup(zfs_cmd_t *zc) +zfs_ioc_recv(zfs_cmd_t *zc) { file_t *fp; - offset_t new_off; objset_t *os; + dmu_recv_cookie_t drc; zfsvfs_t *zfsvfs = NULL; - char *cp; - char cosname[MAXNAMELEN]; boolean_t force = (boolean_t)zc->zc_guid; int error, fd; + offset_t off; + nvlist_t *props = NULL; + objset_t *origin = NULL; + char *tosnap; + char tofs[ZFS_MAXNAMELEN]; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || strchr(zc->zc_value, '@') == NULL || strchr(zc->zc_value, '%')) return (EINVAL); + (void) strcpy(tofs, zc->zc_value); + tosnap = strchr(tofs, '@'); + *tosnap = '\0'; + tosnap++; + + if (zc->zc_nvlist_src != NULL && + (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + &props)) != 0) + return (error); + fd = zc->zc_cookie; fp = getf(fd); - if (fp == NULL) + if (fp == NULL) { + nvlist_free(props); return (EBADF); + } /* * Get the zfsvfs for the receiving objset. There * won't be one if we're operating on a zvol, if the * objset doesn't exist yet, or is not mounted. */ - cp = strchr(zc->zc_value, '@'); - *cp = '\0'; - error = dmu_objset_open(zc->zc_value, DMU_OST_ANY, + + error = dmu_objset_open(tofs, DMU_OST_ANY, DS_MODE_STANDARD | DS_MODE_READONLY, &os); - *cp = '@'; if (!error) { if (dmu_objset_type(os) == DMU_OST_ZFS) { mutex_enter(&os->os->os_user_ptr_lock); @@ -2122,60 +2276,111 @@ zfs_ioc_recvbackup(zfs_cmd_t *zc) dmu_objset_close(os); } - error = dmu_recvbackup(zc->zc_value, &zc->zc_begin_record, - &zc->zc_cookie, force, zfsvfs != NULL, fp->f_vnode, - fp->f_offset, cosname); + if (zc->zc_string[0]) { + error = dmu_objset_open(zc->zc_string, DMU_OST_ANY, + DS_MODE_STANDARD | DS_MODE_READONLY, &origin); + if (error) { + if (zfsvfs != NULL) + VFS_RELE(zfsvfs->z_vfs); + nvlist_free(props); + releasef(fd); + return (error); + } + } + + error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record, + force, origin, zfsvfs != NULL, &drc); + if (origin) + dmu_objset_close(origin); + if (error) { + if (zfsvfs != NULL) + VFS_RELE(zfsvfs->z_vfs); + nvlist_free(props); + releasef(fd); + return (error); + } /* - * For incremental snapshots where we created a - * temporary clone, we now swap zfsvfs::z_os with - * the newly created and received "cosname". + * If properties are supplied, they are to completely replace + * the existing ones; "inherit" any existing properties. */ - if (!error && zfsvfs != NULL) { - char osname[MAXNAMELEN]; - int mode; - - error = zfs_suspend_fs(zfsvfs, osname, &mode); - if (!error) { - int swap_err; - int snap_err = 0; - - swap_err = dsl_dataset_clone_swap(cosname, force); - if (!swap_err) { - char *cp = strrchr(zc->zc_value, '@'); - - *cp = '\0'; - snap_err = dmu_replay_end_snapshot(zc->zc_value, - &zc->zc_begin_record); - *cp = '@'; + if (props) { + objset_t *os; + nvlist_t *nv = NULL; + + error = dmu_objset_open(tofs, DMU_OST_ANY, + DS_MODE_STANDARD | DS_MODE_READONLY | DS_MODE_INCONSISTENT, + &os); + if (error == 0) { + error = dsl_prop_get_all(os, &nv); + dmu_objset_close(os); + } + if (error == 0) { + nvpair_t *elem; + zfs_cmd_t zc2 = { 0 }; + + (void) strcpy(zc2.zc_name, tofs); + for (elem = nvlist_next_nvpair(nv, NULL); elem; + elem = nvlist_next_nvpair(nv, elem)) { + (void) strcpy(zc2.zc_value, nvpair_name(elem)); + if (zfs_secpolicy_inherit(&zc2, CRED()) == 0) + (void) zfs_ioc_inherit_prop(&zc2); } - error = zfs_resume_fs(zfsvfs, osname, mode); - if (!error) - error = swap_err; - if (!error) - error = snap_err; } + if (nv) + nvlist_free(nv); + } + + /* + * Set properties. Note, we ignore errors. Would be better to + * do best-effort in zfs_set_prop_nvlist, too. + */ + (void) zfs_set_prop_nvlist(tofs, props); + nvlist_free(props); + + off = fp->f_offset; + error = dmu_recv_stream(&drc, fp->f_vnode, &off); - /* destroy the clone we created */ - (void) dmu_objset_destroy(cosname); + if (error == 0) { + if (zfsvfs != NULL) { + char osname[MAXNAMELEN]; + int mode; + + (void) zfs_suspend_fs(zfsvfs, osname, &mode); + error = dmu_recv_end(&drc); + error |= zfs_resume_fs(zfsvfs, osname, mode); + } else { + error = dmu_recv_end(&drc); + } } if (zfsvfs != NULL) VFS_RELE(zfsvfs->z_vfs); - new_off = fp->f_offset + zc->zc_cookie; - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &new_off, NULL) == 0) - fp->f_offset = new_off; + + zc->zc_cookie = off - fp->f_offset; + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; releasef(fd); return (error); } +/* + * inputs: + * zc_name name of snapshot to send + * zc_value short name of incremental fromsnap (may be empty) + * zc_cookie file descriptor to send stream to + * zc_obj fromorigin flag (mutually exclusive with zc_value) + * + * outputs: none + */ static int -zfs_ioc_sendbackup(zfs_cmd_t *zc) +zfs_ioc_send(zfs_cmd_t *zc) { objset_t *fromsnap = NULL; objset_t *tosnap; file_t *fp; int error; + offset_t off; error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap); @@ -2207,8 +2412,11 @@ zfs_ioc_sendbackup(zfs_cmd_t *zc) return (EBADF); } - error = dmu_sendbackup(tosnap, fromsnap, fp->f_vnode); + off = fp->f_offset; + error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off); + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; releasef(zc->zc_cookie); if (fromsnap) dmu_objset_close(fromsnap); @@ -2313,6 +2521,13 @@ zfs_ioc_clear(zfs_cmd_t *zc) return (0); } +/* + * inputs: + * zc_name name of filesystem + * zc_value name of origin snapshot + * + * outputs: none + */ static int zfs_ioc_promote(zfs_cmd_t *zc) { @@ -2500,8 +2715,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE }, { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE }, { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE }, - { zfs_ioc_recvbackup, zfs_secpolicy_receive, DATASET_NAME, B_TRUE }, - { zfs_ioc_sendbackup, zfs_secpolicy_send, DATASET_NAME, B_TRUE }, + { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE }, + { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE }, { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE }, { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE }, { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE }, diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index 573f746e72..9e94ef0560 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -88,9 +88,10 @@ * to freed memory. The example below illustrates the following Big Rules: * * (1) A check must be made in each zfs thread for a mounted file system. - * This is done avoiding races using ZFS_ENTER(zfsvfs) or - * ZFS_ENTER_VERIFY(zfsvfs, zp). A ZFS_EXIT(zfsvfs) is needed before - * all returns. + * This is done avoiding races using ZFS_ENTER(zfsvfs). + * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes + * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros + * can return EIO from the calling function. * * (2) VN_RELE() should always be the last thing except for zil_commit() * (if necessary) and ZFS_EXIT(). This is for 3 reasons: @@ -163,6 +164,7 @@ * ZFS_EXIT(zfsvfs); // finished in zfs * return (error); // done, report error */ + /* ARGSUSED */ static int zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) @@ -286,7 +288,8 @@ zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, zp = VTOZ(vp); zfsvfs = zp->z_zfsvfs; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); /* offset parameter is in/out */ error = zfs_holey(vp, com, &off); @@ -432,7 +435,8 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) int error; rl_t *rl; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); os = zfsvfs->z_os; /* @@ -625,7 +629,8 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) limit = MAXOFFSET_T; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); zilog = zfsvfs->z_log; /* @@ -951,7 +956,8 @@ zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); if (flag & V_ACE_MASK) error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); @@ -994,7 +1000,8 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, zfsvfs_t *zfsvfs = zdp->z_zfsvfs; int error; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zdp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zdp); *vpp = NULL; @@ -1128,7 +1135,8 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr)))) return (EINVAL); - ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); os = zfsvfs->z_os; zilog = zfsvfs->z_log; @@ -1371,7 +1379,8 @@ zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, int error; int zflg = ZEXISTS; - ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (flags & FIGNORECASE) { @@ -1575,7 +1584,8 @@ zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, IS_EPHEMERAL(crgetgid(cr)))) return (EINVAL); - ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (dzp->z_phys->zp_flags & ZFS_XATTR) { @@ -1721,7 +1731,8 @@ zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, int error; int zflg = ZEXISTS; - ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (flags & FIGNORECASE) @@ -1858,7 +1869,8 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int error; uint8_t prefetch; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); /* * If we are not given an eof variable, @@ -2080,7 +2092,8 @@ zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); ZFS_EXIT(zfsvfs); return (0); @@ -2116,7 +2129,8 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, xoptattr_t *xoap = NULL; boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); pzp = zp->z_phys; mutex_enter(&zp->z_lock); @@ -2324,7 +2338,8 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, if (mask & AT_NOSET) return (EINVAL); - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); pzp = zp->z_phys; zilog = zfsvfs->z_log; @@ -2811,7 +2826,8 @@ zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, int error = 0; int zflg = 0; - ZFS_ENTER_VERIFY_ZP(zfsvfs, sdzp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(sdzp); zilog = zfsvfs->z_log; /* @@ -2826,10 +2842,7 @@ zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, } tdzp = VTOZ(tdvp); - if (!tdzp->z_dbuf_held) { - ZFS_EXIT(zfsvfs); - return (EIO); - } + ZFS_VERIFY_ZP(tdzp); if (zfsvfs->z_case & ZFS_UTF8_ONLY && u8_validate(tnm, strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); @@ -3108,7 +3121,8 @@ zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, ASSERT(vap->va_type == VLNK); - ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (zfsvfs->z_case & ZFS_UTF8_ONLY && u8_validate(name, strlen(name), @@ -3248,7 +3262,8 @@ zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) size_t bufsz; int error; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); bufsz = (size_t)zp->z_phys->zp_size; if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { @@ -3305,7 +3320,8 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, ASSERT(tdvp->v_type == VDIR); - ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; if (VOP_REALVP(svp, &realvp, ct) == 0) @@ -3315,6 +3331,8 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, ZFS_EXIT(zfsvfs); return (EXDEV); } + szp = VTOZ(svp); + ZFS_VERIFY_ZP(szp); if (zfsvfs->z_case & ZFS_UTF8_ONLY && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { @@ -3324,11 +3342,6 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, if (flags & FIGNORECASE) zf |= ZCILOOK; - szp = VTOZ(svp); - if (!szp->z_dbuf_held) { - ZFS_EXIT(zfsvfs); - return (EIO); - } top: /* * We do not support links between attributes and non-attributes @@ -3571,9 +3584,8 @@ zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, uint64_t filesz; int error = 0; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); - - ASSERT(zp->z_dbuf_held && zp->z_phys); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); if (len == 0) { /* @@ -3712,7 +3724,8 @@ zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, zfsvfs_t *zfsvfs = zp->z_zfsvfs; int error; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); /* * We are following the UFS semantics with respect to mapcnt @@ -3865,13 +3878,12 @@ zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, int need_unlock = 0, err = 0; offset_t orig_off; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); if (protp) *protp = PROT_ALL; - ASSERT(zp->z_dbuf_held && zp->z_phys); - /* no faultahead (for now) */ if (pl == NULL) { ZFS_EXIT(zfsvfs); @@ -4004,7 +4016,8 @@ zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, ZFS_APPENDONLY))) return (EPERM); - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); if (vp->v_flag & VNOMAP) { ZFS_EXIT(zfsvfs); @@ -4143,7 +4156,8 @@ zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, uint64_t off, len; int error; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); top: if (cmd != F_FREESP) { @@ -4184,7 +4198,8 @@ zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) zfid_short_t *zfid; int size, i; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); gen = (uint32_t)zp->z_gen; size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; @@ -4246,7 +4261,8 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, case _PC_XATTR_EXISTS: zp = VTOZ(vp); zfsvfs = zp->z_zfsvfs; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); *valp = 0; error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); @@ -4295,7 +4311,8 @@ zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); error = zfs_getacl(zp, vsecp, skipaclchk, cr); ZFS_EXIT(zfsvfs); @@ -4312,7 +4329,8 @@ zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - ZFS_ENTER_VERIFY_ZP(zfsvfs, zp); + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); error = zfs_setacl(zp, vsecp, skipaclchk, cr); ZFS_EXIT(zfsvfs); return (error); diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 608376ad7e..107224b5cd 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -496,8 +496,8 @@ typedef enum zfs_ioc { ZFS_IOC_DESTROY, ZFS_IOC_ROLLBACK, ZFS_IOC_RENAME, - ZFS_IOC_RECVBACKUP, - ZFS_IOC_SENDBACKUP, + ZFS_IOC_RECV, + ZFS_IOC_SEND, ZFS_IOC_INJECT_FAULT, ZFS_IOC_CLEAR_FAULT, ZFS_IOC_INJECT_LIST_NEXT, |