diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2018-05-16 11:14:46 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2018-05-16 11:14:46 +0000 |
commit | c884631e9c751a35384a284fea0975fe5174262d (patch) | |
tree | 4d47f3e8853bf814a777e423c1eeb5ae997ec2dd | |
parent | c596bb2c28271ba1ba0b6af4ef4a3244b32bbfe1 (diff) | |
parent | 0b2e8253986c5c761129b58cfdac46d204903de1 (diff) | |
download | illumos-joyent-c884631e9c751a35384a284fea0975fe5174262d.tar.gz |
[illumos-gate merge]
commit 0b2e8253986c5c761129b58cfdac46d204903de1
9512 zfs remap poolname@snapname coredumps
commit 591e0e133f9980083db5d64ac33a30bcc3382ff7
8115 parallel zfs mount
commit b4bf0cf0458759c67920a031021a9d96cd683cfe
9426 metaslab size can exceed offset addressable by spacemap
commit b1da084b97cda9a2d087205b95c45a54ad654453
9309 mdb: this statement may fall through
Conflicts:
usr/src/lib/Makefile
28 files changed, 1431 insertions, 195 deletions
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_io.c b/usr/src/cmd/mdb/common/mdb/mdb_io.c index 12608a89d3..b8c04bcd06 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_io.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_io.c @@ -908,6 +908,7 @@ iob_bytes2str(varglist_t *ap, intsize_t size) case SZ_SHORT: n = (ushort_t)VA_ARG(ap, uint_t); + break; default: n = (uint_t)VA_ARG(ap, uint_t); diff --git a/usr/src/cmd/mdb/common/mdb/mdb_main.c b/usr/src/cmd/mdb/common/mdb/mdb_main.c index a30ee45b7e..ab8ffb80cd 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_main.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_main.c @@ -111,7 +111,7 @@ ucontext_t _mdb_abort_ctx; /* context fatal signal interrupted */ int _mdb_abort_rcount; /* number of times resume requested */ int _mdb_self_fd = -1; /* fd for self as for valid_frame */ -static void +__NORETURN static void terminate(int status) { (void) mdb_signal_blockall(); diff --git a/usr/src/cmd/mdb/common/modules/idm/idm.c b/usr/src/cmd/mdb/common/modules/idm/idm.c index c465a9b8fb..4e4ad832c5 100644 --- a/usr/src/cmd/mdb/common/modules/idm/idm.c +++ b/usr/src/cmd/mdb/common/modules/idm/idm.c @@ -683,7 +683,8 @@ iscsi_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) static int -iscsi_ini_hba_impl(uintptr_t addr, iscsi_dcmd_ctrl_t *idc) { +iscsi_ini_hba_impl(uintptr_t addr, iscsi_dcmd_ctrl_t *idc) +{ iscsi_hba_t ih; if (mdb_vread(&ih, sizeof (ih), addr) != sizeof (ih)) { @@ -1003,8 +1004,8 @@ iscsi_svc_walk_cb(uintptr_t addr, const void *list_walker_data, /*ARGSUSED*/ static int -iscsi_ini_hba_walk_cb(uintptr_t addr, const void *vhba, - void *idc_void) { +iscsi_ini_hba_walk_cb(uintptr_t addr, const void *vhba, void *idc_void) +{ iscsi_dcmd_ctrl_t *idc = idc_void; int rc; @@ -2392,7 +2393,8 @@ iscsi_print_ini_lun(uintptr_t addr, const iscsi_lun_t *lun, static int iscsi_print_ini_cmd(uintptr_t addr, const iscsi_cmd_t *cmd, - iscsi_dcmd_ctrl_t *idc) { + iscsi_dcmd_ctrl_t *idc) +{ uintptr_t states_addr; @@ -2666,13 +2668,15 @@ iscsi_sm_audit_impl(uintptr_t addr) iscsi_iscsi_login_state(sar->sar_new_state); break; default: + state_name = new_state_name = "N/A"; break; } mdb_printf("%s|%s (%d)\n\t%9s %s (%d)\n", ts_string, state_name, sar->sar_state, "New State", new_state_name, sar->sar_new_state); + + break; default: - state_name = new_state_name = "N/A"; break; } @@ -3222,9 +3226,8 @@ iscsi_isns(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) } static int -iscsi_ini_sess_walk_init(mdb_walk_state_t *wsp) { - - +iscsi_ini_sess_walk_init(mdb_walk_state_t *wsp) +{ if (wsp->walk_addr == NULL) { mdb_warn("<iscsi_sess_t addr>::walk iscsi_ini_sess"); return (WALK_ERR); @@ -3240,7 +3243,8 @@ iscsi_ini_sess_walk_init(mdb_walk_state_t *wsp) { } static int -iscsi_ini_sess_step(mdb_walk_state_t *wsp) { +iscsi_ini_sess_step(mdb_walk_state_t *wsp) +{ int status; if (wsp->walk_addr == NULL) { @@ -3263,8 +3267,8 @@ iscsi_ini_sess_step(mdb_walk_state_t *wsp) { } static int -iscsi_ini_conn_walk_init(mdb_walk_state_t *wsp) { - +iscsi_ini_conn_walk_init(mdb_walk_state_t *wsp) +{ if (wsp->walk_addr == NULL) { mdb_warn("<iscsi_conn_t addr>::walk iscsi_ini_conn"); return (WALK_DONE); @@ -3280,7 +3284,8 @@ iscsi_ini_conn_walk_init(mdb_walk_state_t *wsp) { } static int -iscsi_ini_conn_step(mdb_walk_state_t *wsp) { +iscsi_ini_conn_step(mdb_walk_state_t *wsp) +{ int status; if (wsp->walk_addr == NULL) { @@ -3304,8 +3309,8 @@ iscsi_ini_conn_step(mdb_walk_state_t *wsp) { } static int -iscsi_ini_lun_walk_init(mdb_walk_state_t *wsp) { - +iscsi_ini_lun_walk_init(mdb_walk_state_t *wsp) +{ if (wsp->walk_addr == NULL) { mdb_warn("<iscsi_lun_t addr>::walk iscsi_ini_lun"); return (WALK_DONE); @@ -3320,7 +3325,8 @@ iscsi_ini_lun_walk_init(mdb_walk_state_t *wsp) { } static int -iscsi_ini_lun_step(mdb_walk_state_t *wsp) { +iscsi_ini_lun_step(mdb_walk_state_t *wsp) +{ int status; if (wsp->walk_addr == NULL) { @@ -3343,8 +3349,8 @@ iscsi_ini_lun_step(mdb_walk_state_t *wsp) { } static int -iscsi_ini_cmd_walk_init(mdb_walk_state_t *wsp) { - +iscsi_ini_cmd_walk_init(mdb_walk_state_t *wsp) +{ if (wsp->walk_addr == NULL) { mdb_warn("<iscsi_cmd_t addr>::walk iscsi_ini_cmd"); return (WALK_DONE); @@ -3359,7 +3365,8 @@ iscsi_ini_cmd_walk_init(mdb_walk_state_t *wsp) { } static int -iscsi_ini_cmd_step(mdb_walk_state_t *wsp) { +iscsi_ini_cmd_step(mdb_walk_state_t *wsp) +{ int status; if (wsp->walk_addr == NULL) { @@ -3382,9 +3389,8 @@ iscsi_ini_cmd_step(mdb_walk_state_t *wsp) { } static int -iscsi_ini_cmd_walk_cb(uintptr_t addr, const void *vcmd, - void *vidc) { - +iscsi_ini_cmd_walk_cb(uintptr_t addr, const void *vcmd, void *vidc) +{ const iscsi_cmd_t *cmd = vcmd; iscsi_dcmd_ctrl_t *idc = vidc; int rc; @@ -3400,7 +3406,8 @@ iscsi_ini_cmd_walk_cb(uintptr_t addr, const void *vcmd, } static int -iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp) { +iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp) +{ uintptr_t state_addr, array_addr; int array_size; struct i_ddi_soft_state *ss; @@ -3408,7 +3415,7 @@ iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp) { hwi = (idm_hba_walk_info_t *)mdb_zalloc( - sizeof (idm_hba_walk_info_t), UM_SLEEP|UM_GC); + sizeof (idm_hba_walk_info_t), UM_SLEEP|UM_GC); if (!hwi) { mdb_warn("unable to allocate storage for iscsi_ini_hba walk"); @@ -3461,7 +3468,8 @@ iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp) { } static int -iscsi_ini_hba_step(mdb_walk_state_t *wsp) { +iscsi_ini_hba_step(mdb_walk_state_t *wsp) +{ int status; idm_hba_walk_info_t *hwi = (idm_hba_walk_info_t *)wsp->walk_data; diff --git a/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c b/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c index 100cbe4be1..56630dd8a6 100644 --- a/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c +++ b/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c @@ -51,6 +51,7 @@ kmdb_dpi_handle_fault(kreg_t trapno, kreg_t pc, kreg_t sp, int cpuid) switch (trapno) { case T_GPFLT: errno = EACCES; + break; default: errno = EMDB_NOMAP; } diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index 7dac2f2237..d9f253fbf8 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -60,6 +60,7 @@ #include <sys/fs/zfs.h> #include <sys/types.h> #include <time.h> +#include <synch.h> #include <libzfs.h> #include <libzfs_core.h> @@ -5839,7 +5840,12 @@ zfs_do_holds(int argc, char **argv) #define CHECK_SPINNER 30 #define SPINNER_TIME 3 /* seconds */ -#define MOUNT_TIME 5 /* seconds */ +#define MOUNT_TIME 1 /* seconds */ + +typedef struct get_all_state { + boolean_t ga_verbose; + get_all_cb_t *ga_cbp; +} get_all_state_t; static int get_one_dataset(zfs_handle_t *zhp, void *data) @@ -5848,10 +5854,10 @@ get_one_dataset(zfs_handle_t *zhp, void *data) static int spinval = 0; static int spincheck = 0; static time_t last_spin_time = (time_t)0; - get_all_cb_t *cbp = data; + get_all_state_t *state = data; zfs_type_t type = zfs_get_type(zhp); - if (cbp->cb_verbose) { + if (state->ga_verbose) { if (--spincheck < 0) { time_t now = time(NULL); if (last_spin_time + SPINNER_TIME < now) { @@ -5877,25 +5883,23 @@ get_one_dataset(zfs_handle_t *zhp, void *data) zfs_close(zhp); return (0); } - libzfs_add_handle(cbp, zhp); - assert(cbp->cb_used <= cbp->cb_alloc); + libzfs_add_handle(state->ga_cbp, zhp); + assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc); return (0); } static void -get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose) +get_all_datasets(get_all_cb_t *cbp, boolean_t verbose) { - get_all_cb_t cb = { 0 }; - cb.cb_verbose = verbose; - cb.cb_getone = get_one_dataset; + get_all_state_t state = { + .ga_verbose = verbose, + .ga_cbp = cbp + }; if (verbose) set_progress_header(gettext("Reading ZFS config")); - (void) zfs_iter_root(g_zfs, get_one_dataset, &cb); - - *dslist = cb.cb_handles; - *count = cb.cb_used; + (void) zfs_iter_root(g_zfs, get_one_dataset, &state); if (verbose) finish_progress(gettext("done.")); @@ -5906,8 +5910,19 @@ get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose) * similar, we have a common function with an extra parameter to determine which * mode we are using. */ -#define OP_SHARE 0x1 -#define OP_MOUNT 0x2 +typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t; + +typedef struct share_mount_state { + share_mount_op_t sm_op; + boolean_t sm_verbose; + int sm_flags; + char *sm_options; + char *sm_proto; /* only valid for OP_SHARE */ + mutex_t sm_lock; /* protects the remaining fields */ + uint_t sm_total; /* number of filesystems to process */ + uint_t sm_done; /* number of filesystems processed */ + int sm_status; /* -1 if any of the share/mount operations failed */ +} share_mount_state_t; /* * Share or mount a dataset. @@ -6149,6 +6164,29 @@ report_mount_progress(int current, int total) update_progress(info); } +/* + * zfs_foreach_mountpoint() callback that mounts or shares one filesystem and + * updates the progress meter. + */ +static int +share_mount_one_cb(zfs_handle_t *zhp, void *arg) +{ + share_mount_state_t *sms = arg; + int ret; + + ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto, + B_FALSE, sms->sm_options); + + mutex_enter(&sms->sm_lock); + if (ret != 0) + sms->sm_status = ret; + sms->sm_done++; + if (sms->sm_verbose) + report_mount_progress(sms->sm_done, sms->sm_total); + mutex_exit(&sms->sm_lock); + return (ret); +} + static void append_options(char *mntopts, char *newopts) { @@ -6221,8 +6259,6 @@ share_mount(int op, int argc, char **argv) /* check number of arguments */ if (do_all) { - zfs_handle_t **dslist = NULL; - size_t i, count = 0; char *protocol = NULL; if (op == OP_SHARE && argc > 0) { @@ -6243,33 +6279,44 @@ share_mount(int op, int argc, char **argv) } start_progress_timer(); - get_all_datasets(&dslist, &count, verbose); + get_all_cb_t cb = { 0 }; + get_all_datasets(&cb, verbose); - if (count == 0) + if (cb.cb_used == 0) return (0); - qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp); - sa_init_selective_arg_t sharearg; - sharearg.zhandle_arr = dslist; - sharearg.zhandle_len = count; - if ((ret = zfs_init_libshare_arg(zfs_get_handle(dslist[0]), - SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != SA_OK) { - (void) fprintf(stderr, - gettext("Could not initialize libshare, %d"), ret); - return (ret); + if (op == OP_SHARE) { + sa_init_selective_arg_t sharearg; + sharearg.zhandle_arr = cb.cb_handles; + sharearg.zhandle_len = cb.cb_used; + if ((ret = zfs_init_libshare_arg(g_zfs, + SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != SA_OK) { + (void) fprintf(stderr, gettext( + "Could not initialize libshare, %d"), ret); + return (ret); + } } - for (i = 0; i < count; i++) { - if (verbose) - report_mount_progress(i, count); - - if (share_mount_one(dslist[i], op, flags, protocol, - B_FALSE, options) != 0) - ret = 1; - zfs_close(dslist[i]); - } + share_mount_state_t share_mount_state = { 0 }; + share_mount_state.sm_op = op; + share_mount_state.sm_verbose = verbose; + share_mount_state.sm_flags = flags; + share_mount_state.sm_options = options; + share_mount_state.sm_proto = protocol; + share_mount_state.sm_total = cb.cb_used; + (void) mutex_init(&share_mount_state.sm_lock, + LOCK_NORMAL | LOCK_ERRORCHECK, NULL); + /* + * libshare isn't mt-safe, so only do the operation in parallel + * if we're mounting. + */ + zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, + share_mount_one_cb, &share_mount_state, op == OP_MOUNT); + ret = share_mount_state.sm_status; - free(dslist); + for (int i = 0; i < cb.cb_used; i++) + zfs_close(cb.cb_handles[i]); + free(cb.cb_handles); } else if (argc == 0) { struct mnttab entry; @@ -6984,11 +7031,28 @@ zfs_do_diff(int argc, char **argv) return (err != 0); } +/* + * zfs remap <filesystem | volume> + * + * Remap the indirect blocks in the given fileystem or volume. + */ static int zfs_do_remap(int argc, char **argv) { const char *fsname; int err = 0; + int c; + + /* check options */ + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + if (argc != 2) { (void) fprintf(stderr, gettext("wrong number of arguments\n")); usage(B_FALSE); diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index ee1855d850..3bdaeda439 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -680,8 +680,8 @@ libsmbfs: libkrb5 libsec libidmap pkcs11 libsmbios: libdevinfo libsrpt: libstmf libstmf: libscf -libstmfproxy: libstmf libpthread -libsum: libast +libstmfproxy: libstmf libpthread +libsum: libast libsun_ima: libdevinfo libsysevent libsysevent: libsecdb libtecla: libcurses @@ -697,7 +697,7 @@ libvrrpadm: libdladm libscf libvscan: libscf libsecdb libzdoor: libc libzonecfg libcontract libzfs: libdevid libgen libuutil libadm libavl libefi libidmap \ - libumem libtsol libzfs_core libcmdutils + libumem libtsol libzfs_core libzfs_jni: libdiskmgt libzfs libzonecfg: libuuid libsysevent libsec libbrand libpool libscf libproc \ libuutil libbsm libsecdb @@ -713,7 +713,7 @@ passwdutil: libsldap pkcs11: libcryptoutil libgen libuuid policykit: dbusdeps print: libldap5 libmd5 libsendfile -pylibbe: libbe libzfs +pylibbe: libbe libzfs pysolaris: libsec libidmap pyzfs: libzfs raidcfg_plugins: libraidcfg librcm libcfgadm libpicl libpicltree diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com index c4a8af38b8..581adf9120 100644 --- a/usr/src/lib/libzfs/Makefile.com +++ b/usr/src/lib/libzfs/Makefile.com @@ -21,7 +21,7 @@ # # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> -# Copyright (c) 2011, 2016 by Delphix. All rights reserved. +# Copyright (c) 2011, 2017 by Delphix. All rights reserved. # LIBRARY= libzfs.a @@ -49,7 +49,8 @@ OBJS_COMMON= \ libzfs_pool.o \ libzfs_sendrecv.o \ libzfs_status.o \ - libzfs_util.o + libzfs_util.o \ + libzfs_taskq.o OBJECTS= $(OBJS_COMMON) $(OBJS_SHARED) diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 8fc19ba61e..92594c59a0 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -578,12 +578,11 @@ typedef struct get_all_cb { zfs_handle_t **cb_handles; size_t cb_alloc; size_t cb_used; - boolean_t cb_verbose; - int (*cb_getone)(zfs_handle_t *, void *); } get_all_cb_t; +void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t, + zfs_iter_f, void *, boolean_t); void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); -int libzfs_dataset_cmp(const void *, const void *); /* * Functions to create and destroy datasets. diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index 79df1aa994..54018af2c6 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -54,6 +54,7 @@ #include <idmap.h> #include <aclutils.h> #include <directory.h> +#include <time.h> #include <sys/dnode.h> #include <sys/spa.h> @@ -789,6 +790,8 @@ libzfs_mnttab_cache_compare(const void *arg1, const void *arg2) void libzfs_mnttab_init(libzfs_handle_t *hdl) { + (void) mutex_init(&hdl->libzfs_mnttab_cache_lock, + LOCK_NORMAL | LOCK_ERRORCHECK, NULL); assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0); avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare, sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); @@ -829,6 +832,7 @@ libzfs_mnttab_fini(libzfs_handle_t *hdl) free(mtn); } avl_destroy(&hdl->libzfs_mnttab_cache); + (void) mutex_destroy(&hdl->libzfs_mnttab_cache_lock); } void @@ -843,6 +847,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, { mnttab_node_t find; mnttab_node_t *mtn; + int ret = ENOENT; if (!hdl->libzfs_mnttab_enable) { struct mnttab srch = { 0 }; @@ -858,6 +863,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, return (ENOENT); } + mutex_enter(&hdl->libzfs_mnttab_cache_lock); if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) libzfs_mnttab_update(hdl); @@ -865,9 +871,10 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL); if (mtn) { *entry = mtn->mtn_mt; - return (0); + ret = 0; } - return (ENOENT); + mutex_exit(&hdl->libzfs_mnttab_cache_lock); + return (ret); } void @@ -876,14 +883,16 @@ libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special, { mnttab_node_t *mtn; - if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) - return; - mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); - mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); - mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); - mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); - mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); - avl_add(&hdl->libzfs_mnttab_cache, mtn); + mutex_enter(&hdl->libzfs_mnttab_cache_lock); + if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) { + mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); + mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); + mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); + mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); + mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); + avl_add(&hdl->libzfs_mnttab_cache, mtn); + } + mutex_exit(&hdl->libzfs_mnttab_cache_lock); } void @@ -892,6 +901,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) mnttab_node_t find; mnttab_node_t *ret; + mutex_enter(&hdl->libzfs_mnttab_cache_lock); find.mtn_mt.mnt_special = (char *)fsname; if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL)) != NULL) { @@ -902,6 +912,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) free(ret->mtn_mt.mnt_mntopts); free(ret); } + mutex_exit(&hdl->libzfs_mnttab_cache_lock); } int @@ -3886,12 +3897,24 @@ zfs_remap_indirects(libzfs_handle_t *hdl, const char *fs) char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot remap filesystem '%s' "), fs); + "cannot remap dataset '%s'"), fs); err = lzc_remap(fs); if (err != 0) { - (void) zfs_standard_error(hdl, err, errbuf); + switch (err) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + default: + (void) zfs_standard_error(hdl, err, errbuf); + break; + } } return (err); diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h index 9e5641ec46..4c0c89e989 100644 --- a/usr/src/lib/libzfs/common/libzfs_impl.h +++ b/usr/src/lib/libzfs/common/libzfs_impl.h @@ -23,7 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2011, 2017 by Delphix. All rights reserved. */ #ifndef _LIBZFS_IMPL_H @@ -34,6 +34,7 @@ #include <sys/nvpair.h> #include <sys/dmu.h> #include <sys/zfs_ioctl.h> +#include <synch.h> #include <libuutil.h> #include <libzfs.h> @@ -74,6 +75,13 @@ struct libzfs_handle { int libzfs_storeerr; /* stuff error messages into buffer */ void *libzfs_sharehdl; /* libshare handle */ boolean_t libzfs_mnttab_enable; + /* + * We need a lock to handle the case where parallel mount + * threads are populating the mnttab cache simultaneously. The + * lock only protects the integrity of the avl tree, and does + * not protect the contents of the mnttab entries themselves. + */ + mutex_t libzfs_mnttab_cache_lock; avl_tree_t libzfs_mnttab_cache; int libzfs_pool_iter; topo_hdl_t *libzfs_topo_hdl; diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index 9fd37825a3..cf15735f3f 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -22,7 +22,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2016 by Delphix. All rights reserved. + * Copyright (c) 2014, 2017 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright 2017 Joyent, Inc. * Copyright 2017 RackTop Systems. @@ -34,25 +34,25 @@ * they are used by mount and unmount and when changing a filesystem's * mountpoint. * - * zfs_is_mounted() - * zfs_mount() - * zfs_unmount() - * zfs_unmountall() + * zfs_is_mounted() + * zfs_mount() + * zfs_unmount() + * zfs_unmountall() * * This file also contains the functions used to manage sharing filesystems via * NFS and iSCSI: * - * zfs_is_shared() - * zfs_share() - * zfs_unshare() + * zfs_is_shared() + * zfs_share() + * zfs_unshare() * - * zfs_is_shared_nfs() - * zfs_is_shared_smb() - * zfs_share_proto() - * zfs_shareall(); - * zfs_unshare_nfs() - * zfs_unshare_smb() - * zfs_unshareall_nfs() + * zfs_is_shared_nfs() + * zfs_is_shared_smb() + * zfs_share_proto() + * zfs_shareall(); + * zfs_unshare_nfs() + * zfs_unshare_smb() + * zfs_unshareall_nfs() * zfs_unshareall_smb() * zfs_unshareall() * zfs_unshareall_bypath() @@ -60,8 +60,8 @@ * The following functions are available for pool consumers, and will * mount/unmount and share/unshare all datasets within pool: * - * zpool_enable_datasets() - * zpool_disable_datasets() + * zpool_enable_datasets() + * zpool_disable_datasets() */ #include <dirent.h> @@ -83,11 +83,15 @@ #include <libzfs.h> #include "libzfs_impl.h" +#include "libzfs_taskq.h" #include <libshare.h> #include <sys/systeminfo.h> #define MAXISALEN 257 /* based on sysinfo(2) man page */ +static int mount_tq_nthr = 512; /* taskq threads for multi-threaded mounting */ + +static void zfs_mount_task(void *); static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_proto_t); @@ -1077,25 +1081,32 @@ remove_mountpoint(zfs_handle_t *zhp) } } +/* + * Add the given zfs handle to the cb_handles array, dynamically reallocating + * the array if it is out of space. + */ void libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp) { if (cbp->cb_alloc == cbp->cb_used) { size_t newsz; - void *ptr; + zfs_handle_t **newhandles; - newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64; - ptr = zfs_realloc(zhp->zfs_hdl, - cbp->cb_handles, cbp->cb_alloc * sizeof (void *), - newsz * sizeof (void *)); - cbp->cb_handles = ptr; + newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64; + newhandles = zfs_realloc(zhp->zfs_hdl, + cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *), + newsz * sizeof (zfs_handle_t *)); + cbp->cb_handles = newhandles; cbp->cb_alloc = newsz; } cbp->cb_handles[cbp->cb_used++] = zhp; } +/* + * Recursive helper function used during file system enumeration + */ static int -mount_cb(zfs_handle_t *zhp, void *data) +zfs_iter_cb(zfs_handle_t *zhp, void *data) { get_all_cb_t *cbp = data; @@ -1121,104 +1132,350 @@ mount_cb(zfs_handle_t *zhp, void *data) } libzfs_add_handle(cbp, zhp); - if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) { + if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) { zfs_close(zhp); return (-1); } return (0); } +/* + * Sort comparator that compares two mountpoint paths. We sort these paths so + * that subdirectories immediately follow their parents. This means that we + * effectively treat the '/' character as the lowest value non-nul char. An + * example sorted list using this comparator would look like: + * + * /foo + * /foo/bar + * /foo/bar/baz + * /foo/baz + * /foo.bar + * + * The mounting code depends on this ordering to deterministically iterate + * over filesystems in order to spawn parallel mount tasks. + */ int -libzfs_dataset_cmp(const void *a, const void *b) +mountpoint_cmp(const void *arga, const void *argb) { - zfs_handle_t **za = (zfs_handle_t **)a; - zfs_handle_t **zb = (zfs_handle_t **)b; + zfs_handle_t *const *zap = arga; + zfs_handle_t *za = *zap; + zfs_handle_t *const *zbp = argb; + zfs_handle_t *zb = *zbp; char mounta[MAXPATHLEN]; char mountb[MAXPATHLEN]; + const char *a = mounta; + const char *b = mountb; boolean_t gota, gotb; - if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0) - verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta, + gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM); + if (gota) { + verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta, sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); - if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0) - verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb, + } + gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM); + if (gotb) { + verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb, sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); + } - if (gota && gotb) - return (strcmp(mounta, mountb)); + if (gota && gotb) { + while (*a != '\0' && (*a == *b)) { + a++; + b++; + } + if (*a == *b) + return (0); + if (*a == '\0') + return (-1); + if (*b == '\0') + return (1); + if (*a == '/') + return (-1); + if (*b == '/') + return (1); + return (*a < *b ? -1 : *a > *b); + } if (gota) return (-1); if (gotb) return (1); - return (strcmp(zfs_get_name(a), zfs_get_name(b))); + /* + * If neither filesystem has a mountpoint, revert to sorting by + * dataset name. + */ + return (strcmp(zfs_get_name(za), zfs_get_name(zb))); +} + +/* + * Return true if path2 is a child of path1. + */ +static boolean_t +libzfs_path_contains(const char *path1, const char *path2) +{ + return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'); +} + +/* + * Given a mountpoint specified by idx in the handles array, find the first + * non-descendent of that mountpoint and return its index. Descendant paths + * start with the parent's path. This function relies on the ordering + * enforced by mountpoint_cmp(). + */ +static int +non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx) +{ + char parent[ZFS_MAXPROPLEN]; + char child[ZFS_MAXPROPLEN]; + int i; + + verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent, + sizeof (parent), NULL, NULL, 0, B_FALSE) == 0); + + for (i = idx + 1; i < num_handles; i++) { + verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child, + sizeof (child), NULL, NULL, 0, B_FALSE) == 0); + if (!libzfs_path_contains(parent, child)) + break; + } + return (i); +} + +typedef struct mnt_param { + libzfs_handle_t *mnt_hdl; + zfs_taskq_t *mnt_tq; + zfs_handle_t **mnt_zhps; /* filesystems to mount */ + size_t mnt_num_handles; + int mnt_idx; /* Index of selected entry to mount */ + zfs_iter_f mnt_func; + void *mnt_data; +} mnt_param_t; + +/* + * Allocate and populate the parameter struct for mount function, and + * schedule mounting of the entry selected by idx. + */ +static void +zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles, + size_t num_handles, int idx, zfs_iter_f func, void *data, zfs_taskq_t *tq) +{ + mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t)); + + mnt_param->mnt_hdl = hdl; + mnt_param->mnt_tq = tq; + mnt_param->mnt_zhps = handles; + mnt_param->mnt_num_handles = num_handles; + mnt_param->mnt_idx = idx; + mnt_param->mnt_func = func; + mnt_param->mnt_data = data; + + (void) zfs_taskq_dispatch(tq, zfs_mount_task, (void*)mnt_param, + ZFS_TQ_SLEEP); +} + +/* + * This is the structure used to keep state of mounting or sharing operations + * during a call to zpool_enable_datasets(). + */ +typedef struct mount_state { + /* + * ms_mntstatus is set to -1 if any mount fails. While multiple threads + * could update this variable concurrently, no synchronization is + * needed as it's only ever set to -1. + */ + int ms_mntstatus; + int ms_mntflags; + const char *ms_mntopts; +} mount_state_t; + +static int +zfs_mount_one(zfs_handle_t *zhp, void *arg) +{ + mount_state_t *ms = arg; + int ret = 0; + + if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0) + ret = ms->ms_mntstatus = -1; + return (ret); +} + +static int +zfs_share_one(zfs_handle_t *zhp, void *arg) +{ + mount_state_t *ms = arg; + int ret = 0; + + if (zfs_share(zhp) != 0) + ret = ms->ms_mntstatus = -1; + return (ret); +} + +/* + * Task queue function to mount one file system. On completion, it finds and + * schedules its children to be mounted. This depends on the sorting done in + * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries + * each descending from the previous) will have no parallelism since we always + * have to wait for the parent to finish mounting before we can schedule + * its children. + */ +static void +zfs_mount_task(void *arg) +{ + mnt_param_t *mp = arg; + int idx = mp->mnt_idx; + zfs_handle_t **handles = mp->mnt_zhps; + size_t num_handles = mp->mnt_num_handles; + char mountpoint[ZFS_MAXPROPLEN]; + + verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint, + sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); + + if (mp->mnt_func(handles[idx], mp->mnt_data) != 0) + return; + + /* + * We dispatch tasks to mount filesystems with mountpoints underneath + * this one. We do this by dispatching the next filesystem with a + * descendant mountpoint of the one we just mounted, then skip all of + * its descendants, dispatch the next descendant mountpoint, and so on. + * The non_descendant_idx() function skips over filesystems that are + * descendants of the filesystem we just dispatched. + */ + for (int i = idx + 1; i < num_handles; + i = non_descendant_idx(handles, num_handles, i)) { + char child[ZFS_MAXPROPLEN]; + verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, + child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0); + + if (!libzfs_path_contains(mountpoint, child)) + break; /* not a descendant, return */ + zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i, + mp->mnt_func, mp->mnt_data, mp->mnt_tq); + } + free(mp); +} + +/* + * Issue the func callback for each ZFS handle contained in the handles + * array. This function is used to mount all datasets, and so this function + * guarantees that filesystems for parent mountpoints are called before their + * children. As such, before issuing any callbacks, we first sort the array + * of handles by mountpoint. + * + * Callbacks are issued in one of two ways: + * + * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT + * environment variable is set, then we issue callbacks sequentially. + * + * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT + * environment variable is not set, then we use a taskq to dispatch threads + * to mount filesystems is parallel. This function dispatches tasks to mount + * the filesystems at the top-level mountpoints, and these tasks in turn + * are responsible for recursively mounting filesystems in their children + * mountpoints. + */ +void +zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, + size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel) +{ + /* + * The ZFS_SERIAL_MOUNT environment variable is an undocumented + * variable that can be used as a convenience to do a/b comparison + * of serial vs. parallel mounting. + */ + boolean_t serial_mount = !parallel || + (getenv("ZFS_SERIAL_MOUNT") != NULL); + + /* + * Sort the datasets by mountpoint. See mountpoint_cmp for details + * of how these are sorted. + */ + qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp); + + if (serial_mount) { + for (int i = 0; i < num_handles; i++) { + func(handles[i], data); + } + return; + } + + /* + * Issue the callback function for each dataset using a parallel + * algorithm that uses a taskq to manage threads. + */ + zfs_taskq_t *tq = zfs_taskq_create("mount_taskq", mount_tq_nthr, 0, + mount_tq_nthr, mount_tq_nthr, ZFS_TASKQ_PREPOPULATE); + + /* + * There may be multiple "top level" mountpoints outside of the pool's + * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of + * these. + */ + for (int i = 0; i < num_handles; + i = non_descendant_idx(handles, num_handles, i)) { + zfs_dispatch_mount(hdl, handles, num_handles, i, func, data, + tq); + } + + zfs_taskq_wait(tq); /* wait for all scheduled mounts to complete */ + zfs_taskq_destroy(tq); } /* * Mount and share all datasets within the given pool. This assumes that no - * datasets within the pool are currently mounted. Because users can create - * complicated nested hierarchies of mountpoints, we first gather all the - * datasets and mountpoints within the pool, and sort them by mountpoint. Once - * we have the list of all filesystems, we iterate over them in order and mount - * and/or share each one. + * datasets within the pool are currently mounted. */ #pragma weak zpool_mount_datasets = zpool_enable_datasets int zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) { get_all_cb_t cb = { 0 }; - libzfs_handle_t *hdl = zhp->zpool_hdl; + mount_state_t ms = { 0 }; zfs_handle_t *zfsp; - int i, ret = -1; - int *good; + sa_init_selective_arg_t sharearg; + int ret = 0; - /* - * Gather all non-snap datasets within the pool. - */ - if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL) + if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name, + ZFS_TYPE_DATASET)) == NULL) goto out; - libzfs_add_handle(&cb, zfsp); - if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0) - goto out; - /* - * Sort the datasets by mountpoint. - */ - qsort(cb.cb_handles, cb.cb_used, sizeof (void *), - libzfs_dataset_cmp); /* - * And mount all the datasets, keeping track of which ones - * succeeded or failed. + * Gather all non-snapshot datasets within the pool. Start by adding + * the root filesystem for this pool to the list, and then iterate + * over all child filesystems. */ - if ((good = zfs_alloc(zhp->zpool_hdl, - cb.cb_used * sizeof (int))) == NULL) + libzfs_add_handle(&cb, zfsp); + if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0) goto out; - ret = 0; - for (i = 0; i < cb.cb_used; i++) { - if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0) - ret = -1; - else - good[i] = 1; - } + ms.ms_mntopts = mntopts; + ms.ms_mntflags = flags; + zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, + zfs_mount_one, &ms, B_TRUE); + if (ms.ms_mntstatus != 0) + ret = ms.ms_mntstatus; /* - * Then share all the ones that need to be shared. This needs - * to be a separate pass in order to avoid excessive reloading - * of the configuration. Good should never be NULL since - * zfs_alloc is supposed to exit if memory isn't available. + * Share all filesystems that need to be shared. This needs to be + * a separate pass because libshare is not mt-safe, and so we need + * to share serially. */ - for (i = 0; i < cb.cb_used; i++) { - if (good[i] && zfs_share(cb.cb_handles[i]) != 0) - ret = -1; - } + sharearg.zhandle_arr = cb.cb_handles; + sharearg.zhandle_len = cb.cb_used; + if ((ret = zfs_init_libshare_arg(zhp->zpool_hdl, + SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != 0) + goto out; - free(good); + ms.ms_mntstatus = 0; + zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, + zfs_share_one, &ms, B_FALSE); + if (ms.ms_mntstatus != 0) + ret = ms.ms_mntstatus; out: - for (i = 0; i < cb.cb_used; i++) + for (int i = 0; i < cb.cb_used; i++) zfs_close(cb.cb_handles[i]); free(cb.cb_handles); diff --git a/usr/src/lib/libzfs/common/libzfs_taskq.c b/usr/src/lib/libzfs/common/libzfs_taskq.c new file mode 100644 index 0000000000..28bf649710 --- /dev/null +++ b/usr/src/lib/libzfs/common/libzfs_taskq.c @@ -0,0 +1,297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. + * Copyright (c) 2014, 2018 by Delphix. All rights reserved. + */ + +#include <thread.h> +#include <synch.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/debug.h> +#include <sys/sysmacros.h> + +#include "libzfs_taskq.h" + +#define ZFS_TASKQ_ACTIVE 0x00010000 +#define ZFS_TASKQ_NAMELEN 31 + +typedef struct zfs_taskq_ent { + struct zfs_taskq_ent *ztqent_next; + struct zfs_taskq_ent *ztqent_prev; + ztask_func_t *ztqent_func; + void *ztqent_arg; + uintptr_t ztqent_flags; +} zfs_taskq_ent_t; + +struct zfs_taskq { + char ztq_name[ZFS_TASKQ_NAMELEN + 1]; + mutex_t ztq_lock; + rwlock_t ztq_threadlock; + cond_t ztq_dispatch_cv; + cond_t ztq_wait_cv; + thread_t *ztq_threadlist; + int ztq_flags; + int ztq_active; + int ztq_nthreads; + int ztq_nalloc; + int ztq_minalloc; + int ztq_maxalloc; + cond_t ztq_maxalloc_cv; + int ztq_maxalloc_wait; + zfs_taskq_ent_t *ztq_freelist; + zfs_taskq_ent_t ztq_task; +}; + +static zfs_taskq_ent_t * +ztask_alloc(zfs_taskq_t *ztq, int ztqflags) +{ + zfs_taskq_ent_t *t; + timestruc_t ts; + int err; + +again: if ((t = ztq->ztq_freelist) != NULL && + ztq->ztq_nalloc >= ztq->ztq_minalloc) { + ztq->ztq_freelist = t->ztqent_next; + } else { + if (ztq->ztq_nalloc >= ztq->ztq_maxalloc) { + if (!(ztqflags & UMEM_NOFAIL)) + return (NULL); + + /* + * We don't want to exceed ztq_maxalloc, but we can't + * wait for other tasks to complete (and thus free up + * task structures) without risking deadlock with + * the caller. So, we just delay for one second + * to throttle the allocation rate. If we have tasks + * complete before one second timeout expires then + * zfs_taskq_ent_free will signal us and we will + * immediately retry the allocation. + */ + ztq->ztq_maxalloc_wait++; + + ts.tv_sec = 1; + ts.tv_nsec = 0; + err = cond_reltimedwait(&ztq->ztq_maxalloc_cv, + &ztq->ztq_lock, &ts); + + ztq->ztq_maxalloc_wait--; + if (err == 0) + goto again; /* signaled */ + } + mutex_exit(&ztq->ztq_lock); + + t = umem_alloc(sizeof (zfs_taskq_ent_t), ztqflags); + + mutex_enter(&ztq->ztq_lock); + if (t != NULL) + ztq->ztq_nalloc++; + } + return (t); +} + +static void +ztask_free(zfs_taskq_t *ztq, zfs_taskq_ent_t *t) +{ + if (ztq->ztq_nalloc <= ztq->ztq_minalloc) { + t->ztqent_next = ztq->ztq_freelist; + ztq->ztq_freelist = t; + } else { + ztq->ztq_nalloc--; + mutex_exit(&ztq->ztq_lock); + umem_free(t, sizeof (zfs_taskq_ent_t)); + mutex_enter(&ztq->ztq_lock); + } + + if (ztq->ztq_maxalloc_wait) + VERIFY0(cond_signal(&ztq->ztq_maxalloc_cv)); +} + +zfs_taskqid_t +zfs_taskq_dispatch(zfs_taskq_t *ztq, ztask_func_t func, void *arg, + uint_t ztqflags) +{ + zfs_taskq_ent_t *t; + + mutex_enter(&ztq->ztq_lock); + ASSERT(ztq->ztq_flags & ZFS_TASKQ_ACTIVE); + if ((t = ztask_alloc(ztq, ztqflags)) == NULL) { + mutex_exit(&ztq->ztq_lock); + return (0); + } + if (ztqflags & ZFS_TQ_FRONT) { + t->ztqent_next = ztq->ztq_task.ztqent_next; + t->ztqent_prev = &ztq->ztq_task; + } else { + t->ztqent_next = &ztq->ztq_task; + t->ztqent_prev = ztq->ztq_task.ztqent_prev; + } + t->ztqent_next->ztqent_prev = t; + t->ztqent_prev->ztqent_next = t; + t->ztqent_func = func; + t->ztqent_arg = arg; + t->ztqent_flags = 0; + VERIFY0(cond_signal(&ztq->ztq_dispatch_cv)); + mutex_exit(&ztq->ztq_lock); + return (1); +} + +void +zfs_taskq_wait(zfs_taskq_t *ztq) +{ + mutex_enter(&ztq->ztq_lock); + while (ztq->ztq_task.ztqent_next != &ztq->ztq_task || + ztq->ztq_active != 0) { + int ret = cond_wait(&ztq->ztq_wait_cv, &ztq->ztq_lock); + VERIFY(ret == 0 || ret == EINTR); + } + mutex_exit(&ztq->ztq_lock); +} + +static void * +zfs_taskq_thread(void *arg) +{ + zfs_taskq_t *ztq = arg; + zfs_taskq_ent_t *t; + boolean_t prealloc; + + mutex_enter(&ztq->ztq_lock); + while (ztq->ztq_flags & ZFS_TASKQ_ACTIVE) { + if ((t = ztq->ztq_task.ztqent_next) == &ztq->ztq_task) { + int ret; + if (--ztq->ztq_active == 0) + VERIFY0(cond_broadcast(&ztq->ztq_wait_cv)); + ret = cond_wait(&ztq->ztq_dispatch_cv, &ztq->ztq_lock); + VERIFY(ret == 0 || ret == EINTR); + ztq->ztq_active++; + continue; + } + t->ztqent_prev->ztqent_next = t->ztqent_next; + t->ztqent_next->ztqent_prev = t->ztqent_prev; + t->ztqent_next = NULL; + t->ztqent_prev = NULL; + prealloc = t->ztqent_flags & ZFS_TQENT_FLAG_PREALLOC; + mutex_exit(&ztq->ztq_lock); + + VERIFY0(rw_rdlock(&ztq->ztq_threadlock)); + t->ztqent_func(t->ztqent_arg); + VERIFY0(rw_unlock(&ztq->ztq_threadlock)); + + mutex_enter(&ztq->ztq_lock); + if (!prealloc) + ztask_free(ztq, t); + } + ztq->ztq_nthreads--; + VERIFY0(cond_broadcast(&ztq->ztq_wait_cv)); + mutex_exit(&ztq->ztq_lock); + return (NULL); +} + +/*ARGSUSED*/ +zfs_taskq_t * +zfs_taskq_create(const char *name, int nthreads, pri_t pri, int minalloc, + int maxalloc, uint_t flags) +{ + zfs_taskq_t *ztq = umem_zalloc(sizeof (zfs_taskq_t), UMEM_NOFAIL); + int t; + + ASSERT3S(nthreads, >=, 1); + + VERIFY0(rwlock_init(&ztq->ztq_threadlock, USYNC_THREAD, NULL)); + VERIFY0(cond_init(&ztq->ztq_dispatch_cv, USYNC_THREAD, NULL)); + VERIFY0(cond_init(&ztq->ztq_wait_cv, USYNC_THREAD, NULL)); + VERIFY0(cond_init(&ztq->ztq_maxalloc_cv, USYNC_THREAD, NULL)); + VERIFY0(mutex_init( + &ztq->ztq_lock, LOCK_NORMAL | LOCK_ERRORCHECK, NULL)); + + (void) strncpy(ztq->ztq_name, name, ZFS_TASKQ_NAMELEN + 1); + + ztq->ztq_flags = flags | ZFS_TASKQ_ACTIVE; + ztq->ztq_active = nthreads; + ztq->ztq_nthreads = nthreads; + ztq->ztq_minalloc = minalloc; + ztq->ztq_maxalloc = maxalloc; + ztq->ztq_task.ztqent_next = &ztq->ztq_task; + ztq->ztq_task.ztqent_prev = &ztq->ztq_task; + ztq->ztq_threadlist = + umem_alloc(nthreads * sizeof (thread_t), UMEM_NOFAIL); + + if (flags & ZFS_TASKQ_PREPOPULATE) { + mutex_enter(&ztq->ztq_lock); + while (minalloc-- > 0) + ztask_free(ztq, ztask_alloc(ztq, UMEM_NOFAIL)); + mutex_exit(&ztq->ztq_lock); + } + + for (t = 0; t < nthreads; t++) { + (void) thr_create(0, 0, zfs_taskq_thread, + ztq, THR_BOUND, &ztq->ztq_threadlist[t]); + } + + return (ztq); +} + +void +zfs_taskq_destroy(zfs_taskq_t *ztq) +{ + int t; + int nthreads = ztq->ztq_nthreads; + + zfs_taskq_wait(ztq); + + mutex_enter(&ztq->ztq_lock); + + ztq->ztq_flags &= ~ZFS_TASKQ_ACTIVE; + VERIFY0(cond_broadcast(&ztq->ztq_dispatch_cv)); + + while (ztq->ztq_nthreads != 0) { + int ret = cond_wait(&ztq->ztq_wait_cv, &ztq->ztq_lock); + VERIFY(ret == 0 || ret == EINTR); + } + + ztq->ztq_minalloc = 0; + while (ztq->ztq_nalloc != 0) { + ASSERT(ztq->ztq_freelist != NULL); + ztask_free(ztq, ztask_alloc(ztq, UMEM_NOFAIL)); + } + + mutex_exit(&ztq->ztq_lock); + + for (t = 0; t < nthreads; t++) + (void) thr_join(ztq->ztq_threadlist[t], NULL, NULL); + + umem_free(ztq->ztq_threadlist, nthreads * sizeof (thread_t)); + + VERIFY0(rwlock_destroy(&ztq->ztq_threadlock)); + VERIFY0(cond_destroy(&ztq->ztq_dispatch_cv)); + VERIFY0(cond_destroy(&ztq->ztq_wait_cv)); + VERIFY0(cond_destroy(&ztq->ztq_maxalloc_cv)); + VERIFY0(mutex_destroy(&ztq->ztq_lock)); + + umem_free(ztq, sizeof (zfs_taskq_t)); +} diff --git a/usr/src/lib/libzfs/common/libzfs_taskq.h b/usr/src/lib/libzfs/common/libzfs_taskq.h new file mode 100644 index 0000000000..7ac045738c --- /dev/null +++ b/usr/src/lib/libzfs/common/libzfs_taskq.h @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _ZFS_TASKQ_H +#define _ZFS_TASKQ_H + +#include <stdint.h> +#include <umem.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct zfs_taskq zfs_taskq_t; +typedef uintptr_t zfs_taskqid_t; +typedef void (ztask_func_t)(void *); + +#define ZFS_TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */ + +#define ZFS_TASKQ_PREPOPULATE 0x0001 + +#define ZFS_TQ_SLEEP UMEM_NOFAIL /* Can block for memory */ +#define ZFS_TQ_NOSLEEP UMEM_DEFAULT /* cannot block for memory; may fail */ +#define ZFS_TQ_FRONT 0x08 /* Queue in front */ + +extern zfs_taskq_t *zfs_taskq_create(const char *, int, pri_t, int, + int, uint_t); +extern void zfs_taskq_destroy(zfs_taskq_t *); + +extern zfs_taskqid_t zfs_taskq_dispatch(zfs_taskq_t *, ztask_func_t, + void *, uint_t); + +extern void zfs_taskq_wait(zfs_taskq_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_TASKQ_H */ diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers index 5b38fc3eae..17a60e830d 100644 --- a/usr/src/lib/libzfs/common/mapfile-vers +++ b/usr/src/lib/libzfs/common/mapfile-vers @@ -21,8 +21,8 @@ # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2011, 2015 by Delphix. All rights reserved. # Copyright (c) 2012, Joyent, Inc. All rights reserved. +# Copyright (c) 2011, 2017 by Delphix. All rights reserved. # Copyright 2016 Nexenta Systems, Inc. # @@ -51,7 +51,6 @@ SYMBOL_VERSION SUNWprivate_1.1 { fletcher_4_incremental_native; fletcher_4_incremental_byteswap; libzfs_add_handle; - libzfs_dataset_cmp; libzfs_errno; libzfs_error_action; libzfs_error_description; @@ -79,6 +78,7 @@ SYMBOL_VERSION SUNWprivate_1.1 { zfs_destroy_snaps; zfs_destroy_snaps_nvl; zfs_expand_proplist; + zfs_foreach_mountpoint; zfs_get_handle; zfs_get_holds; zfs_get_hole_count; diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 47ed5a1f33..c10c421246 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -56,6 +56,7 @@ dir path=opt/zfs-tests/tests/functional/cli_root/zfs_mount dir path=opt/zfs-tests/tests/functional/cli_root/zfs_promote dir path=opt/zfs-tests/tests/functional/cli_root/zfs_property dir path=opt/zfs-tests/tests/functional/cli_root/zfs_receive +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_remap dir path=opt/zfs-tests/tests/functional/cli_root/zfs_rename dir path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation dir path=opt/zfs-tests/tests/functional/cli_root/zfs_rollback @@ -873,6 +874,11 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_neg \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_001_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/setup mode=0555 @@ -953,6 +959,13 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_014_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/setup mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.cfg \ diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index e37f606fe0..b5974f8476 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -145,7 +145,7 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', - 'zfs_mount_all_001_pos'] + 'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] [/opt/zfs-tests/tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', @@ -162,6 +162,9 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', 'zfs_receive_013_pos', 'zfs_receive_014_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_remap] +tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos', diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index ebf446f61a..57a828c86f 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -162,6 +162,9 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', 'zfs_rename_013_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_remap] +tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index 2d8af0bf69..4cefe8f228 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -162,6 +162,9 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', 'zfs_rename_013_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_remap] +tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib index 41cd9698cc..0e57115e0d 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib @@ -25,7 +25,7 @@ # # -# Copyright (c) 2016 by Delphix. All rights reserved. +# Copyright (c) 2017 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib @@ -84,13 +84,11 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev fi case "$type" in - 'ctr') log_must zfs create $pool/$fs - log_must zfs set mountpoint=$mntpoint $pool/$fs + 'ctr') log_must zfs create -o mountpoint=$mntpoint $pool/$fs ;; 'vol') log_must zfs create -V $VOLSIZE $pool/$fs ;; - *) log_must zfs create $pool/$fs - log_must zfs set mountpoint=$mntpoint $pool/$fs + *) log_must zfs create -o mountpoint=$mntpoint $pool/$fs ;; esac diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh new file mode 100644 index 0000000000..d7fcd20afa --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh @@ -0,0 +1,96 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib + +# DESCRIPTION: +# Verify that if 'zfs mount -a' fails to mount one filesystem, +# the command fails with a non-zero error code, but all other +# filesystems are mounted. +# +# STRATEGY: +# 1. Create zfs filesystems +# 2. Unmount a leaf filesystem +# 3. Create a file in the above filesystem's mountpoint +# 4. Verify that 'zfs mount -a' fails to mount the above +# 5. Verify that all other filesystems were mounted +# + +verify_runnable "both" + +typeset -a filesystems +typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL +typeset fscount=10 + +function setup_all +{ + # Create $fscount filesystems at the top level of $path + for ((i=0; i<$fscount; i++)); do + setup_filesystem "$DISKS" "$TESTPOOL" $i "$path/$i" ctr + done + + zfs list -r $TESTPOOL + + return 0 +} + +function cleanup_all +{ + export __ZFS_POOL_RESTRICT="$TESTPOOL" + log_must zfs $unmountall + unset __ZFS_POOL_RESTRICT + + [[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \ + rm -rf ${TEST_BASE_DIR%%/}/testroot$$ +} + +log_onexit cleanup_all + +log_must setup_all + +# +# Unmount all of the above so that we can create the stray file +# in one of the mountpoint directories. +# +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_must zfs $unmountall +unset __ZFS_POOL_RESTRICT + +# All of our filesystems should be unmounted at this point +for ((i=0; i<$fscount; i++)); do + log_mustnot mounted "$TESTPOOL/$i" +done + +# Create a stray file in one filesystem's mountpoint +touch $path/0/strayfile + +# Verify that zfs mount -a fails +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_mustnot zfs $mountall +unset __ZFS_POOL_RESTRICT + +# All filesystems except for "0" should be mounted +log_mustnot mounted "$TESTPOOL/0" +for ((i=1; i<$fscount; i++)); do + log_must mounted "$TESTPOOL/$i" +done + +log_pass "'zfs $mountall' failed as expected." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh new file mode 100644 index 0000000000..3e6a24bbcd --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh @@ -0,0 +1,162 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib + +# DESCRIPTION: +# Verify that 'zfs mount -a' succeeds given a set of filesystems +# whose mountpoints have a parent/child relationship which is +# counter to the filesystem parent/child relationship. +# +# STRATEGY: +# 1. Create zfs filesystems within the given pool. +# 2. Unmount all the filesystems. +# 3. Verify that 'zfs mount -a' command succeed, +# and all available ZFS filesystems are mounted. +# 4. Verify that 'zfs mount' is identical with 'df -F zfs' +# + +verify_runnable "both" + +typeset -a filesystems + +function setup_all +{ + typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL + typeset fscount=10 + + # + # Generate an array of filesystem names that represent a deep + # hierarchy as such: + # + # 0 + # 0/1 + # 0/1/2 + # 0/1/2/3 + # 0/1/2/3/4 + # ... + # + fs=0 + for ((i=0; i<$fscount; i++)); do + if [[ $i -gt 0 ]]; then + fs=$fs/$i + fi + filesystems+=($fs) + done + + # Create all of the above filesystems + for ((i=0; i<$fscount; i++)); do + fs=${filesystems[$i]} + setup_filesystem "$DISKS" "$TESTPOOL" "$fs" "$path/$i" ctr + done + + zfs list -r $TESTPOOL + + # + # Unmount all of the above so that we can setup our convoluted + # mount paths. + # + export __ZFS_POOL_RESTRICT="$TESTPOOL" + log_must zfs $unmountall + unset __ZFS_POOL_RESTRICT + + # + # Configure the mount paths so that each mountpoint is contained + # in a child filesystem. We should end up with something like the + # following structure (modulo the number of filesystems): + # + # NAME MOUNTPOINT + # testpool /testpool + # testpool/0 /testroot25416/testpool/0/1/2/3/4/5/6 + # testpool/0/1 /testroot25416/testpool/0/1/2/3/4/5 + # testpool/0/1/2 /testroot25416/testpool/0/1/2/3/4 + # testpool/0/1/2/3 /testroot25416/testpool/0/1/2/3 + # testpool/0/1/2/3/4 /testroot25416/testpool/0/1/2 + # testpool/0/1/2/3/4/5 /testroot25416/testpool/0/1 + # testpool/0/1/2/3/4/5/6 /testroot25416/testpool/0 + # + for ((i=0; i<$fscount; i++)); do + fs=$TESTPOOL/${filesystems[$(($fscount - $i - 1))]} + mnt=$path/${filesystems[$i]} + zfs set mountpoint=$mnt $fs + done + + zfs list -r $TESTPOOL + + return 0 +} + +function cleanup_all +{ + export __ZFS_POOL_RESTRICT="$TESTPOOL" + log_must zfs $unmountall + unset __ZFS_POOL_RESTRICT + + for fs in ${filesystems[@]}; do + cleanup_filesystem "$TESTPOOL" "$fs" + done + [[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \ + rm -rf ${TEST_BASE_DIR%%/}/testroot$$ +} + +# +# This function takes a single true/false argument. If true it will verify that +# all file systems are mounted. If false it will verify that they are not +# mounted. +# +function verify_all +{ + if $1; then + logfunc=log_must + else + logfunc=log_mustnot + fi + + for fs in ${filesystems[@]}; do + $logfunc mounted "$TESTPOOL/$fs" + done + + return 0 +} + +log_onexit cleanup_all + +log_must setup_all + +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_must zfs $unmountall +unset __ZFS_POOL_RESTRICT + +verify_all false + +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_must zfs $mountall +unset __ZFS_POOL_RESTRICT + +verify_all true + +log_note "Verify that 'zfs $mountcmd' will display " \ + "all ZFS filesystems currently mounted." + +verify_mount_display + +log_pass "'zfs $mountall' succeeds as root, " \ + "and all available ZFS filesystems are mounted." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile new file mode 100644 index 0000000000..658776d0cd --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_remap + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh new file mode 100644 index 0000000000..e78deacd5b --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh @@ -0,0 +1,19 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh new file mode 100644 index 0000000000..4497dbd746 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh @@ -0,0 +1,17 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh new file mode 100644 index 0000000000..4e0d2bc442 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +# +# DESCRIPTION: +# 'zfs remap' should only work with supported parameters. +# +# STRATEGY: +# 1. Prepare a pool where a top-level VDEV has been removed +# 2. Verify every supported parameter to 'zfs remap' is accepted +# 3. Verify other unsupported parameters raise an error +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL + rm -f $DISK1 $DISK2 +} + +log_assert "'zfs remap' should only work with supported parameters" +log_onexit cleanup + +f="$TESTPOOL/fs" +v="$TESTPOOL/vol" +s="$TESTPOOL/fs@snap" +b="$TESTPOOL/fs#bmark" +c="$TESTPOOL/clone" + +typeset goodparams=("$f" "$v" "$c") +typeset badparams=("-H" "-p" "-?" "$s" "$b" "$f $f" "$f $v" "$f $s") + +DISK1="/var/tmp/zfs_remap-1" +DISK2="/var/tmp/zfs_remap-2" + +# 1. Prepare a pool where a top-level VDEV has been removed +log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK1 +log_must zpool create $TESTPOOL $DISK1 +log_must zfs create $f +log_must zfs create -V 1M -s $v +log_must zfs snap $s +log_must zfs bookmark $s $b +log_must zfs clone $s $c +log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK2 +log_must zpool add $TESTPOOL $DISK2 +log_must zpool remove $TESTPOOL $DISK1 +log_must wait_for_removal $TESTPOOL + +# 2. Verify every supported parameter to 'zfs remap' is accepted +for param in "${goodparams[@]}" +do + log_must zfs remap $param +done + +# 3. Verify other unsupported parameters raise an error +for param in "${badparams[@]}" +do + log_mustnot zfs remap $param +done + +log_pass "'zfs remap' only works with supported parameters" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh new file mode 100644 index 0000000000..d8b52b091e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh @@ -0,0 +1,76 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +# +# DESCRIPTION: +# 'zfs remap' depends on 'feature@obsolete_counts' being active +# +# STRATEGY: +# 1. Prepare a pool where a top-level VDEV has been removed and with +# feature@obsolete_counts disabled +# 2. Verify any 'zfs remap' command cannot be executed +# 3. Verify the same commands complete successfully when +# feature@obsolete_counts is enabled +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL + rm -f $DISK1 $DISK2 +} + +log_assert "'zfs remap' depends on feature@obsolete_counts being active" +log_onexit cleanup + +f="$TESTPOOL/fs" +v="$TESTPOOL/vol" +s="$TESTPOOL/fs@snap" +c="$TESTPOOL/clone" + +DISK1="/var/tmp/zfs_remap-1" +DISK2="/var/tmp/zfs_remap-2" + +# 1. Prepare a pool where a top-level VDEV has been removed with +# feature@obsolete_counts disabled +log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK1 +log_must zpool create -d -o feature@device_removal=enabled $TESTPOOL $DISK1 +log_must zfs create $f +log_must zfs create -V 1M -s $v +log_must zfs snap $s +log_must zfs clone $s $c +log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK2 +log_must zpool add $TESTPOOL $DISK2 +log_must zpool remove $TESTPOOL $DISK1 +log_must wait_for_removal $TESTPOOL + +# 2. Verify any 'zfs remap' command cannot be executed +log_mustnot zfs remap $f +log_mustnot zfs remap $v +log_mustnot zfs remap $c + +# 3. Verify the same commands complete successfully when +# feature@obsolete_counts is enabled +log_must zpool set feature@obsolete_counts=enabled $TESTPOOL +log_must zfs remap $f +log_must zfs remap $v +log_must zfs remap $c + +log_pass "'zfs remap' correctly depends on feature@obsolete_counts being active" diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h index 0f855d4f3d..2df0f21f98 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h @@ -89,7 +89,7 @@ typedef struct dsl_pool { struct dsl_dir *dp_leak_dir; struct dsl_dataset *dp_origin_snap; uint64_t dp_root_dir_obj; - struct taskq *dp_vnrele_taskq; + taskq_t *dp_vnrele_taskq; /* No lock needed - sync context only */ blkptr_t dp_meta_rootbp; diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 6fee8109e0..db2d12db96 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -72,15 +72,21 @@ static vdev_ops_t *vdev_ops_table[] = { /* maximum scrub/resilver I/O queue per leaf vdev */ int zfs_scrub_limit = 10; -/* maximum number of metaslabs per top-level vdev */ +/* target number of metaslabs per top-level vdev */ int vdev_max_ms_count = 200; -/* minimum amount of metaslabs per top-level vdev */ +/* minimum number of metaslabs per top-level vdev */ int vdev_min_ms_count = 16; -/* see comment in vdev_metaslab_set_size() */ +/* practical upper limit of total metaslabs per top-level vdev */ +int vdev_ms_count_limit = 1ULL << 17; + +/* lower limit for metaslab size (512M) */ int vdev_default_ms_shift = 29; +/* upper limit for metaslab size (256G) */ +int vdev_max_ms_shift = 38; + boolean_t vdev_validate_skip = B_FALSE; /* @@ -2028,34 +2034,53 @@ void vdev_metaslab_set_size(vdev_t *vd) { uint64_t asize = vd->vdev_asize; - uint64_t ms_shift = 0; + uint64_t ms_count = asize >> vdev_default_ms_shift; + uint64_t ms_shift; /* - * For vdevs that are bigger than 8G the metaslab size varies in - * a way that the number of metaslabs increases in powers of two, - * linearly in terms of vdev_asize, starting from 16 metaslabs. - * So for vdev_asize of 8G we get 16 metaslabs, for 16G, we get 32, - * and so on, until we hit the maximum metaslab count limit - * [vdev_max_ms_count] from which point the metaslab count stays - * the same. + * There are two dimensions to the metaslab sizing calculation: + * the size of the metaslab and the count of metaslabs per vdev. + * In general, we aim for vdev_max_ms_count (200) metaslabs. The + * range of the dimensions are as follows: + * + * 2^29 <= ms_size <= 2^38 + * 16 <= ms_count <= 131,072 + * + * On the lower end of vdev sizes, we aim for metaslabs sizes of + * at least 512MB (2^29) to minimize fragmentation effects when + * testing with smaller devices. However, the count constraint + * of at least 16 metaslabs will override this minimum size goal. + * + * On the upper end of vdev sizes, we aim for a maximum metaslab + * size of 256GB. However, we will cap the total count to 2^17 + * metaslabs to keep our memory footprint in check. + * + * The net effect of applying above constrains is summarized below. + * + * vdev size metaslab count + * -------------|----------------- + * < 8GB ~16 + * 8GB - 100GB one per 512MB + * 100GB - 50TB ~200 + * 50TB - 32PB one per 256GB + * > 32PB ~131,072 + * ------------------------------- */ - ms_shift = vdev_default_ms_shift; - if ((asize >> ms_shift) < vdev_min_ms_count) { - /* - * For devices that are less than 8G we want to have - * exactly 16 metaslabs. We don't want less as integer - * division rounds down, so less metaslabs mean more - * wasted space. We don't want more as these vdevs are - * small and in the likely event that we are running - * out of space, the SPA will have a hard time finding - * space due to fragmentation. - */ + if (ms_count < vdev_min_ms_count) ms_shift = highbit64(asize / vdev_min_ms_count); - ms_shift = MAX(ms_shift, SPA_MAXBLOCKSHIFT); - - } else if ((asize >> ms_shift) > vdev_max_ms_count) { + else if (ms_count > vdev_max_ms_count) ms_shift = highbit64(asize / vdev_max_ms_count); + else + ms_shift = vdev_default_ms_shift; + + if (ms_shift < SPA_MAXBLOCKSHIFT) { + ms_shift = SPA_MAXBLOCKSHIFT; + } else if (ms_shift > vdev_max_ms_shift) { + ms_shift = vdev_max_ms_shift; + /* cap the total count to constrain memory footprint */ + if ((asize >> ms_shift) > vdev_ms_count_limit) + ms_shift = highbit64(asize / vdev_ms_count_limit); } vd->vdev_ms_shift = ms_shift; |